Skip to content

Commit

Permalink
fix: kurtCache handling of schemas - serialize as JSON Schema
Browse files Browse the repository at this point in the history
Prior to this commit, KurtCache would raise an unhandled YAML
serializing exception when storing a cache entry that included
structured data, because it was trying to serialize the Zod
schema object directly. This commit fixes that problem by
converting the schema to JSON schema (which is cleanly serializable)
before writing the YAML cache entry.
  • Loading branch information
jemc committed Dec 4, 2024
1 parent d0fc4cc commit aca061a
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 32 deletions.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
messages:
- role: user
text: Was this cached?
sampling:
maxOutputTokens: 4096
temperature: 0.5
topP: 0.95
tools:
structured_data:
name: structured_data
description: ""
parameters:
type: object
properties:
cached:
type: boolean
required:
- cached
additionalProperties: false
$schema: http://json-schema.org/draft-07/schema#
forceTool: structured_data
response:
- chunk: '{"cached":'
- chunk: true}
- finished: true
text: '{"cached":true}'
data:
cached: true
3 changes: 2 additions & 1 deletion packages/kurt-cache/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
"semantic-release-monorepo": "^8.0.2",
"ts-jest": "^29.1.2",
"type-fest": "^4.30.0",
"typescript": "^5.4.5"
"typescript": "^5.4.5",
"zod": "^3.23.8"
}
}
45 changes: 31 additions & 14 deletions packages/kurt-cache/spec/KurtCache.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { describe, test, expect } from "@jest/globals"
import { z } from "zod"
import {
existsSync,
readFileSync,
Expand Down Expand Up @@ -29,8 +30,17 @@ const cacheDir = `${__dirname}/../.kurt-cache/test`
const cacheDirRetain = `${cacheDir}-retain`

// A convenience function to make the test cases succinct one-liners.
const gen = async (kurt: Kurt, prompt: string) =>
(await kurt.generateNaturalLanguage({ prompt }).result).text
async function gen(
kurt: Kurt,
prompt: string,
schema?: KurtSchema<KurtSchemaInner>
) {
const stream = schema
? kurt.generateStructuredData({ prompt, schema })
: kurt.generateNaturalLanguage({ prompt })
const result = await stream.result
return schema ? result.data : result.text
}

describe("KurtCache", () => {
test("when cache misses, runs the adapter setup fn just once", async () => {
Expand Down Expand Up @@ -74,31 +84,33 @@ describe("KurtCache", () => {
// We compare with a hard-coded hash here to test that the hash function is
// stable/deterministic across library versions of KurtCache.
//
// If you find yourself needing to change this hash value, it means
// If you find yourself needing to change this hash value when you didn't
// change the prompt or schema used in the test, it probably means
// that you're breaking all existing cache entries, which is a breaking
// change for users of KurtCache who rely on it for their test suites.
const hash =
"ad557ba1818e8013f9e2fbc9598c034e263c96c5fe7edd491e75be8ce450f5c9"
"b8a5a99fa499ef332c4a599d5b1eff433fc0ee7cd6995e86fb7dbfd8b9ffe999"
const filePath = `${cacheDirRetain}/stub-${hash}.yaml`

// Assert that the cache file entry already exists (it has been
// committed into the repo and retained there)
const cached = readFileSync(filePath, "utf8")
expect(cached).toContain("text: This was cached on disk")
expect(cached).toContain("text: '{\"cached\":true}'")

// Use the cache adapter configured appropriately to find the cache entry.
let adapterFnCallCount = 0
const kurt = new Kurt(
new KurtCache(cacheDirRetain, "stub", () => {
adapterFnCallCount++
return new StubAdapter([["This was cached", " on disk"]])
return new StubAdapter([['{"cached":', "true}"]])
})
)

// Expect the cache hit to return the result text from the file.
expect(await gen(kurt, "Was this cached?")).toEqual(
"This was cached on disk"
)
const schema = z.object({ cached: z.boolean() }).strict()
expect(await gen(kurt, "Was this cached?", schema)).toEqual({
cached: true,
})

// Expect that the adapter setup function was never called.
expect(adapterFnCallCount).toEqual(0)
Expand All @@ -108,9 +120,9 @@ describe("KurtCache", () => {

// Delete the cache file and prove that it regenerates exactly the same.
rmSync(filePath)
expect(await gen(kurt, "Was this cached?")).toEqual(
"This was cached on disk"
)
expect(await gen(kurt, "Was this cached?", schema)).toEqual({
cached: true,
})
expect(adapterFnCallCount).toEqual(1)
expect(readFileSync(filePath, "utf8")).toEqual(cached)
})
Expand Down Expand Up @@ -198,11 +210,16 @@ class StubAdapter
yield { finished: true, text, data: undefined }
}

transformStructuredDataFromRawEvents<I extends KurtSchemaInner>(
async *transformStructuredDataFromRawEvents<I extends KurtSchemaInner>(
schema: KurtSchema<I>,
rawEvents: AsyncIterable<{ bytes: string }>
): AsyncIterable<KurtStreamEvent<KurtSchemaResult<I>>> {
throw new Error("Not implemented because tests here don't use it")
let text = ""
for await (const { bytes } of rawEvents) {
text += bytes
yield { chunk: bytes }
}
yield { finished: true, text, data: schema.parse(JSON.parse(text)) }
}

transformWithOptionalToolsFromRawEvents<I extends KurtSchemaInnerMap>(
Expand Down
29 changes: 25 additions & 4 deletions packages/kurt-cache/src/KurtCache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import type {
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"
import { createHash, type Hash } from "node:crypto"
import { stringify as stringifyYAML, parse as parseYAML } from "yaml"
import { zodToJsonSchema } from "zod-to-json-schema"
import { zodToJsonSchema, type JsonSchema7ObjectType } from "zod-to-json-schema"

type CacheData = {
messages: KurtMessage[]
Expand All @@ -23,7 +23,7 @@ type CacheData = {
[key: string]: {
name: string
description: string
parameters: KurtSchema<KurtSchemaInner>
parameters: JsonSchema7ObjectType
}
}
forceTool?: string
Expand Down Expand Up @@ -100,7 +100,15 @@ export class KurtCache<A extends KurtAdapter>
}

generateRawEvents(
options: Omit<CacheData, "response">
options: Omit<CacheData, "response" | "tools"> & {
tools: {
[key: string]: {
name: string
description: string
parameters: KurtSchema<KurtSchemaInner>
}
}
}
): AsyncIterable<AdapterRawEvent<A>> {
// Hash the incoming options to determine the cache key.
const digest = createHash("sha256")
Expand All @@ -120,7 +128,20 @@ export class KurtCache<A extends KurtAdapter>
const adapter = this.lazyAdapter()
return new ResponseEventsShouldCache(
cacheFilePath,
options,
{
...options,
tools: Object.fromEntries(
Object.entries(options.tools).map(([name, tool]) => [
name,
{
...tool,
parameters: zodToJsonSchema(
tool.parameters
) as JsonSchema7ObjectType,
},
])
),
},
adapter.generateRawEvents({
messages: adapter.transformToRawMessages(options.messages),
sampling: options.sampling,
Expand Down
3 changes: 3 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit aca061a

Please sign in to comment.