From aca061a4d1c53fddc45f41ce86a78ba75c40bc50 Mon Sep 17 00:00:00 2001 From: Joe McIlvain Date: Wed, 4 Dec 2024 14:32:54 -0800 Subject: [PATCH] fix: kurtCache handling of schemas - serialize as JSON Schema Prior to this commit, KurtCache would raise an unhandled YAML serializing exception when storing a cache entry that included structured data, because it was trying to serialize the Zod schema object directly. This commit fixes that problem by converting the schema to JSON schema (which is cleanly serializable) before writing the YAML cache entry. --- ...c034e263c96c5fe7edd491e75be8ce450f5c9.yaml | 13 ------ ...eff433fc0ee7cd6995e86fb7dbfd8b9ffe999.yaml | 28 ++++++++++++ packages/kurt-cache/package.json | 3 +- packages/kurt-cache/spec/KurtCache.spec.ts | 45 +++++++++++++------ packages/kurt-cache/src/KurtCache.ts | 29 ++++++++++-- pnpm-lock.yaml | 3 ++ 6 files changed, 89 insertions(+), 32 deletions(-) delete mode 100644 packages/kurt-cache/.kurt-cache/test-retain/stub-ad557ba1818e8013f9e2fbc9598c034e263c96c5fe7edd491e75be8ce450f5c9.yaml create mode 100644 packages/kurt-cache/.kurt-cache/test-retain/stub-b8a5a99fa499ef332c4a599d5b1eff433fc0ee7cd6995e86fb7dbfd8b9ffe999.yaml diff --git a/packages/kurt-cache/.kurt-cache/test-retain/stub-ad557ba1818e8013f9e2fbc9598c034e263c96c5fe7edd491e75be8ce450f5c9.yaml b/packages/kurt-cache/.kurt-cache/test-retain/stub-ad557ba1818e8013f9e2fbc9598c034e263c96c5fe7edd491e75be8ce450f5c9.yaml deleted file mode 100644 index 87147c3..0000000 --- a/packages/kurt-cache/.kurt-cache/test-retain/stub-ad557ba1818e8013f9e2fbc9598c034e263c96c5fe7edd491e75be8ce450f5c9.yaml +++ /dev/null @@ -1,13 +0,0 @@ -messages: - - role: user - text: Was this cached? -sampling: - maxOutputTokens: 4096 - temperature: 0.5 - topP: 0.95 -tools: {} -response: - - chunk: This was cached - - chunk: " on disk" - - finished: true - text: This was cached on disk diff --git a/packages/kurt-cache/.kurt-cache/test-retain/stub-b8a5a99fa499ef332c4a599d5b1eff433fc0ee7cd6995e86fb7dbfd8b9ffe999.yaml b/packages/kurt-cache/.kurt-cache/test-retain/stub-b8a5a99fa499ef332c4a599d5b1eff433fc0ee7cd6995e86fb7dbfd8b9ffe999.yaml new file mode 100644 index 0000000..41722fa --- /dev/null +++ b/packages/kurt-cache/.kurt-cache/test-retain/stub-b8a5a99fa499ef332c4a599d5b1eff433fc0ee7cd6995e86fb7dbfd8b9ffe999.yaml @@ -0,0 +1,28 @@ +messages: + - role: user + text: Was this cached? +sampling: + maxOutputTokens: 4096 + temperature: 0.5 + topP: 0.95 +tools: + structured_data: + name: structured_data + description: "" + parameters: + type: object + properties: + cached: + type: boolean + required: + - cached + additionalProperties: false + $schema: http://json-schema.org/draft-07/schema# +forceTool: structured_data +response: + - chunk: '{"cached":' + - chunk: true} + - finished: true + text: '{"cached":true}' + data: + cached: true diff --git a/packages/kurt-cache/package.json b/packages/kurt-cache/package.json index d309c99..118328f 100644 --- a/packages/kurt-cache/package.json +++ b/packages/kurt-cache/package.json @@ -39,6 +39,7 @@ "semantic-release-monorepo": "^8.0.2", "ts-jest": "^29.1.2", "type-fest": "^4.30.0", - "typescript": "^5.4.5" + "typescript": "^5.4.5", + "zod": "^3.23.8" } } diff --git a/packages/kurt-cache/spec/KurtCache.spec.ts b/packages/kurt-cache/spec/KurtCache.spec.ts index 8d7eb9c..1a288c0 100644 --- a/packages/kurt-cache/spec/KurtCache.spec.ts +++ b/packages/kurt-cache/spec/KurtCache.spec.ts @@ -1,4 +1,5 @@ import { describe, test, expect } from "@jest/globals" +import { z } from "zod" import { existsSync, readFileSync, @@ -29,8 +30,17 @@ const cacheDir = `${__dirname}/../.kurt-cache/test` const cacheDirRetain = `${cacheDir}-retain` // A convenience function to make the test cases succinct one-liners. -const gen = async (kurt: Kurt, prompt: string) => - (await kurt.generateNaturalLanguage({ prompt }).result).text +async function gen( + kurt: Kurt, + prompt: string, + schema?: KurtSchema +) { + const stream = schema + ? kurt.generateStructuredData({ prompt, schema }) + : kurt.generateNaturalLanguage({ prompt }) + const result = await stream.result + return schema ? result.data : result.text +} describe("KurtCache", () => { test("when cache misses, runs the adapter setup fn just once", async () => { @@ -74,31 +84,33 @@ describe("KurtCache", () => { // We compare with a hard-coded hash here to test that the hash function is // stable/deterministic across library versions of KurtCache. // - // If you find yourself needing to change this hash value, it means + // If you find yourself needing to change this hash value when you didn't + // change the prompt or schema used in the test, it probably means // that you're breaking all existing cache entries, which is a breaking // change for users of KurtCache who rely on it for their test suites. const hash = - "ad557ba1818e8013f9e2fbc9598c034e263c96c5fe7edd491e75be8ce450f5c9" + "b8a5a99fa499ef332c4a599d5b1eff433fc0ee7cd6995e86fb7dbfd8b9ffe999" const filePath = `${cacheDirRetain}/stub-${hash}.yaml` // Assert that the cache file entry already exists (it has been // committed into the repo and retained there) const cached = readFileSync(filePath, "utf8") - expect(cached).toContain("text: This was cached on disk") + expect(cached).toContain("text: '{\"cached\":true}'") // Use the cache adapter configured appropriately to find the cache entry. let adapterFnCallCount = 0 const kurt = new Kurt( new KurtCache(cacheDirRetain, "stub", () => { adapterFnCallCount++ - return new StubAdapter([["This was cached", " on disk"]]) + return new StubAdapter([['{"cached":', "true}"]]) }) ) // Expect the cache hit to return the result text from the file. - expect(await gen(kurt, "Was this cached?")).toEqual( - "This was cached on disk" - ) + const schema = z.object({ cached: z.boolean() }).strict() + expect(await gen(kurt, "Was this cached?", schema)).toEqual({ + cached: true, + }) // Expect that the adapter setup function was never called. expect(adapterFnCallCount).toEqual(0) @@ -108,9 +120,9 @@ describe("KurtCache", () => { // Delete the cache file and prove that it regenerates exactly the same. rmSync(filePath) - expect(await gen(kurt, "Was this cached?")).toEqual( - "This was cached on disk" - ) + expect(await gen(kurt, "Was this cached?", schema)).toEqual({ + cached: true, + }) expect(adapterFnCallCount).toEqual(1) expect(readFileSync(filePath, "utf8")).toEqual(cached) }) @@ -198,11 +210,16 @@ class StubAdapter yield { finished: true, text, data: undefined } } - transformStructuredDataFromRawEvents( + async *transformStructuredDataFromRawEvents( schema: KurtSchema, rawEvents: AsyncIterable<{ bytes: string }> ): AsyncIterable>> { - throw new Error("Not implemented because tests here don't use it") + let text = "" + for await (const { bytes } of rawEvents) { + text += bytes + yield { chunk: bytes } + } + yield { finished: true, text, data: schema.parse(JSON.parse(text)) } } transformWithOptionalToolsFromRawEvents( diff --git a/packages/kurt-cache/src/KurtCache.ts b/packages/kurt-cache/src/KurtCache.ts index fdbb9c5..00d4116 100644 --- a/packages/kurt-cache/src/KurtCache.ts +++ b/packages/kurt-cache/src/KurtCache.ts @@ -14,7 +14,7 @@ import type { import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs" import { createHash, type Hash } from "node:crypto" import { stringify as stringifyYAML, parse as parseYAML } from "yaml" -import { zodToJsonSchema } from "zod-to-json-schema" +import { zodToJsonSchema, type JsonSchema7ObjectType } from "zod-to-json-schema" type CacheData = { messages: KurtMessage[] @@ -23,7 +23,7 @@ type CacheData = { [key: string]: { name: string description: string - parameters: KurtSchema + parameters: JsonSchema7ObjectType } } forceTool?: string @@ -100,7 +100,15 @@ export class KurtCache } generateRawEvents( - options: Omit + options: Omit & { + tools: { + [key: string]: { + name: string + description: string + parameters: KurtSchema + } + } + } ): AsyncIterable> { // Hash the incoming options to determine the cache key. const digest = createHash("sha256") @@ -120,7 +128,20 @@ export class KurtCache const adapter = this.lazyAdapter() return new ResponseEventsShouldCache( cacheFilePath, - options, + { + ...options, + tools: Object.fromEntries( + Object.entries(options.tools).map(([name, tool]) => [ + name, + { + ...tool, + parameters: zodToJsonSchema( + tool.parameters + ) as JsonSchema7ObjectType, + }, + ]) + ), + }, adapter.generateRawEvents({ messages: adapter.transformToRawMessages(options.messages), sampling: options.sampling, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 44aba9f..e771ed1 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -146,6 +146,9 @@ importers: typescript: specifier: ^5.4.5 version: 5.4.5 + zod: + specifier: ^3.23.8 + version: 3.23.8 packages/kurt-open-ai: dependencies: