Skip to content

Commit

Permalink
Merge pull request #59 from FormulaMonks/fix/kurt-cache-schema
Browse files Browse the repository at this point in the history
fix: KurtCache handling of schemas - serialize as JSON Schema
  • Loading branch information
jemc authored Dec 5, 2024
2 parents d0fc4cc + aca061a commit 65a209d
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 32 deletions.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
messages:
- role: user
text: Was this cached?
sampling:
maxOutputTokens: 4096
temperature: 0.5
topP: 0.95
tools:
structured_data:
name: structured_data
description: ""
parameters:
type: object
properties:
cached:
type: boolean
required:
- cached
additionalProperties: false
$schema: http://json-schema.org/draft-07/schema#
forceTool: structured_data
response:
- chunk: '{"cached":'
- chunk: true}
- finished: true
text: '{"cached":true}'
data:
cached: true
3 changes: 2 additions & 1 deletion packages/kurt-cache/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
"semantic-release-monorepo": "^8.0.2",
"ts-jest": "^29.1.2",
"type-fest": "^4.30.0",
"typescript": "^5.4.5"
"typescript": "^5.4.5",
"zod": "^3.23.8"
}
}
45 changes: 31 additions & 14 deletions packages/kurt-cache/spec/KurtCache.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { describe, test, expect } from "@jest/globals"
import { z } from "zod"
import {
existsSync,
readFileSync,
Expand Down Expand Up @@ -29,8 +30,17 @@ const cacheDir = `${__dirname}/../.kurt-cache/test`
const cacheDirRetain = `${cacheDir}-retain`

// A convenience function to make the test cases succinct one-liners.
const gen = async (kurt: Kurt, prompt: string) =>
(await kurt.generateNaturalLanguage({ prompt }).result).text
async function gen(
kurt: Kurt,
prompt: string,
schema?: KurtSchema<KurtSchemaInner>
) {
const stream = schema
? kurt.generateStructuredData({ prompt, schema })
: kurt.generateNaturalLanguage({ prompt })
const result = await stream.result
return schema ? result.data : result.text
}

describe("KurtCache", () => {
test("when cache misses, runs the adapter setup fn just once", async () => {
Expand Down Expand Up @@ -74,31 +84,33 @@ describe("KurtCache", () => {
// We compare with a hard-coded hash here to test that the hash function is
// stable/deterministic across library versions of KurtCache.
//
// If you find yourself needing to change this hash value, it means
// If you find yourself needing to change this hash value when you didn't
// change the prompt or schema used in the test, it probably means
// that you're breaking all existing cache entries, which is a breaking
// change for users of KurtCache who rely on it for their test suites.
const hash =
"ad557ba1818e8013f9e2fbc9598c034e263c96c5fe7edd491e75be8ce450f5c9"
"b8a5a99fa499ef332c4a599d5b1eff433fc0ee7cd6995e86fb7dbfd8b9ffe999"
const filePath = `${cacheDirRetain}/stub-${hash}.yaml`

// Assert that the cache file entry already exists (it has been
// committed into the repo and retained there)
const cached = readFileSync(filePath, "utf8")
expect(cached).toContain("text: This was cached on disk")
expect(cached).toContain("text: '{\"cached\":true}'")

// Use the cache adapter configured appropriately to find the cache entry.
let adapterFnCallCount = 0
const kurt = new Kurt(
new KurtCache(cacheDirRetain, "stub", () => {
adapterFnCallCount++
return new StubAdapter([["This was cached", " on disk"]])
return new StubAdapter([['{"cached":', "true}"]])
})
)

// Expect the cache hit to return the result text from the file.
expect(await gen(kurt, "Was this cached?")).toEqual(
"This was cached on disk"
)
const schema = z.object({ cached: z.boolean() }).strict()
expect(await gen(kurt, "Was this cached?", schema)).toEqual({
cached: true,
})

// Expect that the adapter setup function was never called.
expect(adapterFnCallCount).toEqual(0)
Expand All @@ -108,9 +120,9 @@ describe("KurtCache", () => {

// Delete the cache file and prove that it regenerates exactly the same.
rmSync(filePath)
expect(await gen(kurt, "Was this cached?")).toEqual(
"This was cached on disk"
)
expect(await gen(kurt, "Was this cached?", schema)).toEqual({
cached: true,
})
expect(adapterFnCallCount).toEqual(1)
expect(readFileSync(filePath, "utf8")).toEqual(cached)
})
Expand Down Expand Up @@ -198,11 +210,16 @@ class StubAdapter
yield { finished: true, text, data: undefined }
}

transformStructuredDataFromRawEvents<I extends KurtSchemaInner>(
async *transformStructuredDataFromRawEvents<I extends KurtSchemaInner>(
schema: KurtSchema<I>,
rawEvents: AsyncIterable<{ bytes: string }>
): AsyncIterable<KurtStreamEvent<KurtSchemaResult<I>>> {
throw new Error("Not implemented because tests here don't use it")
let text = ""
for await (const { bytes } of rawEvents) {
text += bytes
yield { chunk: bytes }
}
yield { finished: true, text, data: schema.parse(JSON.parse(text)) }
}

transformWithOptionalToolsFromRawEvents<I extends KurtSchemaInnerMap>(
Expand Down
29 changes: 25 additions & 4 deletions packages/kurt-cache/src/KurtCache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import type {
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"
import { createHash, type Hash } from "node:crypto"
import { stringify as stringifyYAML, parse as parseYAML } from "yaml"
import { zodToJsonSchema } from "zod-to-json-schema"
import { zodToJsonSchema, type JsonSchema7ObjectType } from "zod-to-json-schema"

type CacheData = {
messages: KurtMessage[]
Expand All @@ -23,7 +23,7 @@ type CacheData = {
[key: string]: {
name: string
description: string
parameters: KurtSchema<KurtSchemaInner>
parameters: JsonSchema7ObjectType
}
}
forceTool?: string
Expand Down Expand Up @@ -100,7 +100,15 @@ export class KurtCache<A extends KurtAdapter>
}

generateRawEvents(
options: Omit<CacheData, "response">
options: Omit<CacheData, "response" | "tools"> & {
tools: {
[key: string]: {
name: string
description: string
parameters: KurtSchema<KurtSchemaInner>
}
}
}
): AsyncIterable<AdapterRawEvent<A>> {
// Hash the incoming options to determine the cache key.
const digest = createHash("sha256")
Expand All @@ -120,7 +128,20 @@ export class KurtCache<A extends KurtAdapter>
const adapter = this.lazyAdapter()
return new ResponseEventsShouldCache(
cacheFilePath,
options,
{
...options,
tools: Object.fromEntries(
Object.entries(options.tools).map(([name, tool]) => [
name,
{
...tool,
parameters: zodToJsonSchema(
tool.parameters
) as JsonSchema7ObjectType,
},
])
),
},
adapter.generateRawEvents({
messages: adapter.transformToRawMessages(options.messages),
sampling: options.sampling,
Expand Down
3 changes: 3 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 65a209d

Please sign in to comment.