diff --git a/packages/kurt-open-ai/spec/generateNaturalLanguage.spec.ts b/packages/kurt-open-ai/spec/generateNaturalLanguage.spec.ts index d0cb4b5..d1bde5f 100644 --- a/packages/kurt-open-ai/spec/generateNaturalLanguage.spec.ts +++ b/packages/kurt-open-ai/spec/generateNaturalLanguage.spec.ts @@ -48,4 +48,23 @@ describe("KurtOpenAI generateNaturalLanguage", () => { } ) }) + + test("describes a base64-encoded image", async () => { + const result = await snapshotAndMock((kurt) => + kurt.generateNaturalLanguage({ + prompt: "Describe this emoji, in two words.", + extraMessages: [ + { + role: "user", + imageData: { + mimeType: "image/png", + base64Data: + "iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII=", + }, + }, + ], + }) + ) + expect(result.text).toEqual("Heart eyes") + }) }) diff --git a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image.yaml b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image.yaml new file mode 100644 index 0000000..93c043a --- /dev/null +++ b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_describes_a_base64-encoded_image.yaml @@ -0,0 +1,67 @@ +step1Request: + stream: true + stream_options: + include_usage: true + model: gpt-4o-2024-05-13 + max_tokens: 4096 + temperature: 0.5 + top_p: 0.95 + messages: + - role: user + content: + - type: text + text: Describe this emoji, in two words. + - type: image_url + image_url: + url: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII= +step2RawChunks: + - choices: + - index: 0 + delta: + role: assistant + content: "" + refusal: null + logprobs: null + finish_reason: null + system_fingerprint: fp_5796ac6771 + usage: null + - choices: + - index: 0 + delta: + content: Heart + logprobs: null + finish_reason: null + system_fingerprint: fp_5796ac6771 + usage: null + - choices: + - index: 0 + delta: + content: " eyes" + logprobs: null + finish_reason: null + system_fingerprint: fp_5796ac6771 + usage: null + - choices: + - index: 0 + delta: {} + logprobs: null + finish_reason: stop + system_fingerprint: fp_5796ac6771 + usage: null + - choices: [] + system_fingerprint: fp_5796ac6771 + usage: + prompt_tokens: 270 + completion_tokens: 2 + total_tokens: 272 + completion_tokens_details: + reasoning_tokens: 0 +step3KurtEvents: + - chunk: Heart + - chunk: " eyes" + - finished: true + text: Heart eyes + metadata: + totalInputTokens: 270 + totalOutputTokens: 2 + systemFingerprint: fp_5796ac6771 diff --git a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_says_hello.yaml b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_says_hello.yaml index 53d3f85..589b171 100644 --- a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_says_hello.yaml +++ b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_says_hello.yaml @@ -8,7 +8,9 @@ step1Request: top_p: 0.95 messages: - role: user - content: Say hello! + content: + - type: text + text: Say hello! step2RawChunks: - choices: - index: 0 diff --git a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_throws_a_limit_error.yaml b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_throws_a_limit_error.yaml index fbe4122..e141fe4 100644 --- a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_throws_a_limit_error.yaml +++ b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_throws_a_limit_error.yaml @@ -8,7 +8,9 @@ step1Request: top_p: 0.95 messages: - role: user - content: Compose a haiku about content length limitations. + content: + - type: text + text: Compose a haiku about content length limitations. step2RawChunks: - choices: - index: 0 diff --git a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_writes_a_haiku_with_high_temperature.yaml b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_writes_a_haiku_with_high_temperature.yaml index e6f35b4..b65cadb 100644 --- a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_writes_a_haiku_with_high_temperature.yaml +++ b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateNaturalLanguage_writes_a_haiku_with_high_temperature.yaml @@ -8,7 +8,9 @@ step1Request: top_p: 1 messages: - role: user - content: Compose a haiku about a mountain stream at night. + content: + - type: text + text: Compose a haiku about a mountain stream at night. step2RawChunks: - choices: - index: 0 diff --git a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateStructuredData_says_hello.yaml b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateStructuredData_says_hello.yaml index 3f40e9e..9bd7f8c 100644 --- a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateStructuredData_says_hello.yaml +++ b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateStructuredData_says_hello.yaml @@ -8,7 +8,9 @@ step1Request: top_p: 0.95 messages: - role: user - content: Say hello! + content: + - type: text + text: Say hello! tools: - type: function function: diff --git a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateStructuredData_throws_a_validate_error_from_an_impossible_schema.yaml b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateStructuredData_throws_a_validate_error_from_an_impossible_schema.yaml index 2e50746..35d25cc 100644 --- a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateStructuredData_throws_a_validate_error_from_an_impossible_schema.yaml +++ b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateStructuredData_throws_a_validate_error_from_an_impossible_schema.yaml @@ -8,7 +8,9 @@ step1Request: top_p: 0.95 messages: - role: user - content: Say hello! + content: + - type: text + text: Say hello! tools: - type: function function: diff --git a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(after_parallel_tool_calls).yaml b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(after_parallel_tool_calls).yaml index a0bae42..42f9067 100644 --- a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(after_parallel_tool_calls).yaml +++ b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(after_parallel_tool_calls).yaml @@ -8,11 +8,13 @@ step1Request: top_p: 0.95 messages: - role: user - content: |- - Calculate each of the following: - 1. 8026256882 divided by 3402398 - 2. 1185835515 divided by 348263 - 3. 90135094495 minus 89944954350 + content: + - type: text + text: |- + Calculate each of the following: + 1. 8026256882 divided by 3402398 + 2. 1185835515 divided by 348263 + 3. 90135094495 minus 89944954350 - role: assistant tool_calls: - id: call_1 diff --git a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(after_tool_call).yaml b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(after_tool_call).yaml index f63f1b3..fe102d6 100644 --- a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(after_tool_call).yaml +++ b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(after_tool_call).yaml @@ -8,7 +8,9 @@ step1Request: top_p: 0.95 messages: - role: user - content: What's 9876356 divided by 30487, rounded to the nearest integer? + content: + - type: text + text: What's 9876356 divided by 30487, rounded to the nearest integer? - role: assistant tool_calls: - id: call_1 diff --git a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(with_parallel_tool_calls).yaml b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(with_parallel_tool_calls).yaml index ee90a14..bc5b1f1 100644 --- a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(with_parallel_tool_calls).yaml +++ b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(with_parallel_tool_calls).yaml @@ -8,11 +8,13 @@ step1Request: top_p: 0.95 messages: - role: user - content: |- - Calculate each of the following: - 1. 8026256882 divided by 3402398 - 2. 1185835515 divided by 348263 - 3. 90135094495 minus 89944954350 + content: + - type: text + text: |- + Calculate each of the following: + 1. 8026256882 divided by 3402398 + 2. 1185835515 divided by 348263 + 3. 90135094495 minus 89944954350 tools: - type: function function: diff --git a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(with_tool_call).yaml b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(with_tool_call).yaml index 329c78d..1f2215b 100644 --- a/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(with_tool_call).yaml +++ b/packages/kurt-open-ai/spec/snapshots/KurtOpenAI_generateWithOptionalTools_calculator_(with_tool_call).yaml @@ -8,7 +8,9 @@ step1Request: top_p: 0.95 messages: - role: user - content: What's 9876356 divided by 30487, rounded to the nearest integer? + content: + - type: text + text: What's 9876356 divided by 30487, rounded to the nearest integer? tools: - type: function function: diff --git a/packages/kurt-open-ai/src/KurtOpenAI.ts b/packages/kurt-open-ai/src/KurtOpenAI.ts index 04a1340..5b493c7 100644 --- a/packages/kurt-open-ai/src/KurtOpenAI.ts +++ b/packages/kurt-open-ai/src/KurtOpenAI.ts @@ -159,12 +159,30 @@ const openAIRoleMapping = { function toOpenAIMessages(messages: KurtMessage[]): OpenAIMessage[] { const openAIMessages: OpenAIMessage[] = [] + const addMessage = (message: OpenAIMessage) => { + // If we have two messages with the `user` role, we must combine them + // into a multi-part message (a constraint which presumably made sense + // to some OpenAI engineer at some time, but is now totally inscrutable). + const lastMessage = openAIMessages[openAIMessages.length - 1] + if (lastMessage && lastMessage.role === "user" && message.role === "user") { + const lastContent = lastMessage.content + const nextContent = message.content + for (const part of nextContent) lastContent.push(part) + } else { + openAIMessages.push(message) + } + } for (const [messageIndex, message] of messages.entries()) { - const { text, toolCall } = message + const { text, toolCall, imageData } = message if (text) { const role = openAIRoleMapping[message.role] - openAIMessages.push({ role, content: text }) + + if (role === "user") { + addMessage({ role, content: [{ type: "text", text }] }) + } else { + addMessage({ role, content: text }) + } } else if (toolCall) { const { name, args, result } = toolCall @@ -178,7 +196,7 @@ function toOpenAIMessages(messages: KurtMessage[]): OpenAIMessage[] { // We generate a simple sequential id here to satisfy OpenAI's API. const id = `call_${messageIndex}` - openAIMessages.push({ + addMessage({ role: "assistant", tool_calls: [ { @@ -188,13 +206,26 @@ function toOpenAIMessages(messages: KurtMessage[]): OpenAIMessage[] { }, ], }) - openAIMessages.push({ + addMessage({ role: "tool", tool_call_id: id, content: JSON.stringify(result), }) + } else if (imageData && message.role === "user") { + const { mimeType, base64Data } = imageData + + // OpenAI only supports the following MIME types, according to these docs: + // https://platform.openai.com/docs/guides/vision + if (!mimeType.match(/^image\/(jpg|png|webp|gif)$/)) + throw new Error(`Unsupported image MIME type: ${mimeType}`) // TODO: Use a subclass of KurtError + + const url = `data:${mimeType};base64,${base64Data}` + addMessage({ + role: "user", // only supported for the user role + content: [{ type: "image_url", image_url: { url } }], + }) } else { - throw new Error(`Invalid KurtMessage: ${JSON.stringify(message)}`) + throw new Error(`Unsupported KurtMessage: ${JSON.stringify(message)}`) // TODO: Use a subclass of KurtError } } diff --git a/packages/kurt-open-ai/src/OpenAI.types.ts b/packages/kurt-open-ai/src/OpenAI.types.ts index 34751a1..1fb3c9e 100644 --- a/packages/kurt-open-ai/src/OpenAI.types.ts +++ b/packages/kurt-open-ai/src/OpenAI.types.ts @@ -1,7 +1,11 @@ import type { OpenAI as RealOpenAI } from "openai" import type { ChatCompletionCreateParamsStreaming, - ChatCompletionMessageParam, + ChatCompletionSystemMessageParam, + ChatCompletionUserMessageParam, + ChatCompletionAssistantMessageParam, + ChatCompletionToolMessageParam, + ChatCompletionContentPart, FunctionParameters, ChatCompletionTool, ChatCompletionChunk, @@ -11,7 +15,12 @@ import type { export type OpenAI = RealOpenAI export type OpenAIRequest = ChatCompletionCreateParamsStreaming -export type OpenAIMessage = ChatCompletionMessageParam +export type OpenAIMessage = + | ChatCompletionSystemMessageParam + | (ChatCompletionUserMessageParam & { content: ChatCompletionContentPart[] }) + | ChatCompletionAssistantMessageParam + | ChatCompletionToolMessageParam + export type OpenAISchema = FunctionParameters export type OpenAITool = ChatCompletionTool export type OpenAIResponse = Promise> diff --git a/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts b/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts index ef4d391..eaa6fd8 100644 --- a/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts +++ b/packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts @@ -49,4 +49,23 @@ describe("KurtVertexAI generateNaturalLanguage", () => { } ) }) + + test("describes a base64-encoded image", async () => { + const result = await snapshotAndMock((kurt) => + kurt.generateNaturalLanguage({ + prompt: "Describe this emoji, in two words.", + extraMessages: [ + { + role: "user", + imageData: { + mimeType: "image/png", + base64Data: + "iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII=", + }, + }, + ], + }) + ) + expect(result.text).toEqual("Lovestruck smile \n") + }) }) diff --git a/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_describes_a_base64-encoded_image.yaml b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_describes_a_base64-encoded_image.yaml new file mode 100644 index 0000000..ab469d2 --- /dev/null +++ b/packages/kurt-vertex-ai/spec/snapshots/KurtVertexAI_generateNaturalLanguage_describes_a_base64-encoded_image.yaml @@ -0,0 +1,47 @@ +step1Request: + generationConfig: + maxOutputTokens: 4096 + temperature: 0.5 + topP: 0.95 + contents: + - role: user + parts: + - text: Describe this emoji, in two words. + - role: user + parts: + - inlineData: + mimeType: image/png + data: iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII= +step2RawChunks: + - candidates: + - content: + role: model + parts: + - text: Loves + index: 0 + usageMetadata: + promptTokenCount: 266 + candidatesTokenCount: 1 + totalTokenCount: 267 + - candidates: + - content: + role: model + parts: + - text: | + truck smile + finishReason: STOP + index: 0 + usageMetadata: + promptTokenCount: 266 + candidatesTokenCount: 5 + totalTokenCount: 271 +step3KurtEvents: + - chunk: Loves + - chunk: | + truck smile + - finished: true + text: | + Lovestruck smile + metadata: + totalInputTokens: 266 + totalOutputTokens: 5 diff --git a/packages/kurt-vertex-ai/src/KurtVertexAI.ts b/packages/kurt-vertex-ai/src/KurtVertexAI.ts index ba502fe..54ec727 100644 --- a/packages/kurt-vertex-ai/src/KurtVertexAI.ts +++ b/packages/kurt-vertex-ai/src/KurtVertexAI.ts @@ -150,7 +150,7 @@ function toVertexAIMessages(messages: KurtMessage[]): VertexAIMessage[] { const vertexAIMessages: VertexAIMessage[] = [] for (const message of messages) { - const { role, text, toolCall } = message + const { role, text, toolCall, imageData } = message if (text) { vertexAIMessages.push({ role, parts: [{ text }] }) } else if (toolCall) { @@ -159,6 +159,10 @@ function toVertexAIMessages(messages: KurtMessage[]): VertexAIMessage[] { const functionResponse = { name, response: result } vertexAIMessages.push({ role, parts: [{ functionCall }] }) vertexAIMessages.push({ role, parts: [{ functionResponse }] }) + } else if (imageData) { + const { mimeType, base64Data } = imageData + const inlineData = { mimeType, data: base64Data } + vertexAIMessages.push({ role, parts: [{ inlineData }] }) } else { throw new Error(`Invalid KurtMessage: ${JSON.stringify(message)}`) }