Skip to content

Commit

Permalink
feat: add support for imageData messages in KurtOpenAI and `KurtV…
Browse files Browse the repository at this point in the history
…ertexAI`

Now both of these adapters support sending messages with `imageData`.
  • Loading branch information
jemc committed Sep 27, 2024
1 parent a8a2222 commit 6dd80ac
Show file tree
Hide file tree
Showing 16 changed files with 239 additions and 25 deletions.
19 changes: 19 additions & 0 deletions packages/kurt-open-ai/spec/generateNaturalLanguage.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,23 @@ describe("KurtOpenAI generateNaturalLanguage", () => {
}
)
})

test("describes a base64-encoded image", async () => {
const result = await snapshotAndMock((kurt) =>
kurt.generateNaturalLanguage({
prompt: "Describe this emoji, in two words.",
extraMessages: [
{
role: "user",
imageData: {
mimeType: "image/png",
base64Data:
"iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII=",
},
},
],
})
)
expect(result.text).toEqual("Heart eyes")
})
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
step1Request:
stream: true
stream_options:
include_usage: true
model: gpt-4o-2024-05-13
max_tokens: 4096
temperature: 0.5
top_p: 0.95
messages:
- role: user
content:
- type: text
text: Describe this emoji, in two words.
- type: image_url
image_url:
url: 
step2RawChunks:
- choices:
- index: 0
delta:
role: assistant
content: ""
refusal: null
logprobs: null
finish_reason: null
system_fingerprint: fp_5796ac6771
usage: null
- choices:
- index: 0
delta:
content: Heart
logprobs: null
finish_reason: null
system_fingerprint: fp_5796ac6771
usage: null
- choices:
- index: 0
delta:
content: " eyes"
logprobs: null
finish_reason: null
system_fingerprint: fp_5796ac6771
usage: null
- choices:
- index: 0
delta: {}
logprobs: null
finish_reason: stop
system_fingerprint: fp_5796ac6771
usage: null
- choices: []
system_fingerprint: fp_5796ac6771
usage:
prompt_tokens: 270
completion_tokens: 2
total_tokens: 272
completion_tokens_details:
reasoning_tokens: 0
step3KurtEvents:
- chunk: Heart
- chunk: " eyes"
- finished: true
text: Heart eyes
metadata:
totalInputTokens: 270
totalOutputTokens: 2
systemFingerprint: fp_5796ac6771
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ step1Request:
top_p: 0.95
messages:
- role: user
content: Say hello!
content:
- type: text
text: Say hello!
step2RawChunks:
- choices:
- index: 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ step1Request:
top_p: 0.95
messages:
- role: user
content: Compose a haiku about content length limitations.
content:
- type: text
text: Compose a haiku about content length limitations.
step2RawChunks:
- choices:
- index: 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ step1Request:
top_p: 1
messages:
- role: user
content: Compose a haiku about a mountain stream at night.
content:
- type: text
text: Compose a haiku about a mountain stream at night.
step2RawChunks:
- choices:
- index: 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ step1Request:
top_p: 0.95
messages:
- role: user
content: Say hello!
content:
- type: text
text: Say hello!
tools:
- type: function
function:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ step1Request:
top_p: 0.95
messages:
- role: user
content: Say hello!
content:
- type: text
text: Say hello!
tools:
- type: function
function:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ step1Request:
top_p: 0.95
messages:
- role: user
content: |-
Calculate each of the following:
1. 8026256882 divided by 3402398
2. 1185835515 divided by 348263
3. 90135094495 minus 89944954350
content:
- type: text
text: |-
Calculate each of the following:
1. 8026256882 divided by 3402398
2. 1185835515 divided by 348263
3. 90135094495 minus 89944954350
- role: assistant
tool_calls:
- id: call_1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ step1Request:
top_p: 0.95
messages:
- role: user
content: What's 9876356 divided by 30487, rounded to the nearest integer?
content:
- type: text
text: What's 9876356 divided by 30487, rounded to the nearest integer?
- role: assistant
tool_calls:
- id: call_1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ step1Request:
top_p: 0.95
messages:
- role: user
content: |-
Calculate each of the following:
1. 8026256882 divided by 3402398
2. 1185835515 divided by 348263
3. 90135094495 minus 89944954350
content:
- type: text
text: |-
Calculate each of the following:
1. 8026256882 divided by 3402398
2. 1185835515 divided by 348263
3. 90135094495 minus 89944954350
tools:
- type: function
function:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ step1Request:
top_p: 0.95
messages:
- role: user
content: What's 9876356 divided by 30487, rounded to the nearest integer?
content:
- type: text
text: What's 9876356 divided by 30487, rounded to the nearest integer?
tools:
- type: function
function:
Expand Down
41 changes: 36 additions & 5 deletions packages/kurt-open-ai/src/KurtOpenAI.ts
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,30 @@ const openAIRoleMapping = {

function toOpenAIMessages(messages: KurtMessage[]): OpenAIMessage[] {
const openAIMessages: OpenAIMessage[] = []
const addMessage = (message: OpenAIMessage) => {
// If we have two messages with the `user` role, we must combine them
// into a multi-part message (a constraint which presumably made sense
// to some OpenAI engineer at some time, but is now totally inscrutable).
const lastMessage = openAIMessages[openAIMessages.length - 1]
if (lastMessage && lastMessage.role === "user" && message.role === "user") {
const lastContent = lastMessage.content
const nextContent = message.content
for (const part of nextContent) lastContent.push(part)
} else {
openAIMessages.push(message)
}
}

for (const [messageIndex, message] of messages.entries()) {
const { text, toolCall } = message
const { text, toolCall, imageData } = message
if (text) {
const role = openAIRoleMapping[message.role]
openAIMessages.push({ role, content: text })

if (role === "user") {
addMessage({ role, content: [{ type: "text", text }] })
} else {
addMessage({ role, content: text })
}
} else if (toolCall) {
const { name, args, result } = toolCall

Expand All @@ -178,7 +196,7 @@ function toOpenAIMessages(messages: KurtMessage[]): OpenAIMessage[] {
// We generate a simple sequential id here to satisfy OpenAI's API.
const id = `call_${messageIndex}`

openAIMessages.push({
addMessage({
role: "assistant",
tool_calls: [
{
Expand All @@ -188,13 +206,26 @@ function toOpenAIMessages(messages: KurtMessage[]): OpenAIMessage[] {
},
],
})
openAIMessages.push({
addMessage({
role: "tool",
tool_call_id: id,
content: JSON.stringify(result),
})
} else if (imageData && message.role === "user") {
const { mimeType, base64Data } = imageData

// OpenAI only supports the following MIME types, according to these docs:
// https://platform.openai.com/docs/guides/vision
if (!mimeType.match(/^image\/(jpg|png|webp|gif)$/))
throw new Error(`Unsupported image MIME type: ${mimeType}`) // TODO: Use a subclass of KurtError

const url = `data:${mimeType};base64,${base64Data}`
addMessage({
role: "user", // only supported for the user role
content: [{ type: "image_url", image_url: { url } }],
})
} else {
throw new Error(`Invalid KurtMessage: ${JSON.stringify(message)}`)
throw new Error(`Unsupported KurtMessage: ${JSON.stringify(message)}`) // TODO: Use a subclass of KurtError
}
}

Expand Down
13 changes: 11 additions & 2 deletions packages/kurt-open-ai/src/OpenAI.types.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import type { OpenAI as RealOpenAI } from "openai"
import type {
ChatCompletionCreateParamsStreaming,
ChatCompletionMessageParam,
ChatCompletionSystemMessageParam,
ChatCompletionUserMessageParam,
ChatCompletionAssistantMessageParam,
ChatCompletionToolMessageParam,
ChatCompletionContentPart,
FunctionParameters,
ChatCompletionTool,
ChatCompletionChunk,
Expand All @@ -11,7 +15,12 @@ import type {
export type OpenAI = RealOpenAI

export type OpenAIRequest = ChatCompletionCreateParamsStreaming
export type OpenAIMessage = ChatCompletionMessageParam
export type OpenAIMessage =
| ChatCompletionSystemMessageParam
| (ChatCompletionUserMessageParam & { content: ChatCompletionContentPart[] })
| ChatCompletionAssistantMessageParam
| ChatCompletionToolMessageParam

export type OpenAISchema = FunctionParameters
export type OpenAITool = ChatCompletionTool
export type OpenAIResponse = Promise<AsyncIterable<OpenAIResponseChunk>>
Expand Down
19 changes: 19 additions & 0 deletions packages/kurt-vertex-ai/spec/generateNaturalLanguage.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,23 @@ describe("KurtVertexAI generateNaturalLanguage", () => {
}
)
})

test("describes a base64-encoded image", async () => {
const result = await snapshotAndMock((kurt) =>
kurt.generateNaturalLanguage({
prompt: "Describe this emoji, in two words.",
extraMessages: [
{
role: "user",
imageData: {
mimeType: "image/png",
base64Data:
"iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII=",
},
},
],
})
)
expect(result.text).toEqual("Lovestruck smile \n")
})
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
step1Request:
generationConfig:
maxOutputTokens: 4096
temperature: 0.5
topP: 0.95
contents:
- role: user
parts:
- text: Describe this emoji, in two words.
- role: user
parts:
- inlineData:
mimeType: image/png
data: iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII=
step2RawChunks:
- candidates:
- content:
role: model
parts:
- text: Loves
index: 0
usageMetadata:
promptTokenCount: 266
candidatesTokenCount: 1
totalTokenCount: 267
- candidates:
- content:
role: model
parts:
- text: |
truck smile
finishReason: STOP
index: 0
usageMetadata:
promptTokenCount: 266
candidatesTokenCount: 5
totalTokenCount: 271
step3KurtEvents:
- chunk: Loves
- chunk: |
truck smile
- finished: true
text: |
Lovestruck smile
metadata:
totalInputTokens: 266
totalOutputTokens: 5
6 changes: 5 additions & 1 deletion packages/kurt-vertex-ai/src/KurtVertexAI.ts
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ function toVertexAIMessages(messages: KurtMessage[]): VertexAIMessage[] {
const vertexAIMessages: VertexAIMessage[] = []

for (const message of messages) {
const { role, text, toolCall } = message
const { role, text, toolCall, imageData } = message
if (text) {
vertexAIMessages.push({ role, parts: [{ text }] })
} else if (toolCall) {
Expand All @@ -159,6 +159,10 @@ function toVertexAIMessages(messages: KurtMessage[]): VertexAIMessage[] {
const functionResponse = { name, response: result }
vertexAIMessages.push({ role, parts: [{ functionCall }] })
vertexAIMessages.push({ role, parts: [{ functionResponse }] })
} else if (imageData) {
const { mimeType, base64Data } = imageData
const inlineData = { mimeType, data: base64Data }
vertexAIMessages.push({ role, parts: [{ inlineData }] })
} else {
throw new Error(`Invalid KurtMessage: ${JSON.stringify(message)}`)
}
Expand Down

0 comments on commit 6dd80ac

Please sign in to comment.