diff --git a/genkit-tools/common/src/types/document.ts b/genkit-tools/common/src/types/document.ts new file mode 100644 index 000000000..bee366379 --- /dev/null +++ b/genkit-tools/common/src/types/document.ts @@ -0,0 +1,50 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import z from 'zod'; + +// +// IMPORTANT: Keep this file in sync with genkit/ai/src/document.ts! +// + +const EmptyPartSchema = z.object({ + text: z.never().optional(), + media: z.never().optional(), +}); + +export const TextPartSchema = EmptyPartSchema.extend({ + /** The text of the document. */ + text: z.string(), +}); +export type TextPart = z.infer; + +export const MediaPartSchema = EmptyPartSchema.extend({ + media: z.object({ + /** The media content type. Inferred from data uri if not provided. */ + contentType: z.string().optional(), + /** A `data:` or `https:` uri containing the media content. */ + url: z.string(), + }), +}); +export type MediaPart = z.infer; + +export const PartSchema = z.union([TextPartSchema, MediaPartSchema]); +export type Part = z.infer; + +export const DocumentDataSchema = z.object({ + content: z.array(PartSchema), + metadata: z.record(z.string(), z.any()).optional(), +}); +export type DocumentData = z.infer; diff --git a/genkit-tools/common/src/types/model.ts b/genkit-tools/common/src/types/model.ts index a9797c71c..6d083509b 100644 --- a/genkit-tools/common/src/types/model.ts +++ b/genkit-tools/common/src/types/model.ts @@ -13,37 +13,39 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +import { z } from 'zod'; +import { DocumentDataSchema } from './document.js'; // // IMPORTANT: Keep this file in sync with genkit/ai/src/model.ts! // -import { z } from 'zod'; -export const TextPartSchema = z.object({ - /** The text of the message. */ - text: z.string(), +const EmptyPartSchema = z.object({ + text: z.never().optional(), media: z.never().optional(), toolRequest: z.never().optional(), toolResponse: z.never().optional(), + data: z.unknown().optional(), + metadata: z.record(z.unknown()).optional(), +}); + +export const TextPartSchema = EmptyPartSchema.extend({ + /** The text of the message. */ + text: z.string(), }); export type TextPart = z.infer; -export const MediaPartSchema = z.object({ - text: z.never().optional(), +export const MediaPartSchema = EmptyPartSchema.extend({ media: z.object({ /** The media content type. Inferred from data uri if not provided. */ contentType: z.string().optional(), /** A `data:` or `https:` uri containing the media content. */ url: z.string(), }), - toolRequest: z.never().optional(), - toolResponse: z.never().optional(), }); export type MediaPart = z.infer; -export const ToolRequestPartSchema = z.object({ - text: z.never().optional(), - media: z.never().optional(), +export const ToolRequestPartSchema = EmptyPartSchema.extend({ /** A request for a tool to be executed, usually provided by a model. */ toolRequest: z.object({ /** The call id or reference for a specific request. */ @@ -53,14 +55,10 @@ export const ToolRequestPartSchema = z.object({ /** The input parameters for the tool, usually a JSON object. */ input: z.unknown().optional(), }), - toolResponse: z.never().optional(), }); export type ToolRequestPart = z.infer; -export const ToolResponsePartSchema = z.object({ - text: z.never().optional(), - media: z.never().optional(), - toolRequest: z.never().optional(), +export const ToolResponsePartSchema = EmptyPartSchema.extend({ /** A provided response to a tool call. */ toolResponse: z.object({ /** The call id or reference for a specific request. */ @@ -73,11 +71,18 @@ export const ToolResponsePartSchema = z.object({ }); export type ToolResponsePart = z.infer; +export const DataPartSchema = EmptyPartSchema.extend({ + data: z.unknown(), +}); + +export type DataPart = z.infer; + export const PartSchema = z.union([ TextPartSchema, MediaPartSchema, ToolRequestPartSchema, ToolResponsePartSchema, + DataPartSchema, ]); export type Part = z.infer; @@ -90,8 +95,36 @@ export const MessageSchema = z.object({ }); export type MessageData = z.infer; +const OutputFormatSchema = z.enum(['json', 'text', 'media']); + +export const ModelInfoSchema = z.object({ + /** Acceptable names for this model (e.g. different versions). */ + versions: z.array(z.string()).optional(), + /** Friendly label for this model (e.g. "Google AI - Gemini Pro") */ + label: z.string().optional(), + /** Supported model capabilities. */ + supports: z + .object({ + /** Model can process historical messages passed with a prompt. */ + multiturn: z.boolean().optional(), + /** Model can process media as part of the prompt (multimodal input). */ + media: z.boolean().optional(), + /** Model can perform tool calls. */ + tools: z.boolean().optional(), + /** Model can accept messages with role "system". */ + systemRole: z.boolean().optional(), + /** Model can output this type of data. */ + output: z.array(OutputFormatSchema).optional(), + /** Model can natively support document-based context grounding. */ + context: z.boolean().optional(), + }) + .optional(), +}); +export type ModelInfo = z.infer; + export const ToolDefinitionSchema = z.object({ name: z.string(), + description: z.string(), inputSchema: z .record(z.any()) .describe('Valid JSON Schema representing the input of the tool.'), @@ -113,7 +146,7 @@ export const GenerationCommonConfigSchema = z.object({ }); const OutputConfigSchema = z.object({ - format: z.enum(['json', 'text']).optional(), + format: OutputFormatSchema.optional(), schema: z.record(z.any()).optional(), }); export type OutputConfig = z.infer; @@ -123,14 +156,24 @@ export const GenerateRequestSchema = z.object({ config: z.any().optional(), tools: z.array(ToolDefinitionSchema).optional(), output: OutputConfigSchema.optional(), + context: z.array(DocumentDataSchema).optional(), candidates: z.number().optional(), }); -export type GenerateRequest = z.infer; + +export interface GenerateRequest< + CustomOptionsSchema extends z.ZodTypeAny = z.ZodTypeAny, +> extends z.infer { + config?: z.infer; +} export const GenerationUsageSchema = z.object({ inputTokens: z.number().optional(), outputTokens: z.number().optional(), totalTokens: z.number().optional(), + inputCharacters: z.number().optional(), + outputCharacters: z.number().optional(), + inputImages: z.number().optional(), + outputImages: z.number().optional(), custom: z.record(z.number()).optional(), }); export type GenerationUsage = z.infer; @@ -145,10 +188,30 @@ export const CandidateSchema = z.object({ }); export type CandidateData = z.infer; +export const CandidateErrorSchema = z.object({ + index: z.number(), + code: z.enum(['blocked', 'other', 'unknown']), + message: z.string().optional(), +}); +export type CandidateError = z.infer; + export const GenerateResponseSchema = z.object({ candidates: z.array(CandidateSchema), + latencyMs: z.number().optional(), usage: GenerationUsageSchema.optional(), - request: GenerateRequestSchema.optional(), custom: z.unknown(), + request: GenerateRequestSchema.optional(), }); export type GenerateResponseData = z.infer; + +export const GenerateResponseChunkSchema = z.object({ + /** The index of the candidate this chunk belongs to. */ + index: z.number(), + /** The chunk of content to stream right now. */ + content: z.array(PartSchema), + /** Model-specific extra information attached to this chunk. */ + custom: z.unknown().optional(), +}); +export type GenerateResponseChunkData = z.infer< + typeof GenerateResponseChunkSchema +>; diff --git a/genkit-tools/genkit-schema.json b/genkit-tools/genkit-schema.json index bbefab4dd..1eccf269c 100644 --- a/genkit-tools/genkit-schema.json +++ b/genkit-tools/genkit-schema.json @@ -260,6 +260,30 @@ ], "additionalProperties": false }, + "CandidateError": { + "type": "object", + "properties": { + "index": { + "type": "number" + }, + "code": { + "type": "string", + "enum": [ + "blocked", + "other", + "unknown" + ] + }, + "message": { + "type": "string" + } + }, + "required": [ + "index", + "code" + ], + "additionalProperties": false + }, "Candidate": { "type": "object", "properties": { @@ -294,6 +318,29 @@ ], "additionalProperties": false }, + "DataPart": { + "type": "object", + "properties": { + "text": { + "not": {} + }, + "media": { + "not": {} + }, + "toolRequest": { + "not": {} + }, + "toolResponse": { + "not": {} + }, + "data": {}, + "metadata": { + "type": "object", + "additionalProperties": {} + } + }, + "additionalProperties": false + }, "GenerateRequest": { "type": "object", "properties": { @@ -317,7 +364,8 @@ "type": "string", "enum": [ "json", - "text" + "text", + "media" ] }, "schema": { @@ -327,6 +375,71 @@ }, "additionalProperties": false }, + "context": { + "type": "array", + "items": { + "type": "object", + "properties": { + "content": { + "type": "array", + "items": { + "anyOf": [ + { + "type": "object", + "properties": { + "text": { + "type": "string" + }, + "media": { + "not": {} + } + }, + "required": [ + "text" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "text": { + "not": {} + }, + "media": { + "type": "object", + "properties": { + "contentType": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "required": [ + "url" + ], + "additionalProperties": false + } + }, + "required": [ + "media" + ], + "additionalProperties": false + } + ] + } + }, + "metadata": { + "type": "object", + "additionalProperties": {} + } + }, + "required": [ + "content" + ], + "additionalProperties": false + } + }, "candidates": { "type": "number" } @@ -336,6 +449,26 @@ ], "additionalProperties": false }, + "GenerateResponseChunk": { + "type": "object", + "properties": { + "index": { + "type": "number" + }, + "content": { + "type": "array", + "items": { + "$ref": "#/$defs/Part" + } + }, + "custom": {} + }, + "required": [ + "index", + "content" + ], + "additionalProperties": false + }, "GenerateResponse": { "type": "object", "properties": { @@ -345,13 +478,16 @@ "$ref": "#/$defs/Candidate" } }, + "latencyMs": { + "type": "number" + }, "usage": { "$ref": "#/$defs/GenerationUsage" }, + "custom": {}, "request": { "$ref": "#/$defs/GenerateRequest" - }, - "custom": {} + } }, "required": [ "candidates" @@ -397,6 +533,18 @@ "totalTokens": { "type": "number" }, + "inputCharacters": { + "type": "number" + }, + "outputCharacters": { + "type": "number" + }, + "inputImages": { + "type": "number" + }, + "outputImages": { + "type": "number" + }, "custom": { "type": "object", "additionalProperties": { @@ -410,7 +558,7 @@ "type": "object", "properties": { "text": { - "not": {} + "$ref": "#/$defs/DataPart/properties/text" }, "media": { "type": "object", @@ -428,10 +576,14 @@ "additionalProperties": false }, "toolRequest": { - "not": {} + "$ref": "#/$defs/DataPart/properties/toolRequest" }, "toolResponse": { - "not": {} + "$ref": "#/$defs/DataPart/properties/toolResponse" + }, + "data": {}, + "metadata": { + "$ref": "#/$defs/DataPart/properties/metadata" } }, "required": [ @@ -458,6 +610,48 @@ ], "additionalProperties": false }, + "ModelInfo": { + "type": "object", + "properties": { + "versions": { + "type": "array", + "items": { + "type": "string" + } + }, + "label": { + "type": "string" + }, + "supports": { + "type": "object", + "properties": { + "multiturn": { + "type": "boolean" + }, + "media": { + "type": "boolean" + }, + "tools": { + "type": "boolean" + }, + "systemRole": { + "type": "boolean" + }, + "output": { + "type": "array", + "items": { + "$ref": "#/$defs/GenerateRequest/properties/output/properties/format" + } + }, + "context": { + "type": "boolean" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + }, "Part": { "anyOf": [ { @@ -471,6 +665,9 @@ }, { "$ref": "#/$defs/ToolResponsePart" + }, + { + "$ref": "#/$defs/DataPart" } ] }, @@ -490,13 +687,19 @@ "type": "string" }, "media": { - "not": {} + "$ref": "#/$defs/DataPart/properties/media" }, "toolRequest": { - "not": {} + "$ref": "#/$defs/DataPart/properties/toolRequest" }, "toolResponse": { - "not": {} + "$ref": "#/$defs/DataPart/properties/toolResponse" + }, + "data": { + "$ref": "#/$defs/MediaPart/properties/data" + }, + "metadata": { + "$ref": "#/$defs/DataPart/properties/metadata" } }, "required": [ @@ -510,6 +713,9 @@ "name": { "type": "string" }, + "description": { + "type": "string" + }, "inputSchema": { "type": "object", "additionalProperties": {}, @@ -523,6 +729,7 @@ }, "required": [ "name", + "description", "inputSchema" ], "additionalProperties": false @@ -531,10 +738,10 @@ "type": "object", "properties": { "text": { - "not": {} + "$ref": "#/$defs/DataPart/properties/text" }, "media": { - "not": {} + "$ref": "#/$defs/DataPart/properties/media" }, "toolRequest": { "type": "object", @@ -553,7 +760,13 @@ "additionalProperties": false }, "toolResponse": { - "not": {} + "$ref": "#/$defs/DataPart/properties/toolResponse" + }, + "data": { + "$ref": "#/$defs/MediaPart/properties/data" + }, + "metadata": { + "$ref": "#/$defs/DataPart/properties/metadata" } }, "required": [ @@ -565,13 +778,13 @@ "type": "object", "properties": { "text": { - "not": {} + "$ref": "#/$defs/DataPart/properties/text" }, "media": { - "not": {} + "$ref": "#/$defs/DataPart/properties/media" }, "toolRequest": { - "not": {} + "$ref": "#/$defs/DataPart/properties/toolRequest" }, "toolResponse": { "type": "object", @@ -588,6 +801,12 @@ "name" ], "additionalProperties": false + }, + "data": { + "$ref": "#/$defs/MediaPart/properties/data" + }, + "metadata": { + "$ref": "#/$defs/DataPart/properties/metadata" } }, "required": [