From fc87ecef584a8213566aeef3962edb655056a93b Mon Sep 17 00:00:00 2001 From: Narendranath Gogineni Date: Mon, 27 Jan 2025 21:46:02 +0530 Subject: [PATCH 1/6] fix deepinfra streaming responses --- src/utils.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.ts b/src/utils.ts index 3ec89a6de..556547e27 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -40,7 +40,7 @@ export const getStreamModeSplitPattern = ( } if (proxyProvider === DEEPINFRA) { - splitPattern = '\r\n\r\n'; + splitPattern = '\n'; } if (proxyProvider === SAMBANOVA) { From 6c7f28de4d4dacf3a1f224af1d3418daaf0b4beb Mon Sep 17 00:00:00 2001 From: Narendranath Gogineni Date: Tue, 28 Jan 2025 13:45:44 +0530 Subject: [PATCH 2/6] transform vertex logprobs --- .../google-vertex-ai/chatComplete.ts | 20 ++++- .../transformGenerationConfig.ts | 6 ++ src/providers/google-vertex-ai/types.ts | 82 ++++++++++++------- src/providers/google-vertex-ai/utils.ts | 43 +++++++++- src/providers/types.ts | 11 +++ 5 files changed, 128 insertions(+), 34 deletions(-) diff --git a/src/providers/google-vertex-ai/chatComplete.ts b/src/providers/google-vertex-ai/chatComplete.ts index b172a9f29..27ee6f8f2 100644 --- a/src/providers/google-vertex-ai/chatComplete.ts +++ b/src/providers/google-vertex-ai/chatComplete.ts @@ -26,6 +26,7 @@ import { import { ChatCompletionResponse, ErrorResponse, + Logprobs, ProviderConfig, } from '../types'; import { @@ -40,7 +41,11 @@ import type { VertexLLamaChatCompleteResponse, GoogleSearchRetrievalTool, } from './types'; -import { getMimeType, recursivelyDeleteUnsupportedParameters } from './utils'; +import { + getMimeType, + recursivelyDeleteUnsupportedParameters, + transformVertexLogprobs, +} from './utils'; export const buildGoogleSearchRetrievalTool = (tool: Tool) => { const googleSearchRetrievalTool: GoogleSearchRetrievalTool = { @@ -247,6 +252,14 @@ export const VertexGoogleChatCompleteConfig: ProviderConfig = { param: 'generationConfig', transform: (params: Params) => transformGenerationConfig(params), }, + logprobs: { + param: 'generationConfig', + transform: (params: Params) => transformGenerationConfig(params), + }, + top_logprobs: { + param: 'generationConfig', + transform: (params: Params) => transformGenerationConfig(params), + }, // https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/configure-safety-attributes // Example payload to be included in the request that sets the safety settings: // "safety_settings": [ @@ -682,10 +695,15 @@ export const GoogleChatCompleteResponseTransform: ( }), }; } + let logprobsContent: Logprobs[] = transformVertexLogprobs(generation); + return { message: message, index: index, finish_reason: generation.finishReason, + logprobs: { + content: logprobsContent, + }, ...(!strictOpenAiCompliance && { safetyRatings: generation.safetyRatings, }), diff --git a/src/providers/google-vertex-ai/transformGenerationConfig.ts b/src/providers/google-vertex-ai/transformGenerationConfig.ts index 654f66e65..cd1aa231c 100644 --- a/src/providers/google-vertex-ai/transformGenerationConfig.ts +++ b/src/providers/google-vertex-ai/transformGenerationConfig.ts @@ -26,6 +26,12 @@ export function transformGenerationConfig(params: Params) { if (params?.response_format?.type === 'json_object') { generationConfig['responseMimeType'] = 'application/json'; } + if (params['logprobs']) { + generationConfig['responseLogprobs'] = params['logprobs']; + } + if (params['top_logprobs']) { + generationConfig['logprobs'] = params['top_logprobs']; // range 1-5, openai supports 1-20 + } if (params?.response_format?.type === 'json_schema') { generationConfig['responseMimeType'] = 'application/json'; recursivelyDeleteUnsupportedParameters( diff --git a/src/providers/google-vertex-ai/types.ts b/src/providers/google-vertex-ai/types.ts index 6a57451b7..743bea8e0 100644 --- a/src/providers/google-vertex-ai/types.ts +++ b/src/providers/google-vertex-ai/types.ts @@ -14,40 +14,60 @@ export interface GoogleGenerateFunctionCall { args: Record; } -export interface GoogleGenerateContentResponse { - candidates: { - content: { - parts: { - text?: string; - thought?: string; // for models like gemini-2.0-flash-thinking-exp refer: https://ai.google.dev/gemini-api/docs/thinking-mode#streaming_model_thinking - functionCall?: GoogleGenerateFunctionCall; - }[]; - }; - finishReason: string; - index: 0; - safetyRatings: { - category: string; - probability: string; +export interface GoogleResponseCandidate { + content: { + parts: { + text?: string; + thought?: string; // for models like gemini-2.0-flash-thinking-exp refer: https://ai.google.dev/gemini-api/docs/thinking-mode#streaming_model_thinking + functionCall?: GoogleGenerateFunctionCall; }[]; - groundingMetadata?: { - webSearchQueries?: string[]; - searchEntryPoint?: { - renderedContent: string; - }; - groundingSupports?: Array<{ - segment: { - startIndex: number; - endIndex: number; - text: string; - }; - groundingChunkIndices: number[]; - confidenceScores: number[]; - }>; - retrievalMetadata?: { - webDynamicRetrievalScore: number; + }; + logprobsResult?: { + topCandidates: [ + { + candidates: [ + { + token: string; + logProbability: number; + }, + ]; + }, + ]; + chosenCandidates: [ + { + token: string; + logProbability: number; + }, + ]; + }; + finishReason: string; + index: 0; + safetyRatings: { + category: string; + probability: string; + }[]; + groundingMetadata?: { + webSearchQueries?: string[]; + searchEntryPoint?: { + renderedContent: string; + }; + groundingSupports?: Array<{ + segment: { + startIndex: number; + endIndex: number; + text: string; }; + groundingChunkIndices: number[]; + confidenceScores: number[]; + }>; + retrievalMetadata?: { + webDynamicRetrievalScore: number; }; - }[]; + }; +} + +export interface GoogleGenerateContentResponse { + candidates: GoogleResponseCandidate[]; promptFeedback: { safetyRatings: { category: string; diff --git a/src/providers/google-vertex-ai/utils.ts b/src/providers/google-vertex-ai/utils.ts index 55dc83715..af09cc5fd 100644 --- a/src/providers/google-vertex-ai/utils.ts +++ b/src/providers/google-vertex-ai/utils.ts @@ -1,7 +1,7 @@ -import { GoogleErrorResponse } from './types'; +import { GoogleErrorResponse, GoogleResponseCandidate } from './types'; import { generateErrorResponse } from '../utils'; import { fileExtensionMimeTypeMap, GOOGLE_VERTEX_AI } from '../../globals'; -import { ErrorResponse } from '../types'; +import { ErrorResponse, Logprobs } from '../types'; /** * Encodes an object as a Base64 URL-encoded string. @@ -220,3 +220,42 @@ export const recursivelyDeleteUnsupportedParameters = (obj: any) => { } } }; + +export const transformVertexLogprobs = ( + generation: GoogleResponseCandidate +) => { + let logprobsContent: Logprobs[] = []; + if (generation.logprobsResult?.chosenCandidates) { + generation.logprobsResult.chosenCandidates.forEach((candidate) => { + let bytes = []; + for (const char of candidate.token) { + bytes.push(char.charCodeAt(0)); + } + logprobsContent.push({ + token: candidate.token, + logprob: candidate.logProbability, + bytes: bytes, + }); + }); + } + if (generation.logprobsResult?.topCandidates) { + generation.logprobsResult.topCandidates.forEach( + (topCandidatesForIndex, index) => { + let topLogprobs = []; + for (const candidate of topCandidatesForIndex.candidates) { + let bytes = []; + for (const char of candidate.token) { + bytes.push(char.charCodeAt(0)); + } + topLogprobs.push({ + token: candidate.token, + logprob: candidate.logProbability, + bytes: bytes, + }); + } + logprobsContent[index].top_logprobs = topLogprobs; + } + ); + } + return logprobsContent; +}; diff --git a/src/providers/types.ts b/src/providers/types.ts index 7bcacbfc6..3afb2ad61 100644 --- a/src/providers/types.ts +++ b/src/providers/types.ts @@ -158,6 +158,17 @@ export interface ChatChoice { logprobs?: object | null; } +export interface Logprobs { + token: string; + logprob: number; + bytes: number[]; + top_logprobs?: { + token: string; + logprob: number; + bytes: number[]; + }[]; +} + /** * The structure of a completion response for the 'chatComplete' function. * @interface From bf5bdb356a8211a87585567d7e0dc6d6e1e4e2de Mon Sep 17 00:00:00 2001 From: Narendranath Gogineni Date: Tue, 28 Jan 2025 14:12:19 +0530 Subject: [PATCH 3/6] suppport logprobs for google gemini as well --- src/providers/google/chatComplete.ts | 105 +++++++++++++++++++-------- 1 file changed, 74 insertions(+), 31 deletions(-) diff --git a/src/providers/google/chatComplete.ts b/src/providers/google/chatComplete.ts index bf03b647c..3c8807e4c 100644 --- a/src/providers/google/chatComplete.ts +++ b/src/providers/google/chatComplete.ts @@ -13,10 +13,12 @@ import { derefer, getMimeType, recursivelyDeleteUnsupportedParameters, + transformVertexLogprobs, } from '../google-vertex-ai/utils'; import { ChatCompletionResponse, ErrorResponse, + Logprobs, ProviderConfig, } from '../types'; import { @@ -47,6 +49,12 @@ const transformGenerationConfig = (params: Params) => { if (params?.response_format?.type === 'json_object') { generationConfig['responseMimeType'] = 'application/json'; } + if (params['logprobs']) { + generationConfig['responseLogprobs'] = params['logprobs']; + } + if (params['top_logprobs']) { + generationConfig['logprobs'] = params['top_logprobs']; // range 1-5, openai supports 1-20 + } if (params?.response_format?.type === 'json_schema') { generationConfig['responseMimeType'] = 'application/json'; recursivelyDeleteUnsupportedParameters( @@ -331,6 +339,14 @@ export const GoogleChatCompleteConfig: ProviderConfig = { param: 'generationConfig', transform: (params: Params) => transformGenerationConfig(params), }, + logprobs: { + param: 'generationConfig', + transform: (params: Params) => transformGenerationConfig(params), + }, + top_logprobs: { + param: 'generationConfig', + transform: (params: Params) => transformGenerationConfig(params), + }, tools: { param: 'tools', default: '', @@ -397,40 +413,60 @@ interface GoogleGenerateFunctionCall { args: Record; } -interface GoogleGenerateContentResponse { - candidates: { - content: { - parts: { - text?: string; - thought?: string; // for models like gemini-2.0-flash-thinking-exp refer: https://ai.google.dev/gemini-api/docs/thinking-mode#streaming_model_thinking - functionCall?: GoogleGenerateFunctionCall; - }[]; - }; - finishReason: string; - index: 0; - safetyRatings: { - category: string; - probability: string; +interface GoogleResponseCandidate { + content: { + parts: { + text?: string; + thought?: string; // for models like gemini-2.0-flash-thinking-exp refer: https://ai.google.dev/gemini-api/docs/thinking-mode#streaming_model_thinking + functionCall?: GoogleGenerateFunctionCall; }[]; - groundingMetadata?: { - webSearchQueries?: string[]; - searchEntryPoint?: { - renderedContent: string; - }; - groundingSupports?: Array<{ - segment: { - startIndex: number; - endIndex: number; - text: string; - }; - groundingChunkIndices: number[]; - confidenceScores: number[]; - }>; - retrievalMetadata?: { - webDynamicRetrievalScore: number; + }; + logprobsResult?: { + topCandidates: [ + { + candidates: [ + { + token: string; + logProbability: number; + }, + ]; + }, + ]; + chosenCandidates: [ + { + token: string; + logProbability: number; + }, + ]; + }; + finishReason: string; + index: 0; + safetyRatings: { + category: string; + probability: string; + }[]; + groundingMetadata?: { + webSearchQueries?: string[]; + searchEntryPoint?: { + renderedContent: string; + }; + groundingSupports?: Array<{ + segment: { + startIndex: number; + endIndex: number; + text: string; }; + groundingChunkIndices: number[]; + confidenceScores: number[]; + }>; + retrievalMetadata?: { + webDynamicRetrievalScore: number; }; - }[]; + }; +} + +interface GoogleGenerateContentResponse { + candidates: GoogleResponseCandidate[]; promptFeedback: { safetyRatings: { category: string; @@ -528,8 +564,15 @@ export const GoogleChatCompleteResponseTransform: ( }), }; } + let logprobsContent: Logprobs[] = []; + if (generation.logprobsResult) { + logprobsContent = transformVertexLogprobs(generation); + } return { message: message, + logprobs: { + content: logprobsContent, + }, index: generation.index ?? idx, finish_reason: generation.finishReason, ...(!strictOpenAiCompliance && generation.groundingMetadata From b909a36052356473161c019bbaf2317518fbb0b0 Mon Sep 17 00:00:00 2001 From: Narendranath Gogineni Date: Tue, 28 Jan 2025 15:01:51 +0530 Subject: [PATCH 4/6] fix usage object for deepseek --- src/providers/deepseek/chatComplete.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/providers/deepseek/chatComplete.ts b/src/providers/deepseek/chatComplete.ts index 9dc46ef8f..f14b02a7d 100644 --- a/src/providers/deepseek/chatComplete.ts +++ b/src/providers/deepseek/chatComplete.ts @@ -106,6 +106,11 @@ interface DeepSeekStreamChunk { object: string; created: number; model: string; + usage?: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }; choices: { delta: { role?: string | null; @@ -161,6 +166,7 @@ export const DeepSeekChatCompleteResponseTransform: ( export const DeepSeekChatCompleteStreamChunkTransform: ( response: string ) => string = (responseChunk) => { + console.log('responseChunk', responseChunk); let chunk = responseChunk.trim(); chunk = chunk.replace(/^data: /, ''); chunk = chunk.trim(); @@ -182,6 +188,7 @@ export const DeepSeekChatCompleteStreamChunkTransform: ( finish_reason: parsedChunk.choices[0].finish_reason, }, ], + usage: parsedChunk.usage, })}` + '\n\n' ); }; From 411c2fc68c475f86dda1ecc9c93e0d39fbb6e51b Mon Sep 17 00:00:00 2001 From: Narendranath Gogineni Date: Tue, 28 Jan 2025 15:57:30 +0530 Subject: [PATCH 5/6] handle null for logprobs --- src/providers/google-vertex-ai/chatComplete.ts | 13 +++++++++---- src/providers/google-vertex-ai/utils.ts | 1 + src/providers/google/chatComplete.ts | 14 ++++++++------ 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/providers/google-vertex-ai/chatComplete.ts b/src/providers/google-vertex-ai/chatComplete.ts index 27ee6f8f2..2f4f34f81 100644 --- a/src/providers/google-vertex-ai/chatComplete.ts +++ b/src/providers/google-vertex-ai/chatComplete.ts @@ -695,15 +695,20 @@ export const GoogleChatCompleteResponseTransform: ( }), }; } - let logprobsContent: Logprobs[] = transformVertexLogprobs(generation); + const logprobsContent: Logprobs[] | null = + transformVertexLogprobs(generation); + let logprobs; + if (logprobsContent) { + logprobs = { + content: logprobsContent, + }; + } return { message: message, index: index, finish_reason: generation.finishReason, - logprobs: { - content: logprobsContent, - }, + logprobs, ...(!strictOpenAiCompliance && { safetyRatings: generation.safetyRatings, }), diff --git a/src/providers/google-vertex-ai/utils.ts b/src/providers/google-vertex-ai/utils.ts index af09cc5fd..b36efb831 100644 --- a/src/providers/google-vertex-ai/utils.ts +++ b/src/providers/google-vertex-ai/utils.ts @@ -225,6 +225,7 @@ export const transformVertexLogprobs = ( generation: GoogleResponseCandidate ) => { let logprobsContent: Logprobs[] = []; + if (!generation.logprobsResult) return null; if (generation.logprobsResult?.chosenCandidates) { generation.logprobsResult.chosenCandidates.forEach((candidate) => { let bytes = []; diff --git a/src/providers/google/chatComplete.ts b/src/providers/google/chatComplete.ts index 3c8807e4c..24ec8975c 100644 --- a/src/providers/google/chatComplete.ts +++ b/src/providers/google/chatComplete.ts @@ -564,15 +564,17 @@ export const GoogleChatCompleteResponseTransform: ( }), }; } - let logprobsContent: Logprobs[] = []; - if (generation.logprobsResult) { - logprobsContent = transformVertexLogprobs(generation); + const logprobsContent: Logprobs[] | null = + transformVertexLogprobs(generation); + let logprobs; + if (logprobsContent) { + logprobs = { + content: logprobsContent, + }; } return { message: message, - logprobs: { - content: logprobsContent, - }, + logprobs, index: generation.index ?? idx, finish_reason: generation.finishReason, ...(!strictOpenAiCompliance && generation.groundingMetadata From dc82ddc9f56938206136c95bbc94f61f10a2f890 Mon Sep 17 00:00:00 2001 From: Narendranath Gogineni Date: Tue, 28 Jan 2025 17:18:56 +0530 Subject: [PATCH 6/6] remove debug logs --- src/providers/deepseek/chatComplete.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/providers/deepseek/chatComplete.ts b/src/providers/deepseek/chatComplete.ts index f14b02a7d..30903921c 100644 --- a/src/providers/deepseek/chatComplete.ts +++ b/src/providers/deepseek/chatComplete.ts @@ -166,7 +166,6 @@ export const DeepSeekChatCompleteResponseTransform: ( export const DeepSeekChatCompleteStreamChunkTransform: ( response: string ) => string = (responseChunk) => { - console.log('responseChunk', responseChunk); let chunk = responseChunk.trim(); chunk = chunk.replace(/^data: /, ''); chunk = chunk.trim();