Portkey-AI · VisargD · Jan 28, 2025 · Jan 27, 2025 · Jan 28, 2025 · Jan 28, 2025
diff --git a/src/providers/deepseek/chatComplete.ts b/src/providers/deepseek/chatComplete.ts
@@ -106,6 +106,11 @@ interface DeepSeekStreamChunk {
   object: string;
   created: number;
   model: string;
+  usage?: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
   choices: {
     delta: {
       role?: string | null;
@@ -161,6 +166,7 @@ export const DeepSeekChatCompleteResponseTransform: (
 export const DeepSeekChatCompleteStreamChunkTransform: (
   response: string
 ) => string = (responseChunk) => {
+  console.log('responseChunk', responseChunk);
   let chunk = responseChunk.trim();
   chunk = chunk.replace(/^data: /, '');
   chunk = chunk.trim();
@@ -182,6 +188,7 @@ export const DeepSeekChatCompleteStreamChunkTransform: (
           finish_reason: parsedChunk.choices[0].finish_reason,
         },
       ],
+      usage: parsedChunk.usage,
     })}` + '\n\n'
   );
 };
diff --git a/src/providers/google-vertex-ai/chatComplete.ts b/src/providers/google-vertex-ai/chatComplete.ts
@@ -26,6 +26,7 @@ import {
 import {
   ChatCompletionResponse,
   ErrorResponse,
+  Logprobs,
   ProviderConfig,
 } from '../types';
 import {
@@ -40,7 +41,11 @@ import type {
   VertexLLamaChatCompleteResponse,
   GoogleSearchRetrievalTool,
 } from './types';
-import { getMimeType, recursivelyDeleteUnsupportedParameters } from './utils';
+import {
+  getMimeType,
+  recursivelyDeleteUnsupportedParameters,
+  transformVertexLogprobs,
+} from './utils';
 
 export const buildGoogleSearchRetrievalTool = (tool: Tool) => {
   const googleSearchRetrievalTool: GoogleSearchRetrievalTool = {
@@ -247,6 +252,14 @@ export const VertexGoogleChatCompleteConfig: ProviderConfig = {
     param: 'generationConfig',
     transform: (params: Params) => transformGenerationConfig(params),
   },
+  logprobs: {
+    param: 'generationConfig',
+    transform: (params: Params) => transformGenerationConfig(params),
+  },
+  top_logprobs: {
+    param: 'generationConfig',
+    transform: (params: Params) => transformGenerationConfig(params),
+  },
   // https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/configure-safety-attributes
   // Example payload to be included in the request that sets the safety settings:
   //   "safety_settings": [
@@ -682,10 +695,20 @@ export const GoogleChatCompleteResponseTransform: (
               }),
             };
           }
+          const logprobsContent: Logprobs[] | null =
+            transformVertexLogprobs(generation);
+          let logprobs;
+          if (logprobsContent) {
+            logprobs = {
+              content: logprobsContent,
+            };
+          }
+
           return {
             message: message,
             index: index,
             finish_reason: generation.finishReason,
+            logprobs,
             ...(!strictOpenAiCompliance && {
               safetyRatings: generation.safetyRatings,
             }),

diff --git a/src/providers/google-vertex-ai/transformGenerationConfig.ts b/src/providers/google-vertex-ai/transformGenerationConfig.ts
@@ -26,6 +26,12 @@ export function transformGenerationConfig(params: Params) {
   if (params?.response_format?.type === 'json_object') {
     generationConfig['responseMimeType'] = 'application/json';
   }
+  if (params['logprobs']) {
+    generationConfig['responseLogprobs'] = params['logprobs'];
+  }
+  if (params['top_logprobs']) {
+    generationConfig['logprobs'] = params['top_logprobs']; // range 1-5, openai supports 1-20
+  }
   if (params?.response_format?.type === 'json_schema') {
     generationConfig['responseMimeType'] = 'application/json';
     recursivelyDeleteUnsupportedParameters(

diff --git a/src/providers/google-vertex-ai/types.ts b/src/providers/google-vertex-ai/types.ts
@@ -14,40 +14,60 @@ export interface GoogleGenerateFunctionCall {
   args: Record<string, any>;
 }
 
-export interface GoogleGenerateContentResponse {
-  candidates: {
-    content: {
-      parts: {
-        text?: string;
-        thought?: string; // for models like gemini-2.0-flash-thinking-exp refer: https://ai.google.dev/gemini-api/docs/thinking-mode#streaming_model_thinking
-        functionCall?: GoogleGenerateFunctionCall;
-      }[];
-    };
-    finishReason: string;
-    index: 0;
-    safetyRatings: {
-      category: string;
-      probability: string;
+export interface GoogleResponseCandidate {
+  content: {
+    parts: {
+      text?: string;
+      thought?: string; // for models like gemini-2.0-flash-thinking-exp refer: https://ai.google.dev/gemini-api/docs/thinking-mode#streaming_model_thinking
+      functionCall?: GoogleGenerateFunctionCall;
     }[];
-    groundingMetadata?: {
-      webSearchQueries?: string[];
-      searchEntryPoint?: {
-        renderedContent: string;
-      };
-      groundingSupports?: Array<{
-        segment: {
-          startIndex: number;
-          endIndex: number;
-          text: string;
-        };
-        groundingChunkIndices: number[];
-        confidenceScores: number[];
-      }>;
-      retrievalMetadata?: {
-        webDynamicRetrievalScore: number;
+  };
+  logprobsResult?: {
+    topCandidates: [
+      {
+        candidates: [
+          {
+            token: string;
+            logProbability: number;
+          },
+        ];
+      },
+    ];
+    chosenCandidates: [
+      {
+        token: string;
+        logProbability: number;
+      },
+    ];
+  };
+  finishReason: string;
+  index: 0;
+  safetyRatings: {
+    category: string;
+    probability: string;
+  }[];
+  groundingMetadata?: {
+    webSearchQueries?: string[];
+    searchEntryPoint?: {
+      renderedContent: string;
+    };
+    groundingSupports?: Array<{
+      segment: {
+        startIndex: number;
+        endIndex: number;
+        text: string;
       };
+      groundingChunkIndices: number[];
+      confidenceScores: number[];
+    }>;
+    retrievalMetadata?: {
+      webDynamicRetrievalScore: number;
     };
-  }[];
+  };
+}
+
+export interface GoogleGenerateContentResponse {
+  candidates: GoogleResponseCandidate[];
   promptFeedback: {
     safetyRatings: {
       category: string;

diff --git a/src/providers/google-vertex-ai/utils.ts b/src/providers/google-vertex-ai/utils.ts
@@ -1,7 +1,7 @@
-import { GoogleErrorResponse } from './types';
+import { GoogleErrorResponse, GoogleResponseCandidate } from './types';
 import { generateErrorResponse } from '../utils';
 import { fileExtensionMimeTypeMap, GOOGLE_VERTEX_AI } from '../../globals';
-import { ErrorResponse } from '../types';
+import { ErrorResponse, Logprobs } from '../types';
 
 /**
  * Encodes an object as a Base64 URL-encoded string.
@@ -220,3 +220,43 @@ export const recursivelyDeleteUnsupportedParameters = (obj: any) => {
     }
   }
 };
+
+export const transformVertexLogprobs = (
+  generation: GoogleResponseCandidate
+) => {
+  let logprobsContent: Logprobs[] = [];
+  if (!generation.logprobsResult) return null;
+  if (generation.logprobsResult?.chosenCandidates) {
+    generation.logprobsResult.chosenCandidates.forEach((candidate) => {
+      let bytes = [];
+      for (const char of candidate.token) {
+        bytes.push(char.charCodeAt(0));
+      }
+      logprobsContent.push({
+        token: candidate.token,
+        logprob: candidate.logProbability,
+        bytes: bytes,
+      });
+    });
+  }
+  if (generation.logprobsResult?.topCandidates) {
+    generation.logprobsResult.topCandidates.forEach(
+      (topCandidatesForIndex, index) => {
+        let topLogprobs = [];
+        for (const candidate of topCandidatesForIndex.candidates) {
+          let bytes = [];
+          for (const char of candidate.token) {
+            bytes.push(char.charCodeAt(0));
+          }
+          topLogprobs.push({
+            token: candidate.token,
+            logprob: candidate.logProbability,
+            bytes: bytes,
+          });
+        }
+        logprobsContent[index].top_logprobs = topLogprobs;
+      }
+    );
+  }
+  return logprobsContent;
+};
diff --git a/src/providers/google/chatComplete.ts b/src/providers/google/chatComplete.ts
@@ -13,10 +13,12 @@ import {
   derefer,
   getMimeType,
   recursivelyDeleteUnsupportedParameters,
+  transformVertexLogprobs,
 } from '../google-vertex-ai/utils';
 import {
   ChatCompletionResponse,
   ErrorResponse,
+  Logprobs,
   ProviderConfig,
 } from '../types';
 import {
@@ -47,6 +49,12 @@ const transformGenerationConfig = (params: Params) => {
   if (params?.response_format?.type === 'json_object') {
     generationConfig['responseMimeType'] = 'application/json';
   }
+  if (params['logprobs']) {
+    generationConfig['responseLogprobs'] = params['logprobs'];
+  }
+  if (params['top_logprobs']) {
+    generationConfig['logprobs'] = params['top_logprobs']; // range 1-5, openai supports 1-20
+  }
   if (params?.response_format?.type === 'json_schema') {
     generationConfig['responseMimeType'] = 'application/json';
     recursivelyDeleteUnsupportedParameters(
@@ -331,6 +339,14 @@ export const GoogleChatCompleteConfig: ProviderConfig = {
     param: 'generationConfig',
     transform: (params: Params) => transformGenerationConfig(params),
   },
+  logprobs: {
+    param: 'generationConfig',
+    transform: (params: Params) => transformGenerationConfig(params),
+  },
+  top_logprobs: {
+    param: 'generationConfig',
+    transform: (params: Params) => transformGenerationConfig(params),
+  },
   tools: {
     param: 'tools',
     default: '',
@@ -397,40 +413,60 @@ interface GoogleGenerateFunctionCall {
   args: Record<string, any>;
 }
 
-interface GoogleGenerateContentResponse {
-  candidates: {
-    content: {
-      parts: {
-        text?: string;
-        thought?: string; // for models like gemini-2.0-flash-thinking-exp refer: https://ai.google.dev/gemini-api/docs/thinking-mode#streaming_model_thinking
-        functionCall?: GoogleGenerateFunctionCall;
-      }[];
-    };
-    finishReason: string;
-    index: 0;
-    safetyRatings: {
-      category: string;
-      probability: string;
+interface GoogleResponseCandidate {
+  content: {
+    parts: {
+      text?: string;
+      thought?: string; // for models like gemini-2.0-flash-thinking-exp refer: https://ai.google.dev/gemini-api/docs/thinking-mode#streaming_model_thinking
+      functionCall?: GoogleGenerateFunctionCall;
     }[];
-    groundingMetadata?: {
-      webSearchQueries?: string[];
-      searchEntryPoint?: {
-        renderedContent: string;
-      };
-      groundingSupports?: Array<{
-        segment: {
-          startIndex: number;
-          endIndex: number;
-          text: string;
-        };
-        groundingChunkIndices: number[];
-        confidenceScores: number[];
-      }>;
-      retrievalMetadata?: {
-        webDynamicRetrievalScore: number;
+  };
+  logprobsResult?: {
+    topCandidates: [
+      {
+        candidates: [
+          {
+            token: string;
+            logProbability: number;
+          },
+        ];
+      },
+    ];
+    chosenCandidates: [
+      {
+        token: string;
+        logProbability: number;
+      },
+    ];
+  };
+  finishReason: string;
+  index: 0;
+  safetyRatings: {
+    category: string;
+    probability: string;
+  }[];
+  groundingMetadata?: {
+    webSearchQueries?: string[];
+    searchEntryPoint?: {
+      renderedContent: string;
+    };
+    groundingSupports?: Array<{
+      segment: {
+        startIndex: number;
+        endIndex: number;
+        text: string;
       };
+      groundingChunkIndices: number[];
+      confidenceScores: number[];
+    }>;
+    retrievalMetadata?: {
+      webDynamicRetrievalScore: number;
     };
-  }[];
+  };
+}
+
+interface GoogleGenerateContentResponse {
+  candidates: GoogleResponseCandidate[];
   promptFeedback: {
     safetyRatings: {
       category: string;
@@ -528,8 +564,17 @@ export const GoogleChatCompleteResponseTransform: (
               }),
             };
           }
+          const logprobsContent: Logprobs[] | null =
+            transformVertexLogprobs(generation);
+          let logprobs;
+          if (logprobsContent) {
+            logprobs = {
+              content: logprobsContent,
+            };
+          }
           return {
             message: message,
+            logprobs,
             index: generation.index ?? idx,
             finish_reason: generation.finishReason,
             ...(!strictOpenAiCompliance && generation.groundingMetadata