From fc87ecef584a8213566aeef3962edb655056a93b Mon Sep 17 00:00:00 2001
From: Narendranath Gogineni <narenrockstar1@gmail.com>
Date: Mon, 27 Jan 2025 21:46:02 +0530
Subject: [PATCH 1/6] fix deepinfra streaming responses

---
 src/utils.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils.ts b/src/utils.ts
index 3ec89a6de..556547e27 100644
--- a/src/utils.ts
+++ b/src/utils.ts
@@ -40,7 +40,7 @@ export const getStreamModeSplitPattern = (
   }
 
   if (proxyProvider === DEEPINFRA) {
-    splitPattern = '\r\n\r\n';
+    splitPattern = '\n';
   }
 
   if (proxyProvider === SAMBANOVA) {

From 6c7f28de4d4dacf3a1f224af1d3418daaf0b4beb Mon Sep 17 00:00:00 2001
From: Narendranath Gogineni <narenrockstar1@gmail.com>
Date: Tue, 28 Jan 2025 13:45:44 +0530
Subject: [PATCH 2/6] transform vertex logprobs

---
 .../google-vertex-ai/chatComplete.ts          | 20 ++++-
 .../transformGenerationConfig.ts              |  6 ++
 src/providers/google-vertex-ai/types.ts       | 82 ++++++++++++-------
 src/providers/google-vertex-ai/utils.ts       | 43 +++++++++-
 src/providers/types.ts                        | 11 +++
 5 files changed, 128 insertions(+), 34 deletions(-)

diff --git a/src/providers/google-vertex-ai/chatComplete.ts b/src/providers/google-vertex-ai/chatComplete.ts
index b172a9f29..27ee6f8f2 100644
--- a/src/providers/google-vertex-ai/chatComplete.ts
+++ b/src/providers/google-vertex-ai/chatComplete.ts
@@ -26,6 +26,7 @@ import {
 import {
   ChatCompletionResponse,
   ErrorResponse,
+  Logprobs,
   ProviderConfig,
 } from '../types';
 import {
@@ -40,7 +41,11 @@ import type {
   VertexLLamaChatCompleteResponse,
   GoogleSearchRetrievalTool,
 } from './types';
-import { getMimeType, recursivelyDeleteUnsupportedParameters } from './utils';
+import {
+  getMimeType,
+  recursivelyDeleteUnsupportedParameters,
+  transformVertexLogprobs,
+} from './utils';
 
 export const buildGoogleSearchRetrievalTool = (tool: Tool) => {
   const googleSearchRetrievalTool: GoogleSearchRetrievalTool = {
@@ -247,6 +252,14 @@ export const VertexGoogleChatCompleteConfig: ProviderConfig = {
     param: 'generationConfig',
     transform: (params: Params) => transformGenerationConfig(params),
   },
+  logprobs: {
+    param: 'generationConfig',
+    transform: (params: Params) => transformGenerationConfig(params),
+  },
+  top_logprobs: {
+    param: 'generationConfig',
+    transform: (params: Params) => transformGenerationConfig(params),
+  },
   // https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/configure-safety-attributes
   // Example payload to be included in the request that sets the safety settings:
   //   "safety_settings": [
@@ -682,10 +695,15 @@ export const GoogleChatCompleteResponseTransform: (
               }),
             };
           }
+          let logprobsContent: Logprobs[] = transformVertexLogprobs(generation);
+
           return {
             message: message,
             index: index,
             finish_reason: generation.finishReason,
+            logprobs: {
+              content: logprobsContent,
+            },
             ...(!strictOpenAiCompliance && {
               safetyRatings: generation.safetyRatings,
             }),
diff --git a/src/providers/google-vertex-ai/transformGenerationConfig.ts b/src/providers/google-vertex-ai/transformGenerationConfig.ts
index 654f66e65..cd1aa231c 100644
--- a/src/providers/google-vertex-ai/transformGenerationConfig.ts
+++ b/src/providers/google-vertex-ai/transformGenerationConfig.ts
@@ -26,6 +26,12 @@ export function transformGenerationConfig(params: Params) {
   if (params?.response_format?.type === 'json_object') {
     generationConfig['responseMimeType'] = 'application/json';
   }
+  if (params['logprobs']) {
+    generationConfig['responseLogprobs'] = params['logprobs'];
+  }
+  if (params['top_logprobs']) {
+    generationConfig['logprobs'] = params['top_logprobs']; // range 1-5, openai supports 1-20
+  }
   if (params?.response_format?.type === 'json_schema') {
     generationConfig['responseMimeType'] = 'application/json';
     recursivelyDeleteUnsupportedParameters(
diff --git a/src/providers/google-vertex-ai/types.ts b/src/providers/google-vertex-ai/types.ts
index 6a57451b7..743bea8e0 100644
--- a/src/providers/google-vertex-ai/types.ts
+++ b/src/providers/google-vertex-ai/types.ts
@@ -14,40 +14,60 @@ export interface GoogleGenerateFunctionCall {
   args: Record<string, any>;
 }
 
-export interface GoogleGenerateContentResponse {
-  candidates: {
-    content: {
-      parts: {
-        text?: string;
-        thought?: string; // for models like gemini-2.0-flash-thinking-exp refer: https://ai.google.dev/gemini-api/docs/thinking-mode#streaming_model_thinking
-        functionCall?: GoogleGenerateFunctionCall;
-      }[];
-    };
-    finishReason: string;
-    index: 0;
-    safetyRatings: {
-      category: string;
-      probability: string;
+export interface GoogleResponseCandidate {
+  content: {
+    parts: {
+      text?: string;
+      thought?: string; // for models like gemini-2.0-flash-thinking-exp refer: https://ai.google.dev/gemini-api/docs/thinking-mode#streaming_model_thinking
+      functionCall?: GoogleGenerateFunctionCall;
     }[];
-    groundingMetadata?: {
-      webSearchQueries?: string[];
-      searchEntryPoint?: {
-        renderedContent: string;
-      };
-      groundingSupports?: Array<{
-        segment: {
-          startIndex: number;
-          endIndex: number;
-          text: string;
-        };
-        groundingChunkIndices: number[];
-        confidenceScores: number[];
-      }>;
-      retrievalMetadata?: {
-        webDynamicRetrievalScore: number;
+  };
+  logprobsResult?: {
+    topCandidates: [
+      {
+        candidates: [
+          {
+            token: string;
+            logProbability: number;
+          },
+        ];
+      },
+    ];
+    chosenCandidates: [
+      {
+        token: string;
+        logProbability: number;
+      },
+    ];
+  };
+  finishReason: string;
+  index: 0;
+  safetyRatings: {
+    category: string;
+    probability: string;
+  }[];
+  groundingMetadata?: {
+    webSearchQueries?: string[];
+    searchEntryPoint?: {
+      renderedContent: string;
+    };
+    groundingSupports?: Array<{
+      segment: {
+        startIndex: number;
+        endIndex: number;
+        text: string;
       };
+      groundingChunkIndices: number[];
+      confidenceScores: number[];
+    }>;
+    retrievalMetadata?: {
+      webDynamicRetrievalScore: number;
     };
-  }[];
+  };
+}
+
+export interface GoogleGenerateContentResponse {
+  candidates: GoogleResponseCandidate[];
   promptFeedback: {
     safetyRatings: {
       category: string;
diff --git a/src/providers/google-vertex-ai/utils.ts b/src/providers/google-vertex-ai/utils.ts
index 55dc83715..af09cc5fd 100644
--- a/src/providers/google-vertex-ai/utils.ts
+++ b/src/providers/google-vertex-ai/utils.ts
@@ -1,7 +1,7 @@
-import { GoogleErrorResponse } from './types';
+import { GoogleErrorResponse, GoogleResponseCandidate } from './types';
 import { generateErrorResponse } from '../utils';
 import { fileExtensionMimeTypeMap, GOOGLE_VERTEX_AI } from '../../globals';
-import { ErrorResponse } from '../types';
+import { ErrorResponse, Logprobs } from '../types';
 
 /**
  * Encodes an object as a Base64 URL-encoded string.
@@ -220,3 +220,42 @@ export const recursivelyDeleteUnsupportedParameters = (obj: any) => {
     }
   }
 };
+
+export const transformVertexLogprobs = (
+  generation: GoogleResponseCandidate
+) => {
+  let logprobsContent: Logprobs[] = [];
+  if (generation.logprobsResult?.chosenCandidates) {
+    generation.logprobsResult.chosenCandidates.forEach((candidate) => {
+      let bytes = [];
+      for (const char of candidate.token) {
+        bytes.push(char.charCodeAt(0));
+      }
+      logprobsContent.push({
+        token: candidate.token,
+        logprob: candidate.logProbability,
+        bytes: bytes,
+      });
+    });
+  }
+  if (generation.logprobsResult?.topCandidates) {
+    generation.logprobsResult.topCandidates.forEach(
+      (topCandidatesForIndex, index) => {
+        let topLogprobs = [];
+        for (const candidate of topCandidatesForIndex.candidates) {
+          let bytes = [];
+          for (const char of candidate.token) {
+            bytes.push(char.charCodeAt(0));
+          }
+          topLogprobs.push({
+            token: candidate.token,
+            logprob: candidate.logProbability,
+            bytes: bytes,
+          });
+        }
+        logprobsContent[index].top_logprobs = topLogprobs;
+      }
+    );
+  }
+  return logprobsContent;
+};
diff --git a/src/providers/types.ts b/src/providers/types.ts
index 7bcacbfc6..3afb2ad61 100644
--- a/src/providers/types.ts
+++ b/src/providers/types.ts
@@ -158,6 +158,17 @@ export interface ChatChoice {
   logprobs?: object | null;
 }
 
+export interface Logprobs {
+  token: string;
+  logprob: number;
+  bytes: number[];
+  top_logprobs?: {
+    token: string;
+    logprob: number;
+    bytes: number[];
+  }[];
+}
+
 /**
  * The structure of a completion response for the 'chatComplete' function.
  * @interface

From bf5bdb356a8211a87585567d7e0dc6d6e1e4e2de Mon Sep 17 00:00:00 2001
From: Narendranath Gogineni <narenrockstar1@gmail.com>
Date: Tue, 28 Jan 2025 14:12:19 +0530
Subject: [PATCH 3/6] suppport logprobs for google gemini as well

---
 src/providers/google/chatComplete.ts | 105 +++++++++++++++++++--------
 1 file changed, 74 insertions(+), 31 deletions(-)

diff --git a/src/providers/google/chatComplete.ts b/src/providers/google/chatComplete.ts
index bf03b647c..3c8807e4c 100644
--- a/src/providers/google/chatComplete.ts
+++ b/src/providers/google/chatComplete.ts
@@ -13,10 +13,12 @@ import {
   derefer,
   getMimeType,
   recursivelyDeleteUnsupportedParameters,
+  transformVertexLogprobs,
 } from '../google-vertex-ai/utils';
 import {
   ChatCompletionResponse,
   ErrorResponse,
+  Logprobs,
   ProviderConfig,
 } from '../types';
 import {
@@ -47,6 +49,12 @@ const transformGenerationConfig = (params: Params) => {
   if (params?.response_format?.type === 'json_object') {
     generationConfig['responseMimeType'] = 'application/json';
   }
+  if (params['logprobs']) {
+    generationConfig['responseLogprobs'] = params['logprobs'];
+  }
+  if (params['top_logprobs']) {
+    generationConfig['logprobs'] = params['top_logprobs']; // range 1-5, openai supports 1-20
+  }
   if (params?.response_format?.type === 'json_schema') {
     generationConfig['responseMimeType'] = 'application/json';
     recursivelyDeleteUnsupportedParameters(
@@ -331,6 +339,14 @@ export const GoogleChatCompleteConfig: ProviderConfig = {
     param: 'generationConfig',
     transform: (params: Params) => transformGenerationConfig(params),
   },
+  logprobs: {
+    param: 'generationConfig',
+    transform: (params: Params) => transformGenerationConfig(params),
+  },
+  top_logprobs: {
+    param: 'generationConfig',
+    transform: (params: Params) => transformGenerationConfig(params),
+  },
   tools: {
     param: 'tools',
     default: '',
@@ -397,40 +413,60 @@ interface GoogleGenerateFunctionCall {
   args: Record<string, any>;
 }
 
-interface GoogleGenerateContentResponse {
-  candidates: {
-    content: {
-      parts: {
-        text?: string;
-        thought?: string; // for models like gemini-2.0-flash-thinking-exp refer: https://ai.google.dev/gemini-api/docs/thinking-mode#streaming_model_thinking
-        functionCall?: GoogleGenerateFunctionCall;
-      }[];
-    };
-    finishReason: string;
-    index: 0;
-    safetyRatings: {
-      category: string;
-      probability: string;
+interface GoogleResponseCandidate {
+  content: {
+    parts: {
+      text?: string;
+      thought?: string; // for models like gemini-2.0-flash-thinking-exp refer: https://ai.google.dev/gemini-api/docs/thinking-mode#streaming_model_thinking
+      functionCall?: GoogleGenerateFunctionCall;
     }[];
-    groundingMetadata?: {
-      webSearchQueries?: string[];
-      searchEntryPoint?: {
-        renderedContent: string;
-      };
-      groundingSupports?: Array<{
-        segment: {
-          startIndex: number;
-          endIndex: number;
-          text: string;
-        };
-        groundingChunkIndices: number[];
-        confidenceScores: number[];
-      }>;
-      retrievalMetadata?: {
-        webDynamicRetrievalScore: number;
+  };
+  logprobsResult?: {
+    topCandidates: [
+      {
+        candidates: [
+          {
+            token: string;
+            logProbability: number;
+          },
+        ];
+      },
+    ];
+    chosenCandidates: [
+      {
+        token: string;
+        logProbability: number;
+      },
+    ];
+  };
+  finishReason: string;
+  index: 0;
+  safetyRatings: {
+    category: string;
+    probability: string;
+  }[];
+  groundingMetadata?: {
+    webSearchQueries?: string[];
+    searchEntryPoint?: {
+      renderedContent: string;
+    };
+    groundingSupports?: Array<{
+      segment: {
+        startIndex: number;
+        endIndex: number;
+        text: string;
       };
+      groundingChunkIndices: number[];
+      confidenceScores: number[];
+    }>;
+    retrievalMetadata?: {
+      webDynamicRetrievalScore: number;
     };
-  }[];
+  };
+}
+
+interface GoogleGenerateContentResponse {
+  candidates: GoogleResponseCandidate[];
   promptFeedback: {
     safetyRatings: {
       category: string;
@@ -528,8 +564,15 @@ export const GoogleChatCompleteResponseTransform: (
               }),
             };
           }
+          let logprobsContent: Logprobs[] = [];
+          if (generation.logprobsResult) {
+            logprobsContent = transformVertexLogprobs(generation);
+          }
           return {
             message: message,
+            logprobs: {
+              content: logprobsContent,
+            },
             index: generation.index ?? idx,
             finish_reason: generation.finishReason,
             ...(!strictOpenAiCompliance && generation.groundingMetadata

From b909a36052356473161c019bbaf2317518fbb0b0 Mon Sep 17 00:00:00 2001
From: Narendranath Gogineni <narenrockstar1@gmail.com>
Date: Tue, 28 Jan 2025 15:01:51 +0530
Subject: [PATCH 4/6] fix usage object for deepseek

---
 src/providers/deepseek/chatComplete.ts | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/providers/deepseek/chatComplete.ts b/src/providers/deepseek/chatComplete.ts
index 9dc46ef8f..f14b02a7d 100644
--- a/src/providers/deepseek/chatComplete.ts
+++ b/src/providers/deepseek/chatComplete.ts
@@ -106,6 +106,11 @@ interface DeepSeekStreamChunk {
   object: string;
   created: number;
   model: string;
+  usage?: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
   choices: {
     delta: {
       role?: string | null;
@@ -161,6 +166,7 @@ export const DeepSeekChatCompleteResponseTransform: (
 export const DeepSeekChatCompleteStreamChunkTransform: (
   response: string
 ) => string = (responseChunk) => {
+  console.log('responseChunk', responseChunk);
   let chunk = responseChunk.trim();
   chunk = chunk.replace(/^data: /, '');
   chunk = chunk.trim();
@@ -182,6 +188,7 @@ export const DeepSeekChatCompleteStreamChunkTransform: (
           finish_reason: parsedChunk.choices[0].finish_reason,
         },
       ],
+      usage: parsedChunk.usage,
     })}` + '\n\n'
   );
 };

From 411c2fc68c475f86dda1ecc9c93e0d39fbb6e51b Mon Sep 17 00:00:00 2001
From: Narendranath Gogineni <narenrockstar1@gmail.com>
Date: Tue, 28 Jan 2025 15:57:30 +0530
Subject: [PATCH 5/6] handle null for logprobs

---
 src/providers/google-vertex-ai/chatComplete.ts | 13 +++++++++----
 src/providers/google-vertex-ai/utils.ts        |  1 +
 src/providers/google/chatComplete.ts           | 14 ++++++++------
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/src/providers/google-vertex-ai/chatComplete.ts b/src/providers/google-vertex-ai/chatComplete.ts
index 27ee6f8f2..2f4f34f81 100644
--- a/src/providers/google-vertex-ai/chatComplete.ts
+++ b/src/providers/google-vertex-ai/chatComplete.ts
@@ -695,15 +695,20 @@ export const GoogleChatCompleteResponseTransform: (
               }),
             };
           }
-          let logprobsContent: Logprobs[] = transformVertexLogprobs(generation);
+          const logprobsContent: Logprobs[] | null =
+            transformVertexLogprobs(generation);
+          let logprobs;
+          if (logprobsContent) {
+            logprobs = {
+              content: logprobsContent,
+            };
+          }
 
           return {
             message: message,
             index: index,
             finish_reason: generation.finishReason,
-            logprobs: {
-              content: logprobsContent,
-            },
+            logprobs,
             ...(!strictOpenAiCompliance && {
               safetyRatings: generation.safetyRatings,
             }),
diff --git a/src/providers/google-vertex-ai/utils.ts b/src/providers/google-vertex-ai/utils.ts
index af09cc5fd..b36efb831 100644
--- a/src/providers/google-vertex-ai/utils.ts
+++ b/src/providers/google-vertex-ai/utils.ts
@@ -225,6 +225,7 @@ export const transformVertexLogprobs = (
   generation: GoogleResponseCandidate
 ) => {
   let logprobsContent: Logprobs[] = [];
+  if (!generation.logprobsResult) return null;
   if (generation.logprobsResult?.chosenCandidates) {
     generation.logprobsResult.chosenCandidates.forEach((candidate) => {
       let bytes = [];
diff --git a/src/providers/google/chatComplete.ts b/src/providers/google/chatComplete.ts
index 3c8807e4c..24ec8975c 100644
--- a/src/providers/google/chatComplete.ts
+++ b/src/providers/google/chatComplete.ts
@@ -564,15 +564,17 @@ export const GoogleChatCompleteResponseTransform: (
               }),
             };
           }
-          let logprobsContent: Logprobs[] = [];
-          if (generation.logprobsResult) {
-            logprobsContent = transformVertexLogprobs(generation);
+          const logprobsContent: Logprobs[] | null =
+            transformVertexLogprobs(generation);
+          let logprobs;
+          if (logprobsContent) {
+            logprobs = {
+              content: logprobsContent,
+            };
           }
           return {
             message: message,
-            logprobs: {
-              content: logprobsContent,
-            },
+            logprobs,
             index: generation.index ?? idx,
             finish_reason: generation.finishReason,
             ...(!strictOpenAiCompliance && generation.groundingMetadata

From dc82ddc9f56938206136c95bbc94f61f10a2f890 Mon Sep 17 00:00:00 2001
From: Narendranath Gogineni <narenrockstar1@gmail.com>
Date: Tue, 28 Jan 2025 17:18:56 +0530
Subject: [PATCH 6/6] remove debug logs

---
 src/providers/deepseek/chatComplete.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/providers/deepseek/chatComplete.ts b/src/providers/deepseek/chatComplete.ts
index f14b02a7d..30903921c 100644
--- a/src/providers/deepseek/chatComplete.ts
+++ b/src/providers/deepseek/chatComplete.ts
@@ -166,7 +166,6 @@ export const DeepSeekChatCompleteResponseTransform: (
 export const DeepSeekChatCompleteStreamChunkTransform: (
   response: string
 ) => string = (responseChunk) => {
-  console.log('responseChunk', responseChunk);
   let chunk = responseChunk.trim();
   chunk = chunk.replace(/^data: /, '');
   chunk = chunk.trim();