refactor: reduce vertex duplication

Signed-off-by: Alexander Alemayhu <alexander@alemayhu.com>
2anki · Dec 27, 2024 · 4fd38f8 · 4fd38f8
1 parent 1e3f2a0
commit 4fd38f8
Show file tree

Hide file tree

Showing 5 changed files with 77 additions and 93 deletions.
diff --git a/src/infrastracture/adapters/fileConversion/constants.ts b/src/infrastracture/adapters/fileConversion/constants.ts
@@ -1,20 +1,35 @@
-import { HarmBlockThreshold, HarmCategory } from '@google-cloud/vertexai';
+import {
+  HarmBlockThreshold,
+  HarmCategory,
+  SafetySetting,
+} from '@google-cloud/vertexai';
 
-export const SAFETY_SETTINGS = [
+export const SAFETY_SETTINGS: SafetySetting[] = [
   {
-    category: HarmCategory.HARM_CATEGORY_HATE_SPEECH,
+    category: HarmCategory.HARM_CATEGORY_HARASSMENT,
     threshold: HarmBlockThreshold.BLOCK_NONE,
   },
   {
-    category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
+    category: HarmCategory.HARM_CATEGORY_HATE_SPEECH,
     threshold: HarmBlockThreshold.BLOCK_NONE,
   },
   {
     category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
     threshold: HarmBlockThreshold.BLOCK_NONE,
   },
   {
-    category: HarmCategory.HARM_CATEGORY_HARASSMENT,
+    category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
     threshold: HarmBlockThreshold.BLOCK_NONE,
   },
 ];
+
+export const VERTEX_AI_CONFIG = {
+  project: 'notion-to-anki',
+  location: 'europe-west3',
+  model: 'gemini-1.5-pro-002',
+  generationConfig: {
+    maxOutputTokens: 8192,
+    temperature: 1,
+    topP: 0.95,
+  },
+};
diff --git a/src/infrastracture/adapters/fileConversion/contentGenerationUtils.ts b/src/infrastracture/adapters/fileConversion/contentGenerationUtils.ts
@@ -0,0 +1,26 @@
+import { GenerateContentRequest } from '@google-cloud/vertexai';
+import { setupVertexAI } from './vertexAIUtils';
+
+export async function generateContent(req: GenerateContentRequest): Promise<string> {
+  const generativeModel = setupVertexAI();
+  let content = '';
+
+  try {
+    const streamingResp = await generativeModel.generateContentStream(req);
+    for await (const item of streamingResp.stream) {
+      if (
+        item.candidates &&
+        item.candidates[0].content &&
+        item.candidates[0].content.parts
+      ) {
+        content += item.candidates[0].content.parts
+          .map((part) => part.text)
+          .join('');
+      }
+    }
+  } catch (error) {
+    console.error('Error generating content stream:', error);
+  }
+
+  return content;
+} 
diff --git a/src/infrastracture/adapters/fileConversion/convertImageToHTML.ts b/src/infrastracture/adapters/fileConversion/convertImageToHTML.ts
@@ -1,5 +1,5 @@
-import { VertexAI } from '@google-cloud/vertexai';
-import { SAFETY_SETTINGS } from './constants';
+import { GenerateContentRequest } from '@google-cloud/vertexai';
+import { generateContent } from './contentGenerationUtils';
 
 /**
  * Google VertexAI is returning Markdown:
@@ -16,38 +16,22 @@ export function removeFirstAndLastLine(content: string): string {
 export const convertImageToHTML = async (
   imageData: string
 ): Promise<string> => {
-  const vertexAI = new VertexAI({
-    project: 'notion-to-anki',
-    location: 'europe-west3',
-  });
-  const model = 'gemini-1.5-pro-002';
-
-  const generativeModel = vertexAI.preview.getGenerativeModel({
-    model: model,
-    generationConfig: {
-      maxOutputTokens: 8192,
-      temperature: 1,
-      topP: 0.95,
-    },
-    safetySettings: SAFETY_SETTINGS,
-  });
-
   const text1 = {
     text: `Convert the text in this image to the following format for (every question is their own ul):
 
         <ul class=\"toggle\">
-          <li>
-           <details>
-            <summary>
+          <li>
+           <details>
+            <summary>
                 n) question
-            </summary>
+            </summary>
         <p>A) ..., </p>
         <p>B)... </p>
-        etc. 
+        etc. 
         <p>and finally Answer: D</p>
-           </details>
-          </li>
-          </ul>
+           </details>
+          </li>
+          </ul>
 
         —
         - Extra rules: n=is the number for the question, question=the question text
@@ -62,28 +46,10 @@ export const convertImageToHTML = async (
     },
   };
 
-  const req = {
+  const req: GenerateContentRequest = {
     contents: [{ role: 'user', parts: [text1, image1] }],
   };
 
-  let htmlContent = '';
-  try {
-    const streamingResp = await generativeModel.generateContentStream(req);
-    for await (const item of streamingResp.stream) {
-      if (
-        item.candidates &&
-        item.candidates[0].content &&
-        item.candidates[0].content.parts
-      ) {
-        htmlContent += item.candidates[0].content.parts
-          .map((part) => part.text)
-          .join('');
-      }
-    }
-  } catch (error) {
-    console.error('Error generating content stream:', error);
-  }
-  htmlContent = removeFirstAndLastLine(htmlContent);
-
-  return htmlContent;
+  const htmlContent = await generateContent(req);
+  return removeFirstAndLastLine(htmlContent);
 };
diff --git a/src/infrastracture/adapters/fileConversion/convertPDFToHTML.ts b/src/infrastracture/adapters/fileConversion/convertPDFToHTML.ts
@@ -1,25 +1,9 @@
 import path from 'path';
 import fs from 'fs';
-
-import { GenerateContentRequest, VertexAI } from '@google-cloud/vertexai';
-import { SAFETY_SETTINGS } from './constants';
+import { GenerateContentRequest } from '@google-cloud/vertexai';
+import { generateContent } from './contentGenerationUtils';
 
 export const convertPDFToHTML = async (pdf: string): Promise<string> => {
-  const vertexAI = new VertexAI({
-    project: 'notion-to-anki',
-    location: 'europe-west3',
-  });
-  const model = 'gemini-1.5-pro-002';
-  const generativeModel = vertexAI.preview.getGenerativeModel({
-    model: model,
-    generationConfig: {
-      maxOutputTokens: 8192,
-      temperature: 1,
-      topP: 0.95,
-    },
-    safetySettings: SAFETY_SETTINGS,
-  });
-
   const document1 = {
     inlineData: {
       mimeType: 'application/pdf',
@@ -43,27 +27,5 @@ export const convertPDFToHTML = async (pdf: string): Promise<string> => {
     contents: [{ role: 'user', parts: [document1, text1] }],
   };
 
-  let htmlContent = '';
-  try {
-    const streamingResp = await generativeModel.generateContentStream(req);
-    for await (const item of streamingResp.stream) {
-      if (
-        item.candidates &&
-        item.candidates[0].content &&
-        item.candidates[0].content.parts
-      ) {
-        htmlContent += item.candidates[0].content.parts
-          .map((part) => part.text)
-          .join('');
-      }
-    }
-  } catch (error) {
-    console.error('Error generating content stream:', error);
-
-    // const workSpace = process.cwd();
-    // const outputPath = path.join(workSpace, 'output.html');
-    // fs.writeFileSync(outputPath, htmlContent);
-    // console.log(outputPath);
-  }
-  return htmlContent;
+  return generateContent(req);
 };
diff --git a/src/infrastracture/adapters/fileConversion/vertexAIUtils.ts b/src/infrastracture/adapters/fileConversion/vertexAIUtils.ts
@@ -0,0 +1,15 @@
+import { GenerativeModel, VertexAI } from '@google-cloud/vertexai';
+import { SAFETY_SETTINGS, VERTEX_AI_CONFIG } from './constants';
+
+export function setupVertexAI(): GenerativeModel {
+  const vertexAI = new VertexAI({
+    project: VERTEX_AI_CONFIG.project,
+    location: VERTEX_AI_CONFIG.location,
+  });
+
+  return vertexAI.getGenerativeModel({
+    model: VERTEX_AI_CONFIG.model,
+    generationConfig: VERTEX_AI_CONFIG.generationConfig,
+    safetySettings: SAFETY_SETTINGS,
+  });
+}