Skip to content

Commit

Permalink
refactor: reduce vertex duplication
Browse files Browse the repository at this point in the history
Signed-off-by: Alexander Alemayhu <alexander@alemayhu.com>
  • Loading branch information
aalemayhu committed Dec 27, 2024
1 parent 1e3f2a0 commit 4fd38f8
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 93 deletions.
25 changes: 20 additions & 5 deletions src/infrastracture/adapters/fileConversion/constants.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,35 @@
import { HarmBlockThreshold, HarmCategory } from '@google-cloud/vertexai';
import {
HarmBlockThreshold,
HarmCategory,
SafetySetting,
} from '@google-cloud/vertexai';

export const SAFETY_SETTINGS = [
export const SAFETY_SETTINGS: SafetySetting[] = [
{
category: HarmCategory.HARM_CATEGORY_HATE_SPEECH,
category: HarmCategory.HARM_CATEGORY_HARASSMENT,
threshold: HarmBlockThreshold.BLOCK_NONE,
},
{
category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
category: HarmCategory.HARM_CATEGORY_HATE_SPEECH,
threshold: HarmBlockThreshold.BLOCK_NONE,
},
{
category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
threshold: HarmBlockThreshold.BLOCK_NONE,
},
{
category: HarmCategory.HARM_CATEGORY_HARASSMENT,
category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
threshold: HarmBlockThreshold.BLOCK_NONE,
},
];

export const VERTEX_AI_CONFIG = {
project: 'notion-to-anki',
location: 'europe-west3',
model: 'gemini-1.5-pro-002',
generationConfig: {
maxOutputTokens: 8192,
temperature: 1,
topP: 0.95,
},
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { GenerateContentRequest } from '@google-cloud/vertexai';
import { setupVertexAI } from './vertexAIUtils';

export async function generateContent(req: GenerateContentRequest): Promise<string> {

Check failure on line 4 in src/infrastracture/adapters/fileConversion/contentGenerationUtils.ts

View workflow job for this annotation

GitHub Actions / build (20.18.0)

Replace `req:·GenerateContentRequest` with `⏎··req:·GenerateContentRequest⏎`
const generativeModel = setupVertexAI();
let content = '';

try {
const streamingResp = await generativeModel.generateContentStream(req);
for await (const item of streamingResp.stream) {
if (
item.candidates &&
item.candidates[0].content &&
item.candidates[0].content.parts
) {
content += item.candidates[0].content.parts
.map((part) => part.text)
.join('');
}
}
} catch (error) {
console.error('Error generating content stream:', error);
}

return content;
}

Check failure on line 26 in src/infrastracture/adapters/fileConversion/contentGenerationUtils.ts

View workflow job for this annotation

GitHub Actions / build (20.18.0)

Replace `·` with `⏎`
60 changes: 13 additions & 47 deletions src/infrastracture/adapters/fileConversion/convertImageToHTML.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { VertexAI } from '@google-cloud/vertexai';
import { SAFETY_SETTINGS } from './constants';
import { GenerateContentRequest } from '@google-cloud/vertexai';
import { generateContent } from './contentGenerationUtils';

/**
* Google VertexAI is returning Markdown:
Expand All @@ -16,38 +16,22 @@ export function removeFirstAndLastLine(content: string): string {
export const convertImageToHTML = async (
imageData: string
): Promise<string> => {
const vertexAI = new VertexAI({
project: 'notion-to-anki',
location: 'europe-west3',
});
const model = 'gemini-1.5-pro-002';

const generativeModel = vertexAI.preview.getGenerativeModel({
model: model,
generationConfig: {
maxOutputTokens: 8192,
temperature: 1,
topP: 0.95,
},
safetySettings: SAFETY_SETTINGS,
});

const text1 = {
text: `Convert the text in this image to the following format for (every question is their own ul):
<ul class=\"toggle\">
  <li>
   <details>
    <summary>
<li>
<details>
<summary>
n) question
    </summary>
</summary>
<p>A) ..., </p>
<p>B)... </p>
etc. 
etc.
<p>and finally Answer: D</p>
   </details>
  </li>
  </ul>
</details>
</li>
</ul>
- Extra rules: n=is the number for the question, question=the question text
Expand All @@ -62,28 +46,10 @@ export const convertImageToHTML = async (
},
};

const req = {
const req: GenerateContentRequest = {
contents: [{ role: 'user', parts: [text1, image1] }],
};

let htmlContent = '';
try {
const streamingResp = await generativeModel.generateContentStream(req);
for await (const item of streamingResp.stream) {
if (
item.candidates &&
item.candidates[0].content &&
item.candidates[0].content.parts
) {
htmlContent += item.candidates[0].content.parts
.map((part) => part.text)
.join('');
}
}
} catch (error) {
console.error('Error generating content stream:', error);
}
htmlContent = removeFirstAndLastLine(htmlContent);

return htmlContent;
const htmlContent = await generateContent(req);
return removeFirstAndLastLine(htmlContent);
};
44 changes: 3 additions & 41 deletions src/infrastracture/adapters/fileConversion/convertPDFToHTML.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,9 @@
import path from 'path';
import fs from 'fs';

import { GenerateContentRequest, VertexAI } from '@google-cloud/vertexai';
import { SAFETY_SETTINGS } from './constants';
import { GenerateContentRequest } from '@google-cloud/vertexai';
import { generateContent } from './contentGenerationUtils';

export const convertPDFToHTML = async (pdf: string): Promise<string> => {

Check failure on line 6 in src/infrastracture/adapters/fileConversion/convertPDFToHTML.ts

View workflow job for this annotation

GitHub Actions / build (20.18.0)

Async arrow function has no 'await' expression
const vertexAI = new VertexAI({
project: 'notion-to-anki',
location: 'europe-west3',
});
const model = 'gemini-1.5-pro-002';
const generativeModel = vertexAI.preview.getGenerativeModel({
model: model,
generationConfig: {
maxOutputTokens: 8192,
temperature: 1,
topP: 0.95,
},
safetySettings: SAFETY_SETTINGS,
});

const document1 = {
inlineData: {
mimeType: 'application/pdf',
Expand All @@ -43,27 +27,5 @@ export const convertPDFToHTML = async (pdf: string): Promise<string> => {
contents: [{ role: 'user', parts: [document1, text1] }],
};

let htmlContent = '';
try {
const streamingResp = await generativeModel.generateContentStream(req);
for await (const item of streamingResp.stream) {
if (
item.candidates &&
item.candidates[0].content &&
item.candidates[0].content.parts
) {
htmlContent += item.candidates[0].content.parts
.map((part) => part.text)
.join('');
}
}
} catch (error) {
console.error('Error generating content stream:', error);

// const workSpace = process.cwd();
// const outputPath = path.join(workSpace, 'output.html');
// fs.writeFileSync(outputPath, htmlContent);
// console.log(outputPath);
}
return htmlContent;
return generateContent(req);
};
15 changes: 15 additions & 0 deletions src/infrastracture/adapters/fileConversion/vertexAIUtils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import { GenerativeModel, VertexAI } from '@google-cloud/vertexai';
import { SAFETY_SETTINGS, VERTEX_AI_CONFIG } from './constants';

export function setupVertexAI(): GenerativeModel {
const vertexAI = new VertexAI({
project: VERTEX_AI_CONFIG.project,
location: VERTEX_AI_CONFIG.location,
});

return vertexAI.getGenerativeModel({
model: VERTEX_AI_CONFIG.model,
generationConfig: VERTEX_AI_CONFIG.generationConfig,
safetySettings: SAFETY_SETTINGS,
});
}

0 comments on commit 4fd38f8

Please sign in to comment.