diff --git a/lib/utils/embeddings/embed.ts b/lib/utils/embeddings/embed.ts index b9894219..698347d9 100644 --- a/lib/utils/embeddings/embed.ts +++ b/lib/utils/embeddings/embed.ts @@ -2,9 +2,10 @@ import {OpenAIEmbeddings} from 'langchain/embeddings/openai' import {RecursiveCharacterTextSplitter} from 'langchain/text_splitter' import {WeaviateStore} from 'langchain/vectorstores/weaviate' import {cloneRepo} from './cloneRepo' -import {type WeaviateConfig} from './db' import deleteRepo from './delete' import {checkRepoExists} from './exists' +import {type WeaviateConfig} from './db' +import { AISummary } from './summary' export default async function addRepo( weaviateConfig: WeaviateConfig, @@ -42,16 +43,17 @@ export default async function addRepo( process.env.GITHUB_ACCESS_TOKEN || '' ) - const docs = repo.map(doc => { + const docs = await Promise.all(repo.map(async doc => { return { ...doc, metadata: { ...doc.metadata, userId: weaviateConfig.userId, + summary: await AISummary(doc.pageContent), ext: doc.metadata.source.split('.')[1] || '' } } - }) + })) const embeddings = new OpenAIEmbeddings({ openAIApiKey: process.env.OPENAI_API_KEY @@ -69,4 +71,4 @@ export default async function addRepo( console.error(e) return } -} +} \ No newline at end of file diff --git a/lib/utils/embeddings/query.ts b/lib/utils/embeddings/query.ts index 0343ceda..f5a9cbec 100644 --- a/lib/utils/embeddings/query.ts +++ b/lib/utils/embeddings/query.ts @@ -5,6 +5,7 @@ const keys = [ 'source', 'text', 'ext', + 'summary', 'repository', 'branch', 'userId', diff --git a/lib/utils/embeddings/setup/schema.ts b/lib/utils/embeddings/setup/schema.ts index 92d9128c..d9b03e80 100644 --- a/lib/utils/embeddings/setup/schema.ts +++ b/lib/utils/embeddings/setup/schema.ts @@ -37,6 +37,13 @@ export const schema = { name: 'repository', tokenization: 'word' }, + { + dataType: ['text'], + indexFilterable: true, + indexSearchable: true, + name: 'summary', + tokenization: 'word' + }, { dataType: ['text'], indexFilterable: true, diff --git a/lib/utils/embeddings/summary.ts b/lib/utils/embeddings/summary.ts new file mode 100644 index 00000000..7b915ec8 --- /dev/null +++ b/lib/utils/embeddings/summary.ts @@ -0,0 +1,19 @@ +import {LLMChain} from 'langchain/chains' +import {OpenAI} from 'langchain/llms/openai' +import {PromptTemplate} from 'langchain/prompts' + +const prompt = PromptTemplate.fromTemplate( + `You are AI that make desc of code snippet. Many keywords, straight to point, 1 line. + + Code:#!/bin/sh\ngroovyc src/*.groovy\ngroovy src/Main.groovy --cp src/ + AI:Compiles and runs a Groovy program using the source files in the \"src\" directory. + + Code:{code} + AI:` +) + +export async function AISummary(code: string, modelName: string = 'gpt-3.5-turbo', temperature: number = 0) { + const model = new OpenAI({temperature, modelName}) + const codeChain = new LLMChain({llm: model, prompt}) + return await codeChain.call({code}) +}