Skip to content

Commit

Permalink
Arihan/mar 31 summarize and upsert code snippets from reposplitter (#57)
Browse files Browse the repository at this point in the history
* Add summary function for code snippets
  • Loading branch information
arihanv authored Dec 13, 2023
1 parent 7e4bc04 commit 21e16f2
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 4 deletions.
10 changes: 6 additions & 4 deletions lib/utils/embeddings/embed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ import {OpenAIEmbeddings} from 'langchain/embeddings/openai'
import {RecursiveCharacterTextSplitter} from 'langchain/text_splitter'
import {WeaviateStore} from 'langchain/vectorstores/weaviate'
import {cloneRepo} from './cloneRepo'
import {type WeaviateConfig} from './db'
import deleteRepo from './delete'
import {checkRepoExists} from './exists'
import {type WeaviateConfig} from './db'
import { AISummary } from './summary'

export default async function addRepo(
weaviateConfig: WeaviateConfig,
Expand Down Expand Up @@ -40,16 +41,17 @@ export default async function addRepo(
process.env.GITHUB_ACCESS_TOKEN || ''
)

const docs = repo.map(doc => {
const docs = await Promise.all(repo.map(async doc => {
return {
...doc,
metadata: {
...doc.metadata,
userId: weaviateConfig.userId,
summary: await AISummary(doc.pageContent),
ext: doc.metadata.source.split('.')[1] || ''
}
}
})
}))

const embeddings = new OpenAIEmbeddings({
openAIApiKey: process.env.OPENAI_API_KEY
Expand All @@ -67,4 +69,4 @@ export default async function addRepo(
console.error(e)
return
}
}
}
1 change: 1 addition & 0 deletions lib/utils/embeddings/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const keys = [
'source',
'text',
'ext',
'summary',
'repository',
'branch',
'userId',
Expand Down
7 changes: 7 additions & 0 deletions lib/utils/embeddings/setup/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ export const schema = {
name: 'repository',
tokenization: 'word'
},
{
dataType: ['text'],
indexFilterable: true,
indexSearchable: true,
name: 'summary',
tokenization: 'word'
},
{
dataType: ['text'],
indexFilterable: true,
Expand Down
19 changes: 19 additions & 0 deletions lib/utils/embeddings/summary.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import {LLMChain} from 'langchain/chains'
import {OpenAI} from 'langchain/llms/openai'
import {PromptTemplate} from 'langchain/prompts'

const prompt = PromptTemplate.fromTemplate(
`You are AI that make desc of code snippet. Many keywords, straight to point, 1 line.
Code:#!/bin/sh\ngroovyc src/*.groovy\ngroovy src/Main.groovy --cp src/
AI:Compiles and runs a Groovy program using the source files in the \"src\" directory.
Code:{code}
AI:`
)

export async function AISummary(code: string, modelName: string = 'gpt-3.5-turbo', temperature: number = 0) {
const model = new OpenAI({temperature, modelName})
const codeChain = new LLMChain({llm: model, prompt})
return await codeChain.call({code})
}

0 comments on commit 21e16f2

Please sign in to comment.