RubricLab · tedspare · Dec 8, 2023 · Nov 29, 2023 · Nov 30, 2023 · Nov 30, 2023
diff --git a/app/api/webhook/github/route.ts b/app/api/webhook/github/route.ts
@@ -167,60 +167,24 @@ export const POST = async (req: Request) => {
 	if (comment && !comment.body.toLowerCase().includes('maige'))
 		return new Response('Irrelevant comment', {status: 202})
 
-	if (issue?.pull_request) {
-		const {
-			pull_request: {diff_url: diffUrl},
-			node_id: pullId
-		} = issue
-
-		// Get GitHub app instance access token
-		const app = new App({
-			appId: process.env.GITHUB_APP_ID || '',
-			privateKey: process.env.GITHUB_PRIVATE_KEY || ''
-		})
-
-		const octokit = await app.getInstallationOctokit(instanceId)
-
-		const response = await fetch(diffUrl)
-
-		if (!response.ok) return new Response('Could not fetch diff', {status: 503})
-
-		const data = await response.text()
-
-		await reviewer({
-			octokit: octokit,
-			input: `Instruction: ${comment?.body}\n\nPR Diff:\n${data}`,
-			pullId
-		})
-
-		return new Response('Reviewed PR', {status: 200})
-	}
-
 	if (
 		!(
 			(action === 'opened' && payload?.issue) ||
-			(action === 'created' && payload?.comment)
+			(action === 'created' && payload?.comment) ||
+			(action === 'opened' && payload?.pull_request) ||
+			(action === 'synchronize' && payload?.pull_request)
 		)
 	)
 		return new Response('Webhook received', {status: 202})
 
 	const {
-		issue: {
-			node_id: issueId,
-			title,
-			number: issueNumber,
-			body,
-			labels: existingLabels
-		},
 		repository: {
 			node_id: repoId,
 			name,
 			owner: {login: owner}
 		}
 	} = payload
 
-	const existingLabelNames = existingLabels?.map((l: Label) => l.name)
-
 	const customer = await prisma.customer.findUnique({
 		where: {
 			name: owner || undefined
@@ -289,6 +253,60 @@ export const POST = async (req: Request) => {
 
 	await incrementUsage(prisma, owner)
 
+	if ((action == 'opened' || action == 'synchronize') && payload.pull_request) {
+		const {
+			pull_request: {diff_url: diffUrl}
+		} = payload
+
+		const res = await fetch(diffUrl)
+		if (!res.ok) return new Response('Could not fetch diff', {status: 503})
+		const diff = await res.text()
+
+		await reviewer({
+			customerId,
+			octokit,
+			input: `Instruction: ${comment?.body}\n\nPR Diff:\n${diff}`,
+			pullNumber: payload.number,
+			repoFullName: `${owner}/${name}`,
+			commitId: payload.pull_request.head.sha
+		})
+
+		return new Response('Reviewed PR', {status: 200})
+	}
+
+	if (issue?.pull_request) {
+		const {
+			pull_request: {diff_url: diffUrl},
+			node_id: pullId
+		} = issue
+
+		const res = await fetch(diffUrl)
+		if (!res.ok) return new Response('Could not fetch diff', {status: 503})
+		const diff = await res.text()
+
+		await reviewer({
+			customerId,
+			octokit,
+			input: `Instruction: ${comment?.body}\n\nPR Diff:\n${diff}`,
+			pullId,
+			repoFullName: `${owner}/${name}`
+		})
+
+		return new Response('Replied to PR comment', {status: 200})
+	}
+
+	const {
+		issue: {
+			node_id: issueId,
+			title,
+			number: issueNumber,
+			body,
+			labels: existingLabels
+		}
+	} = payload
+
+	const existingLabelNames = existingLabels?.map((l: Label) => l.name)
+
 	/**
 	 * Repo commands
 	 */

diff --git a/bun.lockb b/bun.lockb
diff --git a/lib/agents/reviewer.tsx b/lib/agents/reviewer.tsx
@@ -1,7 +1,10 @@
 import {initializeAgentExecutorWithOptions} from 'langchain/agents'
 import {ChatOpenAI} from 'langchain/chat_models/openai'
 import {SerpAPI} from 'langchain/tools'
+import parse, {Change, Chunk} from 'parse-diff'
 import env from '~/env.mjs'
+import {codeComment} from '~/tools/codeComment'
+import {codebaseSearch} from '~/tools/codeSearch'
 import {prComment} from '~/tools/prComment'
 import {isDev} from '~/utils'
 
@@ -12,36 +15,110 @@ const model = new ChatOpenAI({
 })
 
 export async function reviewer({
+	customerId,
 	input,
 	octokit,
-	pullId
+	pullId,
+	repoFullName,
+	pullNumber,
+	commitId
 }: {
+	customerId: string
 	input: string
 	octokit: any
-	pullId: string
+	pullId?: string
+	repoFullName?: string
+	pullNumber?: number
+	commitId?: string
 }) {
-	const tools = [new SerpAPI(), prComment({octokit, pullId})]
-
-	const prefix = `
-	You are senior engineer reviewing a Pull Request in GitHub made by a junior engineer.
-		You MUST leave a comment on the PR according to the user's instructions using the prComment function.
-    Format your answer beautifully using markdown suitable for GitHub.
-    DO NOT use any emojis or non-Ascii characters.
-    {agent_scratchpad}
-	`.replaceAll('\n', ' ')
-
-	const executor = await initializeAgentExecutorWithOptions(tools, model, {
-		agentType: 'openai-functions',
-		returnIntermediateSteps: isDev,
-		handleParsingErrors: true,
-		verbose: false,
-		agentArgs: {
-			prefix
+	/**
+	 * Comment on a PR
+	 */
+	if (pullId) {
+		const tools = [new SerpAPI(), codebaseSearch({customerId, repoFullName})]
+
+		const prefix = `
+		You are a 1000x senior engineer summarizing a pull request on GitHub.
+		Provide a high-level summary (maximum 5 sentences) of the diff.
+		If you write too much, the author will get overwhelmed.
+		Limit prose.
+
+		{agent_scratchpad}
+		`.replaceAll('\n', ' ')
+
+		const executor = await initializeAgentExecutorWithOptions(tools, model, {
+			agentType: 'openai-functions',
+			returnIntermediateSteps: isDev,
+			handleParsingErrors: true,
+			verbose: true,
+			agentArgs: {
+				prefix
+			}
+		})
+
+		const result = await executor.call({input})
+
+		// Forcefully call the prComment tool
+		await prComment({octokit, pullId}).func({
+			comment: result.output
+		})
+
+		return
+	} else {
+		/**
+		 * New or updated PR
+		 */
+		const prefix = `
+		You are a 1000x senior engineer reviewing a pull request on GitHub.
+		Only comment on modified code.
+		Only flag the top few issues: bad patterns, clear mistakes, or potential breaking changes.
+		If it looks like new code is unused, try searching for it.
+		Think step by step.
+		Limit prose. If you write too much, the author will get overwhelmed.
+
+		{agent_scratchpad}
+		`.replaceAll('\n', ' ')
+
+		let files = parse(input)
+		let diff = ''
+
+		for (const file of files) {
+			let changes = `File Path: ${file.from}\n\n`
+
+			file.chunks.forEach((chunk: Chunk) => {
+				chunk.changes.forEach((change: Change & {ln2?: string; ln?: string}) => {
+					const line = change.content.replaceAll('\t', ' ')
+					changes += `${change.ln2 || change.ln} ${line}\n`
+				})
+
+				changes += '='.repeat(10) + '\n'
+			})
+
+			diff += changes + '='.repeat(20) + '\n\n'
 		}
-	})
 
-	const result = await executor.call({input})
-	const {output} = result
+		const tools = [
+			codebaseSearch({customerId, repoFullName}),
+			codeComment({
+				octokit,
+				repoFullName,
+				pullNumber,
+				commitId
+			})
+		]
+
+		const executor = await initializeAgentExecutorWithOptions(tools, model, {
+			agentType: 'openai-functions',
+			returnIntermediateSteps: isDev,
+			handleParsingErrors: true,
+			verbose: true,
+			agentArgs: {
+				prefix
+			}
+		})
+
+		await executor.call({input: diff})
 
-	return output
+		return
+	}
 }
diff --git a/lib/constants.ts b/lib/constants.ts
@@ -14,3 +14,7 @@ export const TIERS = {
 		priceId: process.env.STRIPE_BASE_PRICE_ID || ''
 	}
 }
+
+export const COPY = {
+	FOOTER: `By [Maige](https://maige.app). How's my driving?`
+}
diff --git a/lib/tests/weaviate.test.ts b/lib/tests/weaviate.test.ts
@@ -7,23 +7,26 @@ test.skip('Bun test runner - Weaviate', () => {
 	expect(Bun.version).toInclude('1.0')
 })
 
-// To test this, pass the following env vars to the process:
+// If Bun is unable to access env vars, pass these to the test runner:
 // WEAVIATE_HOST, WEAVIATE_SCHEME, OPENAI_API_KEY
-// since Bun seems to not be able to access env vars
-test.skip(
+test(
 	'Embed repo',
 	async () => {
-		const customerId = Math.random().toString(36).substring(7)
-		const repoUrl = 'https://github.com/RubricLab/shot'
-		const branch = 'main'
+		const customerId = 'clot5gx6a0000uvdovvfi1x9q'
+		const repoUrl = 'https://github.com/RubricLab/maige'
+		const branch = 'staging'
+		const query = 'codeSearch' // arbitrary - should change over time
 
 		const vectorDB = new Weaviate(customerId)
+		const docs = await vectorDB.embedRepo(repoUrl, branch)
 
-		const docs = await vectorDB.embedRepo(repoUrl, branch, true)
+		expect(docs?.length).toBeGreaterThan(0)
 
-		console.log(`Loaded ${docs?.length} docs`)
+		const search = await vectorDB.searchCode(query, repoUrl, 1, undefined, branch)
 
-		expect(docs.length).toBeGreaterThan(0)
+		expect(search?.length).toBeGreaterThan(0)
+
+		if (search?.length > 0) expect(search[0].text).toInclude('search')
 	},
 	15 * 1000
 )
diff --git a/lib/tools/codeComment.ts b/lib/tools/codeComment.ts
@@ -0,0 +1,45 @@
+import {DynamicStructuredTool} from 'langchain/tools'
+import {z} from 'zod'
+import {COPY} from '~/constants'
+
+/**
+ * Comment on code
+ */
+export function codeComment({
+	octokit,
+	repoFullName,
+	pullNumber,
+	commitId
+}: {
+	octokit: any
+	repoFullName: string
+	pullNumber: number
+	commitId: string
+}) {
+	return new DynamicStructuredTool({
+		description: 'Adds a comment to code in a PR',
+		func: async ({comment, line, side, path}) => {
+			const res = await octokit.request(
+				`POST /repos/${repoFullName}/pulls/${pullNumber}/comments`,
+				{
+					body: `${comment}\n\n${COPY.FOOTER}`,
+					commit_id: commitId,
+					path,
+					line,
+					side,
+					headers: {'X-GitHub-Api-Version': '2022-11-28'}
+				}
+			)
+			return JSON.stringify(res)
+		},
+		name: 'codeComment',
+		schema: z.object({
+			comment: z.string().describe('The comment to add'),
+			line: z.number().describe('The line number'),
+			side: z
+				.enum(['LEFT', 'RIGHT'])
+				.describe('If Deletion then LEFT, else RIGHT'),
+			path: z.string().describe('The path to the file')
+		})
+	})
+}
diff --git a/lib/tools/codeSearch.ts b/lib/tools/codeSearch.ts
@@ -15,9 +15,9 @@ export function codebaseSearch({
 	return new DynamicStructuredTool({
 		description:
 			'Search the codebase by query. Uses vector similarity; format queries to make use of this.',
-		func: async ({query}) => {
+		func: async ({query, filePath}) => {
 			const db = new Weaviate(customerId)
-			const docs = await db.searchCode(query, repoFullName)
+			const docs = await db.searchCode(query, repoFullName, 3, filePath)
 
 			if (!docs?.length) return 'No results found'
 
@@ -30,7 +30,8 @@ export function codebaseSearch({
 		},
 		name: 'searchCode',
 		schema: z.object({
-			query: z.string().describe('The query to search')
+			query: z.string().describe('The query to search'),
+			filePath: z.string().optional().describe('The file path to search')
 		})
 	})
 }