Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
bentwnghk committed Oct 29, 2024
2 parents fd38132 + b4e3f60 commit d2bbe8b
Show file tree
Hide file tree
Showing 23 changed files with 851 additions and 64 deletions.
19 changes: 11 additions & 8 deletions src/database/server/models/chunk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { and, desc, isNull } from 'drizzle-orm/expressions';
import { chunk } from 'lodash-es';

import { serverDB } from '@/database/server';
import { ChunkMetadata, FileChunk, SemanticSearchChunk } from '@/types/chunk';
import { ChunkMetadata, FileChunk } from '@/types/chunk';

import {
NewChunkItem,
Expand Down Expand Up @@ -148,6 +148,8 @@ export class ChunkModel {

const data = await serverDB
.select({
fileId: fileChunks.fileId,
fileName: files.name,
id: chunks.id,
index: chunks.index,
metadata: chunks.metadata,
Expand All @@ -158,16 +160,15 @@ export class ChunkModel {
.from(chunks)
.leftJoin(embeddings, eq(chunks.id, embeddings.chunkId))
.leftJoin(fileChunks, eq(chunks.id, fileChunks.chunkId))
.leftJoin(files, eq(fileChunks.fileId, files.id))
.where(fileIds ? inArray(fileChunks.fileId, fileIds) : undefined)
.orderBy((t) => desc(t.similarity))
.limit(30);

return data.map(
(item): SemanticSearchChunk => ({
...item,
metadata: item.metadata as ChunkMetadata,
}),
);
return data.map((item) => ({
...item,
metadata: item.metadata as ChunkMetadata,
}));
}

async semanticSearchForChat({
Expand All @@ -187,7 +188,7 @@ export class ChunkModel {
const result = await serverDB
.select({
fileId: files.id,
filename: files.name,
fileName: files.name,
id: chunks.id,
index: chunks.index,
metadata: chunks.metadata,
Expand All @@ -205,6 +206,8 @@ export class ChunkModel {

return result.map((item) => {
return {
fileId: item.fileId,
fileName: item.fileName,
id: item.id,
index: item.index,
similarity: item.similarity,
Expand Down
2 changes: 1 addition & 1 deletion src/features/ChatInput/useSend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ import { useCallback, useMemo } from 'react';

import { useChatStore } from '@/store/chat';
import { chatSelectors } from '@/store/chat/selectors';
import { SendMessageParams } from '@/store/chat/slices/aiChat/action';
import { fileChatSelectors, useFileStore } from '@/store/file';
import { SendMessageParams } from '@/types/message';

export type UseSendMessageParams = Pick<
SendMessageParams,
Expand Down
1 change: 1 addition & 0 deletions src/libs/langchain/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export const LANGCHAIN_SUPPORT_TEXT_LIST = [

'sh',
'patch',
'log',
// js
'js',
'jsx',
Expand Down
15 changes: 15 additions & 0 deletions src/prompts/files/file.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import { ChatFileItem } from '@/types/message';

const filePrompt = (item: ChatFileItem) =>
`<file id="${item.id}" name="${item.name}" type="${item.fileType}" size="${item.size}" url="${item.url}"></file>`;

export const filePrompts = (fileList: ChatFileItem[]) => {
if (fileList.length === 0) return '';

const prompt = `<files>
<files_docstring>here are user upload files you can refer to</files_docstring>
${fileList.map((item) => filePrompt(item)).join('\n')}
</files>`;

return prompt.trim();
};
14 changes: 14 additions & 0 deletions src/prompts/files/image.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { ChatImageItem } from '@/types/message';

const imagePrompt = (item: ChatImageItem) => `<image name="${item.alt}" url="${item.url}"></image>`;

export const imagesPrompts = (imageList: ChatImageItem[]) => {
if (imageList.length === 0) return '';

const prompt = `<images>
<images_docstring>here are user upload images you can refer to</images_docstring>
${imageList.map((item) => imagePrompt(item)).join('\n')}
</images>`;

return prompt.trim();
};
117 changes: 117 additions & 0 deletions src/prompts/files/index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import { describe, expect, it } from 'vitest';

import { ChatFileItem, ChatImageItem } from '@/types/message';

import { filesPrompts } from './index';

describe('filesPrompts', () => {
// 创建测试用的示例数据
const mockImage: ChatImageItem = {
id: 'img-1',
alt: 'test image',
url: 'https://example.com/image.jpg',
};

const mockFile: ChatFileItem = {
id: 'file-1',
name: 'test.pdf',
fileType: 'application/pdf',
size: 1024,
url: 'https://example.com/test.pdf',
};

it('should generate prompt with only images', () => {
const result = filesPrompts({
imageList: [mockImage],
fileList: undefined,
});

expect(result).toEqual(
`<files_info>
<images>
<images_docstring>here are user upload images you can refer to</images_docstring>
<image name="test image" url="https://example.com/image.jpg"></image>
</images>
</files_info>`,
);
});

it('should generate prompt with only files', () => {
const result = filesPrompts({
imageList: [],
fileList: [mockFile],
});

expect(result).toEqual(
`<files_info>
<files>
<files_docstring>here are user upload files you can refer to</files_docstring>
<file id="file-1" name="test.pdf" type="application/pdf" size="1024" url="https://example.com/test.pdf"></file>
</files>
</files_info>`,
);
});

it('should generate prompt with both images and files', () => {
const result = filesPrompts({
imageList: [mockImage],
fileList: [mockFile],
});

expect(result).toEqual(
`<files_info>
<images>
<images_docstring>here are user upload images you can refer to</images_docstring>
<image name="test image" url="https://example.com/image.jpg"></image>
</images>
<files>
<files_docstring>here are user upload files you can refer to</files_docstring>
<file id="file-1" name="test.pdf" type="application/pdf" size="1024" url="https://example.com/test.pdf"></file>
</files>
</files_info>`,
);
});

it('should generate prompt with empty lists', () => {
const result = filesPrompts({
imageList: [],
fileList: [],
});

expect(result).toEqual('');
});

it('should handle multiple images and files', () => {
const images: ChatImageItem[] = [
mockImage,
{
id: 'img-2',
alt: 'second image',
url: 'https://example.com/image2.jpg',
},
];

const files: ChatFileItem[] = [
mockFile,
{
id: 'file-2',
name: 'document.docx',
fileType: 'application/docx',
size: 2048,
url: 'https://example.com/document.docx',
},
];

const result = filesPrompts({
imageList: images,
fileList: files,
});

expect(result).toContain('second image');
expect(result).toContain('document.docx');
expect(result).toMatch(/<image.*?>.*<image.*?>/s); // Check for multiple image tags
expect(result).toMatch(/<file.*?>.*<file.*?>/s); // Check for multiple file tags
});
});
21 changes: 21 additions & 0 deletions src/prompts/files/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { ChatFileItem, ChatImageItem } from '@/types/message';

import { filePrompts } from './file';
import { imagesPrompts } from './image';

export const filesPrompts = ({
imageList,
fileList,
}: {
fileList?: ChatFileItem[];
imageList: ChatImageItem[];
}) => {
if (imageList.length === 0 && (fileList || []).length === 0) return '';

const prompt = `<files_info>
${imagesPrompts(imageList)}
${fileList ? filePrompts(fileList) : ''}
</files_info>`;

return prompt.trim();
};
26 changes: 26 additions & 0 deletions src/prompts/knowledgeBaseQA/__snapshots__/index.test.ts.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html

exports[`knowledgeBaseQAPrompts > should generate prompt with all parameters 1`] = `
"<knowledge_base_qa_info>
You are also a helpful assistant good answering questions related to Test Knowledge. And you'll be provided with a question and several passages that might be relevant. And currently your task is to provide answer based on the question and passages.
<knowledge_base_anwser_instruction>
- Note that passages might not be relevant to the question, please only use the passages that are relevant.
- if there is no relevant passage, please answer using your knowledge.
- Answer should use the same original language as the question and follow markdown syntax.
</knowledge_base_anwser_instruction>
<knowledge_bases>
<knowledge_bases_docstring>here are the knowledge base scope we retrieve chunks from:</knowledge_bases_docstring>
<knowledge id="kb1" name="Test Knowledge" type="file" fileType="txt" >Test description</knowledge>
</knowledge_bases>
<retrieved_chunks>
<retrieved_chunks_docstring>here are retrived chunks you can refer to:</retrieved_chunks_docstring>
<chunk fileId="file1" fileName="test.txt" similarity="0.8" pageNumber="1" >This is a test chunk</chunk>
</retrieved_chunks>
<user_query>
<user_query_docstring>to make result better, we may rewrite user's question.If there is a rewrite query, it will be wrapper with \`rewrite_query\` tag.</user_query_docstring>
<raw_query>What is the test about?</raw_query>
<rewrite_query>Could you explain the content of the test?</rewrite_query>
<user_query>
</knowledge_base_qa_info>"
`;
15 changes: 15 additions & 0 deletions src/prompts/knowledgeBaseQA/chunk.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import { ChatSemanticSearchChunk } from '@/types/chunk';

const chunkPrompt = (item: ChatSemanticSearchChunk) =>
`<chunk fileId="${item.fileId}" fileName="${item.fileName}" similarity="${item.similarity}" ${item.pageNumber ? ` pageNumber="${item.pageNumber}" ` : ''}>${item.text}</chunk>`;

export const chunkPrompts = (fileList: ChatSemanticSearchChunk[]) => {
if (fileList.length === 0) return '';

const prompt = `<retrieved_chunks>
<retrieved_chunks_docstring>here are retrived chunks you can refer to:</retrieved_chunks_docstring>
${fileList.map((item) => chunkPrompt(item)).join('\n')}
</retrieved_chunks>`;

return prompt.trim();
};
Loading

0 comments on commit d2bbe8b

Please sign in to comment.