Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Refactor get_question_context_for_brain endpoint #1872

Merged
merged 1 commit into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions backend/modules/brain/controller/brain_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,12 +220,14 @@ async def set_brain_as_default(


@brain_router.post(
"/brains/{brain_id}/question_context",
"/brains/{brain_id}/documents",
dependencies=[Depends(AuthBearer()), Depends(has_brain_authorization())],
tags=["Brain"],
)
async def get_question_context_for_brain(brain_id: UUID, request: BrainQuestionRequest):
async def get_question_context_for_brain(
brain_id: UUID, question: BrainQuestionRequest
):
# TODO: Move this endpoint to AnswerGenerator service
"""Retrieve the question context from a specific brain."""
context = get_question_context_from_brain(brain_id, request.question)
return {"context": context}
context = get_question_context_from_brain(brain_id, question.question)
return {"docs": context}
43 changes: 30 additions & 13 deletions backend/repository/brain/get_question_context_from_brain.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
from uuid import UUID

from attr import dataclass
from logger import get_logger
from models.settings import get_embeddings, get_supabase_client
from vectorstore.supabase import CustomSupabaseVectorStore

logger = get_logger(__name__)


@dataclass
class DocumentAnswer:
file_name: str
file_sha1: str
file_size: int
file_url: str = ""
file_id: str = ""
file_similarity: float = 0.0


def get_question_context_from_brain(brain_id: UUID, question: str) -> str:
# TODO: Move to AnswerGenerator service
supabase_client = get_supabase_client()
Expand All @@ -18,16 +29,22 @@ def get_question_context_from_brain(brain_id: UUID, question: str) -> str:
table_name="vectors",
brain_id=str(brain_id),
)
documents = vector_store.similarity_search(question)
## I can't pass more than 2500 tokens to as return value in my array. So i need to remove the docs after i reach 2000 tokens. A token equals 1.5 characters. So 2000 tokens is 3000 characters.
tokens = 0
for doc in documents:
tokens += len(doc.page_content) * 1.5
if tokens > 3000:
documents.remove(doc)
logger.info("documents", documents)
logger.info("tokens", tokens)
logger.info("🔥🔥🔥🔥🔥🔥")

# aggregate all the documents into one string
return "\n".join([doc.page_content for doc in documents])
documents = vector_store.similarity_search(question, k=20, threshold=0.8)

## Create a list of DocumentAnswer objects from the documents but with no duplicates file_sha1
answers = []
file_sha1s = []
for document in documents:
if document.metadata["file_sha1"] not in file_sha1s:
file_sha1s.append(document.metadata["file_sha1"])
answers.append(
DocumentAnswer(
file_name=document.metadata["file_name"],
file_sha1=document.metadata["file_sha1"],
file_size=document.metadata["file_size"],
file_id=document.metadata["id"],
file_similarity=document.metadata["similarity"],
)
)

return answers
6 changes: 5 additions & 1 deletion backend/vectorstore/supabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,11 @@ def similarity_search(
match_result = [
(
Document(
metadata=search.get("metadata", {}), # type: ignore
metadata={
**search.get("metadata", {}),
"id": search.get("id", ""),
"similarity": search.get("similarity", 0.0),
},
page_content=search.get("content", ""),
),
search.get("similarity", 0.0),
Expand Down
11 changes: 11 additions & 0 deletions frontend/lib/api/brain/brain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,14 @@ export const updateBrainSecrets = async (
): Promise<void> => {
await axiosInstance.put(`/brains/${brainId}/secrets-values`, secrets);
};

export const getDocsFromQuestion = async (
brainId: string,
question: string,
axiosInstance: AxiosInstance
): Promise<string[]> => {
return (await axiosInstance.post<Record<"docs",string[]>>(`/brains/${brainId}/documents`, {
question,
})).data.docs;
}

3 changes: 3 additions & 0 deletions frontend/lib/api/brain/useBrainApi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
getBrains,
getBrainUsers,
getDefaultBrain,
getDocsFromQuestion,
getPublicBrains,
setAsDefaultBrain,
Subscription,
Expand Down Expand Up @@ -48,6 +49,8 @@ export const useBrainApi = () => {
updateBrain: async (brainId: string, brain: UpdateBrainInput) =>
updateBrain(brainId, brain, axiosInstance),
getPublicBrains: async () => getPublicBrains(axiosInstance),
getDocsFromQuestion: async (brainId: string, question: string) =>
getDocsFromQuestion(brainId, question, axiosInstance),
updateBrainSecrets: async (
brainId: string,
secrets: Record<string, string>
Expand Down