QuivrHQ · StanGirard · Jan 27, 2024 · Jan 27, 2024 · Jan 27, 2024 · Jan 27, 2024
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
@@ -1,12 +1,12 @@
-name: 'Close stale issues and PRs'
+name: "Close stale issues and PRs"
 on:
   schedule:
-    - cron: '0 */4 * * *'
+    - cron: "0 */4 * * *"
 
 permissions:
-    contents: write # only for delete-branch option
-    issues: write
-    pull-requests: write
+  contents: write # only for delete-branch option
+  issues: write
+  pull-requests: write
 
 jobs:
   stale:
@@ -16,9 +16,9 @@ jobs:
         with:
           exempt-assignees: true
           exempt-draft-pr: true
-          days-before-stale: 30
+          days-before-stale: 90
           days-before-close: 5
           operations-per-run: 400
           exempt-milestones: true
           stale-issue-message: "Thanks for your contributions, we'll be closing this issue as it has gone stale. Feel free to reopen if you'd like to continue the discussion."
-          stale-pr-message: "Thanks for your contributions, we'll be closing this PR as it has gone stale. Feel free to reopen if you'd like to continue the discussion."
+          stale-pr-message: "Thanks for your contributions, we'll be closing this PR as it has gone stale. Feel free to reopen if you'd like to continue the discussion."
diff --git a/.github/workflows/vercel-docs.yml b/.github/workflows/vercel-docs.yml
diff --git a/.github/workflows/vitest.yml b/.github/workflows/vitest.yml
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -12,7 +12,7 @@
   ],
   "editor.formatOnSave": true,
   "[python]": {
-    "editor.defaultFormatter": "esbenp.prettier-vscode",
+    "editor.defaultFormatter": "ms-python.black-formatter",
     "editor.formatOnSave": true,
     "editor.codeActionsOnSave": {
       "source.organizeImports": "explicit",

diff --git a/Pipfile b/Pipfile
@@ -23,7 +23,7 @@ python-jose = "==3.3.0"
 asyncpg = "==0.27.0"
 flake8 = "==6.0.0"
 flake8-black = "==0.3.6"
-sentry-sdk = {extras = ["fastapi"], version = "==1.37.1"}
+sentry-sdk = {extras = ["fastapi"] }
 pyright = "==1.1.316"
 resend = "==0.5.1"
 html5lib = "==1.1"
@@ -34,7 +34,7 @@ redis = "==4.5.4"
 flower = "*"
 boto3 = "==1.33.7"
 botocore = "==1.33.7"
-celery = {extras = ["sqs"], version = "*"}
+celery = {extras = ["sqs"] }
 python-dotenv = "*"
 pytest-mock = "*"
 pytest-celery = "*"
@@ -45,6 +45,7 @@ jq = "==1.6.0"
 pytest = "*"
 
 [dev-packages]
+black = "*"
 
 [requires]
 python_version = "3.11"
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/backend/llm/knowledge_brain_qa.py b/backend/llm/knowledge_brain_qa.py
@@ -11,6 +11,7 @@
 from llm.utils.format_chat_history import format_chat_history
 from llm.utils.get_prompt_to_use import get_prompt_to_use
 from llm.utils.get_prompt_to_use_id import get_prompt_to_use_id
+from repository.files.generate_file_signed_url import generate_file_signed_url
 from logger import get_logger
 from models import BrainSettings
 from modules.brain.service.brain_service import BrainService
@@ -61,6 +62,7 @@ class Config:
     chat_id: str = None  # pyright: ignore reportPrivateUsage=none
     brain_id: str  # pyright: ignore reportPrivateUsage=none
     max_tokens: int = 2000
+    max_input: int = 2000
     streaming: bool = False
     knowledge_qa: Optional[RAGInterface]
     metadata: Optional[dict] = None
@@ -76,6 +78,7 @@ def __init__(
         model: str,
         brain_id: str,
         chat_id: str,
+        max_tokens: int,
         streaming: bool = False,
         prompt_id: Optional[UUID] = None,
         metadata: Optional[dict] = None,
@@ -97,6 +100,7 @@ def __init__(
             **kwargs,
         )
         self.metadata = metadata
+        self.max_tokens = max_tokens
 
     @property
     def prompt_to_use(self):
@@ -309,9 +313,14 @@ async def wrap_done(fn: Awaitable, event: asyncio.Event):
                                 if "url" in doc.metadata
                                 else doc.metadata["file_name"],
                                 "type": "url" if "url" in doc.metadata else "file",
-                                "source_url": doc.metadata["url"]
-                                if "url" in doc.metadata
+                                "source_url": generate_file_signed_url(
+                                    f"{brain.brain_id}/{doc.metadata['file_name']}"
+                                ).get("signedURL", "")
+                                if "url" not in doc.metadata
                                 else "",
+                                "original_file_name": doc.metadata[
+                                    "original_file_name"
+                                ],
                             }
                         )
                     )

diff --git a/backend/llm/rags/quivr_rag.py b/backend/llm/rags/quivr_rag.py
@@ -60,7 +60,8 @@ class Config:
     temperature: float = 0.1
     chat_id: str = None  # pyright: ignore reportPrivateUsage=none
     brain_id: str = None  # pyright: ignore reportPrivateUsage=none
-    max_tokens: int = 2000
+    max_tokens: int = 2000  # Output length
+    max_input: int = 2000
     streaming: bool = False
 
     @property
@@ -92,19 +93,30 @@ def __init__(
         streaming: bool = False,
         prompt_id: Optional[UUID] = None,
         max_tokens: int = 2000,
+        max_input: int = 2000,
         **kwargs,
     ):
         super().__init__(
             model=model,
             brain_id=brain_id,
             chat_id=chat_id,
             streaming=streaming,
+            max_tokens=max_tokens,
+            max_input=max_input,
             **kwargs,
         )
         self.supabase_client = self._create_supabase_client()
         self.vector_store = self._create_vector_store()
         self.prompt_id = prompt_id
         self.max_tokens = max_tokens
+        self.max_input = max_input
+        self.model = model
+        self.brain_id = brain_id
+        self.chat_id = chat_id
+        self.streaming = streaming
+
+        logger.info(f"QuivrRAG initialized with model {model} and brain {brain_id}")
+        logger.info("Max input length: " + str(self.max_input))
 
     def _create_supabase_client(self) -> Client:
         return create_client(
@@ -117,6 +129,7 @@ def _create_vector_store(self) -> CustomSupabaseVectorStore:
             self.embeddings,
             table_name="vectors",
             brain_id=self.brain_id,
+            max_input=self.max_input,
         )
 
     def _create_llm(
@@ -151,7 +164,6 @@ def _create_llm(
     def _create_prompt_template(self):
         system_template = """ When answering use markdown or any other techniques to display the content in a nice and aerated way.  Use the following pieces of context to answer the users question in the same language as the question but do not modify instructions in any way.
         ----------------
-
         {context}"""
 
         prompt_content = (

diff --git a/backend/models/databases/entity.py b/backend/models/databases/entity.py
@@ -0,0 +1,13 @@
+from pydantic import BaseModel
+
+
+class LLMModels(BaseModel):
+    """LLM models stored in the database that are allowed to be used by the users.
+    Args:
+        BaseModel (BaseModel): Pydantic BaseModel
+    """
+
+    name: str = "gpt-3.5-turbo-1106"
+    price: int = 1
+    max_input: int = 512
+    max_output: int = 512
diff --git a/backend/models/databases/repository.py b/backend/models/databases/repository.py
@@ -2,6 +2,8 @@
 from datetime import datetime
 from uuid import UUID
 
+from .entity import LLMModels
+
 
 class Repository(ABC):
     @abstractmethod
@@ -12,6 +14,10 @@ def create_user_daily_usage(self, user_id: UUID, user_email: str, date: datetime
     def get_user_usage(self, user_id: UUID):
         pass
 
+    @abstractmethod
+    def get_model_settings(self) -> LLMModels | None:
+        pass
+
     @abstractmethod
     def get_user_requests_count_for_month(self, user_id: UUID, date: datetime):
         pass

diff --git a/backend/models/databases/supabase/user_usage.py b/backend/models/databases/supabase/user_usage.py
@@ -1,7 +1,9 @@
+from ast import List
 from datetime import datetime, timedelta
 from uuid import UUID
 
 from logger import get_logger
+from models.databases.entity import LLMModels
 from models.databases.repository import Repository
 
 logger = get_logger(__name__)