Merge pull request #10 from artefactory/feature/frontend

Add frontend
artefactory · Mar 22, 2024 · 25133b2 · 25133b2
2 parents ecf636b + 9d62205
commit 25133b2
Show file tree

Hide file tree

Showing 33 changed files with 1,132 additions and 13 deletions.
diff --git a/.gitignore b/.gitignore
@@ -14,7 +14,7 @@ dist/
 downloads/
 eggs/
 .eggs/
-lib/
+# lib/
 lib64/
 parts/
 sdist/

diff --git a/Makefile b/Makefile
@@ -7,5 +7,8 @@ ingest_rag_2:
 ingest_rag_3:
 	poetry run python -m backend.rag_3.ingest
 
-serve:
+serve_backend:
 	poetry run python -m app.server
+
+serve_frontend:
+	poetry run python -m streamlit run frontend/front.py
diff --git a/README.md b/README.md
@@ -11,6 +11,7 @@
   - [RAG Option 1](#rag-option-1)
   - [RAG Option 2](#rag-option-2)
   - [RAG Option 3](#rag-option-3)
+  - [Frontend](#frontend)
 - [Installation](#installation)
 - [Usage](#usage)
 - [Development](#development)
@@ -116,6 +117,10 @@ Backend:
 
 In option 3, the vectorstore must be populated with text documents (text content or summary) as in option 2. However, the docstore can be populated with either text or image documents.
 
+### Frontend
+
+The demo Streamlit comes from [skaff-rag-accelerator](https://github.com/artefactory/skaff-rag-accelerator/). Please read [documentation](https://artefactory.github.io/skaff-rag-accelerator/) for more details.
+
 ## Installation
 
 To set up the project, ensure you have Python version between 3.10 and 3.11. Then install the dependencies using Poetry:
@@ -162,7 +167,7 @@ To use the RAG Multimodal Demo, follow these steps:
 2. Start the backend server locally:
 
 ```bash
-make serve
+make serve_backend
 ```
 
 This command will launch the backend server, allowing you to access the FastAPI documentation and playground interfaces :
@@ -172,6 +177,12 @@ This command will launch the backend server, allowing you to access the FastAPI
 - RAG Option 2 playground interface: <http://0.0.0.0:8000/rag-2/playground/>
 - RAG Option 3 playground interface: <http://0.0.0.0:8000/rag-3/playground/>
 
+3. Launch the Streamlit frontend interface:
+
+```bash
+make serve_frontend
+```
+
 ## Development
 
 To set up a development environment and install pre-commit hooks, run the following commands:

diff --git a/backend/api_plugins/lib/user_management.py b/backend/api_plugins/lib/user_management.py
@@ -0,0 +1,73 @@
+from datetime import datetime, timedelta
+
+import argon2
+from jose import jwt
+from pydantic import BaseModel
+
+from backend import ALGORITHM, SECRET_KEY
+from backend.database import Database
+
+
+class UnsecureUser(BaseModel):
+    email: str = None
+    password: bytes = None
+
+
+class User(BaseModel):
+    email: str = None
+    hashed_password: str = None
+
+    @classmethod
+    def from_unsecure_user(cls, unsecure_user: UnsecureUser):
+        hashed_password = argon2.hash_password(unsecure_user.password).decode("utf-8")
+        return cls(email=unsecure_user.email, hashed_password=hashed_password)
+
+
+def create_user(user: User) -> None:
+    with Database() as connection:
+        connection.execute(
+            "INSERT INTO users (email, password) VALUES (?, ?)",
+            (user.email, user.hashed_password),
+        )
+
+
+def user_exists(email: str) -> bool:
+    with Database() as connection:
+        result = connection.fetchone("SELECT 1 FROM users WHERE email = ?", (email,))
+        return bool(result)
+
+
+def get_user(email: str) -> User | None:
+    with Database() as connection:
+        user_row = connection.fetchone("SELECT * FROM users WHERE email = ?", (email,))
+        if user_row:
+            return User(email=user_row[0], hashed_password=user_row[1])
+        return None
+
+
+def delete_user(email: str) -> None:
+    with Database() as connection:
+        connection.execute("DELETE FROM users WHERE email = ?", (email,))
+
+
+def authenticate_user(username: str, password: bytes) -> bool | User:
+    user = get_user(username)
+    if not user:
+        return False
+
+    if argon2.verify_password(
+        user.hashed_password.encode("utf-8"), password.encode("utf-8")
+    ):
+        return user
+
+    return False
+
+
+def create_access_token(*, data: dict, expires_delta: timedelta | None = None) -> str:
+    to_encode = data.copy()
+    if expires_delta:
+        expire = datetime.utcnow() + expires_delta
+    else:
+        expire = datetime.utcnow() + timedelta(minutes=60)
+    to_encode.update({"exp": expire})
+    return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
diff --git a/backend/rag_1/chain.py b/backend/rag_1/chain.py
@@ -4,9 +4,13 @@
 from langchain_core.messages import BaseMessage, HumanMessage
 from langchain_core.output_parsers.string import StrOutputParser
 from langchain_core.runnables import RunnableLambda, RunnablePassthrough
-from langchain_core.runnables.base import RunnableSequence
+from langchain_core.runnables.base import RunnableSequence, RunnableSerializable
 from omegaconf.dictconfig import DictConfig
+from pydantic import BaseModel
 
+from backend.rag_components.chain_links.rag_with_history import (
+    construct_rag_with_history,
+)
 from backend.utils.image import resize_base64_image
 from backend.utils.llm import get_vision_llm
 from backend.utils.retriever import get_retriever
@@ -83,7 +87,19 @@ def img_prompt_func(data_dict: dict) -> list[BaseMessage]:
     return [HumanMessage(content=messages)]
 
 
-def get_chain(config: DictConfig) -> RunnableSequence:
+class Question(BaseModel):
+    """Question to be answered."""
+
+    question: str
+
+
+class Response(BaseModel):
+    """Response to the question."""
+
+    response: str
+
+
+def get_base_chain(config: DictConfig) -> RunnableSequence:
     """Constructs a RAG pipeline that retrieves image and text data from documents.
 
     The pipeline consists of the following steps:
@@ -112,5 +128,22 @@ def get_chain(config: DictConfig) -> RunnableSequence:
         | model
         | StrOutputParser()
     )
+    typed_chain = chain.with_types(input_type=str, output_type=Response)
+
+    return typed_chain
 
-    return chain
+
+def get_chain(config: DictConfig) -> RunnableSerializable:
+    """Get the appropriate RAG pipeline based on the configuration.
+
+    Args:
+        config (DictConfig): Configuration object.
+
+    Returns:
+        RunnableSerializable: RAG pipeline.
+    """
+    base_chain = get_base_chain(config)
+    if config.rag.enable_chat_memory:
+        chain_with_mem = construct_rag_with_history(base_chain, config)
+        return chain_with_mem
+    return base_chain
diff --git a/backend/rag_1/config.py b/backend/rag_1/config.py
@@ -56,6 +56,14 @@ def validate_size(cls, value: list[float]) -> list[float]:
         return value
 
 
+@dataclass(config=ConfigDict(extra="forbid"))
+class RagConfig:
+    """Configuration for RAG."""
+
+    database_url: str
+    enable_chat_memory: bool
+
+
 @dataclass(config=ConfigDict(extra="forbid"))
 class Config:
     """Configuration for the RAG Option 1."""
@@ -64,13 +72,16 @@ class Config:
 
     path: PathConfig
 
+    text_llm: HydraObject
     vision_llm: HydraObject
     embedding: HydraObject
     vectorstore: HydraObject
     retriever: HydraObject
 
     ingest: IngestConfig
 
+    rag: RagConfig
+
 
 def validate_config(config: DictConfig) -> Config:
     """Validate the configuration.

diff --git a/backend/rag_1/config.yaml b/backend/rag_1/config.yaml
@@ -5,6 +5,15 @@ path:
   export_extracted: "${.docs}/extracted/${..name}"
   database: "database/${..name}"
 
+text_llm:
+  _target_: langchain_openai.AzureChatOpenAI
+  azure_endpoint: ${oc.env:TEXT_OPENAI_ENDPOINT}
+  openai_api_key: ${oc.env:TEXT_OPENAI_API_KEY}
+  openai_api_version: "2024-02-15-preview"
+  deployment_name: "gpt4-turbo"
+  temperature: 0.0
+  max_tokens: 1024
+
 vision_llm:
   _target_: langchain_openai.AzureChatOpenAI
   azure_endpoint: ${oc.env:VISION_OPENAI_ENDPOINT}
@@ -51,3 +60,7 @@ ingest:
   table_min_size: [0.0, 0.0]
 
   export_extracted: True
+
+rag:
+  database_url: ${oc.env:DATABASE_URL}
+  enable_chat_memory: ${oc.decode:${oc.env:ENABLE_AUTHENTICATION}}
diff --git a/backend/rag_2/chain.py b/backend/rag_2/chain.py
@@ -3,16 +3,35 @@
 from langchain_core.output_parsers.string import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.runnables import RunnablePassthrough
-from langchain_core.runnables.base import RunnableSequence
+from langchain_core.runnables.base import (
+    RunnableSequence,
+    RunnableSerializable,
+)
 from omegaconf.dictconfig import DictConfig
+from pydantic import BaseModel
 
+from backend.rag_components.chain_links.rag_with_history import (
+    construct_rag_with_history,
+)
 from backend.utils.llm import get_text_llm
 from backend.utils.retriever import get_retriever
 
 from . import prompts
 
 
-def get_chain(config: DictConfig) -> RunnableSequence:
+class Question(BaseModel):
+    """Question to be answered."""
+
+    question: str
+
+
+class Response(BaseModel):
+    """Response to the question."""
+
+    response: str
+
+
+def get_base_chain(config: DictConfig) -> RunnableSequence:
     """Constructs a RAG pipeline that retrieves text data from documents.
 
     The pipeline consists of the following steps:
@@ -43,5 +62,22 @@ def get_chain(config: DictConfig) -> RunnableSequence:
         | model
         | StrOutputParser()
     )
+    typed_chain = chain.with_types(input_type=str, output_type=Response)
+
+    return typed_chain
 
-    return chain
+
+def get_chain(config: DictConfig) -> RunnableSerializable:
+    """Get the appropriate RAG pipeline based on the configuration.
+
+    Args:
+        config (DictConfig): Configuration object.
+
+    Returns:
+        RunnableSerializable: RAG pipeline.
+    """
+    base_chain = get_base_chain(config)
+    if config.rag.enable_chat_memory:
+        chain_with_mem = construct_rag_with_history(base_chain, config)
+        return chain_with_mem
+    return base_chain
diff --git a/backend/rag_2/config.py b/backend/rag_2/config.py
@@ -130,6 +130,14 @@ def validate_size(cls, value: list[float]) -> list[float]:
         return value
 
 
+@dataclass(config=ConfigDict(extra="forbid"))
+class RagConfig:
+    """Configuration for RAG."""
+
+    database_url: str
+    enable_chat_memory: bool
+
+
 @dataclass(config=ConfigDict(extra="forbid"))
 class Config:
     """Configuration for the RAG Option 2."""
@@ -147,6 +155,8 @@ class Config:
 
     ingest: IngestConfig
 
+    rag: RagConfig
+
 
 def validate_config(config: DictConfig) -> Config:
     """Validate the configuration.

diff --git a/backend/rag_2/config.yaml b/backend/rag_2/config.yaml
@@ -81,3 +81,7 @@ ingest:
     image: "summary"
 
   export_extracted: True
+
+rag:
+  database_url: ${oc.env:DATABASE_URL}
+  enable_chat_memory: ${oc.decode:${oc.env:ENABLE_AUTHENTICATION}}
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,7 +14,7 @@ dist/ @@
     downloads/
     eggs/
     .eggs/
-    lib/
+    # lib/
     lib64/
     parts/
     sdist/
@@ Expand Down @@