Skip to content

Commit

Permalink
Merge pull request #10 from artefactory/feature/frontend
Browse files Browse the repository at this point in the history
Add frontend
  • Loading branch information
baptiste-pasquier authored Mar 22, 2024
2 parents ecf636b + 9d62205 commit 25133b2
Show file tree
Hide file tree
Showing 33 changed files with 1,132 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dist/
downloads/
eggs/
.eggs/
lib/
# lib/
lib64/
parts/
sdist/
Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,8 @@ ingest_rag_2:
ingest_rag_3:
poetry run python -m backend.rag_3.ingest

serve:
serve_backend:
poetry run python -m app.server

serve_frontend:
poetry run python -m streamlit run frontend/front.py
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
- [RAG Option 1](#rag-option-1)
- [RAG Option 2](#rag-option-2)
- [RAG Option 3](#rag-option-3)
- [Frontend](#frontend)
- [Installation](#installation)
- [Usage](#usage)
- [Development](#development)
Expand Down Expand Up @@ -116,6 +117,10 @@ Backend:

In option 3, the vectorstore must be populated with text documents (text content or summary) as in option 2. However, the docstore can be populated with either text or image documents.

### Frontend

The demo Streamlit comes from [skaff-rag-accelerator](https://github.com/artefactory/skaff-rag-accelerator/). Please read [documentation](https://artefactory.github.io/skaff-rag-accelerator/) for more details.

## Installation

To set up the project, ensure you have Python version between 3.10 and 3.11. Then install the dependencies using Poetry:
Expand Down Expand Up @@ -162,7 +167,7 @@ To use the RAG Multimodal Demo, follow these steps:
2. Start the backend server locally:

```bash
make serve
make serve_backend
```

This command will launch the backend server, allowing you to access the FastAPI documentation and playground interfaces :
Expand All @@ -172,6 +177,12 @@ This command will launch the backend server, allowing you to access the FastAPI
- RAG Option 2 playground interface: <http://0.0.0.0:8000/rag-2/playground/>
- RAG Option 3 playground interface: <http://0.0.0.0:8000/rag-3/playground/>

3. Launch the Streamlit frontend interface:

```bash
make serve_frontend
```

## Development

To set up a development environment and install pre-commit hooks, run the following commands:
Expand Down
73 changes: 73 additions & 0 deletions backend/api_plugins/lib/user_management.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from datetime import datetime, timedelta

import argon2
from jose import jwt
from pydantic import BaseModel

from backend import ALGORITHM, SECRET_KEY
from backend.database import Database


class UnsecureUser(BaseModel):
email: str = None
password: bytes = None


class User(BaseModel):
email: str = None
hashed_password: str = None

@classmethod
def from_unsecure_user(cls, unsecure_user: UnsecureUser):
hashed_password = argon2.hash_password(unsecure_user.password).decode("utf-8")
return cls(email=unsecure_user.email, hashed_password=hashed_password)


def create_user(user: User) -> None:
with Database() as connection:
connection.execute(
"INSERT INTO users (email, password) VALUES (?, ?)",
(user.email, user.hashed_password),
)


def user_exists(email: str) -> bool:
with Database() as connection:
result = connection.fetchone("SELECT 1 FROM users WHERE email = ?", (email,))
return bool(result)


def get_user(email: str) -> User | None:
with Database() as connection:
user_row = connection.fetchone("SELECT * FROM users WHERE email = ?", (email,))
if user_row:
return User(email=user_row[0], hashed_password=user_row[1])
return None


def delete_user(email: str) -> None:
with Database() as connection:
connection.execute("DELETE FROM users WHERE email = ?", (email,))


def authenticate_user(username: str, password: bytes) -> bool | User:
user = get_user(username)
if not user:
return False

if argon2.verify_password(
user.hashed_password.encode("utf-8"), password.encode("utf-8")
):
return user

return False


def create_access_token(*, data: dict, expires_delta: timedelta | None = None) -> str:
to_encode = data.copy()
if expires_delta:
expire = datetime.utcnow() + expires_delta
else:
expire = datetime.utcnow() + timedelta(minutes=60)
to_encode.update({"exp": expire})
return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
39 changes: 36 additions & 3 deletions backend/rag_1/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_core.output_parsers.string import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_core.runnables.base import RunnableSequence
from langchain_core.runnables.base import RunnableSequence, RunnableSerializable
from omegaconf.dictconfig import DictConfig
from pydantic import BaseModel

from backend.rag_components.chain_links.rag_with_history import (
construct_rag_with_history,
)
from backend.utils.image import resize_base64_image
from backend.utils.llm import get_vision_llm
from backend.utils.retriever import get_retriever
Expand Down Expand Up @@ -83,7 +87,19 @@ def img_prompt_func(data_dict: dict) -> list[BaseMessage]:
return [HumanMessage(content=messages)]


def get_chain(config: DictConfig) -> RunnableSequence:
class Question(BaseModel):
"""Question to be answered."""

question: str


class Response(BaseModel):
"""Response to the question."""

response: str


def get_base_chain(config: DictConfig) -> RunnableSequence:
"""Constructs a RAG pipeline that retrieves image and text data from documents.
The pipeline consists of the following steps:
Expand Down Expand Up @@ -112,5 +128,22 @@ def get_chain(config: DictConfig) -> RunnableSequence:
| model
| StrOutputParser()
)
typed_chain = chain.with_types(input_type=str, output_type=Response)

return typed_chain

return chain

def get_chain(config: DictConfig) -> RunnableSerializable:
"""Get the appropriate RAG pipeline based on the configuration.
Args:
config (DictConfig): Configuration object.
Returns:
RunnableSerializable: RAG pipeline.
"""
base_chain = get_base_chain(config)
if config.rag.enable_chat_memory:
chain_with_mem = construct_rag_with_history(base_chain, config)
return chain_with_mem
return base_chain
11 changes: 11 additions & 0 deletions backend/rag_1/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ def validate_size(cls, value: list[float]) -> list[float]:
return value


@dataclass(config=ConfigDict(extra="forbid"))
class RagConfig:
"""Configuration for RAG."""

database_url: str
enable_chat_memory: bool


@dataclass(config=ConfigDict(extra="forbid"))
class Config:
"""Configuration for the RAG Option 1."""
Expand All @@ -64,13 +72,16 @@ class Config:

path: PathConfig

text_llm: HydraObject
vision_llm: HydraObject
embedding: HydraObject
vectorstore: HydraObject
retriever: HydraObject

ingest: IngestConfig

rag: RagConfig


def validate_config(config: DictConfig) -> Config:
"""Validate the configuration.
Expand Down
13 changes: 13 additions & 0 deletions backend/rag_1/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ path:
export_extracted: "${.docs}/extracted/${..name}"
database: "database/${..name}"

text_llm:
_target_: langchain_openai.AzureChatOpenAI
azure_endpoint: ${oc.env:TEXT_OPENAI_ENDPOINT}
openai_api_key: ${oc.env:TEXT_OPENAI_API_KEY}
openai_api_version: "2024-02-15-preview"
deployment_name: "gpt4-turbo"
temperature: 0.0
max_tokens: 1024

vision_llm:
_target_: langchain_openai.AzureChatOpenAI
azure_endpoint: ${oc.env:VISION_OPENAI_ENDPOINT}
Expand Down Expand Up @@ -51,3 +60,7 @@ ingest:
table_min_size: [0.0, 0.0]

export_extracted: True

rag:
database_url: ${oc.env:DATABASE_URL}
enable_chat_memory: ${oc.decode:${oc.env:ENABLE_AUTHENTICATION}}
42 changes: 39 additions & 3 deletions backend/rag_2/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,35 @@
from langchain_core.output_parsers.string import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables.base import RunnableSequence
from langchain_core.runnables.base import (
RunnableSequence,
RunnableSerializable,
)
from omegaconf.dictconfig import DictConfig
from pydantic import BaseModel

from backend.rag_components.chain_links.rag_with_history import (
construct_rag_with_history,
)
from backend.utils.llm import get_text_llm
from backend.utils.retriever import get_retriever

from . import prompts


def get_chain(config: DictConfig) -> RunnableSequence:
class Question(BaseModel):
"""Question to be answered."""

question: str


class Response(BaseModel):
"""Response to the question."""

response: str


def get_base_chain(config: DictConfig) -> RunnableSequence:
"""Constructs a RAG pipeline that retrieves text data from documents.
The pipeline consists of the following steps:
Expand Down Expand Up @@ -43,5 +62,22 @@ def get_chain(config: DictConfig) -> RunnableSequence:
| model
| StrOutputParser()
)
typed_chain = chain.with_types(input_type=str, output_type=Response)

return typed_chain

return chain

def get_chain(config: DictConfig) -> RunnableSerializable:
"""Get the appropriate RAG pipeline based on the configuration.
Args:
config (DictConfig): Configuration object.
Returns:
RunnableSerializable: RAG pipeline.
"""
base_chain = get_base_chain(config)
if config.rag.enable_chat_memory:
chain_with_mem = construct_rag_with_history(base_chain, config)
return chain_with_mem
return base_chain
10 changes: 10 additions & 0 deletions backend/rag_2/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,14 @@ def validate_size(cls, value: list[float]) -> list[float]:
return value


@dataclass(config=ConfigDict(extra="forbid"))
class RagConfig:
"""Configuration for RAG."""

database_url: str
enable_chat_memory: bool


@dataclass(config=ConfigDict(extra="forbid"))
class Config:
"""Configuration for the RAG Option 2."""
Expand All @@ -147,6 +155,8 @@ class Config:

ingest: IngestConfig

rag: RagConfig


def validate_config(config: DictConfig) -> Config:
"""Validate the configuration.
Expand Down
4 changes: 4 additions & 0 deletions backend/rag_2/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,7 @@ ingest:
image: "summary"

export_extracted: True

rag:
database_url: ${oc.env:DATABASE_URL}
enable_chat_memory: ${oc.decode:${oc.env:ENABLE_AUTHENTICATION}}
Loading

0 comments on commit 25133b2

Please sign in to comment.