Skip to content

Commit

Permalink
♻️ refactor rag_components
Browse files Browse the repository at this point in the history
  • Loading branch information
baptiste-pasquier committed Mar 25, 2024
1 parent 754e333 commit 77db4e8
Show file tree
Hide file tree
Showing 21 changed files with 45 additions and 38 deletions.
4 changes: 2 additions & 2 deletions backend/rag_1/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
from backend.rag_components.chain_links.retrieve_and_format_multimodal_docs import (
fetch_docs_chain,
)
from backend.utils.llm import get_vision_llm
from backend.utils.retriever import get_retriever
from backend.rag_components.llm import get_vision_llm
from backend.rag_components.retriever import get_retriever

from . import prompts

Expand Down
4 changes: 2 additions & 2 deletions backend/rag_1/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
from unstructured.partition.pdf import partition_pdf

from backend.rag_1.config import validate_config
from backend.utils.unstructured import (
from backend.rag_components.unstructured import (
load_chunking_func,
select_images,
select_tables,
select_texts,
)
from backend.utils.vectorstore import get_vectorstore
from backend.rag_components.vectorstore import get_vectorstore

logger = logging.getLogger(__name__)

Expand Down
8 changes: 4 additions & 4 deletions backend/rag_1/notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,16 @@
"\n",
"from backend.rag_1.chain import get_chain\n",
"from backend.rag_1.config import validate_config\n",
"from backend.utils.elements import convert_documents_to_elements\n",
"from backend.utils.retriever import get_retriever\n",
"from backend.utils.unstructured import (\n",
"from backend.rag_components.elements import convert_documents_to_elements\n",
"from backend.rag_components.retriever import get_retriever\n",
"from backend.rag_components.unstructured import (\n",
" load_chunking_func,\n",
" select_images,\n",
" select_tables,\n",
" select_texts,\n",
")\n",
"from backend.rag_components.vectorstore import get_vectorstore\n",
"from backend.utils.utils import format_time_delta\n",
"from backend.utils.vectorstore import get_vectorstore\n",
"\n",
"logging.basicConfig(format=\"[%(asctime)s] - %(name)s - %(levelname)s - %(message)s\")\n",
"logging.getLogger(\"backend\").setLevel(logging.INFO)\n",
Expand Down
4 changes: 2 additions & 2 deletions backend/rag_2/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from backend.rag_components.chain_links.retrieve_and_format_text_docs import (
fetch_docs_chain,
)
from backend.utils.llm import get_text_llm
from backend.utils.retriever import get_retriever
from backend.rag_components.llm import get_text_llm
from backend.rag_components.retriever import get_retriever

from . import prompts

Expand Down
2 changes: 1 addition & 1 deletion backend/rag_2/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ store:
root_path: "${..path.database}/multi_vector_retriever_metadata/"

retriever:
_target_: backend.utils.multi_vector.ThresholdedMultiVectorRetriever
_target_: backend.rag_components.multi_vector.ThresholdedMultiVectorRetriever
vectorstore: ${..vectorstore}
byte_store: ${..store}
id_key: "doc_id"
Expand Down
12 changes: 6 additions & 6 deletions backend/rag_2/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@

from backend.rag_2 import prompts
from backend.rag_2.config import validate_config
from backend.utils.elements import Image, Table, Text
from backend.utils.ingest import add_elements_to_multivector_retriever
from backend.utils.llm import get_text_llm, get_vision_llm
from backend.utils.retriever import get_retriever
from backend.utils.summarization import (
from backend.rag_components.elements import Image, Table, Text
from backend.rag_components.ingest import add_elements_to_multivector_retriever
from backend.rag_components.llm import get_text_llm, get_vision_llm
from backend.rag_components.retriever import get_retriever
from backend.rag_components.summarization import (
generate_image_summaries,
generate_text_summaries,
)
from backend.utils.unstructured import (
from backend.rag_components.unstructured import (
load_chunking_func,
select_images,
select_tables,
Expand Down
8 changes: 4 additions & 4 deletions backend/rag_2/notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@
" apply_summarize_table,\n",
" apply_summarize_text,\n",
")\n",
"from backend.utils.elements import convert_documents_to_elements\n",
"from backend.utils.ingest import add_elements_to_multivector_retriever\n",
"from backend.utils.retriever import get_retriever\n",
"from backend.utils.unstructured import (\n",
"from backend.rag_components.elements import convert_documents_to_elements\n",
"from backend.rag_components.ingest import add_elements_to_multivector_retriever\n",
"from backend.rag_components.retriever import get_retriever\n",
"from backend.rag_components.unstructured import (\n",
" load_chunking_func,\n",
" select_images,\n",
" select_tables,\n",
Expand Down
4 changes: 2 additions & 2 deletions backend/rag_3/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from backend.rag_components.chain_links.retrieve_and_format_multimodal_docs import (
fetch_docs_chain,
)
from backend.utils.llm import get_vision_llm
from backend.utils.retriever import get_retriever
from backend.rag_components.llm import get_vision_llm
from backend.rag_components.retriever import get_retriever

from . import prompts

Expand Down
2 changes: 1 addition & 1 deletion backend/rag_3/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ store:
root_path: "${..path.database}/multi_vector_retriever_metadata/"

retriever:
_target_: backend.utils.multi_vector.ThresholdedMultiVectorRetriever
_target_: backend.rag_components.multi_vector.ThresholdedMultiVectorRetriever
vectorstore: ${..vectorstore}
byte_store: ${..store}
id_key: "doc_id"
Expand Down
12 changes: 6 additions & 6 deletions backend/rag_3/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@

from backend.rag_3 import prompts
from backend.rag_3.config import validate_config
from backend.utils.elements import Image, Table, Text
from backend.utils.ingest import add_elements_to_multivector_retriever
from backend.utils.llm import get_text_llm, get_vision_llm
from backend.utils.retriever import get_retriever
from backend.utils.summarization import (
from backend.rag_components.elements import Image, Table, Text
from backend.rag_components.ingest import add_elements_to_multivector_retriever
from backend.rag_components.llm import get_text_llm, get_vision_llm
from backend.rag_components.retriever import get_retriever
from backend.rag_components.summarization import (
generate_image_summaries,
generate_text_summaries,
)
from backend.utils.unstructured import (
from backend.rag_components.unstructured import (
load_chunking_func,
select_images,
select_tables,
Expand Down
8 changes: 4 additions & 4 deletions backend/rag_3/notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@
" apply_summarize_table,\n",
" apply_summarize_text,\n",
")\n",
"from backend.utils.elements import convert_documents_to_elements\n",
"from backend.utils.ingest import add_elements_to_multivector_retriever\n",
"from backend.utils.retriever import get_retriever\n",
"from backend.utils.unstructured import (\n",
"from backend.rag_components.elements import convert_documents_to_elements\n",
"from backend.rag_components.ingest import add_elements_to_multivector_retriever\n",
"from backend.rag_components.retriever import get_retriever\n",
"from backend.rag_components.unstructured import (\n",
" load_chunking_func,\n",
" select_images,\n",
" select_tables,\n",
Expand Down
2 changes: 1 addition & 1 deletion backend/rag_components/chain_links/rag_with_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from backend.rag_components.chain_links.condense_question import condense_question
from backend.rag_components.chat_message_history import get_chat_message_history
from backend.utils.llm import get_text_llm
from backend.rag_components.llm import get_text_llm


class QuestionWithHistory(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from langchain_core.documents import Document
from pydantic import BaseModel, PrivateAttr, validator

from .image import local_image_to_base64
from backend.utils.image import local_image_to_base64


class Element(BaseModel):
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from omegaconf.dictconfig import DictConfig
from unstructured.documents.coordinates import RelativeCoordinateSystem

from backend.utils.elements import Image, Table, TableImage, TableText, Text
from backend.rag_components.elements import Image, Table, TableImage, TableText, Text


def get_element_size(element: unstructured_elements.Element) -> tuple[float, float]:
Expand Down
File renamed without changes.
9 changes: 8 additions & 1 deletion tests/backend/utils/test_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,14 @@
from pytest import FixtureRequest
from pytest_lazy_fixtures import lf

from backend.utils.elements import Element, Image, Table, TableImage, TableText, Text
from backend.rag_components.elements import (
Element,
Image,
Table,
TableImage,
TableText,
Text,
)

# ----------------------------------- Text ----------------------------------- #

Expand Down

0 comments on commit 77db4e8

Please sign in to comment.