diff --git a/backend/rag_1/chain.py b/backend/rag_1/chain.py index a0c5103..9afe492 100644 --- a/backend/rag_1/chain.py +++ b/backend/rag_1/chain.py @@ -13,8 +13,8 @@ from backend.rag_components.chain_links.retrieve_and_format_multimodal_docs import ( fetch_docs_chain, ) -from backend.utils.llm import get_vision_llm -from backend.utils.retriever import get_retriever +from backend.rag_components.llm import get_vision_llm +from backend.rag_components.retriever import get_retriever from . import prompts diff --git a/backend/rag_1/ingest.py b/backend/rag_1/ingest.py index ac33adf..a09d5f1 100644 --- a/backend/rag_1/ingest.py +++ b/backend/rag_1/ingest.py @@ -10,13 +10,13 @@ from unstructured.partition.pdf import partition_pdf from backend.rag_1.config import validate_config -from backend.utils.unstructured import ( +from backend.rag_components.unstructured import ( load_chunking_func, select_images, select_tables, select_texts, ) -from backend.utils.vectorstore import get_vectorstore +from backend.rag_components.vectorstore import get_vectorstore logger = logging.getLogger(__name__) diff --git a/backend/rag_1/notebook.ipynb b/backend/rag_1/notebook.ipynb index d440e6c..abced02 100644 --- a/backend/rag_1/notebook.ipynb +++ b/backend/rag_1/notebook.ipynb @@ -73,16 +73,16 @@ "\n", "from backend.rag_1.chain import get_chain\n", "from backend.rag_1.config import validate_config\n", - "from backend.utils.elements import convert_documents_to_elements\n", - "from backend.utils.retriever import get_retriever\n", - "from backend.utils.unstructured import (\n", + "from backend.rag_components.elements import convert_documents_to_elements\n", + "from backend.rag_components.retriever import get_retriever\n", + "from backend.rag_components.unstructured import (\n", " load_chunking_func,\n", " select_images,\n", " select_tables,\n", " select_texts,\n", ")\n", + "from backend.rag_components.vectorstore import get_vectorstore\n", "from backend.utils.utils import format_time_delta\n", - "from backend.utils.vectorstore import get_vectorstore\n", "\n", "logging.basicConfig(format=\"[%(asctime)s] - %(name)s - %(levelname)s - %(message)s\")\n", "logging.getLogger(\"backend\").setLevel(logging.INFO)\n", diff --git a/backend/rag_2/chain.py b/backend/rag_2/chain.py index 3eb7b6e..4eef926 100644 --- a/backend/rag_2/chain.py +++ b/backend/rag_2/chain.py @@ -16,8 +16,8 @@ from backend.rag_components.chain_links.retrieve_and_format_text_docs import ( fetch_docs_chain, ) -from backend.utils.llm import get_text_llm -from backend.utils.retriever import get_retriever +from backend.rag_components.llm import get_text_llm +from backend.rag_components.retriever import get_retriever from . import prompts diff --git a/backend/rag_2/config.yaml b/backend/rag_2/config.yaml index 7e23156..0482eb5 100644 --- a/backend/rag_2/config.yaml +++ b/backend/rag_2/config.yaml @@ -41,7 +41,7 @@ store: root_path: "${..path.database}/multi_vector_retriever_metadata/" retriever: - _target_: backend.utils.multi_vector.ThresholdedMultiVectorRetriever + _target_: backend.rag_components.multi_vector.ThresholdedMultiVectorRetriever vectorstore: ${..vectorstore} byte_store: ${..store} id_key: "doc_id" diff --git a/backend/rag_2/ingest.py b/backend/rag_2/ingest.py index 564802c..42893ff 100644 --- a/backend/rag_2/ingest.py +++ b/backend/rag_2/ingest.py @@ -12,15 +12,15 @@ from backend.rag_2 import prompts from backend.rag_2.config import validate_config -from backend.utils.elements import Image, Table, Text -from backend.utils.ingest import add_elements_to_multivector_retriever -from backend.utils.llm import get_text_llm, get_vision_llm -from backend.utils.retriever import get_retriever -from backend.utils.summarization import ( +from backend.rag_components.elements import Image, Table, Text +from backend.rag_components.ingest import add_elements_to_multivector_retriever +from backend.rag_components.llm import get_text_llm, get_vision_llm +from backend.rag_components.retriever import get_retriever +from backend.rag_components.summarization import ( generate_image_summaries, generate_text_summaries, ) -from backend.utils.unstructured import ( +from backend.rag_components.unstructured import ( load_chunking_func, select_images, select_tables, diff --git a/backend/rag_2/notebook.ipynb b/backend/rag_2/notebook.ipynb index 688ef5b..49a67f4 100644 --- a/backend/rag_2/notebook.ipynb +++ b/backend/rag_2/notebook.ipynb @@ -79,10 +79,10 @@ " apply_summarize_table,\n", " apply_summarize_text,\n", ")\n", - "from backend.utils.elements import convert_documents_to_elements\n", - "from backend.utils.ingest import add_elements_to_multivector_retriever\n", - "from backend.utils.retriever import get_retriever\n", - "from backend.utils.unstructured import (\n", + "from backend.rag_components.elements import convert_documents_to_elements\n", + "from backend.rag_components.ingest import add_elements_to_multivector_retriever\n", + "from backend.rag_components.retriever import get_retriever\n", + "from backend.rag_components.unstructured import (\n", " load_chunking_func,\n", " select_images,\n", " select_tables,\n", diff --git a/backend/rag_3/chain.py b/backend/rag_3/chain.py index 58ed3ad..f09f1e4 100644 --- a/backend/rag_3/chain.py +++ b/backend/rag_3/chain.py @@ -16,8 +16,8 @@ from backend.rag_components.chain_links.retrieve_and_format_multimodal_docs import ( fetch_docs_chain, ) -from backend.utils.llm import get_vision_llm -from backend.utils.retriever import get_retriever +from backend.rag_components.llm import get_vision_llm +from backend.rag_components.retriever import get_retriever from . import prompts diff --git a/backend/rag_3/config.yaml b/backend/rag_3/config.yaml index b55e472..aebc473 100644 --- a/backend/rag_3/config.yaml +++ b/backend/rag_3/config.yaml @@ -41,7 +41,7 @@ store: root_path: "${..path.database}/multi_vector_retriever_metadata/" retriever: - _target_: backend.utils.multi_vector.ThresholdedMultiVectorRetriever + _target_: backend.rag_components.multi_vector.ThresholdedMultiVectorRetriever vectorstore: ${..vectorstore} byte_store: ${..store} id_key: "doc_id" diff --git a/backend/rag_3/ingest.py b/backend/rag_3/ingest.py index b654a09..24d5d1f 100644 --- a/backend/rag_3/ingest.py +++ b/backend/rag_3/ingest.py @@ -12,15 +12,15 @@ from backend.rag_3 import prompts from backend.rag_3.config import validate_config -from backend.utils.elements import Image, Table, Text -from backend.utils.ingest import add_elements_to_multivector_retriever -from backend.utils.llm import get_text_llm, get_vision_llm -from backend.utils.retriever import get_retriever -from backend.utils.summarization import ( +from backend.rag_components.elements import Image, Table, Text +from backend.rag_components.ingest import add_elements_to_multivector_retriever +from backend.rag_components.llm import get_text_llm, get_vision_llm +from backend.rag_components.retriever import get_retriever +from backend.rag_components.summarization import ( generate_image_summaries, generate_text_summaries, ) -from backend.utils.unstructured import ( +from backend.rag_components.unstructured import ( load_chunking_func, select_images, select_tables, diff --git a/backend/rag_3/notebook.ipynb b/backend/rag_3/notebook.ipynb index 51b8ccb..aece0ed 100644 --- a/backend/rag_3/notebook.ipynb +++ b/backend/rag_3/notebook.ipynb @@ -79,10 +79,10 @@ " apply_summarize_table,\n", " apply_summarize_text,\n", ")\n", - "from backend.utils.elements import convert_documents_to_elements\n", - "from backend.utils.ingest import add_elements_to_multivector_retriever\n", - "from backend.utils.retriever import get_retriever\n", - "from backend.utils.unstructured import (\n", + "from backend.rag_components.elements import convert_documents_to_elements\n", + "from backend.rag_components.ingest import add_elements_to_multivector_retriever\n", + "from backend.rag_components.retriever import get_retriever\n", + "from backend.rag_components.unstructured import (\n", " load_chunking_func,\n", " select_images,\n", " select_tables,\n", diff --git a/backend/rag_components/chain_links/rag_with_history.py b/backend/rag_components/chain_links/rag_with_history.py index 9688386..53861a9 100644 --- a/backend/rag_components/chain_links/rag_with_history.py +++ b/backend/rag_components/chain_links/rag_with_history.py @@ -7,7 +7,7 @@ from backend.rag_components.chain_links.condense_question import condense_question from backend.rag_components.chat_message_history import get_chat_message_history -from backend.utils.llm import get_text_llm +from backend.rag_components.llm import get_text_llm class QuestionWithHistory(BaseModel): diff --git a/backend/utils/elements.py b/backend/rag_components/elements.py similarity index 99% rename from backend/utils/elements.py rename to backend/rag_components/elements.py index 930414b..ce314e3 100644 --- a/backend/utils/elements.py +++ b/backend/rag_components/elements.py @@ -10,7 +10,7 @@ from langchain_core.documents import Document from pydantic import BaseModel, PrivateAttr, validator -from .image import local_image_to_base64 +from backend.utils.image import local_image_to_base64 class Element(BaseModel): diff --git a/backend/utils/ingest.py b/backend/rag_components/ingest.py similarity index 100% rename from backend/utils/ingest.py rename to backend/rag_components/ingest.py diff --git a/backend/utils/llm.py b/backend/rag_components/llm.py similarity index 100% rename from backend/utils/llm.py rename to backend/rag_components/llm.py diff --git a/backend/utils/multi_vector.py b/backend/rag_components/multi_vector.py similarity index 100% rename from backend/utils/multi_vector.py rename to backend/rag_components/multi_vector.py diff --git a/backend/utils/retriever.py b/backend/rag_components/retriever.py similarity index 100% rename from backend/utils/retriever.py rename to backend/rag_components/retriever.py diff --git a/backend/utils/summarization.py b/backend/rag_components/summarization.py similarity index 100% rename from backend/utils/summarization.py rename to backend/rag_components/summarization.py diff --git a/backend/utils/unstructured.py b/backend/rag_components/unstructured.py similarity index 98% rename from backend/utils/unstructured.py rename to backend/rag_components/unstructured.py index 2562a42..8359fea 100644 --- a/backend/utils/unstructured.py +++ b/backend/rag_components/unstructured.py @@ -8,7 +8,7 @@ from omegaconf.dictconfig import DictConfig from unstructured.documents.coordinates import RelativeCoordinateSystem -from backend.utils.elements import Image, Table, TableImage, TableText, Text +from backend.rag_components.elements import Image, Table, TableImage, TableText, Text def get_element_size(element: unstructured_elements.Element) -> tuple[float, float]: diff --git a/backend/utils/vectorstore.py b/backend/rag_components/vectorstore.py similarity index 100% rename from backend/utils/vectorstore.py rename to backend/rag_components/vectorstore.py diff --git a/tests/backend/utils/test_elements.py b/tests/backend/utils/test_elements.py index 83a2b20..b1ba5f2 100644 --- a/tests/backend/utils/test_elements.py +++ b/tests/backend/utils/test_elements.py @@ -4,7 +4,14 @@ from pytest import FixtureRequest from pytest_lazy_fixtures import lf -from backend.utils.elements import Element, Image, Table, TableImage, TableText, Text +from backend.rag_components.elements import ( + Element, + Image, + Table, + TableImage, + TableText, + Text, +) # ----------------------------------- Text ----------------------------------- #