Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: refactor source.py #351

Merged
merged 17 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions docs/how-to/document_search/search_documents.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
3. Do the search

This guide will walk you through all those steps and explain the details. Let's start with a minimalistic example to get the main idea:

```python
import asyncio
from pathlib import Path
Expand All @@ -14,7 +15,8 @@ from ragbits.core.embeddings.litellm import LiteLLMEmbeddings
from ragbits.core.vector_stores.in_memory import InMemoryVectorStore
from ragbits.document_search import DocumentSearch
from ragbits.document_search.documents.document import DocumentMeta
from ragbits.document_search.documents.sources import GCSSource
from ragbits.document_search.documents.sources.gcs_source import GCSSource


async def main() -> None:
# Load documents (there are multiple possible sources)
Expand Down Expand Up @@ -47,10 +49,12 @@ if __name__ == "__main__":
Before doing any search we need to have some documents that will build our knowledge base. Ragbits offers a handy class `Document` that stores all the information needed for document loading.
Objects of this class are usually instantiated using `DocumentMeta` helper class that supports loading files from your local storage, GCS or HuggingFace.
You can easily add support for your custom sources by extending the `Source` class and implementing the abstract methods:

```python
from pathlib import Path

from ragbits.document_search.documents.sources import Source
from ragbits.document_search.documents.sources.sources import Source


class CustomSource(Source):
@property
Expand Down
2 changes: 1 addition & 1 deletion docs/quickstart/quickstart2_rag.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ We first need to direct Ragbits to the location of the documents to load them. T

```python
from pathlib import Path
from ragbits.document_search.documents.sources import LocalFileSource
from ragbits.document_search.documents.sources.local_file_source import LocalFileSource

# Path to the directory with markdown files to ingest
documents_path = Path(__file__).parent / "pb-source/en"
Expand Down
2 changes: 1 addition & 1 deletion examples/document-search/multimodal.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from ragbits.core.vector_stores.in_memory import InMemoryVectorStore
from ragbits.document_search import DocumentSearch
from ragbits.document_search.documents.document import DocumentMeta, DocumentType
from ragbits.document_search.documents.sources import LocalFileSource
from ragbits.document_search.documents.sources.local_file_source import LocalFileSource
from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter
from ragbits.document_search.ingestion.providers.dummy import DummyImageProvider

Expand Down
2 changes: 1 addition & 1 deletion examples/evaluation/document-search/basic/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
},
},
"source": {
"type": "ragbits.document_search.documents.sources:HuggingFaceSource",
"type": "ragbits.document_search.documents.hugging_face_source:HuggingFaceSource",
"config": {
"path": "micpst/hf-docs",
"split": "train[:5]",
Expand Down
2 changes: 1 addition & 1 deletion examples/evaluation/document-search/basic/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
},
},
"source": {
"type": "ragbits.document_search.documents.sources:HuggingFaceSource",
"type": "ragbits.document_search.documents.hugging_face_source:HuggingFaceSource",
"config": {
"path": "micpst/hf-docs",
"split": "train[:5]",
Expand Down
2 changes: 1 addition & 1 deletion packages/ragbits-cli/tests/unit/test_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from rich.table import Column, Table

from ragbits.cli.state import OutputType, _get_nested_field, print_output, print_output_table
from ragbits.document_search.documents.sources import LocalFileSource
from ragbits.document_search.documents.sources.local_file_source import LocalFileSource


class InnerTestModel(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ragbits.core.vector_stores.qdrant import QdrantVectorStore
from ragbits.document_search import DocumentSearch
from ragbits.document_search.documents.document import DocumentMeta
from ragbits.document_search.documents.sources import LocalFileSource
from ragbits.document_search.documents.sources.local_file_source import LocalFileSource


@pytest.mark.parametrize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ragbits.core.vector_stores.in_memory import InMemoryVectorStore
from ragbits.document_search.documents.document import DocumentMeta, DocumentType
from ragbits.document_search.documents.element import Element
from ragbits.document_search.documents.sources import LocalFileSource
from ragbits.document_search.documents.sources.local_file_source import LocalFileSource


class AnimalElement(Element):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from ragbits.core.vector_stores.base import VectorStoreOptions
from ragbits.document_search.documents.document import Document, DocumentMeta
from ragbits.document_search.documents.element import Element, ImageElement
from ragbits.document_search.documents.source_resolver import SourceResolver
from ragbits.document_search.documents.sources import Source
from ragbits.document_search.documents.sources.source_resolver import SourceResolver
from ragbits.document_search.documents.sources.sources import Source
from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter
from ragbits.document_search.ingestion.processor_strategies import (
ProcessingExecutionStrategy,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

from pydantic import BaseModel

from ragbits.document_search.documents.sources import LocalFileSource, Source, SourceDiscriminator
from ragbits.document_search.documents.sources.local_file_source import LocalFileSource
from ragbits.document_search.documents.sources.sources import Source, SourceDiscriminator


class DocumentType(str, Enum):
Expand Down
Loading
Loading