Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Execution Wrapper #905

Merged
merged 3 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ repos:

- id: black
name: black
entry: bash -c 'cd py && poetry run black $(git diff --cached --name-only --diff-filter=ACM | grep ".py$" | xargs -I {} echo ../{})'
entry: bash -c 'cd py && poetry run black .'
language: system
types: [python]
pass_filenames: false
28 changes: 21 additions & 7 deletions py/cli/commands/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,15 @@


@cli.command()
@click.argument("file_paths", nargs=-1, required=True, type=click.Path(exists=True))
@click.option("--document-ids", multiple=True, help="Document IDs for ingestion")
@click.option("--metadatas", type=JSON, help="Metadatas for ingestion as a JSON string")
@click.argument(
"file_paths", nargs=-1, required=True, type=click.Path(exists=True)
)
@click.option(
"--document-ids", multiple=True, help="Document IDs for ingestion"
)
@click.option(
"--metadatas", type=JSON, help="Metadatas for ingestion as a JSON string"
)
@click.option(
"--versions",
multiple=True,
Expand All @@ -27,18 +33,24 @@ def ingest_files(client, file_paths, document_ids, metadatas, versions):
document_ids = list(document_ids) if document_ids else None
versions = list(versions) if versions else None

response = client.ingest_files(file_paths, metadatas, document_ids, versions)
response = client.ingest_files(
file_paths, metadatas, document_ids, versions
)
click.echo(json.dumps(response, indent=2))


@cli.command()
@click.argument("file-paths", nargs=-1, required=True, type=click.Path(exists=True))
@click.argument(
"file-paths", nargs=-1, required=True, type=click.Path(exists=True)
)
@click.option(
"--document-ids",
required=True,
help="Document IDs to update (comma-separated)",
)
@click.option("--metadatas", type=JSON, help="Metadatas for updating as a JSON string")
@click.option(
"--metadatas", type=JSON, help="Metadatas for updating as a JSON string"
)
@click.pass_obj
def update_files(client, file_paths, document_ids, metadatas):
"""Update existing files in R2R."""
Expand Down Expand Up @@ -77,7 +89,9 @@ def ingest_files_from_urls(client, urls):

try:
response = client.ingest_files([temp_file_path])
click.echo(f"File '{filename}' ingested successfully. Response: {response}")
click.echo(
f"File '{filename}' ingested successfully. Response: {response}"
)
ingested_files.append(filename)
finally:
os.unlink(temp_file_path)
Expand Down
28 changes: 21 additions & 7 deletions py/cli/commands/retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@


@cli.command()
@click.option("--query", prompt="Enter your search query", help="The search query")
@click.option(
"--query", prompt="Enter your search query", help="The search query"
)
# VectorSearchSettings
@click.option(
"--use-vector-search",
Expand All @@ -18,21 +20,27 @@
type=JSON,
help="Filters to apply to the vector search as a JSON",
)
@click.option("--search-limit", default=None, help="Number of search results to return")
@click.option(
"--search-limit", default=None, help="Number of search results to return"
)
@click.option("--do-hybrid-search", is_flag=True, help="Perform hybrid search")
@click.option(
"--selected-group-ids", type=JSON, help="Group IDs to search for as a JSON"
)
# KGSearchSettings
@click.option("--use-kg-search", is_flag=True, help="Use knowledge graph search")
@click.option(
"--use-kg-search", is_flag=True, help="Use knowledge graph search"
)
@click.option("--kg-search-type", default=None, help="Local or Global")
@click.option("--kg-search-level", default=None, help="Level of KG search")
@click.option(
"--kg-search-generation-config",
type=JSON,
help="KG search generation config",
)
@click.option("--entity-types", type=JSON, help="Entity types to search for as a JSON")
@click.option(
"--entity-types", type=JSON, help="Entity types to search for as a JSON"
)
@click.option(
"--relationships", type=JSON, help="Relationships to search for as a JSON"
)
Expand Down Expand Up @@ -111,21 +119,27 @@ def search(client, query, **kwargs):
"--use-vector-search", is_flag=True, default=True, help="Use vector search"
)
@click.option("--filters", type=JSON, help="Search filters as JSON")
@click.option("--search-limit", default=10, help="Number of search results to return")
@click.option(
"--search-limit", default=10, help="Number of search results to return"
)
@click.option("--do-hybrid-search", is_flag=True, help="Perform hybrid search")
@click.option(
"--selected-group-ids", type=JSON, help="Group IDs to search for as a JSON"
)
# KG Search Settings
@click.option("--use-kg-search", is_flag=True, help="Use knowledge graph search")
@click.option(
"--use-kg-search", is_flag=True, help="Use knowledge graph search"
)
@click.option("--kg-search-type", default="global", help="Local or Global")
@click.option(
"--kg-search-level",
default=None,
help="Level of cluster to use for Global KG search",
)
@click.option("--kg-search-model", default=None, help="Model for KG agent")
@click.option("--entity-types", type=JSON, help="Entity types to search for as a JSON")
@click.option(
"--entity-types", type=JSON, help="Entity types to search for as a JSON"
)
@click.option(
"--relationships", type=JSON, help="Relationships to search for as a JSON"
)
Expand Down
8 changes: 6 additions & 2 deletions py/core/base/providers/kg.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ class KGProvider(ABC):

def __init__(self, config: KGConfig) -> None:
if not isinstance(config, KGConfig):
raise ValueError("KGProvider must be initialized with a `KGConfig`.")
raise ValueError(
"KGProvider must be initialized with a `KGConfig`."
)
logger.info(f"Initializing KG provider with config: {config}")
self.config = config
self.validate_config()
Expand Down Expand Up @@ -116,7 +118,9 @@ def structured_query(
param_map = {}

@abstractmethod
def vector_query(self, query, **kwargs: Any) -> Tuple[list[Entity], list[float]]:
def vector_query(
self, query, **kwargs: Any
) -> Tuple[list[Entity], list[float]]:
"""Abstract method to query the graph store with a vector store query."""

# TODO - Type this method.
Expand Down
2 changes: 1 addition & 1 deletion py/core/configs/unstructured.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ provider = "unstructured_api"
provider = "unstructured"
method = "recursive"
chunk_size = 512
chunk_overlap = 50
chunk_overlap = 50
5 changes: 4 additions & 1 deletion py/core/main/app_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ def r2r_app(
raise ValueError(f"Invalid config name: {config_name}")
config = R2RConfig.from_toml(R2RBuilder.CONFIG_OPTIONS[config_name])

if config.embedding.provider == "openai" and "OPENAI_API_KEY" not in os.environ:
if (
config.embedding.provider == "openai"
and "OPENAI_API_KEY" not in os.environ
):
raise ValueError(
"Must set OPENAI_API_KEY in order to initialize OpenAIEmbeddingProvider."
)
Expand Down
Loading
Loading