diff --git a/crewai_tools/tools/valyu_tool/README.md b/crewai_tools/tools/valyu_tool/README.md new file mode 100644 index 00000000..04c4b2a4 --- /dev/null +++ b/crewai_tools/tools/valyu_tool/README.md @@ -0,0 +1,87 @@ +# ValyuContextTool Documentation + +## Description + +This tool enables semantic search across both proprietary and web content using the `https://valyu.network/` API. It allows users to search through programmatically licensed proprietary data from the [Valyu Exchange](https://exchange.valyu.network/) and web content, returning relevant results based on the provided query. + +## Installation + +Install the required packages using: + +```shell +uv add crewai[tools] valyu +``` + +## Example Usage + +### Basic Usage with CrewAI + +```python +from crewai_tools import ValyuContextTool + +# Initialize the tool +tool = ValyuContextTool(api_key="your_api_key") # or use the environment variable VALYU_API_KEY +``` + +## Steps to Get Started + +Follow these steps to use the ValyuContextTool: + +1. **Package Installation**: Install the `crewai[tools]` and `valyu` packages in your Python environment. +2. **API Key**: Get your API key by signing up at `https://exchange.valyu.network/`. +3. **Environment Setup**: Store your API key in an environment variable named `VALYU_API_KEY` + +## Advanced Configuration + +You can customize the parameters for the `ValyuContextTool`: + +- `query`: The search term or phrase +- `search_type`: Type of search ("proprietary", "web", or "both") +- `data_sources`: Specific indexes to query from +- `num_query`: Number of query variations (default: 10) +- `num_results`: Maximum results to return (default: 10) +- `max_price`: Maximum price per query in PCM + +Example: + +```python +from crewai import Agent, Task, Crew +from crewai_tools import ValyuContextTool + +# Initialize the tool +valyu_tool = ValyuContextTool(api_key="your_api_key") + +# Define the agent +research_agent = Agent( + role="Research Analyst", + goal="Find detailed information using Valyu's proprietary and web sources", + backstory="An expert researcher specializing in comprehensive data analysis", + tools=[valyu_tool], + verbose=True +) + +# Define the task +search_task = Task( + expected_output="Detailed analysis of quantum computing advancements", + description="Research recent breakthroughs in quantum computing", + agent=research_agent +) + +# Create and run the crew +crew = Crew( + agents=[research_agent], + tasks=[search_task] +) + +result = crew.kickoff() +print(result) + +# Direct tool usage example +response = valyu_tool._run( + query="quantum computing breakthroughs 2024", + search_type="both", + num_query=10, + num_results=10, + max_price=100 +) +``` diff --git a/crewai_tools/tools/valyu_tool/valyu_context_tool.py b/crewai_tools/tools/valyu_tool/valyu_context_tool.py new file mode 100644 index 00000000..177a20bd --- /dev/null +++ b/crewai_tools/tools/valyu_tool/valyu_context_tool.py @@ -0,0 +1,135 @@ +from typing import Any, Optional, List, Literal +from crewai.tools import BaseTool +from pydantic import PrivateAttr, Field + +try: + from valyu import Valyu + + VALYU_INSTALLED = True +except ImportError: + Valyu = Any + VALYU_INSTALLED = False + + +class ValyuContextTool(BaseTool): + name: str = "Valyu Search Tool" + description: str = ( + "Search programmatically licensed proprietary data and the web. " + "Parameters:\n" + "- search_type: 'both' searches all sources, 'proprietary' for proprietary data not found on the web only, 'web' for web data only\n" + "- max_price: Maximum price threshold. The number of dollars per 1000 results (PCM). (a number between 10 and 100 recommended)\n" + "- num_query: Number of search queries to generate (a value of 5 is recommended)\n" + "- num_results: Number of results to return (must be ≤ num_query)\n" + "- data_sources: List of specific data sources to search from (only include valid sources)\n" + "\nOnly add a data source if you have been instructed to do so and are sure it is a valid source." + ) + _client: Any = PrivateAttr() + search_type: Optional[str] = Field(default="both") + max_price: Optional[int] = Field(default=None) + max_num_results: Optional[int] = Field(default=None) + query_rewrite: Optional[bool] = Field(default=False) + similarity_threshold: Optional[float] = Field(default=0.4) + data_sources: Optional[List[str]] = Field(default=None) + + def __init__(self, api_key: Optional[str] = None, **kwargs): + """ + Initialize the tool with an API key. + """ + super().__init__(**kwargs) + try: + from valyu import Valyu + except ImportError: + import click + + if click.confirm( + "You are missing the 'valyu' package. Would you like to install it?" + ): + import subprocess + + subprocess.run(["uv", "add", "valyu"], check=True) + from valyu import Valyu + else: + raise ImportError( + "The 'valyu' package is required to use the ValyuContextTool. " + "Please install it with: uv add valyu" + ) + self._client = Valyu(api_key=api_key) if api_key else Valyu() + if "max_price" in kwargs: + self.max_price = kwargs["max_price"] + if "search_type" in kwargs: + self.search_type = kwargs["search_type"] + + if "max_num_results" in kwargs: + self.max_num_results = kwargs["max_num_results"] + if "query_rewrite" in kwargs: + self.query_rewrite = kwargs["query_rewrite"] + if "similarity_threshold" in kwargs: + self.similarity_threshold = kwargs["similarity_threshold"] + if "data_sources" in kwargs: + self.data_sources = kwargs["data_sources"] + + def _run( + self, + query: str = Field(), + search_type: Optional[Literal["both", "proprietary", "web"]] = Field( + default=None + ), + data_sources: Optional[List[str]] = Field(default=None), + max_num_results: Optional[int] = Field(default=None), + query_rewrite: Optional[bool] = Field(default=None), + similarity_threshold: Optional[float] = Field(default=None), + max_price: Optional[int] = Field(default=None), + ) -> Any: + """Execute a search query using the Valyu API. + + Args: + query (str): The search query to execute. + search_type (Optional[Literal["both", "proprietary", "web"]]): Type of search to perform. + 'both' searches all sources, 'proprietary' for proprietary data only, 'web' for web data only. + data_sources (Optional[List[str]]): List of specific data sources to search from. + max_num_results (Optional[int]): Maximum number of results to return. + query_rewrite (Optional[bool]): Whether to rewrite the query. + similarity_threshold (Optional[float]): Similarity threshold for the query rewrite. + max_price (Optional[int]): Maximum price threshold per 1000 results (PCM). + + Returns: + dict: A dictionary containing: + - success (bool): Whether the query was successful + - results (List[dict]): List of search results if successful, each containing: + - title (str): Title of the result + - url (str): URL of the result + - content (str): Content of the result + - source (str): Source of the result + - price (float): Price of the result + - error (str): Error message if unsuccessful + """ + + params = { + "query": query, + "search_type": self.search_type or search_type, + "max_num_results": self.max_num_results or max_num_results, + "query_rewrite": self.query_rewrite or query_rewrite, + "similarity_threshold": self.similarity_threshold or similarity_threshold, + "max_price": self.max_price or max_price, + } + + if data_sources: + params["data_sources"] = data_sources + + response = self._client.context(**params) + + if response.success: + results = [ + { + "title": result.title, + "url": result.url, + "content": result.content, + "source": result.source, + "price": result.price, + } + for result in response.results + ] + + return {"success": True, "results": results} + else: + return {"success": False, "error": response.error} diff --git a/pyproject.toml b/pyproject.toml index 9ecc29af..5bd80a21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,7 +88,9 @@ exa-py = [ qdrant-client = [ "qdrant-client>=1.12.1", ] - +valyu = [ + "valyu>=1.0.6", +] [tool.pytest.ini_options] diff --git a/uv.lock b/uv.lock index aeb1dbfe..e8c9bb39 100644 --- a/uv.lock +++ b/uv.lock @@ -713,6 +713,9 @@ spider-client = [ sqlalchemy = [ { name = "sqlalchemy" }, ] +valyu = [ + { name = "valyu" }, +] weaviate-client = [ { name = "weaviate-client" }, ] @@ -755,6 +758,7 @@ requires-dist = [ { name = "snowflake-sqlalchemy", marker = "extra == 'snowflake'", specifier = ">=1.7.3" }, { name = "spider-client", marker = "extra == 'spider-client'", specifier = ">=0.1.25" }, { name = "sqlalchemy", marker = "extra == 'sqlalchemy'", specifier = ">=2.0.35" }, + { name = "valyu", marker = "extra == 'valyu'", specifier = ">=1.0.6" }, { name = "weaviate-client", marker = "extra == 'weaviate-client'", specifier = ">=4.10.2" }, ] @@ -4788,6 +4792,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/78/36828a4d857b25896f9774c875714ba4e9b3bc8a92d2debe3f4df3a83d4f/validators-0.34.0-py3-none-any.whl", hash = "sha256:c804b476e3e6d3786fa07a30073a4ef694e617805eb1946ceee3fe5a9b8b1321", size = 43536 }, ] +[[package]] +name = "valyu" +version = "1.0.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "requests" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/d0/fb1d78519e7543431a41c0600b0a13489a7c4d7070ff6ddbb2030edb6cec/valyu-1.0.6.tar.gz", hash = "sha256:6064240c6c936fc257aa98dd2649c3e472298d8ce6b24981aec10ab9e0d365ab", size = 7650 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bf/a7/12c625552aa58fcbb645f96f1b69f1b66262334e6c7fb9c2c2b371887030/valyu-1.0.6-py3-none-any.whl", hash = "sha256:8065072962deb10667d00ea918cca108ade4846fd7a6b3f689a6162de4c1dbfd", size = 7421 }, +] + [[package]] name = "watchfiles" version = "1.0.4"