From d9ee469eac160bf2c5396260eed0a1cfede9ed1b Mon Sep 17 00:00:00 2001 From: italojohnny Date: Thu, 7 Nov 2024 17:47:01 -0300 Subject: [PATCH] raw --- .../firecrawl/firecrawl_crawl_api.py | 117 +++++++++--------- .../firecrawl/firecrawl_scrape_api.py | 98 ++++++++------- 2 files changed, 109 insertions(+), 106 deletions(-) diff --git a/src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py b/src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py index f1a25a89aad7..73f7edfae84b 100644 --- a/src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py +++ b/src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py @@ -1,81 +1,80 @@ -import uuid - -from langflow.custom import CustomComponent +from langflow.custom import Component +from langflow.io import ( + DictInput, + IntInput, + Output, + SecretStrInput, + StrInput, +) from langflow.schema import Data -class FirecrawlCrawlApi(CustomComponent): +class FirecrawlCrawlApi(Component): display_name: str = "FirecrawlCrawlApi" description: str = "Firecrawl Crawl API." name = "FirecrawlCrawlApi" output_types: list[str] = ["Document"] documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/crawl" - field_config = { - "api_key": { - "display_name": "API Key", - "field_type": "str", - "required": True, - "password": True, - "info": "The API key to use Firecrawl API.", - }, - "url": { - "display_name": "URL", - "field_type": "str", - "required": True, - "info": "The base URL to start crawling from.", - }, - "timeout": { - "display_name": "Timeout", - "field_type": "int", - "info": "The timeout in milliseconds.", - }, - "crawlerOptions": { - "display_name": "Crawler Options", - "info": "Options for the crawler behavior.", - }, - "pageOptions": { - "display_name": "Page Options", - "info": "The page options to send with the request.", - }, - "idempotency_key": { - "display_name": "Idempotency Key", - "field_type": "str", - "info": "Optional idempotency key to ensure unique requests.", - }, - } - def build( - self, - api_key: str, - url: str, - timeout: int = 30000, - crawlerOptions: Data | None = None, # noqa: N803 - pageOptions: Data | None = None, # noqa: N803 - idempotency_key: str | None = None, - ) -> Data: + inputs = [ + SecretStrInput( + name="api_key", + display_name="API Key", + required=True, + password=True, + info="The API key to use Firecrawl API.", + ), + StrInput( + name="url", + display_name="URL", + required=True, + info="The URL to scrape.", + ), + IntInput( + name="timeout", + display_name="Timeout", + info="Timeout in milliseconds for the request.", + ), + StrInput( + name="idempotency_key", + display_name="Idempotency Key", + info="Optional idempotency key to ensure unique requests.", + ), + DictInput( + name="crawlerOptions", + display_name="Crawler Options", + info="The crawler options to send with the request.", + ), + DictInput( + name="pageOptions", + display_name="Page Options", + info="The page options to send with the request.", + ), + ] + + outputs = [ + Output(display_name="Data", name="data", method="crawl"), + ] + + def crawl(self) -> Data: try: from firecrawl.firecrawl import FirecrawlApp except ImportError as e: msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`." raise ImportError(msg) from e - crawler_options_dict = crawlerOptions.__dict__["data"]["text"] if crawlerOptions else {} + crawler_options_dict = self.crawlerOptions.__dict__["data"]["text"] if self.crawlerOptions else {} - page_options_dict = pageOptions.__dict__["data"]["text"] if pageOptions else {} + page_options_dict = self.pageOptions.__dict__["data"]["text"] if self.pageOptions else {} - if not idempotency_key: - idempotency_key = str(uuid.uuid4()) + if not self.idempotency_key: + self.idempotency_key = str(uuid.uuid4()) - app = FirecrawlApp(api_key=api_key) + app = FirecrawlApp(api_key=self.api_key) crawl_result = app.crawl_url( - url, - params={ - "crawlerOptions": crawler_options_dict, - "pageOptions": page_options_dict, - }, - wait_until_done=True, - poll_interval=int(timeout / 1000), - idempotency_key=idempotency_key, + self.url, + params={}, + idempotency_key=self.idempotency_key, ) return Data(data={"results": crawl_result}) diff --git a/src/backend/base/langflow/components/firecrawl/firecrawl_scrape_api.py b/src/backend/base/langflow/components/firecrawl/firecrawl_scrape_api.py index 05ecb3cef396..f9f3545f59bc 100644 --- a/src/backend/base/langflow/components/firecrawl/firecrawl_scrape_api.py +++ b/src/backend/base/langflow/components/firecrawl/firecrawl_scrape_api.py @@ -1,69 +1,73 @@ -from langflow.custom import CustomComponent +from langflow.custom import Component +from langflow.io import ( + DictInput, + IntInput, + Output, + SecretStrInput, + StrInput, +) from langflow.schema import Data -class FirecrawlScrapeApi(CustomComponent): +class FirecrawlScrapeApi(Component): display_name: str = "FirecrawlScrapeApi" description: str = "Firecrawl Scrape API." - name = "FirecrawlScrapeApi" - + # name = "FirecrawlScrapeApi" output_types: list[str] = ["Document"] documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/scrape" - field_config = { - "api_key": { - "display_name": "API Key", - "field_type": "str", - "required": True, - "password": True, - "info": "The API key to use Firecrawl API.", - }, - "url": { - "display_name": "URL", - "field_type": "str", - "required": True, - "info": "The URL to scrape.", - }, - "timeout": { - "display_name": "Timeout", - "info": "Timeout in milliseconds for the request.", - "field_type": "int", - "default_value": 10000, - }, - "pageOptions": { - "display_name": "Page Options", - "info": "The page options to send with the request.", - }, - "extractorOptions": { - "display_name": "Extractor Options", - "info": "The extractor options to send with the request.", - }, - } - def build( - self, - api_key: str, - url: str, - timeout: int = 10000, - pageOptions: Data | None = None, # noqa: N803 - extractorOptions: Data | None = None, # noqa: N803 - ) -> Data: + inputs = [ + SecretStrInput( + name="api_key", + display_name="API Key", + required=True, + password=True, + info="The API key to use Firecrawl API.", + ), + StrInput( + name="url", + display_name="URL", + required=True, + info="The URL to scrape.", + ), + IntInput( + name="timeout", + display_name="Timeout", + info="Timeout in milliseconds for the request.", + ), + DictInput( + name="pageOptions", + display_name="Page Options", + info="The page options to send with the request.", + ), + DictInput( + name="extractorOptions", + display_name="Extractor Options", + info="The extractor options to send with the request.", + ), + ] + + outputs = [ + Output(display_name="Data", name="data", method="crawl"), + ] + + def crawl(self) -> list[Data]: try: from firecrawl.firecrawl import FirecrawlApp except ImportError as e: msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`." raise ImportError(msg) from e - extractor_options_dict = extractorOptions.__dict__["data"]["text"] if extractorOptions else {} - page_options_dict = pageOptions.__dict__["data"]["text"] if pageOptions else {} + extractor_options_dict = self.extractorOptions.__dict__["data"]["text"] if self.extractorOptions else {} + page_options_dict = self.pageOptions.__dict__["data"]["text"] if self.pageOptions else {} - app = FirecrawlApp(api_key=api_key) + app = FirecrawlApp(api_key=self.api_key) results = app.scrape_url( - url, + self.url, { - "timeout": str(timeout), + "timeout": self.timeout, "extractorOptions": extractor_options_dict, "pageOptions": page_options_dict, }, ) - return Data(data=results)