-
Notifications
You must be signed in to change notification settings - Fork 5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b9fbce6
commit d9ee469
Showing
2 changed files
with
109 additions
and
106 deletions.
There are no files selected for viewing
117 changes: 58 additions & 59 deletions
117
src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,81 +1,80 @@ | ||
import uuid | ||
|
||
from langflow.custom import CustomComponent | ||
from langflow.custom import Component | ||
from langflow.io import ( | ||
DictInput, | ||
IntInput, | ||
Output, | ||
SecretStrInput, | ||
StrInput, | ||
) | ||
from langflow.schema import Data | ||
|
||
|
||
class FirecrawlCrawlApi(CustomComponent): | ||
class FirecrawlCrawlApi(Component): | ||
display_name: str = "FirecrawlCrawlApi" | ||
description: str = "Firecrawl Crawl API." | ||
name = "FirecrawlCrawlApi" | ||
|
||
output_types: list[str] = ["Document"] | ||
documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/crawl" | ||
field_config = { | ||
"api_key": { | ||
"display_name": "API Key", | ||
"field_type": "str", | ||
"required": True, | ||
"password": True, | ||
"info": "The API key to use Firecrawl API.", | ||
}, | ||
"url": { | ||
"display_name": "URL", | ||
"field_type": "str", | ||
"required": True, | ||
"info": "The base URL to start crawling from.", | ||
}, | ||
"timeout": { | ||
"display_name": "Timeout", | ||
"field_type": "int", | ||
"info": "The timeout in milliseconds.", | ||
}, | ||
"crawlerOptions": { | ||
"display_name": "Crawler Options", | ||
"info": "Options for the crawler behavior.", | ||
}, | ||
"pageOptions": { | ||
"display_name": "Page Options", | ||
"info": "The page options to send with the request.", | ||
}, | ||
"idempotency_key": { | ||
"display_name": "Idempotency Key", | ||
"field_type": "str", | ||
"info": "Optional idempotency key to ensure unique requests.", | ||
}, | ||
} | ||
|
||
def build( | ||
self, | ||
api_key: str, | ||
url: str, | ||
timeout: int = 30000, | ||
crawlerOptions: Data | None = None, # noqa: N803 | ||
pageOptions: Data | None = None, # noqa: N803 | ||
idempotency_key: str | None = None, | ||
) -> Data: | ||
inputs = [ | ||
SecretStrInput( | ||
name="api_key", | ||
display_name="API Key", | ||
required=True, | ||
password=True, | ||
info="The API key to use Firecrawl API.", | ||
), | ||
StrInput( | ||
name="url", | ||
display_name="URL", | ||
required=True, | ||
info="The URL to scrape.", | ||
), | ||
IntInput( | ||
name="timeout", | ||
display_name="Timeout", | ||
info="Timeout in milliseconds for the request.", | ||
), | ||
StrInput( | ||
name="idempotency_key", | ||
display_name="Idempotency Key", | ||
info="Optional idempotency key to ensure unique requests.", | ||
), | ||
DictInput( | ||
name="crawlerOptions", | ||
display_name="Crawler Options", | ||
info="The crawler options to send with the request.", | ||
), | ||
DictInput( | ||
name="pageOptions", | ||
display_name="Page Options", | ||
info="The page options to send with the request.", | ||
), | ||
] | ||
|
||
outputs = [ | ||
Output(display_name="Data", name="data", method="crawl"), | ||
] | ||
|
||
def crawl(self) -> Data: | ||
try: | ||
from firecrawl.firecrawl import FirecrawlApp | ||
except ImportError as e: | ||
msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`." | ||
raise ImportError(msg) from e | ||
crawler_options_dict = crawlerOptions.__dict__["data"]["text"] if crawlerOptions else {} | ||
crawler_options_dict = self.crawlerOptions.__dict__["data"]["text"] if self.crawlerOptions else {} | ||
Check failure on line 66 in src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py GitHub Actions / Ruff Style Check (3.12)Ruff (F841)
|
||
|
||
page_options_dict = pageOptions.__dict__["data"]["text"] if pageOptions else {} | ||
page_options_dict = self.pageOptions.__dict__["data"]["text"] if self.pageOptions else {} | ||
Check failure on line 68 in src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py GitHub Actions / Ruff Style Check (3.12)Ruff (F841)
|
||
|
||
if not idempotency_key: | ||
idempotency_key = str(uuid.uuid4()) | ||
if not self.idempotency_key: | ||
self.idempotency_key = str(uuid.uuid4()) | ||
|
||
app = FirecrawlApp(api_key=api_key) | ||
app = FirecrawlApp(api_key=self.api_key) | ||
crawl_result = app.crawl_url( | ||
url, | ||
params={ | ||
"crawlerOptions": crawler_options_dict, | ||
"pageOptions": page_options_dict, | ||
}, | ||
wait_until_done=True, | ||
poll_interval=int(timeout / 1000), | ||
idempotency_key=idempotency_key, | ||
self.url, | ||
params={}, | ||
idempotency_key=self.idempotency_key, | ||
) | ||
|
||
return Data(data={"results": crawl_result}) |
98 changes: 51 additions & 47 deletions
98
src/backend/base/langflow/components/firecrawl/firecrawl_scrape_api.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,69 +1,73 @@ | ||
from langflow.custom import CustomComponent | ||
from langflow.custom import Component | ||
from langflow.io import ( | ||
DictInput, | ||
IntInput, | ||
Output, | ||
SecretStrInput, | ||
StrInput, | ||
) | ||
from langflow.schema import Data | ||
|
||
|
||
class FirecrawlScrapeApi(CustomComponent): | ||
class FirecrawlScrapeApi(Component): | ||
display_name: str = "FirecrawlScrapeApi" | ||
description: str = "Firecrawl Scrape API." | ||
name = "FirecrawlScrapeApi" | ||
|
||
# name = "FirecrawlScrapeApi" | ||
output_types: list[str] = ["Document"] | ||
documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/scrape" | ||
field_config = { | ||
"api_key": { | ||
"display_name": "API Key", | ||
"field_type": "str", | ||
"required": True, | ||
"password": True, | ||
"info": "The API key to use Firecrawl API.", | ||
}, | ||
"url": { | ||
"display_name": "URL", | ||
"field_type": "str", | ||
"required": True, | ||
"info": "The URL to scrape.", | ||
}, | ||
"timeout": { | ||
"display_name": "Timeout", | ||
"info": "Timeout in milliseconds for the request.", | ||
"field_type": "int", | ||
"default_value": 10000, | ||
}, | ||
"pageOptions": { | ||
"display_name": "Page Options", | ||
"info": "The page options to send with the request.", | ||
}, | ||
"extractorOptions": { | ||
"display_name": "Extractor Options", | ||
"info": "The extractor options to send with the request.", | ||
}, | ||
} | ||
|
||
def build( | ||
self, | ||
api_key: str, | ||
url: str, | ||
timeout: int = 10000, | ||
pageOptions: Data | None = None, # noqa: N803 | ||
extractorOptions: Data | None = None, # noqa: N803 | ||
) -> Data: | ||
inputs = [ | ||
SecretStrInput( | ||
name="api_key", | ||
display_name="API Key", | ||
required=True, | ||
password=True, | ||
info="The API key to use Firecrawl API.", | ||
), | ||
StrInput( | ||
name="url", | ||
display_name="URL", | ||
required=True, | ||
info="The URL to scrape.", | ||
), | ||
IntInput( | ||
name="timeout", | ||
display_name="Timeout", | ||
info="Timeout in milliseconds for the request.", | ||
), | ||
DictInput( | ||
name="pageOptions", | ||
display_name="Page Options", | ||
info="The page options to send with the request.", | ||
), | ||
DictInput( | ||
name="extractorOptions", | ||
display_name="Extractor Options", | ||
info="The extractor options to send with the request.", | ||
), | ||
] | ||
|
||
outputs = [ | ||
Output(display_name="Data", name="data", method="crawl"), | ||
] | ||
|
||
def crawl(self) -> list[Data]: | ||
try: | ||
from firecrawl.firecrawl import FirecrawlApp | ||
except ImportError as e: | ||
msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`." | ||
raise ImportError(msg) from e | ||
extractor_options_dict = extractorOptions.__dict__["data"]["text"] if extractorOptions else {} | ||
|
||
page_options_dict = pageOptions.__dict__["data"]["text"] if pageOptions else {} | ||
extractor_options_dict = self.extractorOptions.__dict__["data"]["text"] if self.extractorOptions else {} | ||
page_options_dict = self.pageOptions.__dict__["data"]["text"] if self.pageOptions else {} | ||
|
||
app = FirecrawlApp(api_key=api_key) | ||
app = FirecrawlApp(api_key=self.api_key) | ||
results = app.scrape_url( | ||
url, | ||
self.url, | ||
{ | ||
"timeout": str(timeout), | ||
"timeout": self.timeout, | ||
"extractorOptions": extractor_options_dict, | ||
"pageOptions": page_options_dict, | ||
}, | ||
) | ||
|
||
return Data(data=results) |