-
Notifications
You must be signed in to change notification settings - Fork 5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ab71e2f
commit f61ff7d
Showing
2 changed files
with
111 additions
and
112 deletions.
There are no files selected for viewing
121 changes: 60 additions & 61 deletions
121
src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,81 +1,80 @@ | ||
import uuid | ||
|
||
from langflow.custom import CustomComponent | ||
from langflow.custom import Component | ||
from langflow.io import ( | ||
DataInput, | ||
IntInput, | ||
Output, | ||
SecretStrInput, | ||
StrInput, | ||
) | ||
from langflow.schema import Data | ||
|
||
|
||
class FirecrawlCrawlApi(CustomComponent): | ||
class FirecrawlCrawlApi(Component): | ||
display_name: str = "FirecrawlCrawlApi" | ||
description: str = "Firecrawl Crawl API." | ||
name = "FirecrawlCrawlApi" | ||
|
||
output_types: list[str] = ["Document"] | ||
documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/crawl" | ||
field_config = { | ||
"api_key": { | ||
"display_name": "API Key", | ||
"field_type": "str", | ||
"required": True, | ||
"password": True, | ||
"info": "The API key to use Firecrawl API.", | ||
}, | ||
"url": { | ||
"display_name": "URL", | ||
"field_type": "str", | ||
"required": True, | ||
"info": "The base URL to start crawling from.", | ||
}, | ||
"timeout": { | ||
"display_name": "Timeout", | ||
"field_type": "int", | ||
"info": "The timeout in milliseconds.", | ||
}, | ||
"crawlerOptions": { | ||
"display_name": "Crawler Options", | ||
"info": "Options for the crawler behavior.", | ||
}, | ||
"pageOptions": { | ||
"display_name": "Page Options", | ||
"info": "The page options to send with the request.", | ||
}, | ||
"idempotency_key": { | ||
"display_name": "Idempotency Key", | ||
"field_type": "str", | ||
"info": "Optional idempotency key to ensure unique requests.", | ||
}, | ||
} | ||
documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/crawl-post" | ||
|
||
def build( | ||
self, | ||
api_key: str, | ||
url: str, | ||
timeout: int = 30000, | ||
crawlerOptions: Data | None = None, # noqa: N803 | ||
pageOptions: Data | None = None, # noqa: N803 | ||
idempotency_key: str | None = None, | ||
) -> Data: | ||
inputs = [ | ||
SecretStrInput( | ||
name="api_key", | ||
display_name="API Key", | ||
required=True, | ||
password=True, | ||
info="The API key to use Firecrawl API.", | ||
), | ||
StrInput( | ||
name="url", | ||
display_name="URL", | ||
required=True, | ||
info="The URL to scrape.", | ||
), | ||
IntInput( | ||
name="timeout", | ||
display_name="Timeout", | ||
info="Timeout in milliseconds for the request.", | ||
), | ||
StrInput( | ||
name="idempotency_key", | ||
display_name="Idempotency Key", | ||
info="Optional idempotency key to ensure unique requests.", | ||
), | ||
DataInput( | ||
name="crawlerOptions", | ||
display_name="Crawler Options", | ||
info="The crawler options to send with the request.", | ||
), | ||
DataInput( | ||
name="scrapeOptions", | ||
display_name="Scrape Options", | ||
info="The page options to send with the request.", | ||
), | ||
] | ||
|
||
outputs = [ | ||
Output(display_name="Data", name="data", method="crawl"), | ||
] | ||
idempotency_key: str | None = None | ||
|
||
def crawl(self) -> Data: | ||
try: | ||
from firecrawl.firecrawl import FirecrawlApp | ||
except ImportError as e: | ||
msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`." | ||
raise ImportError(msg) from e | ||
crawler_options_dict = crawlerOptions.__dict__["data"]["text"] if crawlerOptions else {} | ||
|
||
page_options_dict = pageOptions.__dict__["data"]["text"] if pageOptions else {} | ||
|
||
if not idempotency_key: | ||
idempotency_key = str(uuid.uuid4()) | ||
params = self.crawlerOptions.__dict__["data"] if self.crawlerOptions else {} | ||
scrape_options_dict = self.scrapeOptions.__dict__["data"] if self.scrapeOptions else {} | ||
if scrape_options_dict: | ||
params["scrapeOptions"] = scrape_options_dict | ||
|
||
app = FirecrawlApp(api_key=api_key) | ||
crawl_result = app.crawl_url( | ||
url, | ||
params={ | ||
"crawlerOptions": crawler_options_dict, | ||
"pageOptions": page_options_dict, | ||
}, | ||
wait_until_done=True, | ||
poll_interval=int(timeout / 1000), | ||
idempotency_key=idempotency_key, | ||
) | ||
if not self.idempotency_key: | ||
self.idempotency_key = str(uuid.uuid4()) | ||
|
||
app = FirecrawlApp(api_key=self.api_key) | ||
crawl_result = app.crawl_url(self.url, params=params, idempotency_key=self.idempotency_key) | ||
return Data(data={"results": crawl_result}) |
102 changes: 51 additions & 51 deletions
102
src/backend/base/langflow/components/firecrawl/firecrawl_scrape_api.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,69 +1,69 @@ | ||
from langflow.custom import CustomComponent | ||
from langflow.custom import Component | ||
from langflow.io import ( | ||
DataInput, | ||
IntInput, | ||
Output, | ||
SecretStrInput, | ||
StrInput, | ||
) | ||
from langflow.schema import Data | ||
|
||
|
||
class FirecrawlScrapeApi(CustomComponent): | ||
class FirecrawlScrapeApi(Component): | ||
display_name: str = "FirecrawlScrapeApi" | ||
description: str = "Firecrawl Scrape API." | ||
name = "FirecrawlScrapeApi" | ||
|
||
output_types: list[str] = ["Document"] | ||
documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/scrape" | ||
field_config = { | ||
"api_key": { | ||
"display_name": "API Key", | ||
"field_type": "str", | ||
"required": True, | ||
"password": True, | ||
"info": "The API key to use Firecrawl API.", | ||
}, | ||
"url": { | ||
"display_name": "URL", | ||
"field_type": "str", | ||
"required": True, | ||
"info": "The URL to scrape.", | ||
}, | ||
"timeout": { | ||
"display_name": "Timeout", | ||
"info": "Timeout in milliseconds for the request.", | ||
"field_type": "int", | ||
"default_value": 10000, | ||
}, | ||
"pageOptions": { | ||
"display_name": "Page Options", | ||
"info": "The page options to send with the request.", | ||
}, | ||
"extractorOptions": { | ||
"display_name": "Extractor Options", | ||
"info": "The extractor options to send with the request.", | ||
}, | ||
} | ||
|
||
def build( | ||
self, | ||
api_key: str, | ||
url: str, | ||
timeout: int = 10000, | ||
pageOptions: Data | None = None, # noqa: N803 | ||
extractorOptions: Data | None = None, # noqa: N803 | ||
) -> Data: | ||
inputs = [ | ||
SecretStrInput( | ||
name="api_key", | ||
display_name="API Key", | ||
required=True, | ||
password=True, | ||
info="The API key to use Firecrawl API.", | ||
), | ||
StrInput( | ||
name="url", | ||
display_name="URL", | ||
required=True, | ||
info="The URL to scrape.", | ||
), | ||
IntInput( | ||
name="timeout", | ||
display_name="Timeout", | ||
info="Timeout in milliseconds for the request.", | ||
), | ||
DataInput( | ||
name="scrapeOptions", | ||
display_name="Scrape Options", | ||
info="The page options to send with the request.", | ||
), | ||
DataInput( # https://docs.firecrawl.dev/features/extract | ||
name="extractorOptions", | ||
display_name="Extractor Options", | ||
info="The extractor options to send with the request.", | ||
), | ||
] | ||
|
||
outputs = [ | ||
Output(display_name="Data", name="data", method="crawl"), | ||
] | ||
|
||
def crawl(self) -> list[Data]: | ||
try: | ||
from firecrawl.firecrawl import FirecrawlApp | ||
except ImportError as e: | ||
msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`." | ||
raise ImportError(msg) from e | ||
extractor_options_dict = extractorOptions.__dict__["data"]["text"] if extractorOptions else {} | ||
|
||
page_options_dict = pageOptions.__dict__["data"]["text"] if pageOptions else {} | ||
|
||
app = FirecrawlApp(api_key=api_key) | ||
results = app.scrape_url( | ||
url, | ||
{ | ||
"timeout": str(timeout), | ||
"extractorOptions": extractor_options_dict, | ||
"pageOptions": page_options_dict, | ||
}, | ||
) | ||
params = self.scrapeOptions.__dict__["data"] if self.scrapeOptions else {} | ||
extractor_options_dict = self.extractorOptions.__dict__["data"] if self.extractorOptions else {} | ||
if extractor_options_dict: | ||
params["extract"] = extractor_options_dict | ||
|
||
app = FirecrawlApp(api_key=self.api_key) | ||
results = app.scrape_url(self.url, params=params) | ||
return Data(data=results) |