Skip to content

Commit

Permalink
raw
Browse files Browse the repository at this point in the history
  • Loading branch information
italojohnny committed Nov 11, 2024
1 parent b9fbce6 commit d9ee469
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 106 deletions.
117 changes: 58 additions & 59 deletions src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py
Original file line number Diff line number Diff line change
@@ -1,81 +1,80 @@
import uuid

from langflow.custom import CustomComponent
from langflow.custom import Component
from langflow.io import (
DictInput,
IntInput,
Output,
SecretStrInput,
StrInput,
)
from langflow.schema import Data


class FirecrawlCrawlApi(CustomComponent):
class FirecrawlCrawlApi(Component):
display_name: str = "FirecrawlCrawlApi"
description: str = "Firecrawl Crawl API."
name = "FirecrawlCrawlApi"

output_types: list[str] = ["Document"]
documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/crawl"
field_config = {
"api_key": {
"display_name": "API Key",
"field_type": "str",
"required": True,
"password": True,
"info": "The API key to use Firecrawl API.",
},
"url": {
"display_name": "URL",
"field_type": "str",
"required": True,
"info": "The base URL to start crawling from.",
},
"timeout": {
"display_name": "Timeout",
"field_type": "int",
"info": "The timeout in milliseconds.",
},
"crawlerOptions": {
"display_name": "Crawler Options",
"info": "Options for the crawler behavior.",
},
"pageOptions": {
"display_name": "Page Options",
"info": "The page options to send with the request.",
},
"idempotency_key": {
"display_name": "Idempotency Key",
"field_type": "str",
"info": "Optional idempotency key to ensure unique requests.",
},
}

def build(
self,
api_key: str,
url: str,
timeout: int = 30000,
crawlerOptions: Data | None = None, # noqa: N803
pageOptions: Data | None = None, # noqa: N803
idempotency_key: str | None = None,
) -> Data:
inputs = [
SecretStrInput(
name="api_key",
display_name="API Key",
required=True,
password=True,
info="The API key to use Firecrawl API.",
),
StrInput(
name="url",
display_name="URL",
required=True,
info="The URL to scrape.",
),
IntInput(
name="timeout",
display_name="Timeout",
info="Timeout in milliseconds for the request.",
),
StrInput(
name="idempotency_key",
display_name="Idempotency Key",
info="Optional idempotency key to ensure unique requests.",
),
DictInput(
name="crawlerOptions",
display_name="Crawler Options",
info="The crawler options to send with the request.",
),
DictInput(
name="pageOptions",
display_name="Page Options",
info="The page options to send with the request.",
),
]

outputs = [
Output(display_name="Data", name="data", method="crawl"),
]

def crawl(self) -> Data:
try:
from firecrawl.firecrawl import FirecrawlApp
except ImportError as e:
msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`."
raise ImportError(msg) from e
crawler_options_dict = crawlerOptions.__dict__["data"]["text"] if crawlerOptions else {}
crawler_options_dict = self.crawlerOptions.__dict__["data"]["text"] if self.crawlerOptions else {}

Check failure on line 66 in src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (F841)

src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py:66:9: F841 Local variable `crawler_options_dict` is assigned to but never used

page_options_dict = pageOptions.__dict__["data"]["text"] if pageOptions else {}
page_options_dict = self.pageOptions.__dict__["data"]["text"] if self.pageOptions else {}

Check failure on line 68 in src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (F841)

src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py:68:9: F841 Local variable `page_options_dict` is assigned to but never used

if not idempotency_key:
idempotency_key = str(uuid.uuid4())
if not self.idempotency_key:
self.idempotency_key = str(uuid.uuid4())

Check failure on line 71 in src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (F821)

src/backend/base/langflow/components/firecrawl/firecrawl_crawl_api.py:71:40: F821 Undefined name `uuid`

app = FirecrawlApp(api_key=api_key)
app = FirecrawlApp(api_key=self.api_key)
crawl_result = app.crawl_url(
url,
params={
"crawlerOptions": crawler_options_dict,
"pageOptions": page_options_dict,
},
wait_until_done=True,
poll_interval=int(timeout / 1000),
idempotency_key=idempotency_key,
self.url,
params={},
idempotency_key=self.idempotency_key,
)

return Data(data={"results": crawl_result})
Original file line number Diff line number Diff line change
@@ -1,69 +1,73 @@
from langflow.custom import CustomComponent
from langflow.custom import Component
from langflow.io import (
DictInput,
IntInput,
Output,
SecretStrInput,
StrInput,
)
from langflow.schema import Data


class FirecrawlScrapeApi(CustomComponent):
class FirecrawlScrapeApi(Component):
display_name: str = "FirecrawlScrapeApi"
description: str = "Firecrawl Scrape API."
name = "FirecrawlScrapeApi"

# name = "FirecrawlScrapeApi"
output_types: list[str] = ["Document"]
documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/scrape"
field_config = {
"api_key": {
"display_name": "API Key",
"field_type": "str",
"required": True,
"password": True,
"info": "The API key to use Firecrawl API.",
},
"url": {
"display_name": "URL",
"field_type": "str",
"required": True,
"info": "The URL to scrape.",
},
"timeout": {
"display_name": "Timeout",
"info": "Timeout in milliseconds for the request.",
"field_type": "int",
"default_value": 10000,
},
"pageOptions": {
"display_name": "Page Options",
"info": "The page options to send with the request.",
},
"extractorOptions": {
"display_name": "Extractor Options",
"info": "The extractor options to send with the request.",
},
}

def build(
self,
api_key: str,
url: str,
timeout: int = 10000,
pageOptions: Data | None = None, # noqa: N803
extractorOptions: Data | None = None, # noqa: N803
) -> Data:
inputs = [
SecretStrInput(
name="api_key",
display_name="API Key",
required=True,
password=True,
info="The API key to use Firecrawl API.",
),
StrInput(
name="url",
display_name="URL",
required=True,
info="The URL to scrape.",
),
IntInput(
name="timeout",
display_name="Timeout",
info="Timeout in milliseconds for the request.",
),
DictInput(
name="pageOptions",
display_name="Page Options",
info="The page options to send with the request.",
),
DictInput(
name="extractorOptions",
display_name="Extractor Options",
info="The extractor options to send with the request.",
),
]

outputs = [
Output(display_name="Data", name="data", method="crawl"),
]

def crawl(self) -> list[Data]:
try:
from firecrawl.firecrawl import FirecrawlApp
except ImportError as e:
msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`."
raise ImportError(msg) from e
extractor_options_dict = extractorOptions.__dict__["data"]["text"] if extractorOptions else {}

page_options_dict = pageOptions.__dict__["data"]["text"] if pageOptions else {}
extractor_options_dict = self.extractorOptions.__dict__["data"]["text"] if self.extractorOptions else {}
page_options_dict = self.pageOptions.__dict__["data"]["text"] if self.pageOptions else {}

app = FirecrawlApp(api_key=api_key)
app = FirecrawlApp(api_key=self.api_key)
results = app.scrape_url(
url,
self.url,
{
"timeout": str(timeout),
"timeout": self.timeout,
"extractorOptions": extractor_options_dict,
"pageOptions": page_options_dict,
},
)

return Data(data=results)

0 comments on commit d9ee469

Please sign in to comment.