From 08907ebbc2cb47cfc3151946764656a7f4ce99c6 Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Thu, 2 Jan 2025 22:08:57 -0800 Subject: [PATCH 01/20] allow llmClient to be optionally passed in (#352) (#364) * allow llmClient to be optionally passed in (#352) * feat: allow llmClient to be optionally passed in * update: add ollama client example from pr: #349 * update: README and changeset * lint --------- Co-authored-by: Arihan Varanasi <63890951+arihanv@users.noreply.github.com> --- .changeset/spicy-singers-flow.md | 5 + README.md | 1 + examples/external_client.ts | 48 +++++ examples/external_clients/ollama.ts | 313 ++++++++++++++++++++++++++++ lib/index.ts | 11 +- lib/llm/LLMClient.ts | 2 +- package.json | 1 + types/stagehand.ts | 2 + 8 files changed, 378 insertions(+), 5 deletions(-) create mode 100644 .changeset/spicy-singers-flow.md create mode 100644 examples/external_client.ts create mode 100644 examples/external_clients/ollama.ts diff --git a/.changeset/spicy-singers-flow.md b/.changeset/spicy-singers-flow.md new file mode 100644 index 00000000..6c09c5cf --- /dev/null +++ b/.changeset/spicy-singers-flow.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": minor +--- + +exposed llmClient in stagehand constructor diff --git a/README.md b/README.md index 141bde86..b75fe2dc 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,7 @@ This constructor is used to create an instance of Stagehand. - `1`: SDK-level logging - `2`: LLM-client level logging (most granular) - `debugDom`: a `boolean` that draws bounding boxes around elements presented to the LLM during automation. + - `llmClient`: (optional) a custom `LLMClient` implementation. - **Returns:** diff --git a/examples/external_client.ts b/examples/external_client.ts new file mode 100644 index 00000000..03f21dc7 --- /dev/null +++ b/examples/external_client.ts @@ -0,0 +1,48 @@ +import { type ConstructorParams, type LogLine, Stagehand } from "../lib"; +import { z } from "zod"; +import { OllamaClient } from "./external_clients/ollama"; + +const StagehandConfig: ConstructorParams = { + env: "BROWSERBASE", + apiKey: process.env.BROWSERBASE_API_KEY, + projectId: process.env.BROWSERBASE_PROJECT_ID, + verbose: 1, + llmClient: new OllamaClient( + (message: LogLine) => + console.log(`[stagehand::${message.category}] ${message.message}`), + false, + undefined, + "llama3.2", + ), + debugDom: true, +}; + +async function example() { + const stagehand = new Stagehand(StagehandConfig); + + await stagehand.init(); + await stagehand.page.goto("https://news.ycombinator.com"); + + const headlines = await stagehand.page.extract({ + instruction: "Extract only 3 stories from the Hacker News homepage.", + schema: z.object({ + stories: z + .array( + z.object({ + title: z.string(), + url: z.string(), + points: z.number(), + }), + ) + .length(3), + }), + }); + + console.log(headlines); + + await stagehand.close(); +} + +(async () => { + await example(); +})(); diff --git a/examples/external_clients/ollama.ts b/examples/external_clients/ollama.ts new file mode 100644 index 00000000..a488ec0e --- /dev/null +++ b/examples/external_clients/ollama.ts @@ -0,0 +1,313 @@ +import OpenAI, { type ClientOptions } from "openai"; +import { zodResponseFormat } from "openai/helpers/zod"; +import type { LLMCache } from "../../lib/cache/LLMCache"; +import { validateZodSchema } from "../../lib/utils"; +import { + type ChatCompletionOptions, + type ChatMessage, + LLMClient, +} from "../../lib/llm/LLMClient"; +import type { LogLine } from "../../types/log"; +import type { AvailableModel } from "../../types/model"; +import type { + ChatCompletion, + ChatCompletionAssistantMessageParam, + ChatCompletionContentPartImage, + ChatCompletionContentPartText, + ChatCompletionCreateParamsNonStreaming, + ChatCompletionMessageParam, + ChatCompletionSystemMessageParam, + ChatCompletionUserMessageParam, +} from "openai/resources/chat"; + +export class OllamaClient extends LLMClient { + public type = "ollama" as const; + private client: OpenAI; + private cache: LLMCache | undefined; + public logger: (message: LogLine) => void; + private enableCaching: boolean; + public clientOptions: ClientOptions; + + constructor( + logger: (message: LogLine) => void, + enableCaching = false, + cache: LLMCache | undefined, + modelName: "llama3.2", + clientOptions?: ClientOptions, + ) { + super(modelName as AvailableModel); + this.client = new OpenAI({ + ...clientOptions, + baseURL: clientOptions?.baseURL || "http://localhost:11434/v1", + apiKey: "ollama", + }); + this.logger = logger; + this.cache = cache; + this.enableCaching = enableCaching; + this.modelName = modelName as AvailableModel; + } + + async createChatCompletion( + options: ChatCompletionOptions, + retries = 3, + ): Promise { + const { image, requestId, ...optionsWithoutImageAndRequestId } = options; + + // TODO: Implement vision support + if (image) { + throw new Error( + "Image provided. Vision is not currently supported for Ollama", + ); + } + + this.logger({ + category: "ollama", + message: "creating chat completion", + level: 1, + auxiliary: { + options: { + value: JSON.stringify({ + ...optionsWithoutImageAndRequestId, + requestId, + }), + type: "object", + }, + modelName: { + value: this.modelName, + type: "string", + }, + }, + }); + + const cacheOptions = { + model: this.modelName, + messages: options.messages, + temperature: options.temperature, + top_p: options.top_p, + frequency_penalty: options.frequency_penalty, + presence_penalty: options.presence_penalty, + image: image, + response_model: options.response_model, + }; + + if (options.image) { + const screenshotMessage: ChatMessage = { + role: "user", + content: [ + { + type: "image_url", + image_url: { + url: `data:image/jpeg;base64,${options.image.buffer.toString("base64")}`, + }, + }, + ...(options.image.description + ? [{ type: "text", text: options.image.description }] + : []), + ], + }; + + options.messages.push(screenshotMessage); + } + + if (this.enableCaching && this.cache) { + const cachedResponse = await this.cache.get( + cacheOptions, + options.requestId, + ); + + if (cachedResponse) { + this.logger({ + category: "llm_cache", + message: "LLM cache hit - returning cached response", + level: 1, + auxiliary: { + requestId: { + value: options.requestId, + type: "string", + }, + cachedResponse: { + value: JSON.stringify(cachedResponse), + type: "object", + }, + }, + }); + return cachedResponse; + } + + this.logger({ + category: "llm_cache", + message: "LLM cache miss - no cached response found", + level: 1, + auxiliary: { + requestId: { + value: options.requestId, + type: "string", + }, + }, + }); + } + + let responseFormat = undefined; + if (options.response_model) { + responseFormat = zodResponseFormat( + options.response_model.schema, + options.response_model.name, + ); + } + + /* eslint-disable */ + // Remove unsupported options + const { response_model, ...ollamaOptions } = { + ...optionsWithoutImageAndRequestId, + model: this.modelName, + }; + + this.logger({ + category: "ollama", + message: "creating chat completion", + level: 1, + auxiliary: { + ollamaOptions: { + value: JSON.stringify(ollamaOptions), + type: "object", + }, + }, + }); + + const formattedMessages: ChatCompletionMessageParam[] = + options.messages.map((message) => { + if (Array.isArray(message.content)) { + const contentParts = message.content.map((content) => { + if ("image_url" in content) { + const imageContent: ChatCompletionContentPartImage = { + image_url: { + url: content.image_url.url, + }, + type: "image_url", + }; + return imageContent; + } else { + const textContent: ChatCompletionContentPartText = { + text: content.text, + type: "text", + }; + return textContent; + } + }); + + if (message.role === "system") { + const formattedMessage: ChatCompletionSystemMessageParam = { + ...message, + role: "system", + content: contentParts.filter( + (content): content is ChatCompletionContentPartText => + content.type === "text", + ), + }; + return formattedMessage; + } else if (message.role === "user") { + const formattedMessage: ChatCompletionUserMessageParam = { + ...message, + role: "user", + content: contentParts, + }; + return formattedMessage; + } else { + const formattedMessage: ChatCompletionAssistantMessageParam = { + ...message, + role: "assistant", + content: contentParts.filter( + (content): content is ChatCompletionContentPartText => + content.type === "text", + ), + }; + return formattedMessage; + } + } + + const formattedMessage: ChatCompletionUserMessageParam = { + role: "user", + content: message.content, + }; + + return formattedMessage; + }); + + const body: ChatCompletionCreateParamsNonStreaming = { + ...ollamaOptions, + model: this.modelName, + messages: formattedMessages, + response_format: responseFormat, + stream: false, + tools: options.tools?.filter((tool) => "function" in tool), // ensure only OpenAI compatibletools are used + }; + + const response = await this.client.chat.completions.create(body); + + this.logger({ + category: "ollama", + message: "response", + level: 1, + auxiliary: { + response: { + value: JSON.stringify(response), + type: "object", + }, + requestId: { + value: requestId, + type: "string", + }, + }, + }); + + if (options.response_model) { + const extractedData = response.choices[0].message.content; + const parsedData = JSON.parse(extractedData); + + if (!validateZodSchema(options.response_model.schema, parsedData)) { + if (retries > 0) { + return this.createChatCompletion(options, retries - 1); + } + + throw new Error("Invalid response schema"); + } + + if (this.enableCaching) { + this.cache.set( + cacheOptions, + { + ...parsedData, + }, + options.requestId, + ); + } + + return parsedData; + } + + if (this.enableCaching) { + this.logger({ + category: "llm_cache", + message: "caching response", + level: 1, + auxiliary: { + requestId: { + value: options.requestId, + type: "string", + }, + cacheOptions: { + value: JSON.stringify(cacheOptions), + type: "object", + }, + response: { + value: JSON.stringify(response), + type: "object", + }, + }, + }); + this.cache.set(cacheOptions, response, options.requestId); + } + + return response as T; + } +} diff --git a/lib/index.ts b/lib/index.ts index 130cb090..7e9b7901 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -335,6 +335,7 @@ export class Stagehand { verbose, debugDom, llmProvider, + llmClient, headless, logger, browserbaseSessionCreateParams, @@ -358,10 +359,12 @@ export class Stagehand { this.projectId = projectId ?? process.env.BROWSERBASE_PROJECT_ID; this.verbose = verbose ?? 0; this.debugDom = debugDom ?? false; - this.llmClient = this.llmProvider.getClient( - modelName ?? DEFAULT_MODEL_NAME, - modelClientOptions, - ); + this.llmClient = + llmClient || + this.llmProvider.getClient( + modelName ?? DEFAULT_MODEL_NAME, + modelClientOptions, + ); this.domSettleTimeoutMs = domSettleTimeoutMs ?? 30_000; this.headless = headless ?? false; this.browserbaseSessionCreateParams = browserbaseSessionCreateParams; diff --git a/lib/llm/LLMClient.ts b/lib/llm/LLMClient.ts index 254454ca..c93fab49 100644 --- a/lib/llm/LLMClient.ts +++ b/lib/llm/LLMClient.ts @@ -65,7 +65,7 @@ export interface ChatCompletionOptions { export type LLMResponse = AnthropicTransformedResponse | ChatCompletion; export abstract class LLMClient { - public type: "openai" | "anthropic"; + public type: "openai" | "anthropic" | string; public modelName: AvailableModel; public hasVision: boolean; public clientOptions: ClientOptions; diff --git a/package.json b/package.json index 690f7bd9..fde65cc9 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "2048": "npm run build-dom-scripts && tsx examples/2048.ts", "example": "npm run build-dom-scripts && tsx examples/example.ts", "debug-url": "npm run build-dom-scripts && tsx examples/debugUrl.ts", + "external-client": "npm run build-dom-scripts && tsx examples/external_client.ts", "format": "prettier --write .", "prettier": "prettier --check .", "prettier:fix": "prettier --write .", diff --git a/types/stagehand.ts b/types/stagehand.ts index 90417ed5..dc035762 100644 --- a/types/stagehand.ts +++ b/types/stagehand.ts @@ -4,6 +4,7 @@ import { z } from "zod"; import { LLMProvider } from "../lib/llm/LLMProvider"; import { LogLine } from "./log"; import { AvailableModel, ClientOptions } from "./model"; +import { LLMClient } from "../lib/llm/LLMClient"; export interface ConstructorParams { env: "LOCAL" | "BROWSERBASE"; @@ -19,6 +20,7 @@ export interface ConstructorParams { enableCaching?: boolean; browserbaseSessionID?: string; modelName?: AvailableModel; + llmClient?: LLMClient; modelClientOptions?: ClientOptions; } From 0b73f7bdcdf97365307e27a57d92aeeb9a6db9de Mon Sep 17 00:00:00 2001 From: Sameel Date: Fri, 3 Jan 2025 12:01:39 -0500 Subject: [PATCH 02/20] filter eval triggers (#365) --- .github/workflows/ci.yml | 129 ++++++++++++++++++++++++--------------- 1 file changed, 81 insertions(+), 48 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 278b7895..762ea5a6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,10 +4,14 @@ on: push: branches: - main + paths: + - "lib/**" + - "evals/**" pull_request: types: - opened - synchronize + - labeled env: EVAL_MODELS: "gpt-4o,gpt-4o-mini,claude-3-5-sonnet-latest" @@ -18,6 +22,31 @@ concurrency: cancel-in-progress: true jobs: + determine-evals: + runs-on: ubuntu-latest + outputs: + run-extract: ${{ steps.check-labels.outputs.run-extract }} + run-act: ${{ steps.check-labels.outputs.run-act }} + run-observe: ${{ steps.check-labels.outputs.run-observe }} + run-text-extract: ${{ steps.check-labels.outputs.run-text-extract }} + steps: + - id: check-labels + run: | + # Default to running all tests on main branch + if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then + echo "Running all tests for main branch" + echo "run-extract=true" >> $GITHUB_OUTPUT + echo "run-act=true" >> $GITHUB_OUTPUT + echo "run-observe=true" >> $GITHUB_OUTPUT + echo "run-text-extract=true" >> $GITHUB_OUTPUT + exit 0 + fi + + # Check for specific labels + echo "run-extract=${{ contains(github.event.pull_request.labels.*.name, 'extract') }}" >> $GITHUB_OUTPUT + echo "run-act=${{ contains(github.event.pull_request.labels.*.name, 'act') }}" >> $GITHUB_OUTPUT + echo "run-observe=${{ contains(github.event.pull_request.labels.*.name, 'observe') }}" >> $GITHUB_OUTPUT + echo "run-text-extract=${{ contains(github.event.pull_request.labels.*.name, 'text-extract') }}" >> $GITHUB_OUTPUT run-lint: runs-on: ubuntu-latest steps: @@ -81,10 +110,55 @@ jobs: - name: Run E2E Tests run: npm run e2e + run-combination-evals: + needs: [run-e2e-tests, determine-evals] + runs-on: ubuntu-latest + timeout-minutes: 40 + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} + BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} + BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} + HEADLESS: true + EVAL_ENV: browserbase + + steps: + - name: Check out repository code + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install dependencies + run: npm install --no-frozen-lockfile + + - name: Install Playwright browsers + run: npm exec playwright install --with-deps + + - name: Run Combination Evals + run: npm run evals category combination + + - name: Log Combination Evals Performance + run: | + experimentName=$(jq -r '.experimentName' eval-summary.json) + echo "View results at https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentName}" + if [ -f eval-summary.json ]; then + combination_score=$(jq '.categories.combination' eval-summary.json) + echo "Combination category score: $combination_score%" + exit 0 + else + echo "Eval summary not found for combination category. Failing CI." + exit 1 + fi + run-act-evals: + needs: [run-e2e-tests, determine-evals, run-combination-evals] + if: needs.determine-evals.outputs.run-act == 'true' runs-on: ubuntu-latest timeout-minutes: 25 - needs: [run-text-extract-evals] env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} @@ -129,7 +203,8 @@ jobs: fi run-extract-evals: - needs: [run-lint, run-build, run-e2e-tests] + needs: [run-e2e-tests, determine-evals, run-combination-evals] + if: needs.determine-evals.outputs.run-extract == 'true' runs-on: ubuntu-latest timeout-minutes: 50 env: @@ -187,7 +262,8 @@ jobs: fi run-text-extract-evals: - needs: [run-extract-evals] + needs: [run-e2e-tests, determine-evals, run-combination-evals] + if: needs.determine-evals.outputs.run-text-extract == 'true' runs-on: ubuntu-latest timeout-minutes: 120 env: @@ -245,9 +321,10 @@ jobs: fi run-observe-evals: + needs: [run-e2e-tests, determine-evals, run-combination-evals] + if: needs.determine-evals.outputs.run-observe == 'true' runs-on: ubuntu-latest timeout-minutes: 25 - needs: [run-act-evals] env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} @@ -290,47 +367,3 @@ jobs: echo "Eval summary not found for observe category. Failing CI." exit 1 fi - - run-combination-evals: - runs-on: ubuntu-latest - timeout-minutes: 40 - needs: [run-observe-evals] - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} - BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} - BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} - HEADLESS: true - EVAL_ENV: browserbase - - steps: - - name: Check out repository code - uses: actions/checkout@v4 - - - name: Set up Node.js - uses: actions/setup-node@v4 - with: - node-version: "20" - - - name: Install dependencies - run: npm install --no-frozen-lockfile - - - name: Install Playwright browsers - run: npm exec playwright install --with-deps - - - name: Run Combination Evals - run: npm run evals category combination - - - name: Log Combination Evals Performance - run: | - experimentName=$(jq -r '.experimentName' eval-summary.json) - echo "View results at https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentName}" - if [ -f eval-summary.json ]; then - combination_score=$(jq '.categories.combination' eval-summary.json) - echo "Combination category score: $combination_score%" - exit 0 - else - echo "Eval summary not found for combination category. Failing CI." - exit 1 - fi From 30236c0defb8d1fdbc2620330776ac35d5dc466d Mon Sep 17 00:00:00 2001 From: Navid Pour Date: Fri, 3 Jan 2025 13:00:45 -0800 Subject: [PATCH 03/20] fix completion verification + perform action on first xpath that can be found on the page (#346) --- lib/handlers/actHandler.ts | 56 ++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/lib/handlers/actHandler.ts b/lib/handlers/actHandler.ts index 44dd30c5..5eceb845 100644 --- a/lib/handlers/actHandler.ts +++ b/lib/handlers/actHandler.ts @@ -73,6 +73,10 @@ export class StagehandActHandler { llmClient: LLMClient; domSettleTimeoutMs?: number; }): Promise { + if (!completed) { + return false; + } + await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs); // o1 is overkill for this task + this task uses a lot of tokens. So we switch it 4o @@ -1258,12 +1262,50 @@ export class StagehandActHandler { try { const initialUrl = this.stagehandPage.page.url(); - const locator = this.stagehandPage.page - .locator(`xpath=${xpaths[0]}`) - .first(); + + // Modified: Attempt to locate the first valid XPath before proceeding + let foundXpath: string | null = null; + let locator: Locator | null = null; + + for (const xp of xpaths) { + const candidate = this.stagehandPage.page + .locator(`xpath=${xp}`) + .first(); + try { + // Try a short wait to see if it's attached to the DOM + await candidate.waitFor({ state: "attached", timeout: 2000 }); + foundXpath = xp; + locator = candidate; + break; + } catch (e) { + this.logger({ + category: "action", + message: "XPath not yet located; moving on", + level: 1, + auxiliary: { + xpath: { + value: xp, + type: "string", + }, + error: { + value: e.message, + type: "string", + }, + }, + }); + // Continue to next XPath + } + } + + // If no XPath was valid, we cannot proceed + if (!foundXpath || !locator) { + throw new Error("None of the provided XPaths could be located."); + } + const originalUrl = this.stagehandPage.page.url(); const componentString = await this._getComponentString(locator); const responseArgs = [...args]; + if (variables) { responseArgs.forEach((arg, index) => { if (typeof arg === "string") { @@ -1271,10 +1313,11 @@ export class StagehandActHandler { } }); } + await this._performPlaywrightMethod( method, args, - xpaths[0], + foundXpath, domSettleTimeoutMs, ); @@ -1299,7 +1342,7 @@ export class StagehandActHandler { }, componentString, requestId, - xpaths: xpaths, + xpaths, newStepString, completed: response.completed, }) @@ -1335,6 +1378,7 @@ export class StagehandActHandler { llmClient, domSettleTimeoutMs, }).catch((error) => { + console.log("error verifying action completion", error); this.logger({ category: "action", message: @@ -1367,7 +1411,7 @@ export class StagehandActHandler { verifierUseVision, requestId, variables, - previousSelectors: [...previousSelectors, xpaths[0]], + previousSelectors: [...previousSelectors, foundXpath], skipActionCacheForThisStep: false, domSettleTimeoutMs, }); From ae301f1c1b554a92911a774297a0173369e43412 Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Fri, 3 Jan 2025 17:19:39 -0800 Subject: [PATCH 04/20] Use docs.browserbase.com instead of www (#375) --- .../tests/BrowserContext/addInitScript.test.ts | 2 +- evals/deterministic/tests/page/addInitScript.test.ts | 2 +- evals/deterministic/tests/page/bringToFront.test.ts | 2 +- evals/deterministic/tests/page/navigation.test.ts | 6 +++--- evals/deterministic/tests/page/reload.test.ts | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/evals/deterministic/tests/BrowserContext/addInitScript.test.ts b/evals/deterministic/tests/BrowserContext/addInitScript.test.ts index 036beae3..fd795151 100644 --- a/evals/deterministic/tests/BrowserContext/addInitScript.test.ts +++ b/evals/deterministic/tests/BrowserContext/addInitScript.test.ts @@ -28,7 +28,7 @@ test.describe("StagehandContext - addInitScript", () => { expect(resultA).toBe("Hello from context.initScript!"); const pageB = await context.newPage(); - await pageB.goto("https://www.browserbase.com"); + await pageB.goto("https://docs.browserbase.com"); const resultB = await pageB.evaluate(() => { const w = window as typeof window & { diff --git a/evals/deterministic/tests/page/addInitScript.test.ts b/evals/deterministic/tests/page/addInitScript.test.ts index 44966d3e..15cc5f4d 100644 --- a/evals/deterministic/tests/page/addInitScript.test.ts +++ b/evals/deterministic/tests/page/addInitScript.test.ts @@ -26,7 +26,7 @@ test.describe("StagehandPage - addInitScript", () => { }); expect(result).toBe("Hello from init script!"); - await page.goto("https://www.browserbase.com/"); + await page.goto("https://docs.browserbase.com/"); const resultAfterNavigation = await page.evaluate(() => { const w = window as typeof window & { __testInitScriptVar?: string; diff --git a/evals/deterministic/tests/page/bringToFront.test.ts b/evals/deterministic/tests/page/bringToFront.test.ts index 2f015dbd..7e58cd80 100644 --- a/evals/deterministic/tests/page/bringToFront.test.ts +++ b/evals/deterministic/tests/page/bringToFront.test.ts @@ -20,7 +20,7 @@ test.describe("StagehandPage - bringToFront", () => { await page1.bringToFront(); - await page1.goto("https://www.browserbase.com"); + await page1.goto("https://docs.browserbase.com"); const page1TitleAfter = await page1.title(); console.log("Page1 Title after:", page1TitleAfter); diff --git a/evals/deterministic/tests/page/navigation.test.ts b/evals/deterministic/tests/page/navigation.test.ts index 067fe526..dee32fe9 100644 --- a/evals/deterministic/tests/page/navigation.test.ts +++ b/evals/deterministic/tests/page/navigation.test.ts @@ -12,14 +12,14 @@ test.describe("StagehandPage - Navigation", () => { await page.goto("https://example.com"); expect(page.url()).toBe("https://example.com/"); - await page.goto("https://www.browserbase.com/"); - expect(page.url()).toBe("https://www.browserbase.com/"); + await page.goto("https://docs.browserbase.com/introduction"); + expect(page.url()).toBe("https://docs.browserbase.com/introduction"); await page.goBack(); expect(page.url()).toBe("https://example.com/"); await page.goForward(); - expect(page.url()).toBe("https://www.browserbase.com/"); + expect(page.url()).toBe("https://docs.browserbase.com/introduction"); await stagehand.close(); }); diff --git a/evals/deterministic/tests/page/reload.test.ts b/evals/deterministic/tests/page/reload.test.ts index 7e4d3ea1..b6beede4 100644 --- a/evals/deterministic/tests/page/reload.test.ts +++ b/evals/deterministic/tests/page/reload.test.ts @@ -8,7 +8,7 @@ test.describe("StagehandPage - Reload", () => { await stagehand.init(); const page = stagehand.page; - await page.goto("https://www.browserbase.com/"); + await page.goto("https://docs.browserbase.com/"); await page.evaluate(() => { const w = window as typeof window & { From 207244e3a46c4474d4d28db039eab131164790ca Mon Sep 17 00:00:00 2001 From: Sameel Date: Sun, 5 Jan 2025 00:29:34 -0500 Subject: [PATCH 05/20] support popups (#374) * more descriptive errors * ctx-level new page detection * improve nav performance * remove new tab log * remove unused imports * changeset * page listener returns StagehandPage * add popup example * clean imports * override on(popup) * add guiding comment * update err message * update tests & add act page handler back * fix non popup events * enhance on() tests * Update few-elephants-cough.md * err msg renaming --- .changeset/few-elephants-cough.md | 5 + .gitignore | 3 +- evals/deterministic/tests/page/on.test.ts | 134 ++++++++++++++++++++++ examples/popup.ts | 46 ++++++++ lib/StagehandPage.ts | 46 +++++++- lib/handlers/actHandler.ts | 4 +- package.json | 1 + types/page.ts | 17 ++- 8 files changed, 245 insertions(+), 11 deletions(-) create mode 100644 .changeset/few-elephants-cough.md create mode 100644 evals/deterministic/tests/page/on.test.ts create mode 100644 examples/popup.ts diff --git a/.changeset/few-elephants-cough.md b/.changeset/few-elephants-cough.md new file mode 100644 index 00000000..f81b59ab --- /dev/null +++ b/.changeset/few-elephants-cough.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": minor +--- + +Pass in a Stagehand Page object into the `on("popup")` listener to allow for multi-page handling. diff --git a/.gitignore b/.gitignore index c259e84d..e5ea06bb 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,5 @@ evals/public evals/playground.ts tmp/ eval-summary.json -pnpm-lock.yaml \ No newline at end of file +pnpm-lock.yaml +evals/deterministic/tests/BrowserContext/tmp-test.har diff --git a/evals/deterministic/tests/page/on.test.ts b/evals/deterministic/tests/page/on.test.ts new file mode 100644 index 00000000..ff3ae4e4 --- /dev/null +++ b/evals/deterministic/tests/page/on.test.ts @@ -0,0 +1,134 @@ +import { expect, test } from "@playwright/test"; +import { Stagehand } from "../../../../lib"; +import StagehandConfig from "../../stagehand.config"; + +test.describe("StagehandPage - page.on()", () => { + test("should click on the crewAI blog tab", async () => { + const stagehand = new Stagehand(StagehandConfig); + await stagehand.init(); + + const page = stagehand.page; + await page.goto( + "https://docs.browserbase.com/integrations/crew-ai/introduction", + ); + + let clickPromise: Promise; + + page.on("popup", async (newPage) => { + clickPromise = newPage.click( + "body > div.page-wrapper > div.navbar-2.w-nav > div.padding-global.top-bot > div > div.navigation-left > nav > a:nth-child(7)", + ); + }); + + await page.goto( + "https://docs.browserbase.com/integrations/crew-ai/introduction", + ); + + await page.click( + "#content-area > div.relative.mt-8.prose.prose-gray.dark\\:prose-invert > p:nth-child(2) > a", + ); + + await clickPromise; + + await stagehand.close(); + }); + + test("should close the new tab and navigate to it on the existing page", async () => { + const stagehand = new Stagehand(StagehandConfig); + await stagehand.init(); + + const page = stagehand.page; + await page.goto( + "https://docs.browserbase.com/integrations/crew-ai/introduction", + ); + + let navigatePromise: Promise; + + page.on("popup", async (newPage) => { + navigatePromise = Promise.allSettled([ + newPage.close(), + page.goto(newPage.url(), { waitUntil: "domcontentloaded" }), + ]); + }); + + // Click on the crewAI blog tab + await page.click( + "#content-area > div.relative.mt-8.prose.prose-gray.dark\\:prose-invert > p:nth-child(2) > a", + ); + + await navigatePromise; + + await page.click( + "body > div.page-wrapper > div.navbar-2.w-nav > div.padding-global.top-bot > div > div.navigation-left > nav > a:nth-child(3)", + ); + + await page.waitForLoadState("domcontentloaded"); + + const currentUrl = page.url(); + expect(currentUrl).toBe("https://www.crewai.com/open-source"); + + await stagehand.close(); + }); + + test("should handle console events", async () => { + const stagehand = new Stagehand(StagehandConfig); + await stagehand.init(); + + const page = stagehand.page; + await page.goto("https://example.com"); + + const messages: string[] = []; + page.on("console", (msg) => { + messages.push(msg.text()); + }); + + await page.evaluate(() => console.log("Test console log")); + + expect(messages).toContain("Test console log"); + + await stagehand.close(); + }); + + test("should handle dialog events", async () => { + const stagehand = new Stagehand(StagehandConfig); + await stagehand.init(); + + const page = stagehand.page; + await page.goto("https://example.com"); + + page.on("dialog", async (dialog) => { + expect(dialog.message()).toBe("Test alert"); + await dialog.dismiss(); + }); + + await page.evaluate(() => alert("Test alert")); + + await stagehand.close(); + }); + + test("should handle request and response events", async () => { + const stagehand = new Stagehand(StagehandConfig); + await stagehand.init(); + + const page = stagehand.page; + await page.goto("https://example.com"); + + const requests: string[] = []; + const responses: string[] = []; + + page.on("request", (request) => { + requests.push(request.url()); + }); + + page.on("response", (response) => { + responses.push(response.url()); + }); + + await page.goto("https://example.com"); + + expect(requests).toContain("https://example.com/"); + expect(responses).toContain("https://example.com/"); + + await stagehand.close(); + }); +}); diff --git a/examples/popup.ts b/examples/popup.ts new file mode 100644 index 00000000..c87c3866 --- /dev/null +++ b/examples/popup.ts @@ -0,0 +1,46 @@ +/** + * This file is meant to be used as a scratchpad for developing new evals. + * To create a Stagehand project with best practices and configuration, run: + * + * npx create-browser-app@latest my-browser-app + */ + +import { ObserveResult, Stagehand } from "../lib"; +import StagehandConfig from "./stagehand.config"; + +async function example() { + const stagehand = new Stagehand(StagehandConfig); + await stagehand.init(); + + const page = await stagehand.page; + + let observePromise: Promise; + + page.on("popup", async (newPage) => { + observePromise = newPage.observe({ + instruction: "return all the next possible actions from the page", + }); + }); + + await page.goto( + "https://docs.browserbase.com/integrations/crew-ai/introduction", + ); + + await page.click( + "#content-area > div.relative.mt-8.prose.prose-gray.dark\\:prose-invert > p:nth-child(2) > a", + ); + + await page.waitForTimeout(5000); + + if (observePromise) { + const observeResult = await observePromise; + + console.log("Observed", observeResult.length, "actions"); + } + + await stagehand.close(); +} + +(async () => { + await example(); +})(); diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index 120b6250..fbbf85b0 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -34,13 +34,24 @@ export class StagehandPage { ) { this.intPage = Object.assign(page, { act: () => { - throw new Error("act() is not implemented on the base page object"); + throw new Error( + "You seem to be calling `act` on a page in an uninitialized `Stagehand` object. Ensure you are running `await stagehand.init()` on the Stagehand object before referencing the `page` object.", + ); }, extract: () => { - throw new Error("extract() is not implemented on the base page object"); + throw new Error( + "You seem to be calling `extract` on a page in an uninitialized `Stagehand` object. Ensure you are running `await stagehand.init()` on the Stagehand object before referencing the `page` object.", + ); }, observe: () => { - throw new Error("observe() is not implemented on the base page object"); + throw new Error( + "You seem to be calling `observe` on a page in an uninitialized `Stagehand` object. Ensure you are running `await stagehand.init()` on the Stagehand object before referencing the `page` object.", + ); + }, + on: () => { + throw new Error( + "You seem to be referencing a page in an uninitialized `Stagehand` object. Ensure you are running `await stagehand.init()` on the Stagehand object before referencing the `page` object.", + ); }, }); this.stagehand = stagehand; @@ -105,9 +116,38 @@ export class StagehandPage { }; } + if (prop === "on") { + return (event: string, listener: (param: unknown) => void) => { + if (event === "popup") { + return this.context.on("page", async (page) => { + const newContext = await StagehandContext.init( + page.context(), + stagehand, + ); + const newStagehandPage = new StagehandPage( + page, + stagehand, + newContext, + this.llmClient, + ); + + await newStagehandPage.init(); + + listener(newStagehandPage.page); + }); + } + + return this.context.on( + event as keyof PlaywrightPage["on"], + listener, + ); + }; + } + return target[prop as keyof PlaywrightPage]; }, }); + await this._waitForSettledDom(); return this; } diff --git a/lib/handlers/actHandler.ts b/lib/handlers/actHandler.ts index 5eceb845..cb895eb4 100644 --- a/lib/handlers/actHandler.ts +++ b/lib/handlers/actHandler.ts @@ -8,10 +8,10 @@ import { ActionCache } from "../cache/ActionCache"; import { act, fillInVariables, verifyActCompletion } from "../inference"; import { LLMClient } from "../llm/LLMClient"; import { LLMProvider } from "../llm/LLMProvider"; +import { StagehandContext } from "../StagehandContext"; +import { StagehandPage } from "../StagehandPage"; import { generateId } from "../utils"; import { ScreenshotService } from "../vision"; -import { StagehandPage } from "../StagehandPage"; -import { StagehandContext } from "../StagehandContext"; export class StagehandActHandler { private readonly stagehandPage: StagehandPage; diff --git a/package.json b/package.json index fde65cc9..682f0a4a 100644 --- a/package.json +++ b/package.json @@ -6,6 +6,7 @@ "module": "./dist/index.js", "types": "./dist/index.d.ts", "scripts": { + "popup": "npm run build-dom-scripts && tsx examples/popup.ts", "2048": "npm run build-dom-scripts && tsx examples/2048.ts", "example": "npm run build-dom-scripts && tsx examples/example.ts", "debug-url": "npm run build-dom-scripts && tsx examples/debugUrl.ts", diff --git a/types/page.ts b/types/page.ts index f6b7570a..3e052f29 100644 --- a/types/page.ts +++ b/types/page.ts @@ -1,6 +1,9 @@ -import type { Page as PlaywrightPage } from "@playwright/test"; -import type { BrowserContext as PlaywrightContext } from "@playwright/test"; -import type { Browser as PlaywrightBrowser } from "@playwright/test"; +import type { + Browser as PlaywrightBrowser, + BrowserContext as PlaywrightContext, + Page as PlaywrightPage, +} from "@playwright/test"; +import type { z } from "zod"; import type { ActOptions, ActResult, @@ -9,13 +12,17 @@ import type { ObserveOptions, ObserveResult, } from "./stagehand"; -import type { z } from "zod"; -export interface Page extends PlaywrightPage { + +export interface Page extends Omit { act: (options: ActOptions) => Promise; extract: ( options: ExtractOptions, ) => Promise>; observe: (options?: ObserveOptions) => Promise; + + on: { + (event: "popup", listener: (page: Page) => unknown): Page; + } & PlaywrightPage["on"]; } // Empty type for now, but will be used in the future From 9bbb54fae536d9ce3cea1a927e685a5ea610f1d8 Mon Sep 17 00:00:00 2001 From: Sameel Date: Sun, 5 Jan 2025 07:32:17 -0500 Subject: [PATCH 06/20] load env on local e2e (#378) --- evals/deterministic/stagehand.config.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/evals/deterministic/stagehand.config.ts b/evals/deterministic/stagehand.config.ts index 94d4ecb7..52e9f455 100644 --- a/evals/deterministic/stagehand.config.ts +++ b/evals/deterministic/stagehand.config.ts @@ -1,4 +1,6 @@ import type { ConstructorParams, LogLine } from "../../lib"; +import dotenv from "dotenv"; +dotenv.config({ path: "../../.env" }); const StagehandConfig: ConstructorParams = { env: "LOCAL" /* Environment to run Stagehand in */, From db2ef5997664e81b1dfb5ca992392362f2d3bab1 Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Sun, 5 Jan 2025 21:38:33 -0500 Subject: [PATCH 07/20] Remove evals on main and make logs sync (#381) * Remove evals on main and make logs sync * update ci --- .changeset/shiny-scissors-hear.md | 5 +++++ .github/workflows/ci.yml | 6 ------ lib/index.ts | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) create mode 100644 .changeset/shiny-scissors-hear.md diff --git a/.changeset/shiny-scissors-hear.md b/.changeset/shiny-scissors-hear.md new file mode 100644 index 00000000..93821095 --- /dev/null +++ b/.changeset/shiny-scissors-hear.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": minor +--- + +make logs only sync diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 762ea5a6..b1254308 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,12 +1,6 @@ name: Evals on: - push: - branches: - - main - paths: - - "lib/**" - - "evals/**" pull_request: types: - opened diff --git a/lib/index.ts b/lib/index.ts index 7e9b7901..299489ca 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -321,7 +321,7 @@ export class Stagehand { private apiKey: string | undefined; private projectId: string | undefined; // We want external logger to accept async functions - private externalLogger?: (logLine: LogLine) => void | Promise; + private externalLogger?: (logLine: LogLine) => void; private browserbaseSessionCreateParams?: Browserbase.Sessions.SessionCreateParams; public variables: { [key: string]: unknown }; private contextPath?: string; From a77efccfde3a3948013eda3a52935e8a21d45b3e Mon Sep 17 00:00:00 2001 From: Sameel Date: Mon, 6 Jan 2025 21:23:07 -0500 Subject: [PATCH 08/20] Refactor LLM Types (#383) * unify LLM tool type * replace anthropic types * add comment * changeset * fix ollama tool usage --- .changeset/calm-teachers-rescue.md | 5 ++ examples/external_clients/ollama.ts | 9 ++- lib/handlers/actHandler.ts | 5 +- lib/inference.ts | 1 + lib/llm/AnthropicClient.ts | 32 ++++------ lib/llm/LLMClient.ts | 44 +++++++++---- lib/llm/OpenAIClient.ts | 19 ++++-- lib/prompt.ts | 95 ++++++++++++++--------------- types/llm.ts | 6 ++ types/model.ts | 31 ---------- 10 files changed, 128 insertions(+), 119 deletions(-) create mode 100644 .changeset/calm-teachers-rescue.md create mode 100644 types/llm.ts diff --git a/.changeset/calm-teachers-rescue.md b/.changeset/calm-teachers-rescue.md new file mode 100644 index 00000000..c77cc7b7 --- /dev/null +++ b/.changeset/calm-teachers-rescue.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +Unified LLM input/output types for reduced dependence on OpenAI types diff --git a/examples/external_clients/ollama.ts b/examples/external_clients/ollama.ts index a488ec0e..5d5b68f7 100644 --- a/examples/external_clients/ollama.ts +++ b/examples/external_clients/ollama.ts @@ -239,7 +239,14 @@ export class OllamaClient extends LLMClient { messages: formattedMessages, response_format: responseFormat, stream: false, - tools: options.tools?.filter((tool) => "function" in tool), // ensure only OpenAI compatibletools are used + tools: options.tools?.map((tool) => ({ + function: { + name: tool.name, + description: tool.description, + parameters: tool.parameters, + }, + type: "function", + })), }; const response = await this.client.chat.completions.create(body); diff --git a/lib/handlers/actHandler.ts b/lib/handlers/actHandler.ts index cb895eb4..b4966639 100644 --- a/lib/handlers/actHandler.ts +++ b/lib/handlers/actHandler.ts @@ -1378,7 +1378,6 @@ export class StagehandActHandler { llmClient, domSettleTimeoutMs, }).catch((error) => { - console.log("error verifying action completion", error); this.logger({ category: "action", message: @@ -1389,6 +1388,10 @@ export class StagehandActHandler { value: error.message, type: "string", }, + trace: { + value: error.stack, + type: "string", + }, }, }); diff --git a/lib/inference.ts b/lib/inference.ts index 2aa23c5c..0bb08f14 100644 --- a/lib/inference.ts +++ b/lib/inference.ts @@ -174,6 +174,7 @@ export async function extract({ }) { type ExtractionResponse = z.infer; type MetadataResponse = z.infer; + // TODO: antipattern const isUsingAnthropic = llmClient.type === "anthropic"; const extractionResponse = await llmClient.createChatCompletion({ diff --git a/lib/llm/AnthropicClient.ts b/lib/llm/AnthropicClient.ts index 3ebe3faf..1675547d 100644 --- a/lib/llm/AnthropicClient.ts +++ b/lib/llm/AnthropicClient.ts @@ -7,13 +7,9 @@ import { } from "@anthropic-ai/sdk/resources"; import { zodToJsonSchema } from "zod-to-json-schema"; import { LogLine } from "../../types/log"; -import { - AnthropicJsonSchemaObject, - AnthropicTransformedResponse, - AvailableModel, -} from "../../types/model"; +import { AnthropicJsonSchemaObject, AvailableModel } from "../../types/model"; import { LLMCache } from "../cache/LLMCache"; -import { ChatCompletionOptions, LLMClient } from "./LLMClient"; +import { ChatCompletionOptions, LLMClient, LLMResponse } from "./LLMClient"; export class AnthropicClient extends LLMClient { public type = "anthropic" as const; @@ -39,7 +35,7 @@ export class AnthropicClient extends LLMClient { this.clientOptions = clientOptions; } - async createChatCompletion( + async createChatCompletion( options: ChatCompletionOptions & { retries?: number }, ): Promise { const optionsWithoutImage = { ...options }; @@ -185,17 +181,15 @@ export class AnthropicClient extends LLMClient { } let anthropicTools: Tool[] = options.tools?.map((tool) => { - if (tool.type === "function") { - return { - name: tool.function.name, - description: tool.function.description, - input_schema: { - type: "object", - properties: tool.function.parameters.properties, - required: tool.function.parameters.required, - }, - }; - } + return { + name: tool.name, + description: tool.description, + input_schema: { + type: "object", + properties: tool.parameters.properties, + required: tool.parameters.required, + }, + }; }); let toolDefinition: Tool | undefined; @@ -247,7 +241,7 @@ export class AnthropicClient extends LLMClient { }, }); - const transformedResponse: AnthropicTransformedResponse = { + const transformedResponse: LLMResponse = { id: response.id, object: "chat.completion", created: Date.now(), diff --git a/lib/llm/LLMClient.ts b/lib/llm/LLMClient.ts index c93fab49..815aee98 100644 --- a/lib/llm/LLMClient.ts +++ b/lib/llm/LLMClient.ts @@ -1,14 +1,6 @@ -import { - ChatCompletion, - ChatCompletionToolChoiceOption, -} from "openai/resources"; import { ZodType } from "zod"; -import { - AnthropicTransformedResponse, - AvailableModel, - ClientOptions, - ToolCall, -} from "../../types/model"; +import { LLMTool } from "../../types/llm"; +import { AvailableModel, ClientOptions } from "../../types/model"; export interface ChatMessage { role: "system" | "user" | "assistant"; @@ -56,13 +48,39 @@ export interface ChatCompletionOptions { name: string; schema: ZodType; }; - tools?: ToolCall[]; - tool_choice?: "auto" | ChatCompletionToolChoiceOption; + tools?: LLMTool[]; + tool_choice?: "auto" | "none" | "required"; maxTokens?: number; requestId: string; } -export type LLMResponse = AnthropicTransformedResponse | ChatCompletion; +export type LLMResponse = { + id: string; + object: string; + created: number; + model: string; + choices: { + index: number; + message: { + role: string; + content: string | null; + tool_calls: { + id: string; + type: string; + function: { + name: string; + arguments: string; + }; + }[]; + }; + finish_reason: string; + }[]; + usage: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }; +}; export abstract class LLMClient { public type: "openai" | "anthropic" | string; diff --git a/lib/llm/OpenAIClient.ts b/lib/llm/OpenAIClient.ts index ab27ad60..336974d5 100644 --- a/lib/llm/OpenAIClient.ts +++ b/lib/llm/OpenAIClient.ts @@ -1,7 +1,6 @@ import OpenAI, { ClientOptions } from "openai"; import { zodResponseFormat } from "openai/helpers/zod"; import { - ChatCompletion, ChatCompletionAssistantMessageParam, ChatCompletionContentPartImage, ChatCompletionContentPartText, @@ -15,7 +14,12 @@ import { LogLine } from "../../types/log"; import { AvailableModel } from "../../types/model"; import { LLMCache } from "../cache/LLMCache"; import { validateZodSchema } from "../utils"; -import { ChatCompletionOptions, ChatMessage, LLMClient } from "./LLMClient"; +import { + ChatCompletionOptions, + ChatMessage, + LLMClient, + LLMResponse, +} from "./LLMClient"; export class OpenAIClient extends LLMClient { public type = "openai" as const; @@ -41,7 +45,7 @@ export class OpenAIClient extends LLMClient { this.modelName = modelName; } - async createChatCompletion( + async createChatCompletion( optionsInitial: ChatCompletionOptions, retries: number = 3, ): Promise { @@ -319,7 +323,14 @@ export class OpenAIClient extends LLMClient { messages: formattedMessages, response_format: responseFormat, stream: false, - tools: options.tools?.filter((tool) => "function" in tool), // ensure only OpenAI tools are used + tools: options.tools?.map((tool) => ({ + function: { + name: tool.name, + description: tool.description, + parameters: tool.parameters, + }, + type: "function", + })), }; const response = await this.client.chat.completions.create(body); diff --git a/lib/prompt.ts b/lib/prompt.ts index 37217993..51224e65 100644 --- a/lib/prompt.ts +++ b/lib/prompt.ts @@ -1,4 +1,4 @@ -import OpenAI from "openai"; +import { LLMTool } from "../types/llm"; import { ChatMessage } from "./llm/LLMClient"; // act @@ -135,65 +135,60 @@ ${Object.keys(variables) }; } -export const actTools: Array = [ +export const actTools: LLMTool[] = [ { type: "function", - function: { - name: "doAction", - description: - "execute the next playwright step that directly accomplishes the goal", - parameters: { - type: "object", - required: ["method", "element", "args", "step", "completed"], - properties: { - method: { - type: "string", - description: "The playwright function to call.", - }, - element: { - type: "number", - description: "The element number to act on", - }, - args: { - type: "array", - description: "The required arguments", - items: { - type: "string", - description: "The argument to pass to the function", - }, - }, - step: { - type: "string", - description: - "human readable description of the step that is taken in the past tense. Please be very detailed.", - }, - why: { + name: "doAction", + description: + "execute the next playwright step that directly accomplishes the goal", + parameters: { + type: "object", + required: ["method", "element", "args", "step", "completed"], + properties: { + method: { + type: "string", + description: "The playwright function to call.", + }, + element: { + type: "number", + description: "The element number to act on", + }, + args: { + type: "array", + description: "The required arguments", + items: { type: "string", - description: - "why is this step taken? how does it advance the goal?", - }, - completed: { - type: "boolean", - description: - "true if the goal should be accomplished after this step", + description: "The argument to pass to the function", }, }, + step: { + type: "string", + description: + "human readable description of the step that is taken in the past tense. Please be very detailed.", + }, + why: { + type: "string", + description: "why is this step taken? how does it advance the goal?", + }, + completed: { + type: "boolean", + description: + "true if the goal should be accomplished after this step", + }, }, }, }, { type: "function", - function: { - name: "skipSection", - description: - "skips this area of the webpage because the current goal cannot be accomplished here", - parameters: { - type: "object", - properties: { - reason: { - type: "string", - description: "reason that no action is taken", - }, + name: "skipSection", + description: + "skips this area of the webpage because the current goal cannot be accomplished here", + parameters: { + type: "object", + properties: { + reason: { + type: "string", + description: "reason that no action is taken", }, }, }, diff --git a/types/llm.ts b/types/llm.ts new file mode 100644 index 00000000..f383b97e --- /dev/null +++ b/types/llm.ts @@ -0,0 +1,6 @@ +export interface LLMTool { + type: "function"; + name: string; + description: string; + parameters: Record; +} diff --git a/types/model.ts b/types/model.ts index 50f80364..f4406b79 100644 --- a/types/model.ts +++ b/types/model.ts @@ -1,6 +1,5 @@ import type { ClientOptions as AnthropicClientOptions } from "@anthropic-ai/sdk"; import type { ClientOptions as OpenAIClientOptions } from "openai"; -import { ChatCompletionTool as OpenAITool } from "openai/resources"; import { z } from "zod"; export const AvailableModelSchema = z.enum([ @@ -20,36 +19,6 @@ export type ModelProvider = "openai" | "anthropic"; export type ClientOptions = OpenAIClientOptions | AnthropicClientOptions; -export type ToolCall = OpenAITool; - -export type AnthropicTransformedResponse = { - id: string; - object: string; - created: number; - model: string; - choices: { - index: number; - message: { - role: string; - content: string | null; - tool_calls: { - id: string; - type: string; - function: { - name: string; - arguments: string; - }; - }[]; - }; - finish_reason: string; - }[]; - usage: { - prompt_tokens: number; - completion_tokens: number; - total_tokens: number; - }; -}; - export interface AnthropicJsonSchemaObject { definitions?: { MySchema?: { properties?: Record; required?: string[] }; From 75c0e20cde54951399753e0fa841df463e1271b8 Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Mon, 6 Jan 2025 21:24:09 -0500 Subject: [PATCH 09/20] update: LLMClient default inherits logger from Stagehand (#366) (#367) * update: LLMClient default inherits logger from Stagehand * update: add changeset * update: export LLMClient Co-authored-by: Arihan Varanasi <63890951+arihanv@users.noreply.github.com> --- .changeset/mean-swans-fix.md | 5 +++++ examples/external_client.ts | 12 ++++-------- examples/external_clients/ollama.ts | 18 ++++++++++++------ lib/index.ts | 6 ++++++ lib/llm/AnthropicClient.ts | 18 ++++++++++++------ lib/llm/LLMProvider.ts | 20 ++++++++++---------- lib/llm/OpenAIClient.ts | 18 ++++++++++++------ 7 files changed, 61 insertions(+), 36 deletions(-) create mode 100644 .changeset/mean-swans-fix.md diff --git a/.changeset/mean-swans-fix.md b/.changeset/mean-swans-fix.md new file mode 100644 index 00000000..e8f52844 --- /dev/null +++ b/.changeset/mean-swans-fix.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": minor +--- + +Logger in LLMClient is inherited by default from Stagehand. Named rather than positional arguments are used in implemented LLMClients. diff --git a/examples/external_client.ts b/examples/external_client.ts index 03f21dc7..de869ee9 100644 --- a/examples/external_client.ts +++ b/examples/external_client.ts @@ -1,4 +1,4 @@ -import { type ConstructorParams, type LogLine, Stagehand } from "../lib"; +import { type ConstructorParams, Stagehand } from "../lib"; import { z } from "zod"; import { OllamaClient } from "./external_clients/ollama"; @@ -7,13 +7,9 @@ const StagehandConfig: ConstructorParams = { apiKey: process.env.BROWSERBASE_API_KEY, projectId: process.env.BROWSERBASE_PROJECT_ID, verbose: 1, - llmClient: new OllamaClient( - (message: LogLine) => - console.log(`[stagehand::${message.category}] ${message.message}`), - false, - undefined, - "llama3.2", - ), + llmClient: new OllamaClient({ + modelName: "llama3.2", + }), debugDom: true, }; diff --git a/examples/external_clients/ollama.ts b/examples/external_clients/ollama.ts index 5d5b68f7..d884e7fb 100644 --- a/examples/external_clients/ollama.ts +++ b/examples/external_clients/ollama.ts @@ -28,13 +28,19 @@ export class OllamaClient extends LLMClient { private enableCaching: boolean; public clientOptions: ClientOptions; - constructor( - logger: (message: LogLine) => void, + constructor({ + logger, enableCaching = false, - cache: LLMCache | undefined, - modelName: "llama3.2", - clientOptions?: ClientOptions, - ) { + cache = undefined, + modelName = "llama3.2", + clientOptions, + }: { + logger?: (message: LogLine) => void; + enableCaching?: boolean; + cache?: LLMCache; + modelName?: string; + clientOptions?: ClientOptions; + }) { super(modelName as AvailableModel); this.client = new OpenAI({ ...clientOptions, diff --git a/lib/index.ts b/lib/index.ts index 299489ca..c898074c 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -365,6 +365,11 @@ export class Stagehand { modelName ?? DEFAULT_MODEL_NAME, modelClientOptions, ); + + if (!this.llmClient.logger) { + this.llmClient.logger = this.logger; + } + this.domSettleTimeoutMs = domSettleTimeoutMs ?? 30_000; this.headless = headless ?? false; this.browserbaseSessionCreateParams = browserbaseSessionCreateParams; @@ -614,3 +619,4 @@ export * from "../types/model"; export * from "../types/playwright"; export * from "../types/stagehand"; export * from "../types/page"; +export { LLMClient } from "./llm/LLMClient"; diff --git a/lib/llm/AnthropicClient.ts b/lib/llm/AnthropicClient.ts index 1675547d..69f217d9 100644 --- a/lib/llm/AnthropicClient.ts +++ b/lib/llm/AnthropicClient.ts @@ -19,13 +19,19 @@ export class AnthropicClient extends LLMClient { private enableCaching: boolean; public clientOptions: ClientOptions; - constructor( - logger: (message: LogLine) => void, + constructor({ + logger, enableCaching = false, - cache: LLMCache | undefined, - modelName: AvailableModel, - clientOptions?: ClientOptions, - ) { + cache, + modelName, + clientOptions, + }: { + logger: (message: LogLine) => void; + enableCaching?: boolean; + cache?: LLMCache; + modelName: AvailableModel; + clientOptions?: ClientOptions; + }) { super(modelName); this.client = new Anthropic(clientOptions); this.logger = logger; diff --git a/lib/llm/LLMProvider.ts b/lib/llm/LLMProvider.ts index d6f62217..358eb1a8 100644 --- a/lib/llm/LLMProvider.ts +++ b/lib/llm/LLMProvider.ts @@ -61,21 +61,21 @@ export class LLMProvider { switch (provider) { case "openai": - return new OpenAIClient( - this.logger, - this.enableCaching, - this.cache, + return new OpenAIClient({ + logger: this.logger, + enableCaching: this.enableCaching, + cache: this.cache, modelName, clientOptions, - ); + }); case "anthropic": - return new AnthropicClient( - this.logger, - this.enableCaching, - this.cache, + return new AnthropicClient({ + logger: this.logger, + enableCaching: this.enableCaching, + cache: this.cache, modelName, clientOptions, - ); + }); default: throw new Error(`Unsupported provider: ${provider}`); } diff --git a/lib/llm/OpenAIClient.ts b/lib/llm/OpenAIClient.ts index 336974d5..2b96e284 100644 --- a/lib/llm/OpenAIClient.ts +++ b/lib/llm/OpenAIClient.ts @@ -29,13 +29,19 @@ export class OpenAIClient extends LLMClient { private enableCaching: boolean; public clientOptions: ClientOptions; - constructor( - logger: (message: LogLine) => void, + constructor({ + logger, enableCaching = false, - cache: LLMCache | undefined, - modelName: AvailableModel, - clientOptions?: ClientOptions, - ) { + cache, + modelName, + clientOptions, + }: { + logger: (message: LogLine) => void; + enableCaching?: boolean; + cache?: LLMCache; + modelName: AvailableModel; + clientOptions?: ClientOptions; + }) { super(modelName); this.clientOptions = clientOptions; this.client = new OpenAI(clientOptions); From 7ee584174dfba384dafe85379dcc91abe2c0b04c Mon Sep 17 00:00:00 2001 From: Sean McGuire <75873287+seanmcguire12@users.noreply.github.com> Date: Tue, 7 Jan 2025 15:15:48 -0400 Subject: [PATCH 10/20] Fix deterministic evals (#370) * disable caching to fix mem leak * separate configs for bb and regular e2e * dont require API keys for regular e2e tests * move bb tests to Browserbase dir * run regular e2e tests on external PRs * prettier * address comments * update package.json * Update evals/deterministic/e2e.playwright.config.ts Co-authored-by: Anirudh Kamath --------- Co-authored-by: Anirudh Kamath --- .github/workflows/ci.yml | 37 ++++++++++++++++++- ...ight.config.ts => bb.playwright.config.ts} | 2 +- evals/deterministic/e2e.playwright.config.ts | 33 +++++++++++++++++ evals/deterministic/stagehand.config.ts | 10 ++--- .../{page => browserbase}/contexts.test.ts | 0 .../{page => browserbase}/downloads.test.ts | 0 .../{page => browserbase}/uploads.test.ts | 0 package.json | 3 +- 8 files changed, 76 insertions(+), 9 deletions(-) rename evals/deterministic/{playwright.config.ts => bb.playwright.config.ts} (96%) create mode 100644 evals/deterministic/e2e.playwright.config.ts rename evals/deterministic/tests/{page => browserbase}/contexts.test.ts (100%) rename evals/deterministic/tests/{page => browserbase}/downloads.test.ts (100%) rename evals/deterministic/tests/{page => browserbase}/uploads.test.ts (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b1254308..d041ac6a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,6 +41,7 @@ jobs: echo "run-act=${{ contains(github.event.pull_request.labels.*.name, 'act') }}" >> $GITHUB_OUTPUT echo "run-observe=${{ contains(github.event.pull_request.labels.*.name, 'observe') }}" >> $GITHUB_OUTPUT echo "run-text-extract=${{ contains(github.event.pull_request.labels.*.name, 'text-extract') }}" >> $GITHUB_OUTPUT + run-lint: runs-on: ubuntu-latest steps: @@ -79,6 +80,38 @@ jobs: needs: [run-lint, run-build] runs-on: ubuntu-latest timeout-minutes: 50 + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + HEADLESS: true + + steps: + - name: Check out repository code + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install dependencies + run: npm install --no-frozen-lockfile + + - name: Install Playwright browsers + run: npm exec playwright install --with-deps + + - name: Run E2E Tests (Deterministic Playwright) + run: npm run e2e + + run-e2e-bb-tests: + needs: [run-e2e-tests] + runs-on: ubuntu-latest + timeout-minutes: 50 + + if: > + github.event_name == 'push' || + (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) + env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} @@ -101,8 +134,8 @@ jobs: - name: Install Playwright browsers run: npm exec playwright install --with-deps - - name: Run E2E Tests - run: npm run e2e + - name: Run E2E Tests (browserbase) + run: npm run e2e:bb run-combination-evals: needs: [run-e2e-tests, determine-evals] diff --git a/evals/deterministic/playwright.config.ts b/evals/deterministic/bb.playwright.config.ts similarity index 96% rename from evals/deterministic/playwright.config.ts rename to evals/deterministic/bb.playwright.config.ts index c6b9ef3c..cea358f5 100644 --- a/evals/deterministic/playwright.config.ts +++ b/evals/deterministic/bb.playwright.config.ts @@ -4,7 +4,7 @@ import { defineConfig, devices } from "@playwright/test"; * See https://playwright.dev/docs/test-configuration. */ export default defineConfig({ - testDir: "./tests", + testDir: "./tests/browserbase", /* Fail the build on CI if you accidentally left test.only in the source code. */ /* Run tests in files in parallel */ diff --git a/evals/deterministic/e2e.playwright.config.ts b/evals/deterministic/e2e.playwright.config.ts new file mode 100644 index 00000000..e35f3a12 --- /dev/null +++ b/evals/deterministic/e2e.playwright.config.ts @@ -0,0 +1,33 @@ +import { defineConfig, devices } from "@playwright/test"; + +/** + * See https://playwright.dev/docs/test-configuration. + */ +export default defineConfig({ + // Look in "tests" for test files... + testDir: "./tests", + // ...but ignore anything in "tests/browserbase" + testIgnore: ["**/browserbase/**"], + + /* Fail the build on CI if you accidentally left test.only in the source code. */ + /* Run tests in files in parallel */ + fullyParallel: true, + /* Reporter to use. See https://playwright.dev/docs/test-reporters */ + // reporter: "html", + reporter: "line", + retries: 2, + + /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */ + use: { + /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */ + trace: "on-first-retry", + }, + + /* Configure projects for major browsers */ + projects: [ + { + name: "chromium", + use: { ...devices["Desktop Chrome"] }, + }, + ], +}); diff --git a/evals/deterministic/stagehand.config.ts b/evals/deterministic/stagehand.config.ts index 52e9f455..b5478c0d 100644 --- a/evals/deterministic/stagehand.config.ts +++ b/evals/deterministic/stagehand.config.ts @@ -4,8 +4,8 @@ dotenv.config({ path: "../../.env" }); const StagehandConfig: ConstructorParams = { env: "LOCAL" /* Environment to run Stagehand in */, - apiKey: process.env.BROWSERBASE_API_KEY! /* API key for authentication */, - projectId: process.env.BROWSERBASE_PROJECT_ID! /* Project identifier */, + apiKey: process.env.BROWSERBASE_API_KEY /* API key for authentication */, + projectId: process.env.BROWSERBASE_PROJECT_ID /* Project identifier */, verbose: 1 /* Logging verbosity level (0=quiet, 1=normal, 2=verbose) */, debugDom: true /* Enable DOM debugging features */, headless: true /* Run browser in headless mode */, @@ -15,11 +15,11 @@ const StagehandConfig: ConstructorParams = { ) /* Custom logging function */, domSettleTimeoutMs: 30_000 /* Timeout for DOM to settle in milliseconds */, browserbaseSessionCreateParams: { - projectId: process.env.BROWSERBASE_PROJECT_ID!, + projectId: process.env.BROWSERBASE_PROJECT_ID, }, - enableCaching: true /* Enable caching functionality */, + enableCaching: false /* Enable caching functionality */, browserbaseSessionID: - undefined /* Session ID for resuming Browserbase sessions */, + undefined /* Session ID for resuming browserbase sessions */, modelName: "gpt-4o" /* Name of the model to use */, modelClientOptions: { apiKey: process.env.OPENAI_API_KEY, diff --git a/evals/deterministic/tests/page/contexts.test.ts b/evals/deterministic/tests/browserbase/contexts.test.ts similarity index 100% rename from evals/deterministic/tests/page/contexts.test.ts rename to evals/deterministic/tests/browserbase/contexts.test.ts diff --git a/evals/deterministic/tests/page/downloads.test.ts b/evals/deterministic/tests/browserbase/downloads.test.ts similarity index 100% rename from evals/deterministic/tests/page/downloads.test.ts rename to evals/deterministic/tests/browserbase/downloads.test.ts diff --git a/evals/deterministic/tests/page/uploads.test.ts b/evals/deterministic/tests/browserbase/uploads.test.ts similarity index 100% rename from evals/deterministic/tests/page/uploads.test.ts rename to evals/deterministic/tests/browserbase/uploads.test.ts diff --git a/package.json b/package.json index 682f0a4a..9bd4f4ab 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,8 @@ "eslint": "eslint .", "cache:clear": "rm -rf .cache", "evals": "npm run build-dom-scripts && tsx evals/index.eval.ts", - "e2e": "npm run build-dom-scripts && cd evals/deterministic && npx playwright test", + "e2e": "npm run build-dom-scripts && cd evals/deterministic && npx playwright test --config=e2e.playwright.config.ts", + "e2e:bb": "npm run build-dom-scripts && cd evals/deterministic && npx playwright test --config=bb.playwright.config.ts", "build-dom-scripts": "tsx lib/dom/genDomScripts.ts", "build-types": "tsc --emitDeclarationOnly --outDir dist", "build-js": "tsup lib/index.ts --dts", From b1c657976847de86d82324030f90c2f6a1f3f976 Mon Sep 17 00:00:00 2001 From: Sean McGuire <75873287+seanmcguire12@users.noreply.github.com> Date: Tue, 7 Jan 2025 19:30:28 -0400 Subject: [PATCH 11/20] dont require LLMClient to use stagehand (#379) * dont require LLMClient to use stagehand * make sure default LLMClient still works if api key is available * changeset * set logger if not defined * rm API keys for e2e in CI * update error msg * test error occurs without API key or LLM client --- .changeset/polite-papayas-occur.md | 5 ++ .github/workflows/ci.yml | 2 - .../tests/Errors/apiKeyError.test.ts | 77 ++++++++++++++++++ lib/StagehandPage.ts | 80 +++++++++++-------- lib/index.ts | 23 +++--- 5 files changed, 141 insertions(+), 46 deletions(-) create mode 100644 .changeset/polite-papayas-occur.md create mode 100644 evals/deterministic/tests/Errors/apiKeyError.test.ts diff --git a/.changeset/polite-papayas-occur.md b/.changeset/polite-papayas-occur.md new file mode 100644 index 00000000..ab101e4a --- /dev/null +++ b/.changeset/polite-papayas-occur.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +dont require LLM Client to use non-ai stagehand functions diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d041ac6a..3642d4ee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -81,8 +81,6 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 50 env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} HEADLESS: true steps: diff --git a/evals/deterministic/tests/Errors/apiKeyError.test.ts b/evals/deterministic/tests/Errors/apiKeyError.test.ts new file mode 100644 index 00000000..c181fe27 --- /dev/null +++ b/evals/deterministic/tests/Errors/apiKeyError.test.ts @@ -0,0 +1,77 @@ +import { test, expect } from "@playwright/test"; +import { Stagehand } from "../../../../lib"; +import StagehandConfig from "../../stagehand.config"; +import { z } from "zod"; + +test.describe("API key/LLMClient error", () => { + test("Should confirm that we get an error if we call extract without LLM API key or LLMClient", async () => { + const stagehand = new Stagehand(StagehandConfig); + await stagehand.init(); + await stagehand.page.goto("https://docs.browserbase.com/introduction"); + + let errorThrown: Error | null = null; + + try { + await stagehand.page.extract({ + instruction: + "From the introduction page, extract the explanation of what Browserbase is.", + schema: z.object({ + stars: z.string().describe("the explanation of what Browserbase is"), + }), + }); + } catch (error) { + errorThrown = error as Error; + } + + expect(errorThrown).toBeInstanceOf(Error); + expect(errorThrown?.message).toContain( + "No LLM API key or LLM Client configured", + ); + + await stagehand.close(); + }); + + test("Should confirm that we get an error if we call act without LLM API key or LLMClient", async () => { + const stagehand = new Stagehand(StagehandConfig); + await stagehand.init(); + await stagehand.page.goto("https://docs.browserbase.com/introduction"); + + let errorThrown: Error | null = null; + + try { + await stagehand.page.act({ + action: "Click on the 'Quickstart' section", + }); + } catch (error) { + errorThrown = error as Error; + } + + expect(errorThrown).toBeInstanceOf(Error); + expect(errorThrown?.message).toContain( + "No LLM API key or LLM Client configured", + ); + + await stagehand.close(); + }); + + test("Should confirm that we get an error if we call observe without LLM API key or LLMClient", async () => { + const stagehand = new Stagehand(StagehandConfig); + await stagehand.init(); + await stagehand.page.goto("https://docs.browserbase.com/introduction"); + + let errorThrown: Error | null = null; + + try { + await stagehand.page.observe(); + } catch (error) { + errorThrown = error as Error; + } + + expect(errorThrown).toBeInstanceOf(Error); + expect(errorThrown?.message).toContain( + "No LLM API key or LLM Client configured", + ); + + await stagehand.close(); + }); +}); diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index fbbf85b0..d7bcafe5 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -56,26 +56,28 @@ export class StagehandPage { }); this.stagehand = stagehand; this.intContext = context; - this.actHandler = new StagehandActHandler({ - verbose: this.stagehand.verbose, - llmProvider: this.stagehand.llmProvider, - enableCaching: this.stagehand.enableCaching, - logger: this.stagehand.logger, - stagehandPage: this, - stagehandContext: this.intContext, - llmClient: llmClient, - }); - this.extractHandler = new StagehandExtractHandler({ - stagehand: this.stagehand, - logger: this.stagehand.logger, - stagehandPage: this, - }); - this.observeHandler = new StagehandObserveHandler({ - stagehand: this.stagehand, - logger: this.stagehand.logger, - stagehandPage: this, - }); this.llmClient = llmClient; + if (this.llmClient) { + this.actHandler = new StagehandActHandler({ + verbose: this.stagehand.verbose, + llmProvider: this.stagehand.llmProvider, + enableCaching: this.stagehand.enableCaching, + logger: this.stagehand.logger, + stagehandPage: this, + stagehandContext: this.intContext, + llmClient: llmClient, + }); + this.extractHandler = new StagehandExtractHandler({ + stagehand: this.stagehand, + logger: this.stagehand.logger, + stagehandPage: this, + }); + this.observeHandler = new StagehandObserveHandler({ + stagehand: this.stagehand, + logger: this.stagehand.logger, + stagehandPage: this, + }); + } } async init(): Promise { @@ -98,22 +100,30 @@ export class StagehandPage { return result; }; - if (prop === "act") { - return async (options: ActOptions) => { - return this.act(options); - }; - } - - if (prop === "extract") { - return async (options: ExtractOptions) => { - return this.extract(options); - }; - } - - if (prop === "observe") { - return async (options: ObserveOptions) => { - return this.observe(options); - }; + if (this.llmClient) { + if (prop === "act") { + return async (options: ActOptions) => { + return this.act(options); + }; + } + if (prop === "extract") { + return async (options: ExtractOptions) => { + return this.extract(options); + }; + } + if (prop === "observe") { + return async (options: ObserveOptions) => { + return this.observe(options); + }; + } + } else { + if (prop === "act" || prop === "extract" || prop === "observe") { + return () => { + throw new Error( + "No LLM API key or LLM Client configured. An LLM API key or a custom LLM Client is required to use act, extract, or observe.", + ); + }; + } } if (prop === "on") { diff --git a/lib/index.ts b/lib/index.ts index c898074c..0d958f67 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -359,17 +359,22 @@ export class Stagehand { this.projectId = projectId ?? process.env.BROWSERBASE_PROJECT_ID; this.verbose = verbose ?? 0; this.debugDom = debugDom ?? false; - this.llmClient = - llmClient || - this.llmProvider.getClient( - modelName ?? DEFAULT_MODEL_NAME, - modelClientOptions, - ); - - if (!this.llmClient.logger) { + if (llmClient) { + this.llmClient = llmClient; + } else { + try { + // try to set a default LLM client + this.llmClient = this.llmProvider.getClient( + modelName ?? DEFAULT_MODEL_NAME, + modelClientOptions, + ); + } catch { + this.llmClient = undefined; + } + } + if (this.llmClient && !this.llmClient.logger) { this.llmClient.logger = this.logger; } - this.domSettleTimeoutMs = domSettleTimeoutMs ?? 30_000; this.headless = headless ?? false; this.browserbaseSessionCreateParams = browserbaseSessionCreateParams; From 30e7d091445004c71aec1748d3a7d75fb86d1f11 Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Wed, 8 Jan 2025 01:44:49 -0500 Subject: [PATCH 12/20] add branding to readme and link to docs (#371) * add branding to readme * logo * light/dark mode * better light/dark mode * logo * changeset * remove license link * link to mit license * add branding to readme * logo * light/dark mode * better light/dark mode * logo * changeset * remove license link * link to mit license * cleanup readme * env vars * more cleanup * more cleanup * don't fmt readme * move prereqs * available models * fix note * cleanup * axe more * axe even more * contributors * env vars * need to add contributing guide * contribution guide * contributions * installation * rm stagehand v. playwright * temp * move note * wording * lindsay edits * change interoperable --- .changeset/poor-eels-sin.md | 5 + .prettierignore | 1 + README.md | 579 ++++-------------------------------- 3 files changed, 69 insertions(+), 516 deletions(-) create mode 100644 .changeset/poor-eels-sin.md diff --git a/.changeset/poor-eels-sin.md b/.changeset/poor-eels-sin.md new file mode 100644 index 00000000..cc75ecb5 --- /dev/null +++ b/.changeset/poor-eels-sin.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +pretty readme :) diff --git a/.prettierignore b/.prettierignore index bd5535a6..39cb3f67 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1 +1,2 @@ pnpm-lock.yaml +README.md diff --git a/README.md b/README.md index b75fe2dc..7227ae08 100644 --- a/README.md +++ b/README.md @@ -1,570 +1,117 @@

- An AI web browsing framework focused on simplicity and extensibility. + An AI web browsing framework focused on simplicity and extensibility.
+ Read the Docs

- NPM - MIT License - Slack Community + + + + NPM + + + + + + MIT License + + + + + + Slack Community + +

--- -- [Intro](#intro) -- [Getting Started](#getting-started) -- [API Reference](#api-reference) - - [act()](#act) - - [extract()](#extract) - - [observe()](#observe) - - [close()](#close) -- [Model Support](#model-support) -- [How It Works](#how-it-works) -- [Stagehand vs Playwright](#stagehand-vs-playwright) -- [Prompting Tips](#prompting-tips) -- [Roadmap](#roadmap) -- [Contributing](#contributing) -- [Acknowledgements](#acknowledgements) -- [License](#license) +Stagehand is the easiest way to build browser automations. It is fully compatible with [Playwright](https://playwright.dev/), offering three simple AI APIs (`act`, `extract`, and `observe`) on top of the base Playwright `Page` class that provide the building blocks for web automation via natural language. It also makes Playwright more accessible to non-technical users and less vulnerable to minor changes in the UI/DOM. -> [!NOTE] -> `Stagehand` is currently available as an early release, and we're actively seeking feedback from the community. Please join our [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-2tdncfgkk-fF8y5U0uJzR2y2_M9c9OJA) to stay updated on the latest developments and provide feedback. - -## Intro +Anything that can be done in a browser can be done with Stagehand. Consider: -Stagehand is the AI-powered successor to [Playwright](https://github.com/microsoft/playwright), offering three simple APIs (`act`, `extract`, and `observe`) that provide the building blocks for natural language driven web automation. +1. Go to Hacker News and extract the top stories of the day +1. Log into Amazon, search for AirPods, and buy the most relevant product +1. Go to ESPN, search for Steph Curry, and get stats for his last 10 games -The goal of Stagehand is to provide a lightweight, configurable framework, without overly complex abstractions, as well as modular support for different models and model providers. It's not going to order you a pizza, but it will help you reliably automate the web. +Stagehand makes it easier to write durable, performant browser automation code. When used with [Browserbase](https://browserbase.com/), it offers unparalleled debugging tools like session replay and step-by-step debugging. -Each Stagehand function takes in an atomic instruction, such as `act("click the login button")` or `extract("find the red shoes")`, generates the appropriate Playwright code to accomplish that instruction, and executes it. +> [!NOTE] +> `Stagehand` is currently available as an early release, and we're actively seeking feedback from the community. Please join our [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-2tdncfgkk-fF8y5U0uJzR2y2_M9c9OJA) to stay updated on the latest developments and provide feedback. -Instructions should be atomic to increase reliability, and step planning should be handled by the higher level agent. You can use `observe()` to get a suggested list of actions that can be taken on the current page, and then use those to ground your step planning prompts. +## Documentation -Stagehand is [open source](#license) and maintained by the [Browserbase](https://browserbase.com) team. We believe that by enabling more developers to build reliable web automations, we'll expand the market of developers who benefit from our headless browser infrastructure. This is the framework that we wished we had while tinkering on our own applications, and we're excited to share it with you. +Visit [docs.stagehand.dev](https://docs.stagehand.dev) to view the full documentation. ## Getting Started -### 1. Install the Stagehand package +### Quickstart -We also install zod to power typed extraction +To create a new Stagehand project configured to our default settings, run: ```bash -npm install @browserbasehq/stagehand zod -``` - -### 2. Configure your model provider - -You'll need to provide your API Key for the model provider you'd like to use. The default model provider is OpenAI, but you can also use Anthropic or others. More information on supported models can be found in the [API Reference](#api-reference). - -Ensure that an OpenAI API Key or Anthropic API key is accessible in your local environment. - -``` -export OPENAI_API_KEY=sk-... -export ANTHROPIC_API_KEY=sk-... +npx create-browser-app --example quickstart ``` -### 3. Create a Stagehand Instance +Read our [Quickstart Guide](https://docs.stagehand.dev/get_started/quickstart) in the docs for more information. -If you plan to run the browser locally, you'll also need to install Playwright's browser dependencies. +You can also add Stagehand to an existing Typescript project by running: ```bash -npm exec playwright install -``` - -Then you can create a Stagehand instance like so: - -```javascript -import { Stagehand } from "@browserbasehq/stagehand"; -import { z } from "zod"; - -const stagehand = new Stagehand({ - env: "LOCAL", -}); +npm install @browserbasehq/stagehand zod +npx playwright install # if running locally ``` -If you plan to run the browser remotely, you'll need to set a Browserbase API Key and Project ID. +### Build and Run from Source ```bash -export BROWSERBASE_API_KEY=... -export BROWSERBASE_PROJECT_ID=... -``` - -```javascript -import { Stagehand } from "@browserbasehq/stagehand"; -import { z } from "zod"; - -const stagehand = new Stagehand({ - env: "BROWSERBASE", - enableCaching: true, -}); -``` - -### 4. Run your first automation - -```javascript -await stagehand.init(); -const page = stagehand.page; -await page.goto("https://github.com/browserbase/stagehand"); -await page.act({ action: "click on the contributors" }); -const contributor = await page.extract({ - instruction: "extract the top contributor", - schema: z.object({ - username: z.string(), - url: z.string(), - }), -}); -await stagehand.close(); -console.log(`Our favorite contributor is ${contributor.username}`); -``` - -This simple snippet will open a browser, navigate to the Stagehand repo, and log the top contributor. - -## API Reference - -### `Stagehand()` - -This constructor is used to create an instance of Stagehand. - -- **Arguments:** - - - `env`: `'LOCAL'` or `'BROWSERBASE'`. Defaults to `'BROWSERBASE'`. - - `modelName`: (optional) an `AvailableModel` string to specify the default model to use. - - `modelClientOptions`: (optional) configuration options for the model client. - - `enableCaching`: a `boolean` that enables caching of LLM responses. When set to `true`, the LLM requests will be cached on disk and reused for identical requests. Defaults to `false`. - - `headless`: a `boolean` that determines if the browser runs in headless mode. Defaults to `false`. When the env is set to `BROWSERBASE`, this will be ignored. - - `domSettleTimeoutMs`: an `integer` that specifies the timeout in milliseconds for waiting for the DOM to settle. Defaults to 30000 (30 seconds). - - `apiKey`: (optional) your Browserbase API key. Defaults to `BROWSERBASE_API_KEY` environment variable. - - `projectId`: (optional) your Browserbase project ID. Defaults to `BROWSERBASE_PROJECT_ID` environment variable. - - `browserbaseSessionCreateParams`: configuration options for creating new Browserbase sessions. - - `browserbaseSessionID`: ID of an existing live Browserbase session. Overrides `browserbaseSessionCreateParams`. - - `logger`: a function that handles log messages. Useful for custom logging implementations. - - `verbose`: an `integer` that enables several levels of logging during automation: - - `0`: limited to no logging - - `1`: SDK-level logging - - `2`: LLM-client level logging (most granular) - - `debugDom`: a `boolean` that draws bounding boxes around elements presented to the LLM during automation. - - `llmClient`: (optional) a custom `LLMClient` implementation. - -- **Returns:** - - - An instance of the `Stagehand` class configured with the specified options. - -- **Example:** - - ```javascript - // Basic usage - const stagehand = new Stagehand(); - - // Custom configuration - const stagehand = new Stagehand({ - env: "LOCAL", - verbose: 1, - headless: true, - enableCaching: true, - logger: (logLine) => { - console.log(`[${logLine.category}] ${logLine.message}`); - }, - }); - - // Resume existing Browserbase session - const stagehand = new Stagehand({ - env: "BROWSERBASE", - browserbaseSessionID: "existing-session-id", - }); - ``` - -### Methods - -#### `init()` - -`init()` asynchronously initializes the Stagehand instance. It should be called before any other methods. - -> [!WARNING] -> Passing parameters to `init()` is deprecated and will be removed in the next major version. Use the constructor options instead. - -- **Arguments:** - - - `modelName`: (**deprecated**, optional) an `AvailableModel` string to specify the model to use. This will be used for all other methods unless overridden. - - `modelClientOptions`: (**deprecated**, optional) configuration options for the model client - - `domSettleTimeoutMs`: (**deprecated**, optional) timeout in milliseconds for waiting for the DOM to settle - -- **Returns:** - - - A `Promise` that resolves to an object containing: - - `debugUrl`: a `string` representing the URL for live debugging. This is only available when using a Browserbase browser. - - `sessionUrl`: a `string` representing the session URL. This is only available when using a Browserbase browser. - - `sessionId`: a `string` representing the session ID. This is only available when using a Browserbase browser. - -- **Example:** - ```javascript - await stagehand.init(); - ``` - -#### `act()` - -`act()` allows Stagehand to interact with a web page. Provide an `action` like `"search for 'x'"`, or `"select the cheapest flight presented"` (small atomic goals perform the best). - -> [!WARNING] -> `act()` on the Stagehand instance is deprecated and will be removed in the next major version. Use `stagehand.page.act()` instead. - -- **Arguments:** - - - `action`: a `string` describing the action to perform - - `modelName`: (optional) an `AvailableModel` string to specify the model to use - - `modelClientOptions`: (optional) configuration options for the model client - - `useVision`: (optional) a `boolean` or `"fallback"` to determine if vision-based processing should be used. Defaults to `"fallback"` - - `variables`: (optional) a `Record` of variables to use in the action. Variables in the action string are referenced using `%variable_name%` - - `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle - -- **Returns:** - - - A `Promise` that resolves to an object containing: - - `success`: a `boolean` indicating if the action was completed successfully. - - `message`: a `string` providing details about the action's execution. - - `action`: a `string` describing the action performed. - -- **Example:** - - ```javascript - // Basic usage - await stagehand.page.act({ action: "click on add to cart" }); - - // Using variables - await stagehand.page.act({ - action: "enter %username% into the username field", - variables: { - username: "john.doe@example.com", - }, - }); - - // Multiple variables - await stagehand.page.act({ - action: "fill in the form with %username% and %password%", - variables: { - username: "john.doe", - password: "secretpass123", - }, - }); - ``` - -#### `extract()` - -`extract()` grabs structured text from the current page using [zod](https://github.com/colinhacks/zod). Given instructions and `schema`, you will receive structured data. Unlike some extraction libraries, stagehand can extract any information on a page, not just the main article contents. - -> [!WARNING] -> `extract()` on the Stagehand instance is deprecated and will be removed in the next major version. Use `stagehand.page.extract()` instead. - -- **Arguments:** - - - `instruction`: a `string` providing instructions for extraction - - `schema`: a `z.AnyZodObject` defining the structure of the data to extract - - `modelName`: (optional) an `AvailableModel` string to specify the model to use - - `modelClientOptions`: (optional) configuration options for the model client - - `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle - - `useTextExtract`: (optional) a `boolean` to determine if text-based extraction should be used. Defaults to `false` - -- **Returns:** - - - A `Promise` that resolves to the structured data as defined by the provided `schema`. - -- **Example:** - ```javascript - const price = await stagehand.page.extract({ - instruction: "extract the price of the item", - schema: z.object({ - price: z.number(), - }), - }); - ``` - -#### `observe()` - -> [!WARNING] -> `observe()` on the Stagehand instance is deprecated and will be removed in the next major version. Use `stagehand.page.observe()` instead. - -> [!NOTE] -> `observe()` currently only evaluates the first chunk in the page. - -`observe()` is used to get a list of actions that can be taken on the current page. It's useful for adding context to your planning step, or if you unsure of what page you're on. - -If you are looking for a specific element, you can also pass in an instruction to observe via: `observe({ instruction: "{your instruction}"})`. - -- **Arguments:** - - - `instruction`: (optional) a `string` providing instructions for the observation. Defaults to "Find actions that can be performed on this page." - - `modelName`: (optional) an `AvailableModel` string to specify the model to use - - `modelClientOptions`: (optional) configuration options for the model client - - `useVision`: (optional) a `boolean` to determine if vision-based processing should be used. Defaults to `false` - - `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle - -- **Returns:** - - - A `Promise` that resolves to an array of objects containing: - - `selector`: a `string` representing the element selector - - `description`: a `string` describing the possible action - -- **Example:** - ```javascript - const actions = await stagehand.page.observe(); - ``` - -#### `close()` - -`close()` is a cleanup method to remove the temporary files created by Stagehand. It's highly recommended that you call this when you're done with your automation. - -- **Example:** - ```javascript - await stagehand.close(); - ``` - -#### `page` and `context` - -`page` and `context` are instances of Playwright's `Page` and `BrowserContext` respectively. Use these methods to interact with the Playwright instance that Stagehand is using. Most commonly, you'll use `page.goto()` to navigate to a URL. - -- **Example:** - ```javascript - await stagehand.page.goto("https://github.com/browserbase/stagehand"); - ``` - -### `log()` - -`log()` is used to print a message to the browser console. These messages will be persisted in the Browserbase session logs, and can be used to debug sessions after they've completed. - -Make sure the log level is above the verbose level you set when initializing the Stagehand instance. - -- **Example:** - ```javascript - stagehand.log("Hello, world!"); - ``` - -## Model Support - -Stagehand leverages a generic LLM client architecture to support various language models from different providers. This design allows for flexibility, enabling the integration of new models with minimal changes to the core system. Different models work better for different tasks, so you can choose the model that best suits your needs. - -#### Currently Supported Models - -Stagehand currently supports the following models from OpenAI and Anthropic: - -- **OpenAI Models:** - - - `gpt-4o` - - `gpt-4o-mini` - - `gpt-4o-2024-08-06` - -- **Anthropic Models:** - - `claude-3-5-sonnet-latest` - - `claude-3-5-sonnet-20240620` - - `claude-3-5-sonnet-20241022` - -These models can be specified when initializing the `Stagehand` instance or when calling methods like `act()` and `extract()`. - -## How It Works - -The SDK has two major phases: - -1. Processing the DOM (including chunking - _see below_). -2. Taking LLM powered actions based on the current state of the DOM. - -### DOM processing - -Stagehand uses a combination of techniques to prepare the DOM. - -The DOM Processing steps look as follows: - -1. Via Playwright, inject a script into the DOM accessible by the SDK that can run processing. -2. Crawl the DOM and create a list of candidate elements. - - Candidate elements are either leaf elements (DOM elements that contain actual user facing substance), or are interactive elements. - - Interactive elements are determined by a combination of roles and HTML tags. -3. Candidate elements that are not active, visible, or at the top of the DOM are discarded. - - The LLM should only receive elements it can faithfully act on on behalf of the agent/user. -4. For each candidate element, an xPath is generated. this guarantees that if this element is picked by the LLM, we'll be able to reliably target it. -5. Return both the list of candidate elements, as well as the map of elements to xPath selectors across the browser back to the SDK, to be analyzed by the LLM. - -#### Chunking - -While LLMs will continue to increase context window length and reduce latency, giving any reasoning system less stuff to think about should make it more reliable. As a result, DOM processing is done in chunks in order to keep the context small per inference call. In order to chunk, the SDK considers a candidate element that starts in a section of the viewport to be a part of that chunk. In the future, padding will be added to ensure that an individual chunk does not lack relevant context. See this diagram for how it looks: - -![](./docs/media/chunks.png) - -### Vision - -The `act()` and `observe()` methods can take a `useVision` flag. If this is set to `true`, the LLM will be provided with a annotated screenshot of the current page to identify which elements to act on. This is useful for complex DOMs that the LLM has a hard time reasoning about, even after processing and chunking. By default, this flag is set to `"fallback"`, which means that if the LLM fails to successfully identify a single element, Stagehand will retry the attempt using vision. - -### LLM analysis - -Now we have a list of candidate elements and a way to select them. We can present those elements with additional context to the LLM for extraction or action. While untested on a large scale, presenting a "numbered list of elements" guides the model to not treat the context as a full DOM, but as a list of related but independent elements to operate on. - -In the case of action, we ask the LLM to write a playwright method in order to do the correct thing. In our limited testing, playwright syntax is much more effective than relying on built in javascript APIs, possibly due to tokenization. - -Lastly, we use the LLM to write future instructions to itself to help manage it's progress and goals when operating across chunks. - -### Stagehand vs Playwright - -Below is an example of how to extract a list of companies from the AI Grant website using both Stagehand and Playwright. - -![](./docs/media/stagehand-playwright.png) - -## Prompting Tips - -Prompting Stagehand is more literal and atomic than other higher level frameworks, including agentic frameworks. Here are some guidelines to help you craft effective prompts: - -### Do: - -- **Use specific and concise actions** - -```javascript -await stagehand.page.act({ action: "click the login button" }); - -const productInfo = await stagehand.page.extract({ - instruction: "find the red shoes", - schema: z.object({ - productName: z.string(), - price: z.number(), - }), -}); -``` - -- **Break down complex tasks into smaller, atomic steps** - -Instead of combining actions: - -```javascript -// Avoid this -await stagehand.page.act({ action: "log in and purchase the first item" }); -``` - -Split them into individual steps: - -```javascript -await stagehand.page.act({ action: "click the login button" }); -// ...additional steps to log in... -await stagehand.page.act({ action: "click on the first item" }); -await stagehand.page.act({ action: "click the purchase button" }); -``` - -- **Use `observe()` to get actionable suggestions from the current page** - -```javascript -const actions = await stagehand.page.observe(); -console.log("Possible actions:", actions); -``` - -### Don't: - -- **Use broad or ambiguous instructions** - -```javascript -// Too vague -await stagehand.page.act({ action: "find something interesting on the page" }); -``` - -- **Combine multiple actions into one instruction** - -```javascript -// Avoid combining actions -await stagehand.page.act({ action: "fill out the form and submit it" }); +git clone https://github.com/browserbase/stagehand.git +cd stagehand +npm install +npx playwright install +npm run example # run the blank script at ./examples/example.ts ``` -- **Expect Stagehand to perform high-level planning or reasoning** +Stagehand is best when you have an API key for an LLM provider and Browserbase credentials. To add these to your project, run: -```javascript -// Outside Stagehand's scope -await stagehand.page.act({ action: "book the cheapest flight available" }); +```bash +cp .env.example .env +nano .env # Edit the .env file to add API keys ``` -By following these guidelines, you'll increase the reliability and effectiveness of your web automations with Stagehand. Remember, Stagehand excels at executing precise, well-defined actions so keeping your instructions atomic will lead to the best outcomes. - -We leave the agentic behaviour to higher-level agentic systems which can use Stagehand as a tool. - -## Roadmap - -At a high level, we're focused on improving reliability, speed, and cost in that order of priority. - -You can see the roadmap [here](./ROADMAP.md). Looking to contribute? Read on! - ## Contributing > [!NOTE] -> We highly value contributions to Stagehand! For support or code review, please join our [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-2tdncfgkk-fF8y5U0uJzR2y2_M9c9OJA). - -First, clone the repo - -```bash -git clone git@github.com:browserbase/stagehand.git -``` - -Then install dependencies - -```bash -npm install -``` - -Ensure you have the `.env` file as documented above in the Getting Started section. - -Then, run the example script `npm run example`. - -### Development tips - -A good development loop is: - -1. Try things in the example file -2. Use that to make changes to the SDK -3. Write evals that help validate your changes -4. Make sure you don't break existing evals! -5. Open a PR and get it reviewed by the team. - -### Running evals - -You'll need a Braintrust API key to run evals - -```.env -BRAINTRUST_API_KEY="" -``` - -After that, you can run all evals at once using `npm run evals` - -You can also run individual evals using `npm run evals -- your_eval_name`. - -### Adding new evals - -Running all evals can take some time. We have a convenience script `example.ts` where you can develop your new single eval before adding it to the set of all evals. - -You can run `npm run example` to execute and iterate on the eval you are currently developing. - -#### Adding a New Model - -To add a new model to Stagehand, follow these steps: - -1. **Define the Model**: Add the new model name to the `AvailableModel` type in the `LLMProvider.ts` file. This ensures that the model is recognized by the system. - -2. **Map the Model to a Provider**: Update the `modelToProviderMap` in the `LLMProvider` class to associate the new model with its corresponding provider. This mapping is crucial for determining which client to use. - -3. **Implement the Client**: If the new model requires a new client, implement a class that adheres to the `LLMClient` interface. This class should define all necessary methods, such as `createChatCompletion`. - -4. **Update the `getClient` Method**: Modify the `getClient` method in the `LLMProvider` class to return an instance of the new client when the new model is requested. - -### Building the SDK +> We highly value contributions to Stagehand! For questions or support, please join our [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-2tdncfgkk-fF8y5U0uJzR2y2_M9c9OJA). -Stagehand uses [tsup](https://github.com/egoist/tsup) to build the SDK and vanilla [esbuild](https://esbuild.github.io/d) to build the scripts that run in the DOM. +At a high level, we're focused on improving reliability, speed, and cost in that order of priority. If you're interested in contributing, we strongly recommend reaching out to [Anirudh Kamath](https://x.com/kamathematic) or [Paul Klein](https://x.com/pk_iv) in our [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-2tdncfgkk-fF8y5U0uJzR2y2_M9c9OJA) before starting to ensure that your contribution aligns with our goals. -1. run `npm run build` -2. run `npm pack` to get a tarball for distribution +For more information, please see our [Contributing Guide](https://docs.stagehand.dev/contributions/contributing). ## Acknowledgements This project heavily relies on [Playwright](https://playwright.dev/) as a resilient backbone to automate the web. It also would not be possible without the awesome techniques and discoveries made by [tarsier](https://github.com/reworkd/tarsier), and [fuji-web](https://github.com/normal-computing/fuji-web). -[Jeremy Press](https://x.com/jeremypress) wrote the original MVP of Stagehand and continues to be a major ally to the project. +We'd like to thank the following people for their contributions to Stagehand: +- [Jeremy Press](https://x.com/jeremypress) wrote the original MVP of Stagehand and continues to be an ally to the project. +- [Navid Pour](https://github.com/navidpour) is heavily responsible for the current architecture of Stagehand and the `act` API. +- [Sean McGuire](https://github.com/seanmcguire12) is a major contributor to the project and has been a great help with improving the `extract` API and getting evals to a high level. +- [Filip Michalsky](https://github.com/filip-michalsky) has been doing a lot of work on building out integrations like [Langchain](https://js.langchain.com/docs/integrations/tools/stagehand/) and [Claude MCP](https://github.com/browserbase/mcp-server-browserbase), generally improving the repository, and unblocking users. +- [Sameel Arif](https://github.com/sameelarif) is a major contributor to the project, especially around improving the developer experience. ## License Licensed under the MIT License. -Copyright 2024 Browserbase, Inc. +Copyright 2025 Browserbase, Inc. \ No newline at end of file From 5899ec2c4b73c636bfd8120ec3aac225af7dd949 Mon Sep 17 00:00:00 2001 From: Sameel Date: Tue, 7 Jan 2025 23:13:17 -0800 Subject: [PATCH 13/20] Pass LLM logger in createChatCompletion (#385) * migrate to new options syntax * fix * input logger in extract/observe * changeset * patch -> minor * import and override stagehandconfig --------- Co-authored-by: Anirudh Kamath --- .changeset/soft-snails-lick.md | 5 + examples/external_client.ts | 21 +-- examples/external_clients/ollama.ts | 46 +++--- lib/handlers/extractHandler.ts | 2 + lib/handlers/observeHandler.ts | 1 + lib/index.ts | 3 - lib/inference.ts | 241 ++++++++++++++-------------- lib/llm/AnthropicClient.ts | 40 ++--- lib/llm/LLMClient.ts | 10 +- lib/llm/OpenAIClient.ts | 56 +++---- lib/prompt.ts | 18 --- 11 files changed, 218 insertions(+), 225 deletions(-) create mode 100644 .changeset/soft-snails-lick.md diff --git a/.changeset/soft-snails-lick.md b/.changeset/soft-snails-lick.md new file mode 100644 index 00000000..3ab64ebc --- /dev/null +++ b/.changeset/soft-snails-lick.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": minor +--- + +Moved the LLMClient logger paremeter to the createChatCompletion method options. diff --git a/examples/external_client.ts b/examples/external_client.ts index de869ee9..bc365213 100644 --- a/examples/external_client.ts +++ b/examples/external_client.ts @@ -1,20 +1,15 @@ -import { type ConstructorParams, Stagehand } from "../lib"; +import { Stagehand } from "../lib"; import { z } from "zod"; import { OllamaClient } from "./external_clients/ollama"; - -const StagehandConfig: ConstructorParams = { - env: "BROWSERBASE", - apiKey: process.env.BROWSERBASE_API_KEY, - projectId: process.env.BROWSERBASE_PROJECT_ID, - verbose: 1, - llmClient: new OllamaClient({ - modelName: "llama3.2", - }), - debugDom: true, -}; +import StagehandConfig from "./stagehand.config"; async function example() { - const stagehand = new Stagehand(StagehandConfig); + const stagehand = new Stagehand({ + ...StagehandConfig, + llmClient: new OllamaClient({ + modelName: "llama3.2", + }), + }); await stagehand.init(); await stagehand.page.goto("https://news.ycombinator.com"); diff --git a/examples/external_clients/ollama.ts b/examples/external_clients/ollama.ts index d884e7fb..cb918b68 100644 --- a/examples/external_clients/ollama.ts +++ b/examples/external_clients/ollama.ts @@ -1,14 +1,5 @@ import OpenAI, { type ClientOptions } from "openai"; import { zodResponseFormat } from "openai/helpers/zod"; -import type { LLMCache } from "../../lib/cache/LLMCache"; -import { validateZodSchema } from "../../lib/utils"; -import { - type ChatCompletionOptions, - type ChatMessage, - LLMClient, -} from "../../lib/llm/LLMClient"; -import type { LogLine } from "../../types/log"; -import type { AvailableModel } from "../../types/model"; import type { ChatCompletion, ChatCompletionAssistantMessageParam, @@ -19,23 +10,28 @@ import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam, } from "openai/resources/chat"; +import type { LLMCache } from "../../lib/cache/LLMCache"; +import { + type ChatMessage, + CreateChatCompletionOptions, + LLMClient, +} from "../../lib/llm/LLMClient"; +import { validateZodSchema } from "../../lib/utils"; +import type { AvailableModel } from "../../types/model"; export class OllamaClient extends LLMClient { public type = "ollama" as const; private client: OpenAI; private cache: LLMCache | undefined; - public logger: (message: LogLine) => void; private enableCaching: boolean; public clientOptions: ClientOptions; constructor({ - logger, enableCaching = false, cache = undefined, modelName = "llama3.2", clientOptions, }: { - logger?: (message: LogLine) => void; enableCaching?: boolean; cache?: LLMCache; modelName?: string; @@ -47,16 +43,16 @@ export class OllamaClient extends LLMClient { baseURL: clientOptions?.baseURL || "http://localhost:11434/v1", apiKey: "ollama", }); - this.logger = logger; this.cache = cache; this.enableCaching = enableCaching; this.modelName = modelName as AvailableModel; } - async createChatCompletion( - options: ChatCompletionOptions, + async createChatCompletion({ + options, retries = 3, - ): Promise { + logger, + }: CreateChatCompletionOptions): Promise { const { image, requestId, ...optionsWithoutImageAndRequestId } = options; // TODO: Implement vision support @@ -66,7 +62,7 @@ export class OllamaClient extends LLMClient { ); } - this.logger({ + logger({ category: "ollama", message: "creating chat completion", level: 1, @@ -122,7 +118,7 @@ export class OllamaClient extends LLMClient { ); if (cachedResponse) { - this.logger({ + logger({ category: "llm_cache", message: "LLM cache hit - returning cached response", level: 1, @@ -140,7 +136,7 @@ export class OllamaClient extends LLMClient { return cachedResponse; } - this.logger({ + logger({ category: "llm_cache", message: "LLM cache miss - no cached response found", level: 1, @@ -168,7 +164,7 @@ export class OllamaClient extends LLMClient { model: this.modelName, }; - this.logger({ + logger({ category: "ollama", message: "creating chat completion", level: 1, @@ -257,7 +253,7 @@ export class OllamaClient extends LLMClient { const response = await this.client.chat.completions.create(body); - this.logger({ + logger({ category: "ollama", message: "response", level: 1, @@ -279,7 +275,11 @@ export class OllamaClient extends LLMClient { if (!validateZodSchema(options.response_model.schema, parsedData)) { if (retries > 0) { - return this.createChatCompletion(options, retries - 1); + return this.createChatCompletion({ + options, + logger, + retries: retries - 1, + }); } throw new Error("Invalid response schema"); @@ -299,7 +299,7 @@ export class OllamaClient extends LLMClient { } if (this.enableCaching) { - this.logger({ + logger({ category: "llm_cache", message: "caching response", level: 1, diff --git a/lib/handlers/extractHandler.ts b/lib/handlers/extractHandler.ts index 143d599f..6f57082b 100644 --- a/lib/handlers/extractHandler.ts +++ b/lib/handlers/extractHandler.ts @@ -306,6 +306,7 @@ export class StagehandExtractHandler { chunksTotal: 1, llmClient, requestId, + logger: this.logger, }); const { @@ -434,6 +435,7 @@ export class StagehandExtractHandler { chunksTotal: chunks.length, requestId, isUsingTextExtract: false, + logger: this.logger, }); const { diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index 3e53f9c8..70d07435 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -120,6 +120,7 @@ export class StagehandObserveHandler { llmClient, image: annotatedScreenshot, requestId, + logger: this.logger, }); const elementsWithSelectors = observationResponse.elements.map( diff --git a/lib/index.ts b/lib/index.ts index 0d958f67..18997a91 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -372,9 +372,6 @@ export class Stagehand { this.llmClient = undefined; } } - if (this.llmClient && !this.llmClient.logger) { - this.llmClient.logger = this.logger; - } this.domSettleTimeoutMs = domSettleTimeoutMs ?? 30_000; this.headless = headless ?? false; this.browserbaseSessionCreateParams = browserbaseSessionCreateParams; diff --git a/lib/inference.ts b/lib/inference.ts index 0bb08f14..76047056 100644 --- a/lib/inference.ts +++ b/lib/inference.ts @@ -1,28 +1,27 @@ +import { z } from "zod"; +import { ActCommandParams, ActCommandResult } from "../types/act"; +import { VerifyActCompletionParams } from "../types/inference"; +import { LogLine } from "../types/log"; +import { + AnnotatedScreenshotText, + ChatMessage, + LLMClient, +} from "./llm/LLMClient"; import { actTools, buildActSystemPrompt, buildActUserPrompt, - buildAskSystemPrompt, buildExtractSystemPrompt, buildExtractUserPrompt, + buildMetadataPrompt, + buildMetadataSystemPrompt, buildObserveSystemPrompt, buildObserveUserMessage, - buildAskUserPrompt, - buildVerifyActCompletionSystemPrompt, - buildVerifyActCompletionUserPrompt, buildRefineSystemPrompt, buildRefineUserPrompt, - buildMetadataSystemPrompt, - buildMetadataPrompt, + buildVerifyActCompletionSystemPrompt, + buildVerifyActCompletionUserPrompt, } from "./prompt"; -import { z } from "zod"; -import { - AnnotatedScreenshotText, - ChatMessage, - LLMClient, -} from "./llm/LLMClient"; -import { VerifyActCompletionParams } from "../types/inference"; -import { ActCommandParams, ActCommandResult } from "../types/act"; export async function verifyActCompletion({ goal, @@ -40,25 +39,28 @@ export async function verifyActCompletion({ type VerificationResponse = z.infer; const response = await llmClient.createChatCompletion({ - messages: [ - buildVerifyActCompletionSystemPrompt(), - buildVerifyActCompletionUserPrompt(goal, steps, domElements), - ], - temperature: 0.1, - top_p: 1, - frequency_penalty: 0, - presence_penalty: 0, - image: screenshot - ? { - buffer: screenshot, - description: "This is a screenshot of the whole visible page.", - } - : undefined, - response_model: { - name: "Verification", - schema: verificationSchema, + options: { + messages: [ + buildVerifyActCompletionSystemPrompt(), + buildVerifyActCompletionUserPrompt(goal, steps, domElements), + ], + temperature: 0.1, + top_p: 1, + frequency_penalty: 0, + presence_penalty: 0, + image: screenshot + ? { + buffer: screenshot, + description: "This is a screenshot of the whole visible page.", + } + : undefined, + response_model: { + name: "Verification", + schema: verificationSchema, + }, + requestId, }, - requestId, + logger, }); if (!response || typeof response !== "object") { @@ -109,17 +111,20 @@ export async function act({ ]; const response = await llmClient.createChatCompletion({ - messages, - temperature: 0.1, - top_p: 1, - frequency_penalty: 0, - presence_penalty: 0, - tool_choice: "auto" as const, - tools: actTools, - image: screenshot - ? { buffer: screenshot, description: AnnotatedScreenshotText } - : undefined, - requestId, + options: { + messages, + temperature: 0.1, + top_p: 1, + frequency_penalty: 0, + presence_penalty: 0, + tool_choice: "auto" as const, + tools: actTools, + image: screenshot + ? { buffer: screenshot, description: AnnotatedScreenshotText } + : undefined, + requestId, + }, + logger, }); const toolCalls = response.choices[0].message.tool_calls; @@ -160,6 +165,7 @@ export async function extract({ chunksSeen, chunksTotal, requestId, + logger, isUsingTextExtract, }: { instruction: string; @@ -171,6 +177,7 @@ export async function extract({ chunksTotal: number; requestId: string; isUsingTextExtract?: boolean; + logger: (message: LogLine) => void; }) { type ExtractionResponse = z.infer; type MetadataResponse = z.infer; @@ -178,40 +185,46 @@ export async function extract({ const isUsingAnthropic = llmClient.type === "anthropic"; const extractionResponse = await llmClient.createChatCompletion({ - messages: [ - buildExtractSystemPrompt(isUsingAnthropic, isUsingTextExtract), - buildExtractUserPrompt(instruction, domElements, isUsingAnthropic), - ], - response_model: { - schema: schema, - name: "Extraction", - }, - temperature: 0.1, - top_p: 1, - frequency_penalty: 0, - presence_penalty: 0, - requestId, - }); - - const refinedResponse = - await llmClient.createChatCompletion({ + options: { messages: [ - buildRefineSystemPrompt(), - buildRefineUserPrompt( - instruction, - previouslyExtractedContent, - extractionResponse, - ), + buildExtractSystemPrompt(isUsingAnthropic, isUsingTextExtract), + buildExtractUserPrompt(instruction, domElements, isUsingAnthropic), ], response_model: { schema: schema, - name: "RefinedExtraction", + name: "Extraction", }, temperature: 0.1, top_p: 1, frequency_penalty: 0, presence_penalty: 0, requestId, + }, + logger, + }); + + const refinedResponse = + await llmClient.createChatCompletion({ + options: { + messages: [ + buildRefineSystemPrompt(), + buildRefineUserPrompt( + instruction, + previouslyExtractedContent, + extractionResponse, + ), + ], + response_model: { + schema: schema, + name: "RefinedExtraction", + }, + temperature: 0.1, + top_p: 1, + frequency_penalty: 0, + presence_penalty: 0, + requestId, + }, + logger, }); const metadataSchema = z.object({ @@ -229,24 +242,27 @@ export async function extract({ const metadataResponse = await llmClient.createChatCompletion({ - messages: [ - buildMetadataSystemPrompt(), - buildMetadataPrompt( - instruction, - refinedResponse, - chunksSeen, - chunksTotal, - ), - ], - response_model: { - name: "Metadata", - schema: metadataSchema, + options: { + messages: [ + buildMetadataSystemPrompt(), + buildMetadataPrompt( + instruction, + refinedResponse, + chunksSeen, + chunksTotal, + ), + ], + response_model: { + name: "Metadata", + schema: metadataSchema, + }, + temperature: 0.1, + top_p: 1, + frequency_penalty: 0, + presence_penalty: 0, + requestId, }, - temperature: 0.1, - top_p: 1, - frequency_penalty: 0, - presence_penalty: 0, - requestId, + logger, }); return { @@ -261,12 +277,14 @@ export async function observe({ llmClient, image, requestId, + logger, }: { instruction: string; domElements: string; llmClient: LLMClient; image?: Buffer; requestId: string; + logger: (message: LogLine) => void; }): Promise<{ elements: { elementId: number; description: string }[]; }> { @@ -289,22 +307,25 @@ export async function observe({ const observationResponse = await llmClient.createChatCompletion({ - messages: [ - buildObserveSystemPrompt(), - buildObserveUserMessage(instruction, domElements), - ], - image: image - ? { buffer: image, description: AnnotatedScreenshotText } - : undefined, - response_model: { - schema: observeSchema, - name: "Observation", + options: { + messages: [ + buildObserveSystemPrompt(), + buildObserveUserMessage(instruction, domElements), + ], + image: image + ? { buffer: image, description: AnnotatedScreenshotText } + : undefined, + response_model: { + schema: observeSchema, + name: "Observation", + }, + temperature: 0.1, + top_p: 1, + frequency_penalty: 0, + presence_penalty: 0, + requestId, }, - temperature: 0.1, - top_p: 1, - frequency_penalty: 0, - presence_penalty: 0, - requestId, + logger, }); const parsedResponse = { @@ -317,25 +338,3 @@ export async function observe({ return parsedResponse; } - -export async function ask({ - question, - llmClient, - requestId, -}: { - question: string; - llmClient: LLMClient; - requestId: string; -}) { - const response = await llmClient.createChatCompletion({ - messages: [buildAskSystemPrompt(), buildAskUserPrompt(question)], - temperature: 0.1, - top_p: 1, - frequency_penalty: 0, - presence_penalty: 0, - requestId, - }); - - // The parsing is now handled in the LLM clients - return response.choices[0].message.content; -} diff --git a/lib/llm/AnthropicClient.ts b/lib/llm/AnthropicClient.ts index 69f217d9..9a8237fc 100644 --- a/lib/llm/AnthropicClient.ts +++ b/lib/llm/AnthropicClient.ts @@ -9,18 +9,20 @@ import { zodToJsonSchema } from "zod-to-json-schema"; import { LogLine } from "../../types/log"; import { AnthropicJsonSchemaObject, AvailableModel } from "../../types/model"; import { LLMCache } from "../cache/LLMCache"; -import { ChatCompletionOptions, LLMClient, LLMResponse } from "./LLMClient"; +import { + CreateChatCompletionOptions, + LLMClient, + LLMResponse, +} from "./LLMClient"; export class AnthropicClient extends LLMClient { public type = "anthropic" as const; private client: Anthropic; private cache: LLMCache | undefined; - public logger: (message: LogLine) => void; private enableCaching: boolean; public clientOptions: ClientOptions; constructor({ - logger, enableCaching = false, cache, modelName, @@ -34,20 +36,21 @@ export class AnthropicClient extends LLMClient { }) { super(modelName); this.client = new Anthropic(clientOptions); - this.logger = logger; this.cache = cache; this.enableCaching = enableCaching; this.modelName = modelName; this.clientOptions = clientOptions; } - async createChatCompletion( - options: ChatCompletionOptions & { retries?: number }, - ): Promise { + async createChatCompletion({ + options, + retries, + logger, + }: CreateChatCompletionOptions): Promise { const optionsWithoutImage = { ...options }; delete optionsWithoutImage.image; - this.logger({ + logger({ category: "anthropic", message: "creating chat completion", level: 1, @@ -66,7 +69,7 @@ export class AnthropicClient extends LLMClient { image: options.image, response_model: options.response_model, tools: options.tools, - retries: options.retries, + retries: retries, }; if (this.enableCaching) { @@ -75,7 +78,7 @@ export class AnthropicClient extends LLMClient { options.requestId, ); if (cachedResponse) { - this.logger({ + logger({ category: "llm_cache", message: "LLM cache hit - returning cached response", level: 1, @@ -96,7 +99,7 @@ export class AnthropicClient extends LLMClient { }); return cachedResponse as T; } else { - this.logger({ + logger({ category: "llm_cache", message: "LLM cache miss - no cached response found", level: 1, @@ -231,7 +234,7 @@ export class AnthropicClient extends LLMClient { temperature: options.temperature, }); - this.logger({ + logger({ category: "anthropic", message: "response", level: 1, @@ -281,7 +284,7 @@ export class AnthropicClient extends LLMClient { }, }; - this.logger({ + logger({ category: "anthropic", message: "transformed response", level: 1, @@ -307,13 +310,14 @@ export class AnthropicClient extends LLMClient { return result as T; // anthropic returns this as `unknown`, so we need to cast } else { - if (!options.retries || options.retries < 5) { + if (!retries || retries < 5) { return this.createChatCompletion({ - ...options, - retries: (options.retries ?? 0) + 1, + options, + logger, + retries: (retries ?? 0) + 1, }); } - this.logger({ + logger({ category: "anthropic", message: "error creating chat completion", level: 1, @@ -332,7 +336,7 @@ export class AnthropicClient extends LLMClient { if (this.enableCaching) { this.cache.set(cacheOptions, transformedResponse, options.requestId); - this.logger({ + logger({ category: "anthropic", message: "cached response", level: 1, diff --git a/lib/llm/LLMClient.ts b/lib/llm/LLMClient.ts index 815aee98..9e6257c8 100644 --- a/lib/llm/LLMClient.ts +++ b/lib/llm/LLMClient.ts @@ -1,6 +1,7 @@ import { ZodType } from "zod"; import { LLMTool } from "../../types/llm"; import { AvailableModel, ClientOptions } from "../../types/model"; +import { LogLine } from "../../types/log"; export interface ChatMessage { role: "system" | "user" | "assistant"; @@ -82,6 +83,12 @@ export type LLMResponse = { }; }; +export interface CreateChatCompletionOptions { + options: ChatCompletionOptions; + logger: (message: LogLine) => void; + retries?: number; +} + export abstract class LLMClient { public type: "openai" | "anthropic" | string; public modelName: AvailableModel; @@ -94,7 +101,6 @@ export abstract class LLMClient { } abstract createChatCompletion( - options: ChatCompletionOptions, + options: CreateChatCompletionOptions, ): Promise; - abstract logger: (message: { category?: string; message: string }) => void; } diff --git a/lib/llm/OpenAIClient.ts b/lib/llm/OpenAIClient.ts index 2b96e284..6204e613 100644 --- a/lib/llm/OpenAIClient.ts +++ b/lib/llm/OpenAIClient.ts @@ -17,6 +17,7 @@ import { validateZodSchema } from "../utils"; import { ChatCompletionOptions, ChatMessage, + CreateChatCompletionOptions, LLMClient, LLMResponse, } from "./LLMClient"; @@ -25,12 +26,10 @@ export class OpenAIClient extends LLMClient { public type = "openai" as const; private client: OpenAI; private cache: LLMCache | undefined; - public logger: (message: LogLine) => void; private enableCaching: boolean; public clientOptions: ClientOptions; constructor({ - logger, enableCaching = false, cache, modelName, @@ -45,16 +44,16 @@ export class OpenAIClient extends LLMClient { super(modelName); this.clientOptions = clientOptions; this.client = new OpenAI(clientOptions); - this.logger = logger; this.cache = cache; this.enableCaching = enableCaching; this.modelName = modelName; } - async createChatCompletion( - optionsInitial: ChatCompletionOptions, - retries: number = 3, - ): Promise { + async createChatCompletion({ + options: optionsInitial, + logger, + retries = 3, + }: CreateChatCompletionOptions): Promise { let options: Partial = optionsInitial; // O1 models do not support most of the options. So we override them. @@ -119,7 +118,7 @@ export class OpenAIClient extends LLMClient { const { image, requestId, ...optionsWithoutImageAndRequestId } = options; - this.logger({ + logger({ category: "openai", message: "creating chat completion", level: 1, @@ -155,7 +154,7 @@ export class OpenAIClient extends LLMClient { options.requestId, ); if (cachedResponse) { - this.logger({ + logger({ category: "llm_cache", message: "LLM cache hit - returning cached response", level: 1, @@ -172,7 +171,7 @@ export class OpenAIClient extends LLMClient { }); return cachedResponse; } else { - this.logger({ + logger({ category: "llm_cache", message: "LLM cache miss - no cached response found", level: 1, @@ -220,7 +219,7 @@ export class OpenAIClient extends LLMClient { Do not include any other text, formating or markdown in your output. Do not include \`\`\` or \`\`\`json in your response. Only the JSON object itself.`, }); } catch (error) { - this.logger({ + logger({ category: "openai", message: "Failed to parse response model schema", level: 0, @@ -228,10 +227,11 @@ export class OpenAIClient extends LLMClient { if (retries > 0) { // as-casting to account for o1 models not supporting all options - return this.createChatCompletion( - options as ChatCompletionOptions, - retries - 1, - ); + return this.createChatCompletion({ + options: options as ChatCompletionOptions, + logger, + retries: retries - 1, + }); } throw error; @@ -252,7 +252,7 @@ export class OpenAIClient extends LLMClient { }; /* eslint-enable */ - this.logger({ + logger({ category: "openai", message: "creating chat completion", level: 1, @@ -358,7 +358,7 @@ export class OpenAIClient extends LLMClient { ]; response.choices[0].message.content = null; } catch (error) { - this.logger({ + logger({ category: "openai", message: "Failed to parse tool call response", level: 0, @@ -376,17 +376,18 @@ export class OpenAIClient extends LLMClient { if (retries > 0) { // as-casting to account for o1 models not supporting all options - return this.createChatCompletion( - options as ChatCompletionOptions, - retries - 1, - ); + return this.createChatCompletion({ + options: options as ChatCompletionOptions, + logger, + retries: retries - 1, + }); } throw error; } } - this.logger({ + logger({ category: "openai", message: "response", level: 1, @@ -409,10 +410,11 @@ export class OpenAIClient extends LLMClient { if (!validateZodSchema(options.response_model.schema, parsedData)) { if (retries > 0) { // as-casting to account for o1 models not supporting all options - return this.createChatCompletion( - options as ChatCompletionOptions, - retries - 1, - ); + return this.createChatCompletion({ + options: options as ChatCompletionOptions, + logger, + retries: retries - 1, + }); } throw new Error("Invalid response schema"); @@ -432,7 +434,7 @@ export class OpenAIClient extends LLMClient { } if (this.enableCaching) { - this.logger({ + logger({ category: "llm_cache", message: "caching response", level: 1, diff --git a/lib/prompt.ts b/lib/prompt.ts index 51224e65..2c19152e 100644 --- a/lib/prompt.ts +++ b/lib/prompt.ts @@ -351,21 +351,3 @@ export function buildObserveUserMessage( DOM: ${domElements}`, }; } - -// ask -const askSystemPrompt = ` -you are a simple question answering assistent given the user's question. respond with only the answer. -`; -export function buildAskSystemPrompt(): ChatMessage { - return { - role: "system", - content: askSystemPrompt, - }; -} - -export function buildAskUserPrompt(question: string): ChatMessage { - return { - role: "user", - content: `question: ${question}`, - }; -} From a41271baf351e20f4c79b4b654d8a947b615a121 Mon Sep 17 00:00:00 2001 From: Sameel Date: Wed, 8 Jan 2025 00:28:10 -0800 Subject: [PATCH 14/20] Vercel ai sdk impl (#382) * ai sdk client (WIP) * unify LLM tool type * replace anthropic types * update ollama tool usage * delete old ai sdk client * new ai sdk client * ai sdk example * add comment * changeset * fix ollama tool usage * remove changeset * use default logger * fixed messages type * message type fix * update deps * update type * migrate to new options syntax * fix * input logger in extract/observe * remove AISdkClient logger * changeset * aisdk use StagehandConfig * change aisdk model to gemini * Revert "Merge branch 'sameel/move-llm-logger' into vercel-ai-sdk-impl" This reverts commit ec63bf4f20244f9ab497c6853c5a3f9ac4e7c031, reversing changes made to e575d883f09251e9d12e06760f663d7534c5243d. * lint error * changeset --- .changeset/shiny-ladybugs-shave.md | 5 + examples/ai_sdk_example.ts | 40 ++ examples/external_clients/aisdk.ts | 112 +++++ lib/index.ts | 1 + package-lock.json | 698 ++++++++++++++++++++++++----- package.json | 6 +- 6 files changed, 743 insertions(+), 119 deletions(-) create mode 100644 .changeset/shiny-ladybugs-shave.md create mode 100644 examples/ai_sdk_example.ts create mode 100644 examples/external_clients/aisdk.ts diff --git a/.changeset/shiny-ladybugs-shave.md b/.changeset/shiny-ladybugs-shave.md new file mode 100644 index 00000000..f95423fd --- /dev/null +++ b/.changeset/shiny-ladybugs-shave.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +Added example implementation of the Vercel AI SDK as an LLMClient diff --git a/examples/ai_sdk_example.ts b/examples/ai_sdk_example.ts new file mode 100644 index 00000000..ef6037dc --- /dev/null +++ b/examples/ai_sdk_example.ts @@ -0,0 +1,40 @@ +import { google } from "@ai-sdk/google"; +import { z } from "zod"; +import { Stagehand } from "../lib"; +import { AISdkClient } from "./external_clients/aisdk"; +import StagehandConfig from "./stagehand.config"; + +async function example() { + const stagehand = new Stagehand({ + ...StagehandConfig, + llmClient: new AISdkClient({ + model: google("gemini-1.5-flash-latest"), + }), + }); + + await stagehand.init(); + await stagehand.page.goto("https://news.ycombinator.com"); + + const headlines = await stagehand.page.extract({ + instruction: "Extract only 3 stories from the Hacker News homepage.", + schema: z.object({ + stories: z + .array( + z.object({ + title: z.string(), + url: z.string(), + points: z.number(), + }), + ) + .length(3), + }), + }); + + console.log(headlines); + + await stagehand.close(); +} + +(async () => { + await example(); +})(); diff --git a/examples/external_clients/aisdk.ts b/examples/external_clients/aisdk.ts new file mode 100644 index 00000000..83e35cc1 --- /dev/null +++ b/examples/external_clients/aisdk.ts @@ -0,0 +1,112 @@ +import { + CoreAssistantMessage, + CoreMessage, + CoreSystemMessage, + CoreTool, + CoreUserMessage, + generateObject, + generateText, + ImagePart, + LanguageModel, + TextPart, +} from "ai"; +import { ChatCompletion } from "openai/resources/chat/completions"; +import { + CreateChatCompletionOptions, + LLMClient, +} from "../../lib/llm/LLMClient"; +import { AvailableModel } from "../../types/model"; + +export class AISdkClient extends LLMClient { + public type = "aisdk" as const; + private model: LanguageModel; + + constructor({ model }: { model: LanguageModel }) { + super(model.modelId as AvailableModel); + this.model = model; + } + + async createChatCompletion({ + options, + }: CreateChatCompletionOptions): Promise { + const formattedMessages: CoreMessage[] = options.messages.map((message) => { + if (Array.isArray(message.content)) { + if (message.role === "system") { + const systemMessage: CoreSystemMessage = { + role: "system", + content: message.content + .map((c) => ("text" in c ? c.text : "")) + .join("\n"), + }; + return systemMessage; + } + + const contentParts = message.content.map((content) => { + if ("image_url" in content) { + const imageContent: ImagePart = { + type: "image", + image: content.image_url.url, + }; + return imageContent; + } else { + const textContent: TextPart = { + type: "text", + text: content.text, + }; + return textContent; + } + }); + + if (message.role === "user") { + const userMessage: CoreUserMessage = { + role: "user", + content: contentParts, + }; + return userMessage; + } else { + const textOnlyParts = contentParts.map((part) => ({ + type: "text" as const, + text: part.type === "image" ? "[Image]" : part.text, + })); + const assistantMessage: CoreAssistantMessage = { + role: "assistant", + content: textOnlyParts, + }; + return assistantMessage; + } + } + + return { + role: message.role, + content: message.content, + }; + }); + + if (options.response_model) { + const response = await generateObject({ + model: this.model, + messages: formattedMessages, + schema: options.response_model.schema, + }); + + return response.object; + } + + const tools: Record = {}; + + for (const rawTool of options.tools) { + tools[rawTool.name] = { + description: rawTool.description, + parameters: rawTool.parameters, + }; + } + + const response = await generateText({ + model: this.model, + messages: formattedMessages, + tools, + }); + + return response as T; + } +} diff --git a/lib/index.ts b/lib/index.ts index 18997a91..cceb4d7c 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -372,6 +372,7 @@ export class Stagehand { this.llmClient = undefined; } } + this.domSettleTimeoutMs = domSettleTimeoutMs ?? 30_000; this.headless = headless ?? false; this.browserbaseSessionCreateParams = browserbaseSessionCreateParams; diff --git a/package-lock.json b/package-lock.json index 47f40009..da63850a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,20 +1,23 @@ { "name": "@browserbasehq/stagehand", - "version": "1.7.0", + "version": "1.8.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@browserbasehq/stagehand", - "version": "1.7.0", + "version": "1.8.0", "license": "MIT", "dependencies": { "@anthropic-ai/sdk": "^0.27.3", "@browserbasehq/sdk": "^2.0.0", "sharp": "^0.33.5", + "ws": "^8.18.0", "zod-to-json-schema": "^3.23.5" }, "devDependencies": { + "@ai-sdk/google": "^1.0.13", + "@ai-sdk/openai": "^1.0.14", "@changesets/changelog-github": "^0.5.0", "@changesets/cli": "^2.27.9", "@eslint/js": "^9.16.0", @@ -24,6 +27,7 @@ "@types/node": "^20.11.30", "@types/ws": "^8.5.13", "adm-zip": "^0.5.16", + "ai": "^4.0.26", "autoevals": "^0.0.64", "braintrust": "^0.0.171", "cheerio": "^1.0.0", @@ -48,6 +52,134 @@ "zod": "^3.23.8" } }, + "node_modules/@ai-sdk/google": { + "version": "1.0.13", + "resolved": "https://registry.npmjs.org/@ai-sdk/google/-/google-1.0.13.tgz", + "integrity": "sha512-L4ej4rd3JpHp0QVqlgr383EedwQXu9tJ3hmJl793Lt8zMLgY+VKZE816v/bz0R3zCyTtiUrxR+LDhbSMuPV7eQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "1.0.4", + "@ai-sdk/provider-utils": "2.0.6" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + } + }, + "node_modules/@ai-sdk/google/node_modules/@ai-sdk/provider": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-1.0.4.tgz", + "integrity": "sha512-lJi5zwDosvvZER3e/pB8lj1MN3o3S7zJliQq56BRr4e9V3fcRyFtwP0JRxaRS5vHYX3OJ154VezVoQNrk0eaKw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "json-schema": "^0.4.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@ai-sdk/google/node_modules/@ai-sdk/provider-utils": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-2.0.6.tgz", + "integrity": "sha512-nB0rPwIBSCk0UkfdkprAxQ45ZjfKlk+Ts5zvIBQkJ5SnTCL9meg6bW65aomQrxhdvtqZML2jjaWTI8/l6AIVlQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "1.0.4", + "eventsource-parser": "^3.0.0", + "nanoid": "^3.3.8", + "secure-json-parse": "^2.7.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, + "node_modules/@ai-sdk/google/node_modules/eventsource-parser": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.0.tgz", + "integrity": "sha512-T1C0XCUimhxVQzW4zFipdx0SficT651NnkR0ZSH3yQwh+mFMdLfgjABVi4YtMTtaL4s168593DaoaRLMqryavA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@ai-sdk/openai": { + "version": "1.0.14", + "resolved": "https://registry.npmjs.org/@ai-sdk/openai/-/openai-1.0.14.tgz", + "integrity": "sha512-uyOkQNtYsHr4qyV7y0rmMAtdW4LTJoThYo1qXcvQa30RDh/MyvLEOjKYX181Siyp8LcTqYvwf6Tt+eckdVTTug==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "1.0.4", + "@ai-sdk/provider-utils": "2.0.6" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + } + }, + "node_modules/@ai-sdk/openai/node_modules/@ai-sdk/provider": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-1.0.4.tgz", + "integrity": "sha512-lJi5zwDosvvZER3e/pB8lj1MN3o3S7zJliQq56BRr4e9V3fcRyFtwP0JRxaRS5vHYX3OJ154VezVoQNrk0eaKw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "json-schema": "^0.4.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@ai-sdk/openai/node_modules/@ai-sdk/provider-utils": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-2.0.6.tgz", + "integrity": "sha512-nB0rPwIBSCk0UkfdkprAxQ45ZjfKlk+Ts5zvIBQkJ5SnTCL9meg6bW65aomQrxhdvtqZML2jjaWTI8/l6AIVlQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "1.0.4", + "eventsource-parser": "^3.0.0", + "nanoid": "^3.3.8", + "secure-json-parse": "^2.7.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, + "node_modules/@ai-sdk/openai/node_modules/eventsource-parser": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.0.tgz", + "integrity": "sha512-T1C0XCUimhxVQzW4zFipdx0SficT651NnkR0ZSH3yQwh+mFMdLfgjABVi4YtMTtaL4s168593DaoaRLMqryavA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@ai-sdk/provider": { "version": "0.0.11", "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-0.0.11.tgz", @@ -62,15 +194,15 @@ } }, "node_modules/@ai-sdk/provider-utils": { - "version": "1.0.22", - "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-1.0.22.tgz", - "integrity": "sha512-YHK2rpj++wnLVc9vPGzGFP3Pjeld2MwhKinetA0zKXOoHAT/Jit5O8kZsxcSlJPu9wvcGT1UGZEjZrtO7PfFOQ==", + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-2.0.5.tgz", + "integrity": "sha512-2M7vLhYN0ThGjNlzow7oO/lsL+DyMxvGMIYmVQvEYaCWhDzxH5dOp78VNjJIVwHzVLMbBDigX3rJuzAs853idw==", "dev": true, "license": "Apache-2.0", "dependencies": { - "@ai-sdk/provider": "0.0.26", - "eventsource-parser": "^1.1.2", - "nanoid": "^3.3.7", + "@ai-sdk/provider": "1.0.3", + "eventsource-parser": "^3.0.0", + "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "engines": { @@ -86,9 +218,9 @@ } }, "node_modules/@ai-sdk/provider-utils/node_modules/@ai-sdk/provider": { - "version": "0.0.26", - "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-0.0.26.tgz", - "integrity": "sha512-dQkfBDs2lTYpKM8389oopPdQgIU007GQyCbuPPrV+K6MtSII3HBfE0stUIMXUb44L+LK1t6GXPP7wjSzjO6uKg==", + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-1.0.3.tgz", + "integrity": "sha512-WiuJEpHTrltOIzv3x2wx4gwksAHW0h6nK3SoDzjqCOJLu/2OJ1yASESTIX+f07ChFykHElVoP80Ol/fe9dw6tQ==", "dev": true, "license": "Apache-2.0", "dependencies": { @@ -98,15 +230,25 @@ "node": ">=18" } }, + "node_modules/@ai-sdk/provider-utils/node_modules/eventsource-parser": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.0.tgz", + "integrity": "sha512-T1C0XCUimhxVQzW4zFipdx0SficT651NnkR0ZSH3yQwh+mFMdLfgjABVi4YtMTtaL4s168593DaoaRLMqryavA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@ai-sdk/react": { - "version": "0.0.70", - "resolved": "https://registry.npmjs.org/@ai-sdk/react/-/react-0.0.70.tgz", - "integrity": "sha512-GnwbtjW4/4z7MleLiW+TOZC2M29eCg1tOUpuEiYFMmFNZK8mkrqM0PFZMo6UsYeUYMWqEOOcPOU9OQVJMJh7IQ==", + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/@ai-sdk/react/-/react-1.0.7.tgz", + "integrity": "sha512-j2/of4iCNq+r2Bjx0O9vdRhn5C/02t2Esenis71YtnsoynPz74eQlJ3N0RYYPheThiJes50yHdfdVdH9ulxs1A==", "dev": true, "license": "Apache-2.0", "dependencies": { - "@ai-sdk/provider-utils": "1.0.22", - "@ai-sdk/ui-utils": "0.0.50", + "@ai-sdk/provider-utils": "2.0.5", + "@ai-sdk/ui-utils": "1.0.6", "swr": "^2.2.5", "throttleit": "2.1.0" }, @@ -148,6 +290,68 @@ } } }, + "node_modules/@ai-sdk/solid/node_modules/@ai-sdk/provider": { + "version": "0.0.26", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-0.0.26.tgz", + "integrity": "sha512-dQkfBDs2lTYpKM8389oopPdQgIU007GQyCbuPPrV+K6MtSII3HBfE0stUIMXUb44L+LK1t6GXPP7wjSzjO6uKg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "json-schema": "^0.4.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@ai-sdk/solid/node_modules/@ai-sdk/provider-utils": { + "version": "1.0.22", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-1.0.22.tgz", + "integrity": "sha512-YHK2rpj++wnLVc9vPGzGFP3Pjeld2MwhKinetA0zKXOoHAT/Jit5O8kZsxcSlJPu9wvcGT1UGZEjZrtO7PfFOQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "0.0.26", + "eventsource-parser": "^1.1.2", + "nanoid": "^3.3.7", + "secure-json-parse": "^2.7.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, + "node_modules/@ai-sdk/solid/node_modules/@ai-sdk/ui-utils": { + "version": "0.0.50", + "resolved": "https://registry.npmjs.org/@ai-sdk/ui-utils/-/ui-utils-0.0.50.tgz", + "integrity": "sha512-Z5QYJVW+5XpSaJ4jYCCAVG7zIAuKOOdikhgpksneNmKvx61ACFaf98pmOd+xnjahl0pIlc/QIe6O4yVaJ1sEaw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "0.0.26", + "@ai-sdk/provider-utils": "1.0.22", + "json-schema": "^0.4.0", + "secure-json-parse": "^2.7.0", + "zod-to-json-schema": "^3.23.3" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, "node_modules/@ai-sdk/svelte": { "version": "0.0.57", "resolved": "https://registry.npmjs.org/@ai-sdk/svelte/-/svelte-0.0.57.tgz", @@ -171,7 +375,44 @@ } } }, - "node_modules/@ai-sdk/ui-utils": { + "node_modules/@ai-sdk/svelte/node_modules/@ai-sdk/provider": { + "version": "0.0.26", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-0.0.26.tgz", + "integrity": "sha512-dQkfBDs2lTYpKM8389oopPdQgIU007GQyCbuPPrV+K6MtSII3HBfE0stUIMXUb44L+LK1t6GXPP7wjSzjO6uKg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "json-schema": "^0.4.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@ai-sdk/svelte/node_modules/@ai-sdk/provider-utils": { + "version": "1.0.22", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-1.0.22.tgz", + "integrity": "sha512-YHK2rpj++wnLVc9vPGzGFP3Pjeld2MwhKinetA0zKXOoHAT/Jit5O8kZsxcSlJPu9wvcGT1UGZEjZrtO7PfFOQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "0.0.26", + "eventsource-parser": "^1.1.2", + "nanoid": "^3.3.7", + "secure-json-parse": "^2.7.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, + "node_modules/@ai-sdk/svelte/node_modules/@ai-sdk/ui-utils": { "version": "0.0.50", "resolved": "https://registry.npmjs.org/@ai-sdk/ui-utils/-/ui-utils-0.0.50.tgz", "integrity": "sha512-Z5QYJVW+5XpSaJ4jYCCAVG7zIAuKOOdikhgpksneNmKvx61ACFaf98pmOd+xnjahl0pIlc/QIe6O4yVaJ1sEaw==", @@ -196,38 +437,123 @@ } } }, - "node_modules/@ai-sdk/ui-utils/node_modules/@ai-sdk/provider": { - "version": "0.0.26", - "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-0.0.26.tgz", - "integrity": "sha512-dQkfBDs2lTYpKM8389oopPdQgIU007GQyCbuPPrV+K6MtSII3HBfE0stUIMXUb44L+LK1t6GXPP7wjSzjO6uKg==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "json-schema": "^0.4.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/@ai-sdk/vue": { - "version": "0.0.59", - "resolved": "https://registry.npmjs.org/@ai-sdk/vue/-/vue-0.0.59.tgz", - "integrity": "sha512-+ofYlnqdc8c4F6tM0IKF0+7NagZRAiqBJpGDJ+6EYhDW8FHLUP/JFBgu32SjxSxC6IKFZxEnl68ZoP/Z38EMlw==", + "node_modules/@ai-sdk/ui-utils": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/@ai-sdk/ui-utils/-/ui-utils-1.0.6.tgz", + "integrity": "sha512-ZP6Vjj+VCnSPBIAvWAdKj2olQONJ/f4aZpkVCGkzprdhv8TjHwB6CTlXFS3zypuEGy4asg84dc1dvXKooQXFvg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "1.0.3", + "@ai-sdk/provider-utils": "2.0.5", + "zod-to-json-schema": "^3.23.5" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, + "node_modules/@ai-sdk/ui-utils/node_modules/@ai-sdk/provider": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-1.0.3.tgz", + "integrity": "sha512-WiuJEpHTrltOIzv3x2wx4gwksAHW0h6nK3SoDzjqCOJLu/2OJ1yASESTIX+f07ChFykHElVoP80Ol/fe9dw6tQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "json-schema": "^0.4.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@ai-sdk/vue": { + "version": "0.0.59", + "resolved": "https://registry.npmjs.org/@ai-sdk/vue/-/vue-0.0.59.tgz", + "integrity": "sha512-+ofYlnqdc8c4F6tM0IKF0+7NagZRAiqBJpGDJ+6EYhDW8FHLUP/JFBgu32SjxSxC6IKFZxEnl68ZoP/Z38EMlw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider-utils": "1.0.22", + "@ai-sdk/ui-utils": "0.0.50", + "swrv": "^1.0.4" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "vue": "^3.3.4" + }, + "peerDependenciesMeta": { + "vue": { + "optional": true + } + } + }, + "node_modules/@ai-sdk/vue/node_modules/@ai-sdk/provider": { + "version": "0.0.26", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-0.0.26.tgz", + "integrity": "sha512-dQkfBDs2lTYpKM8389oopPdQgIU007GQyCbuPPrV+K6MtSII3HBfE0stUIMXUb44L+LK1t6GXPP7wjSzjO6uKg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "json-schema": "^0.4.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@ai-sdk/vue/node_modules/@ai-sdk/provider-utils": { + "version": "1.0.22", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-1.0.22.tgz", + "integrity": "sha512-YHK2rpj++wnLVc9vPGzGFP3Pjeld2MwhKinetA0zKXOoHAT/Jit5O8kZsxcSlJPu9wvcGT1UGZEjZrtO7PfFOQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "0.0.26", + "eventsource-parser": "^1.1.2", + "nanoid": "^3.3.7", + "secure-json-parse": "^2.7.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, + "node_modules/@ai-sdk/vue/node_modules/@ai-sdk/ui-utils": { + "version": "0.0.50", + "resolved": "https://registry.npmjs.org/@ai-sdk/ui-utils/-/ui-utils-0.0.50.tgz", + "integrity": "sha512-Z5QYJVW+5XpSaJ4jYCCAVG7zIAuKOOdikhgpksneNmKvx61ACFaf98pmOd+xnjahl0pIlc/QIe6O4yVaJ1sEaw==", "dev": true, "license": "Apache-2.0", "dependencies": { + "@ai-sdk/provider": "0.0.26", "@ai-sdk/provider-utils": "1.0.22", - "@ai-sdk/ui-utils": "0.0.50", - "swrv": "^1.0.4" + "json-schema": "^0.4.0", + "secure-json-parse": "^2.7.0", + "zod-to-json-schema": "^3.23.3" }, "engines": { "node": ">=18" }, "peerDependencies": { - "vue": "^3.3.4" + "zod": "^3.0.0" }, "peerDependenciesMeta": { - "vue": { + "zod": { "optional": true } } @@ -2882,58 +3208,40 @@ } }, "node_modules/ai": { - "version": "3.4.33", - "resolved": "https://registry.npmjs.org/ai/-/ai-3.4.33.tgz", - "integrity": "sha512-plBlrVZKwPoRTmM8+D1sJac9Bq8eaa2jiZlHLZIWekKWI1yMWYZvCCEezY9ASPwRhULYDJB2VhKOBUUeg3S5JQ==", + "version": "4.0.26", + "resolved": "https://registry.npmjs.org/ai/-/ai-4.0.26.tgz", + "integrity": "sha512-IDnSkiH0C+s+9jfKA5M8vO6PO279b5N/OtkbIy4gYtajLT5i52OobssG6LdZExZQxlYgBvXvTl3YskKJE/kD8Q==", "dev": true, "license": "Apache-2.0", "dependencies": { - "@ai-sdk/provider": "0.0.26", - "@ai-sdk/provider-utils": "1.0.22", - "@ai-sdk/react": "0.0.70", - "@ai-sdk/solid": "0.0.54", - "@ai-sdk/svelte": "0.0.57", - "@ai-sdk/ui-utils": "0.0.50", - "@ai-sdk/vue": "0.0.59", + "@ai-sdk/provider": "1.0.3", + "@ai-sdk/provider-utils": "2.0.5", + "@ai-sdk/react": "1.0.7", + "@ai-sdk/ui-utils": "1.0.6", "@opentelemetry/api": "1.9.0", - "eventsource-parser": "1.1.2", - "json-schema": "^0.4.0", "jsondiffpatch": "0.6.0", - "secure-json-parse": "^2.7.0", - "zod-to-json-schema": "^3.23.3" + "zod-to-json-schema": "^3.23.5" }, "engines": { "node": ">=18" }, "peerDependencies": { - "openai": "^4.42.0", "react": "^18 || ^19 || ^19.0.0-rc", - "sswr": "^2.1.0", - "svelte": "^3.0.0 || ^4.0.0 || ^5.0.0", "zod": "^3.0.0" }, "peerDependenciesMeta": { - "openai": { - "optional": true - }, "react": { "optional": true }, - "sswr": { - "optional": true - }, - "svelte": { - "optional": true - }, "zod": { "optional": true } } }, "node_modules/ai/node_modules/@ai-sdk/provider": { - "version": "0.0.26", - "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-0.0.26.tgz", - "integrity": "sha512-dQkfBDs2lTYpKM8389oopPdQgIU007GQyCbuPPrV+K6MtSII3HBfE0stUIMXUb44L+LK1t6GXPP7wjSzjO6uKg==", + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-1.0.3.tgz", + "integrity": "sha512-WiuJEpHTrltOIzv3x2wx4gwksAHW0h6nK3SoDzjqCOJLu/2OJ1yASESTIX+f07ChFykHElVoP80Ol/fe9dw6tQ==", "dev": true, "license": "Apache-2.0", "dependencies": { @@ -3295,6 +3603,109 @@ "zod": "^3.0.0" } }, + "node_modules/braintrust/node_modules/@ai-sdk/provider-utils": { + "version": "1.0.22", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-1.0.22.tgz", + "integrity": "sha512-YHK2rpj++wnLVc9vPGzGFP3Pjeld2MwhKinetA0zKXOoHAT/Jit5O8kZsxcSlJPu9wvcGT1UGZEjZrtO7PfFOQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "0.0.26", + "eventsource-parser": "^1.1.2", + "nanoid": "^3.3.7", + "secure-json-parse": "^2.7.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, + "node_modules/braintrust/node_modules/@ai-sdk/provider-utils/node_modules/@ai-sdk/provider": { + "version": "0.0.26", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-0.0.26.tgz", + "integrity": "sha512-dQkfBDs2lTYpKM8389oopPdQgIU007GQyCbuPPrV+K6MtSII3HBfE0stUIMXUb44L+LK1t6GXPP7wjSzjO6uKg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "json-schema": "^0.4.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/braintrust/node_modules/@ai-sdk/react": { + "version": "0.0.70", + "resolved": "https://registry.npmjs.org/@ai-sdk/react/-/react-0.0.70.tgz", + "integrity": "sha512-GnwbtjW4/4z7MleLiW+TOZC2M29eCg1tOUpuEiYFMmFNZK8mkrqM0PFZMo6UsYeUYMWqEOOcPOU9OQVJMJh7IQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider-utils": "1.0.22", + "@ai-sdk/ui-utils": "0.0.50", + "swr": "^2.2.5", + "throttleit": "2.1.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "react": "^18 || ^19 || ^19.0.0-rc", + "zod": "^3.0.0" + }, + "peerDependenciesMeta": { + "react": { + "optional": true + }, + "zod": { + "optional": true + } + } + }, + "node_modules/braintrust/node_modules/@ai-sdk/ui-utils": { + "version": "0.0.50", + "resolved": "https://registry.npmjs.org/@ai-sdk/ui-utils/-/ui-utils-0.0.50.tgz", + "integrity": "sha512-Z5QYJVW+5XpSaJ4jYCCAVG7zIAuKOOdikhgpksneNmKvx61ACFaf98pmOd+xnjahl0pIlc/QIe6O4yVaJ1sEaw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "0.0.26", + "@ai-sdk/provider-utils": "1.0.22", + "json-schema": "^0.4.0", + "secure-json-parse": "^2.7.0", + "zod-to-json-schema": "^3.23.3" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, + "node_modules/braintrust/node_modules/@ai-sdk/ui-utils/node_modules/@ai-sdk/provider": { + "version": "0.0.26", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-0.0.26.tgz", + "integrity": "sha512-dQkfBDs2lTYpKM8389oopPdQgIU007GQyCbuPPrV+K6MtSII3HBfE0stUIMXUb44L+LK1t6GXPP7wjSzjO6uKg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "json-schema": "^0.4.0" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/braintrust/node_modules/@braintrust/core": { "version": "0.0.67", "resolved": "https://registry.npmjs.org/@braintrust/core/-/core-0.0.67.tgz", @@ -3681,6 +4092,68 @@ "node": ">=12" } }, + "node_modules/braintrust/node_modules/ai": { + "version": "3.4.33", + "resolved": "https://registry.npmjs.org/ai/-/ai-3.4.33.tgz", + "integrity": "sha512-plBlrVZKwPoRTmM8+D1sJac9Bq8eaa2jiZlHLZIWekKWI1yMWYZvCCEezY9ASPwRhULYDJB2VhKOBUUeg3S5JQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "0.0.26", + "@ai-sdk/provider-utils": "1.0.22", + "@ai-sdk/react": "0.0.70", + "@ai-sdk/solid": "0.0.54", + "@ai-sdk/svelte": "0.0.57", + "@ai-sdk/ui-utils": "0.0.50", + "@ai-sdk/vue": "0.0.59", + "@opentelemetry/api": "1.9.0", + "eventsource-parser": "1.1.2", + "json-schema": "^0.4.0", + "jsondiffpatch": "0.6.0", + "secure-json-parse": "^2.7.0", + "zod-to-json-schema": "^3.23.3" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "openai": "^4.42.0", + "react": "^18 || ^19 || ^19.0.0-rc", + "sswr": "^2.1.0", + "svelte": "^3.0.0 || ^4.0.0 || ^5.0.0", + "zod": "^3.0.0" + }, + "peerDependenciesMeta": { + "openai": { + "optional": true + }, + "react": { + "optional": true + }, + "sswr": { + "optional": true + }, + "svelte": { + "optional": true + }, + "zod": { + "optional": true + } + } + }, + "node_modules/braintrust/node_modules/ai/node_modules/@ai-sdk/provider": { + "version": "0.0.26", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-0.0.26.tgz", + "integrity": "sha512-dQkfBDs2lTYpKM8389oopPdQgIU007GQyCbuPPrV+K6MtSII3HBfE0stUIMXUb44L+LK1t6GXPP7wjSzjO6uKg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "json-schema": "^0.4.0" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/braintrust/node_modules/esbuild": { "version": "0.18.20", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.18.20.tgz", @@ -3962,12 +4435,16 @@ "node": ">=4" } }, - "node_modules/client-only": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz", - "integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==", + "node_modules/clsx": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz", + "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==", "dev": true, - "license": "MIT" + "license": "MIT", + "peer": true, + "engines": { + "node": ">=6" + } }, "node_modules/color": { "version": "4.2.3", @@ -4293,6 +4770,16 @@ "node": ">= 0.8" } }, + "node_modules/dequal": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", + "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/destroy": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz", @@ -4832,15 +5319,14 @@ } }, "node_modules/esrap": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/esrap/-/esrap-1.2.3.tgz", - "integrity": "sha512-ZlQmCCK+n7SGoqo7DnfKaP1sJZa49P01/dXzmjCASSo04p72w8EksT2NMK8CEX8DhKsfJXANioIw8VyHNsBfvQ==", + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/esrap/-/esrap-1.3.2.tgz", + "integrity": "sha512-C4PXusxYhFT98GjLSmb20k9PREuUdporer50dhzGuJu9IJXktbMddVCMLAERl5dAHyAi73GWWCE4FVHGP1794g==", "dev": true, "license": "MIT", "peer": true, "dependencies": { - "@jridgewell/sourcemap-codec": "^1.4.15", - "@types/estree": "^1.0.1" + "@jridgewell/sourcemap-codec": "^1.4.15" } }, "node_modules/esrecurse": { @@ -5753,14 +6239,6 @@ "node": ">=0.10.0" } }, - "node_modules/js-tokens": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", - "dev": true, - "license": "MIT", - "peer": true - }, "node_modules/js-yaml": { "version": "3.14.1", "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", @@ -5963,20 +6441,6 @@ "dev": true, "license": "MIT" }, - "node_modules/loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "dev": true, - "license": "MIT", - "peer": true, - "dependencies": { - "js-tokens": "^3.0.0 || ^4.0.0" - }, - "bin": { - "loose-envify": "cli.js" - } - }, "node_modules/lru-cache": { "version": "10.4.3", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", @@ -5985,9 +6449,9 @@ "license": "ISC" }, "node_modules/magic-string": { - "version": "0.30.14", - "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.14.tgz", - "integrity": "sha512-5c99P1WKTed11ZC0HMJOj6CDIue6F8ySu+bJL+85q1zBEIY8IklrJ1eiKC2NDRh3Ct3FcvmJPyQHb9erXMTJNw==", + "version": "0.30.17", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.17.tgz", + "integrity": "sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA==", "dev": true, "license": "MIT", "peer": true, @@ -6969,15 +7433,12 @@ } }, "node_modules/react": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", - "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", + "version": "19.0.0", + "resolved": "https://registry.npmjs.org/react/-/react-19.0.0.tgz", + "integrity": "sha512-V8AVnmPIICiWpGfm6GLzCR/W5FXLchHop40W4nXBmdlEceh16rCN8O8LNWm5bh5XUX91fh7KpA+W0TgMKmgTpQ==", "dev": true, "license": "MIT", "peer": true, - "dependencies": { - "loose-envify": "^1.1.0" - }, "engines": { "node": ">=0.10.0" } @@ -7641,9 +8102,9 @@ } }, "node_modules/svelte": { - "version": "5.10.0", - "resolved": "https://registry.npmjs.org/svelte/-/svelte-5.10.0.tgz", - "integrity": "sha512-jGJFpB9amHLLQZBbAuQ6csH7WlTvGx4cO4wSSNcgGcx9vDGMTCZzTREf6/wKhVUQDoK+GapgvLQPZHa3e9MOAA==", + "version": "5.16.1", + "resolved": "https://registry.npmjs.org/svelte/-/svelte-5.16.1.tgz", + "integrity": "sha512-FsA1OjAKMAFSDob6j/Tv2ZV9rY4SeqPd1WXQlQkFkePAozSHLp6tbkU9qa1xJ+uTRzMSM2Vx3USdsYZBXd3H3g==", "dev": true, "license": "MIT", "peer": true, @@ -7655,8 +8116,9 @@ "acorn-typescript": "^1.4.13", "aria-query": "^5.3.1", "axobject-query": "^4.1.0", + "clsx": "^2.1.1", "esm-env": "^1.2.1", - "esrap": "^1.2.3", + "esrap": "^1.3.2", "is-reference": "^3.0.3", "locate-character": "^3.0.0", "magic-string": "^0.30.11", @@ -7667,17 +8129,17 @@ } }, "node_modules/swr": { - "version": "2.2.5", - "resolved": "https://registry.npmjs.org/swr/-/swr-2.2.5.tgz", - "integrity": "sha512-QtxqyclFeAsxEUeZIYmsaQ0UjimSq1RZ9Un7I68/0ClKK/U3LoyQunwkQfJZr2fc22DfIXLNDc2wFyTEikCUpg==", + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/swr/-/swr-2.3.0.tgz", + "integrity": "sha512-NyZ76wA4yElZWBHzSgEJc28a0u6QZvhb6w0azeL2k7+Q1gAzVK+IqQYXhVOC/mzi+HZIozrZvBVeSeOZNR2bqA==", "dev": true, "license": "MIT", "dependencies": { - "client-only": "^0.0.1", - "use-sync-external-store": "^1.2.0" + "dequal": "^2.0.3", + "use-sync-external-store": "^1.4.0" }, "peerDependencies": { - "react": "^16.11.0 || ^17.0.0 || ^18.0.0" + "react": "^16.11.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, "node_modules/swrev": { diff --git a/package.json b/package.json index 9bd4f4ab..e5a5ebfa 100644 --- a/package.json +++ b/package.json @@ -6,11 +6,12 @@ "module": "./dist/index.js", "types": "./dist/index.d.ts", "scripts": { - "popup": "npm run build-dom-scripts && tsx examples/popup.ts", "2048": "npm run build-dom-scripts && tsx examples/2048.ts", + "popup": "npm run build-dom-scripts && tsx examples/popup.ts", "example": "npm run build-dom-scripts && tsx examples/example.ts", "debug-url": "npm run build-dom-scripts && tsx examples/debugUrl.ts", "external-client": "npm run build-dom-scripts && tsx examples/external_client.ts", + "ai-sdk-client": "npm run build-dom-scripts && tsx examples/ai_sdk_example.ts", "format": "prettier --write .", "prettier": "prettier --check .", "prettier:fix": "prettier --write .", @@ -35,6 +36,8 @@ "author": "Browserbase", "license": "MIT", "devDependencies": { + "@ai-sdk/google": "^1.0.13", + "@ai-sdk/openai": "^1.0.14", "@changesets/changelog-github": "^0.5.0", "@changesets/cli": "^2.27.9", "@eslint/js": "^9.16.0", @@ -44,6 +47,7 @@ "@types/node": "^20.11.30", "@types/ws": "^8.5.13", "adm-zip": "^0.5.16", + "ai": "^4.0.26", "autoevals": "^0.0.64", "braintrust": "^0.0.171", "cheerio": "^1.0.0", From 92b0ca088fb795d1937657099c470e93208f442a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 8 Jan 2025 02:25:30 -0800 Subject: [PATCH 15/20] Version Packages (#345) Co-authored-by: github-actions[bot] --- .changeset/calm-teachers-rescue.md | 5 ----- .changeset/chilled-kangaroos-rhyme.md | 5 ----- .changeset/fast-dodos-yawn.md | 5 ----- .changeset/few-elephants-cough.md | 5 ----- .changeset/mean-swans-fix.md | 5 ----- .changeset/ninety-timers-punch.md | 5 ----- .changeset/polite-papayas-occur.md | 5 ----- .changeset/poor-eels-sin.md | 5 ----- .changeset/shiny-ladybugs-shave.md | 5 ----- .changeset/shiny-scissors-hear.md | 5 ----- .changeset/soft-snails-lick.md | 5 ----- .changeset/spicy-singers-flow.md | 5 ----- .changeset/tender-years-crash.md | 5 ----- CHANGELOG.md | 32 +++++++++++++++++++++++++++ package.json | 2 +- 15 files changed, 33 insertions(+), 66 deletions(-) delete mode 100644 .changeset/calm-teachers-rescue.md delete mode 100644 .changeset/chilled-kangaroos-rhyme.md delete mode 100644 .changeset/fast-dodos-yawn.md delete mode 100644 .changeset/few-elephants-cough.md delete mode 100644 .changeset/mean-swans-fix.md delete mode 100644 .changeset/ninety-timers-punch.md delete mode 100644 .changeset/polite-papayas-occur.md delete mode 100644 .changeset/poor-eels-sin.md delete mode 100644 .changeset/shiny-ladybugs-shave.md delete mode 100644 .changeset/shiny-scissors-hear.md delete mode 100644 .changeset/soft-snails-lick.md delete mode 100644 .changeset/spicy-singers-flow.md delete mode 100644 .changeset/tender-years-crash.md diff --git a/.changeset/calm-teachers-rescue.md b/.changeset/calm-teachers-rescue.md deleted file mode 100644 index c77cc7b7..00000000 --- a/.changeset/calm-teachers-rescue.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@browserbasehq/stagehand": patch ---- - -Unified LLM input/output types for reduced dependence on OpenAI types diff --git a/.changeset/chilled-kangaroos-rhyme.md b/.changeset/chilled-kangaroos-rhyme.md deleted file mode 100644 index 0ed10ada..00000000 --- a/.changeset/chilled-kangaroos-rhyme.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@browserbasehq/stagehand": patch ---- - -Fix $1-types exposed to the user diff --git a/.changeset/fast-dodos-yawn.md b/.changeset/fast-dodos-yawn.md deleted file mode 100644 index ac075f2c..00000000 --- a/.changeset/fast-dodos-yawn.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@browserbasehq/stagehand": patch ---- - -Throw custom error if context is referenced without initialization, remove act/extract handler from index diff --git a/.changeset/few-elephants-cough.md b/.changeset/few-elephants-cough.md deleted file mode 100644 index f81b59ab..00000000 --- a/.changeset/few-elephants-cough.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@browserbasehq/stagehand": minor ---- - -Pass in a Stagehand Page object into the `on("popup")` listener to allow for multi-page handling. diff --git a/.changeset/mean-swans-fix.md b/.changeset/mean-swans-fix.md deleted file mode 100644 index e8f52844..00000000 --- a/.changeset/mean-swans-fix.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@browserbasehq/stagehand": minor ---- - -Logger in LLMClient is inherited by default from Stagehand. Named rather than positional arguments are used in implemented LLMClients. diff --git a/.changeset/ninety-timers-punch.md b/.changeset/ninety-timers-punch.md deleted file mode 100644 index 86078980..00000000 --- a/.changeset/ninety-timers-punch.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@browserbasehq/stagehand": patch ---- - -Remove stagehand nav entirely diff --git a/.changeset/polite-papayas-occur.md b/.changeset/polite-papayas-occur.md deleted file mode 100644 index ab101e4a..00000000 --- a/.changeset/polite-papayas-occur.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@browserbasehq/stagehand": patch ---- - -dont require LLM Client to use non-ai stagehand functions diff --git a/.changeset/poor-eels-sin.md b/.changeset/poor-eels-sin.md deleted file mode 100644 index cc75ecb5..00000000 --- a/.changeset/poor-eels-sin.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@browserbasehq/stagehand": patch ---- - -pretty readme :) diff --git a/.changeset/shiny-ladybugs-shave.md b/.changeset/shiny-ladybugs-shave.md deleted file mode 100644 index f95423fd..00000000 --- a/.changeset/shiny-ladybugs-shave.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@browserbasehq/stagehand": patch ---- - -Added example implementation of the Vercel AI SDK as an LLMClient diff --git a/.changeset/shiny-scissors-hear.md b/.changeset/shiny-scissors-hear.md deleted file mode 100644 index 93821095..00000000 --- a/.changeset/shiny-scissors-hear.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@browserbasehq/stagehand": minor ---- - -make logs only sync diff --git a/.changeset/soft-snails-lick.md b/.changeset/soft-snails-lick.md deleted file mode 100644 index 3ab64ebc..00000000 --- a/.changeset/soft-snails-lick.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@browserbasehq/stagehand": minor ---- - -Moved the LLMClient logger paremeter to the createChatCompletion method options. diff --git a/.changeset/spicy-singers-flow.md b/.changeset/spicy-singers-flow.md deleted file mode 100644 index 6c09c5cf..00000000 --- a/.changeset/spicy-singers-flow.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@browserbasehq/stagehand": minor ---- - -exposed llmClient in stagehand constructor diff --git a/.changeset/tender-years-crash.md b/.changeset/tender-years-crash.md deleted file mode 100644 index 8ef8d8bb..00000000 --- a/.changeset/tender-years-crash.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@browserbasehq/stagehand": patch ---- - -Remove duplicate logging and expose Page/BrowserContext types diff --git a/CHANGELOG.md b/CHANGELOG.md index f15c529e..df46599c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,37 @@ # @browserbasehq/stagehand +## 1.9.0 + +### Minor Changes + +- [#374](https://github.com/browserbase/stagehand/pull/374) [`207244e`](https://github.com/browserbase/stagehand/commit/207244e3a46c4474d4d28db039eab131164790ca) Thanks [@sameelarif](https://github.com/sameelarif)! - Pass in a Stagehand Page object into the `on("popup")` listener to allow for multi-page handling. + +- [#367](https://github.com/browserbase/stagehand/pull/367) [`75c0e20`](https://github.com/browserbase/stagehand/commit/75c0e20cde54951399753e0fa841df463e1271b8) Thanks [@kamath](https://github.com/kamath)! - Logger in LLMClient is inherited by default from Stagehand. Named rather than positional arguments are used in implemented LLMClients. + +- [#381](https://github.com/browserbase/stagehand/pull/381) [`db2ef59`](https://github.com/browserbase/stagehand/commit/db2ef5997664e81b1dfb5ca992392362f2d3bab1) Thanks [@kamath](https://github.com/kamath)! - make logs only sync + +- [#385](https://github.com/browserbase/stagehand/pull/385) [`5899ec2`](https://github.com/browserbase/stagehand/commit/5899ec2c4b73c636bfd8120ec3aac225af7dd949) Thanks [@sameelarif](https://github.com/sameelarif)! - Moved the LLMClient logger paremeter to the createChatCompletion method options. + +- [#364](https://github.com/browserbase/stagehand/pull/364) [`08907eb`](https://github.com/browserbase/stagehand/commit/08907ebbc2cb47cfc3151946764656a7f4ce99c6) Thanks [@kamath](https://github.com/kamath)! - exposed llmClient in stagehand constructor + +### Patch Changes + +- [#383](https://github.com/browserbase/stagehand/pull/383) [`a77efcc`](https://github.com/browserbase/stagehand/commit/a77efccfde3a3948013eda3a52935e8a21d45b3e) Thanks [@sameelarif](https://github.com/sameelarif)! - Unified LLM input/output types for reduced dependence on OpenAI types + +- [`b7b3701`](https://github.com/browserbase/stagehand/commit/b7b370160bf35b09f5dc132f6e86f6e34fb70a85) Thanks [@kamath](https://github.com/kamath)! - Fix $1-types exposed to the user + +- [#353](https://github.com/browserbase/stagehand/pull/353) [`5c6f14b`](https://github.com/browserbase/stagehand/commit/5c6f14bade201e08cb86d2e14e246cb65707f7ee) Thanks [@kamath](https://github.com/kamath)! - Throw custom error if context is referenced without initialization, remove act/extract handler from index + +- [#360](https://github.com/browserbase/stagehand/pull/360) [`89841fc`](https://github.com/browserbase/stagehand/commit/89841fc42ae82559baddfe2a9593bc3260c082a2) Thanks [@kamath](https://github.com/kamath)! - Remove stagehand nav entirely + +- [#379](https://github.com/browserbase/stagehand/pull/379) [`b1c6579`](https://github.com/browserbase/stagehand/commit/b1c657976847de86d82324030f90c2f6a1f3f976) Thanks [@seanmcguire12](https://github.com/seanmcguire12)! - dont require LLM Client to use non-ai stagehand functions + +- [#371](https://github.com/browserbase/stagehand/pull/371) [`30e7d09`](https://github.com/browserbase/stagehand/commit/30e7d091445004c71aec1748d3a7d75fb86d1f11) Thanks [@kamath](https://github.com/kamath)! - pretty readme :) + +- [#382](https://github.com/browserbase/stagehand/pull/382) [`a41271b`](https://github.com/browserbase/stagehand/commit/a41271baf351e20f4c79b4b654d8a947b615a121) Thanks [@sameelarif](https://github.com/sameelarif)! - Added example implementation of the Vercel AI SDK as an LLMClient + +- [#344](https://github.com/browserbase/stagehand/pull/344) [`c1cf345`](https://github.com/browserbase/stagehand/commit/c1cf34535ed30262989b1dbe262fb0414cdf8230) Thanks [@kamath](https://github.com/kamath)! - Remove duplicate logging and expose Page/BrowserContext types + ## 1.8.0 ### Minor Changes diff --git a/package.json b/package.json index e5a5ebfa..cdc17b8f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@browserbasehq/stagehand", - "version": "1.8.0", + "version": "1.9.0", "description": "An AI web browsing framework focused on simplicity and extensibility.", "main": "./dist/index.js", "module": "./dist/index.js", From 2cee0a45ae2b48d1de6543b196e338e7021e59fe Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Wed, 8 Jan 2025 11:30:56 -0500 Subject: [PATCH 16/20] add demo (#386) * add demo * center align * add changeset --- .changeset/hot-moose-stare.md | 5 +++++ README.md | 9 +++++++++ 2 files changed, 14 insertions(+) create mode 100644 .changeset/hot-moose-stare.md diff --git a/.changeset/hot-moose-stare.md b/.changeset/hot-moose-stare.md new file mode 100644 index 00000000..97bfe6a8 --- /dev/null +++ b/.changeset/hot-moose-stare.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +add demo gif diff --git a/README.md b/README.md index 7227ae08..840b272f 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,15 @@ Visit [docs.stagehand.dev](https://docs.stagehand.dev) to view the full document ## Getting Started + + ### Quickstart To create a new Stagehand project configured to our default settings, run: From ff00965160d568ae0bc3ca437c01f95b5c6e9039 Mon Sep 17 00:00:00 2001 From: Sameel Date: Wed, 8 Jan 2025 10:22:14 -0800 Subject: [PATCH 17/20] allow `systemPrompt` input (#373) * allow `instructions` input * changeset * remove invalid assignment * move instructions to build functions * fix user prompt builder * remove old code * patch -> minor * remove log * return empty string when no instructions are provided * user prompt improvements * enhance act prompt * instructions example * update instructions example * update act system prompt * add to instructions example * prompt fixes * rename instructions to systemPrompt * add system prompt eval * change instructions eval category --- .changeset/sweet-mice-compare.md | 5 +++ evals/evals.config.json | 4 +++ evals/initStagehand.ts | 5 ++- evals/tasks/instructions.ts | 53 ++++++++++++++++++++++++++++++++ examples/instructions.ts | 32 +++++++++++++++++++ lib/StagehandPage.ts | 4 +++ lib/handlers/actHandler.ts | 5 +++ lib/handlers/extractHandler.ts | 6 ++++ lib/handlers/observeHandler.ts | 8 +++-- lib/index.ts | 4 +++ lib/inference.ts | 16 +++++++--- lib/llm/AnthropicClient.ts | 4 +++ lib/llm/LLMClient.ts | 4 ++- lib/prompt.ts | 44 ++++++++++++++++++++++---- package.json | 1 + types/act.ts | 1 + types/stagehand.ts | 4 +++ 17 files changed, 186 insertions(+), 14 deletions(-) create mode 100644 .changeset/sweet-mice-compare.md create mode 100644 evals/tasks/instructions.ts create mode 100644 examples/instructions.ts diff --git a/.changeset/sweet-mice-compare.md b/.changeset/sweet-mice-compare.md new file mode 100644 index 00000000..08cd0626 --- /dev/null +++ b/.changeset/sweet-mice-compare.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": minor +--- + +Allow the input of custom instructions into the constructor so that users can guide, or provide guardrails to, the LLM in making decisions. diff --git a/evals/evals.config.json b/evals/evals.config.json index 6b3788df..af67a546 100644 --- a/evals/evals.config.json +++ b/evals/evals.config.json @@ -4,6 +4,10 @@ "name": "amazon_add_to_cart", "categories": ["act"] }, + { + "name": "instructions", + "categories": ["combination"] + }, { "name": "bidnet", "categories": ["act"] diff --git a/evals/initStagehand.ts b/evals/initStagehand.ts index 8aff6ce7..8a692286 100644 --- a/evals/initStagehand.ts +++ b/evals/initStagehand.ts @@ -11,7 +11,7 @@ */ import { enableCaching, env } from "./env"; -import { AvailableModel, LogLine, Stagehand } from "../lib"; +import { AvailableModel, ConstructorParams, LogLine, Stagehand } from "../lib"; import { EvalLogger } from "./logger"; /** @@ -54,10 +54,12 @@ export const initStagehand = async ({ modelName, domSettleTimeoutMs, logger, + configOverrides, }: { modelName: AvailableModel; domSettleTimeoutMs?: number; logger: EvalLogger; + configOverrides?: Partial; }) => { let chosenApiKey: string | undefined = process.env.OPENAI_API_KEY; if (modelName.startsWith("claude")) { @@ -74,6 +76,7 @@ export const initStagehand = async ({ logger: (logLine: LogLine) => { logger.log(logLine); }, + ...configOverrides, }; const stagehand = new Stagehand(config); diff --git a/evals/tasks/instructions.ts b/evals/tasks/instructions.ts new file mode 100644 index 00000000..78f1ff58 --- /dev/null +++ b/evals/tasks/instructions.ts @@ -0,0 +1,53 @@ +import { EvalFunction } from "../../types/evals"; +import { initStagehand } from "../initStagehand"; + +export const instructions: EvalFunction = async ({ modelName, logger }) => { + const { stagehand, initResponse } = await initStagehand({ + modelName, + logger, + configOverrides: { + systemPrompt: + "if the users says `secret12345`, click on the 'quickstart' tab", + }, + }); + + const { debugUrl, sessionUrl } = initResponse; + + try { + const page = stagehand.page; + + await page.goto("https://docs.browserbase.com/"); + + await page.act({ + action: "secret12345", + }); + + await page.waitForLoadState("domcontentloaded"); + + const url = page.url(); + + const isCorrectUrl = + url === "https://docs.browserbase.com/quickstart/playwright"; + + await stagehand.close(); + + return { + _success: isCorrectUrl, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } catch (error) { + console.error("Error or timeout occurred:", error); + + await stagehand.close(); + + return { + _success: false, + error: JSON.parse(JSON.stringify(error, null, 2)), + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } +}; diff --git a/examples/instructions.ts b/examples/instructions.ts new file mode 100644 index 00000000..fd47d02a --- /dev/null +++ b/examples/instructions.ts @@ -0,0 +1,32 @@ +/** + * This example shows how to use custom instructions with Stagehand. + */ +import { Stagehand } from "../lib"; +import StagehandConfig from "./stagehand.config"; + +async function example() { + const stagehand = new Stagehand({ + ...StagehandConfig, + systemPrompt: + "if the users says `secret12345`, click on the 'quickstart' tab. additionally, if the user says to type something, translate their input into french and type it.", + }); + await stagehand.init(); + + const page = stagehand.page; + + await page.goto("https://docs.browserbase.com/"); + + await page.act({ + action: "secret12345", + }); + + await page.act({ + action: "search for 'how to use browserbase'", + }); + + await stagehand.close(); +} + +(async () => { + await example(); +})(); diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index d7bcafe5..e7a21cf9 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -31,6 +31,7 @@ export class StagehandPage { stagehand: Stagehand, context: StagehandContext, llmClient: LLMClient, + userProvidedInstructions?: string, ) { this.intPage = Object.assign(page, { act: () => { @@ -66,16 +67,19 @@ export class StagehandPage { stagehandPage: this, stagehandContext: this.intContext, llmClient: llmClient, + userProvidedInstructions, }); this.extractHandler = new StagehandExtractHandler({ stagehand: this.stagehand, logger: this.stagehand.logger, stagehandPage: this, + userProvidedInstructions, }); this.observeHandler = new StagehandObserveHandler({ stagehand: this.stagehand, logger: this.stagehand.logger, stagehandPage: this, + userProvidedInstructions, }); } } diff --git a/lib/handlers/actHandler.ts b/lib/handlers/actHandler.ts index b4966639..d3f5165d 100644 --- a/lib/handlers/actHandler.ts +++ b/lib/handlers/actHandler.ts @@ -23,6 +23,7 @@ export class StagehandActHandler { private readonly actions: { [key: string]: { result: string; action: string }; }; + private readonly userProvidedInstructions?: string; constructor({ verbose, @@ -30,6 +31,7 @@ export class StagehandActHandler { enableCaching, logger, stagehandPage, + userProvidedInstructions, }: { verbose: 0 | 1 | 2; llmProvider: LLMProvider; @@ -38,6 +40,7 @@ export class StagehandActHandler { llmClient: LLMClient; stagehandPage: StagehandPage; stagehandContext: StagehandContext; + userProvidedInstructions?: string; }) { this.verbose = verbose; this.llmProvider = llmProvider; @@ -46,6 +49,7 @@ export class StagehandActHandler { this.actionCache = enableCaching ? new ActionCache(this.logger) : undefined; this.actions = {}; this.stagehandPage = stagehandPage; + this.userProvidedInstructions = userProvidedInstructions; } private async _recordAction(action: string, result: string): Promise { @@ -1133,6 +1137,7 @@ export class StagehandActHandler { logger: this.logger, requestId, variables, + userProvidedInstructions: this.userProvidedInstructions, }); this.logger({ diff --git a/lib/handlers/extractHandler.ts b/lib/handlers/extractHandler.ts index 6f57082b..850ea828 100644 --- a/lib/handlers/extractHandler.ts +++ b/lib/handlers/extractHandler.ts @@ -83,11 +83,13 @@ export class StagehandExtractHandler { private readonly stagehand: Stagehand; private readonly stagehandPage: StagehandPage; private readonly logger: (logLine: LogLine) => void; + private readonly userProvidedInstructions?: string; constructor({ stagehand, logger, stagehandPage, + userProvidedInstructions, }: { stagehand: Stagehand; logger: (message: { @@ -97,10 +99,12 @@ export class StagehandExtractHandler { auxiliary?: { [key: string]: { value: string; type: string } }; }) => void; stagehandPage: StagehandPage; + userProvidedInstructions?: string; }) { this.stagehand = stagehand; this.logger = logger; this.stagehandPage = stagehandPage; + this.userProvidedInstructions = userProvidedInstructions; } public async extract({ @@ -306,6 +310,7 @@ export class StagehandExtractHandler { chunksTotal: 1, llmClient, requestId, + userProvidedInstructions: this.userProvidedInstructions, logger: this.logger, }); @@ -435,6 +440,7 @@ export class StagehandExtractHandler { chunksTotal: chunks.length, requestId, isUsingTextExtract: false, + userProvidedInstructions: this.userProvidedInstructions, logger: this.logger, }); diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index 70d07435..10f4990c 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -2,9 +2,9 @@ import { LogLine } from "../../types/log"; import { Stagehand } from "../index"; import { observe } from "../inference"; import { LLMClient } from "../llm/LLMClient"; +import { StagehandPage } from "../StagehandPage"; import { generateId } from "../utils"; import { ScreenshotService } from "../vision"; -import { StagehandPage } from "../StagehandPage"; export class StagehandObserveHandler { private readonly stagehand: Stagehand; @@ -17,19 +17,22 @@ export class StagehandObserveHandler { instruction: string; }; }; - + private readonly userProvidedInstructions?: string; constructor({ stagehand, logger, stagehandPage, + userProvidedInstructions, }: { stagehand: Stagehand; logger: (logLine: LogLine) => void; stagehandPage: StagehandPage; + userProvidedInstructions?: string; }) { this.stagehand = stagehand; this.logger = logger; this.stagehandPage = stagehandPage; + this.userProvidedInstructions = userProvidedInstructions; this.observations = {}; } @@ -120,6 +123,7 @@ export class StagehandObserveHandler { llmClient, image: annotatedScreenshot, requestId, + userProvidedInstructions: this.userProvidedInstructions, logger: this.logger, }); diff --git a/lib/index.ts b/lib/index.ts index cceb4d7c..b67ccdcb 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -326,6 +326,7 @@ export class Stagehand { public variables: { [key: string]: unknown }; private contextPath?: string; private llmClient: LLMClient; + private userProvidedInstructions?: string; constructor( { @@ -344,6 +345,7 @@ export class Stagehand { browserbaseSessionID, modelName, modelClientOptions, + systemPrompt, }: ConstructorParams = { env: "BROWSERBASE", }, @@ -377,6 +379,7 @@ export class Stagehand { this.headless = headless ?? false; this.browserbaseSessionCreateParams = browserbaseSessionCreateParams; this.browserbaseSessionID = browserbaseSessionID; + this.userProvidedInstructions = systemPrompt; } public get logger(): (logLine: LogLine) => void { @@ -450,6 +453,7 @@ export class Stagehand { this, this.stagehandContext, this.llmClient, + this.userProvidedInstructions, ).init(); // Set the browser to headless mode if specified diff --git a/lib/inference.ts b/lib/inference.ts index 76047056..ab82c448 100644 --- a/lib/inference.ts +++ b/lib/inference.ts @@ -104,9 +104,10 @@ export async function act({ logger, requestId, variables, + userProvidedInstructions, }: ActCommandParams): Promise { const messages: ChatMessage[] = [ - buildActSystemPrompt(), + buildActSystemPrompt(userProvidedInstructions), buildActUserPrompt(action, steps, domElements, variables), ]; @@ -167,6 +168,7 @@ export async function extract({ requestId, logger, isUsingTextExtract, + userProvidedInstructions, }: { instruction: string; previouslyExtractedContent: object; @@ -177,6 +179,7 @@ export async function extract({ chunksTotal: number; requestId: string; isUsingTextExtract?: boolean; + userProvidedInstructions?: string; logger: (message: LogLine) => void; }) { type ExtractionResponse = z.infer; @@ -187,7 +190,11 @@ export async function extract({ const extractionResponse = await llmClient.createChatCompletion({ options: { messages: [ - buildExtractSystemPrompt(isUsingAnthropic, isUsingTextExtract), + buildExtractSystemPrompt( + isUsingAnthropic, + isUsingTextExtract, + userProvidedInstructions, + ), buildExtractUserPrompt(instruction, domElements, isUsingAnthropic), ], response_model: { @@ -277,6 +284,7 @@ export async function observe({ llmClient, image, requestId, + userProvidedInstructions, logger, }: { instruction: string; @@ -284,6 +292,7 @@ export async function observe({ llmClient: LLMClient; image?: Buffer; requestId: string; + userProvidedInstructions?: string; logger: (message: LogLine) => void; }): Promise<{ elements: { elementId: number; description: string }[]; @@ -309,7 +318,7 @@ export async function observe({ await llmClient.createChatCompletion({ options: { messages: [ - buildObserveSystemPrompt(), + buildObserveSystemPrompt(userProvidedInstructions), buildObserveUserMessage(instruction, domElements), ], image: image @@ -327,7 +336,6 @@ export async function observe({ }, logger, }); - const parsedResponse = { elements: observationResponse.elements?.map((el) => ({ diff --git a/lib/llm/AnthropicClient.ts b/lib/llm/AnthropicClient.ts index 9a8237fc..824238cb 100644 --- a/lib/llm/AnthropicClient.ts +++ b/lib/llm/AnthropicClient.ts @@ -27,12 +27,14 @@ export class AnthropicClient extends LLMClient { cache, modelName, clientOptions, + userProvidedInstructions, }: { logger: (message: LogLine) => void; enableCaching?: boolean; cache?: LLMCache; modelName: AvailableModel; clientOptions?: ClientOptions; + userProvidedInstructions?: string; }) { super(modelName); this.client = new Anthropic(clientOptions); @@ -40,6 +42,7 @@ export class AnthropicClient extends LLMClient { this.enableCaching = enableCaching; this.modelName = modelName; this.clientOptions = clientOptions; + this.userProvidedInstructions = userProvidedInstructions; } async createChatCompletion({ @@ -61,6 +64,7 @@ export class AnthropicClient extends LLMClient { }, }, }); + // Try to get cached response const cacheOptions = { model: this.modelName, diff --git a/lib/llm/LLMClient.ts b/lib/llm/LLMClient.ts index 9e6257c8..1b8a302b 100644 --- a/lib/llm/LLMClient.ts +++ b/lib/llm/LLMClient.ts @@ -94,10 +94,12 @@ export abstract class LLMClient { public modelName: AvailableModel; public hasVision: boolean; public clientOptions: ClientOptions; + public userProvidedInstructions?: string; - constructor(modelName: AvailableModel) { + constructor(modelName: AvailableModel, userProvidedInstructions?: string) { this.modelName = modelName; this.hasVision = modelsWithVision.includes(modelName); + this.userProvidedInstructions = userProvidedInstructions; } abstract createChatCompletion( diff --git a/lib/prompt.ts b/lib/prompt.ts index 2c19152e..556b101e 100644 --- a/lib/prompt.ts +++ b/lib/prompt.ts @@ -12,6 +12,7 @@ You will receive: 2. the steps that you've taken so far 3. a list of active DOM elements in this chunk to consider to get closer to the goal. 4. Optionally, a list of variable names that the user has provided that you may use to accomplish the goal. To use the variables, you must use the special <|VARIABLE_NAME|> syntax. +5. Optionally, custom instructions will be provided by the user. If the user's instructions are not relevant to the current task, ignore them. Otherwise, make sure to adhere to them. ## Your Goal / Specification @@ -21,7 +22,7 @@ If the user's goal will be accomplished after running the playwright action, set Note 1: If there is a popup on the page for cookies or advertising that has nothing to do with the goal, try to close it first before proceeding. As this can block the goal from being completed. Note 2: Sometimes what your are looking for is hidden behind and element you need to interact with. For example, sliders, buttons, etc... -Again, if the user's goal will be accomplished after running the playwright action, set completed to true. +Again, if the user's goal will be accomplished after running the playwright action, set completed to true. Also, if the user provides custom instructions, it is imperative that you follow them no matter what. `; const verifyActCompletionSystemPrompt = ` @@ -96,10 +97,32 @@ ${domElements} }; } -export function buildActSystemPrompt(): ChatMessage { +export function buildUserInstructionsString( + userProvidedInstructions?: string, +): string { + if (!userProvidedInstructions) { + return ""; + } + + return `\n\n# Custom Instructions Provided by the User + +Please keep the user's instructions in mind when performing actions. If the user's instructions are not relevant to the current task, ignore them. + +User Instructions: +${userProvidedInstructions}`; +} + +export function buildActSystemPrompt( + userProvidedInstructions?: string, +): ChatMessage { return { role: "system", - content: actSystemPrompt, + content: [ + actSystemPrompt, + buildUserInstructionsString(userProvidedInstructions), + ] + .filter(Boolean) + .join("\n\n"), }; } @@ -199,6 +222,7 @@ export const actTools: LLMTool[] = [ export function buildExtractSystemPrompt( isUsingPrintExtractedDataTool: boolean = false, useTextExtract: boolean = true, + userProvidedInstructions?: string, ): ChatMessage { const baseContent = `You are extracting content on behalf of a user. If a user asks you to extract a 'list' of information, or 'all' information, @@ -232,10 +256,14 @@ ONLY print the content using the print_extracted_data tool provided. do not miss any important information.` : ""; + const userInstructions = buildUserInstructionsString( + userProvidedInstructions, + ); + const content = `${baseContent}${contentDetail}\n\n${instructions}\n${toolInstructions}${ additionalInstructions ? `\n\n${additionalInstructions}` : "" - }`.replace(/\s+/g, " "); + }${userInstructions ? `\n\n${userInstructions}` : ""}`.replace(/\s+/g, " "); return { role: "system", @@ -332,12 +360,16 @@ You will be given: Return an array of elements that match the instruction. `; -export function buildObserveSystemPrompt(): ChatMessage { +export function buildObserveSystemPrompt( + userProvidedInstructions?: string, +): ChatMessage { const content = observeSystemPrompt.replace(/\s+/g, " "); return { role: "system", - content, + content: [content, buildUserInstructionsString(userProvidedInstructions)] + .filter(Boolean) + .join("\n\n"), }; } diff --git a/package.json b/package.json index cdc17b8f..049e3353 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,7 @@ "example": "npm run build-dom-scripts && tsx examples/example.ts", "debug-url": "npm run build-dom-scripts && tsx examples/debugUrl.ts", "external-client": "npm run build-dom-scripts && tsx examples/external_client.ts", + "instructions": "npm run build-dom-scripts && tsx examples/instructions.ts", "ai-sdk-client": "npm run build-dom-scripts && tsx examples/ai_sdk_example.ts", "format": "prettier --write .", "prettier": "prettier --check .", diff --git a/types/act.ts b/types/act.ts index 8e51e1ff..553d4a5e 100644 --- a/types/act.ts +++ b/types/act.ts @@ -14,6 +14,7 @@ export interface ActCommandParams { logger: (message: { category?: string; message: string }) => void; requestId: string; variables?: Record; + userProvidedInstructions?: string; } // WARNING: This is NOT to be confused with the ActResult type used in `page.act()`. diff --git a/types/stagehand.ts b/types/stagehand.ts index dc035762..ef74c84a 100644 --- a/types/stagehand.ts +++ b/types/stagehand.ts @@ -22,6 +22,10 @@ export interface ConstructorParams { modelName?: AvailableModel; llmClient?: LLMClient; modelClientOptions?: ClientOptions; + /** + * Instructions for stagehand. + */ + systemPrompt?: string; } export interface InitOptions { From e93561d7875210ce7bd7fe841fb52decf6011fb3 Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Wed, 8 Jan 2025 21:57:24 -0500 Subject: [PATCH 18/20] Fix: Export LLMClient types (#388) * Fix: Export LLMClient types * changeset' --- .changeset/thin-squids-listen.md | 5 +++++ lib/index.ts | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 .changeset/thin-squids-listen.md diff --git a/.changeset/thin-squids-listen.md b/.changeset/thin-squids-listen.md new file mode 100644 index 00000000..a091077f --- /dev/null +++ b/.changeset/thin-squids-listen.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +Export LLMClient type diff --git a/lib/index.ts b/lib/index.ts index b67ccdcb..af6b0daa 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -626,4 +626,4 @@ export * from "../types/model"; export * from "../types/playwright"; export * from "../types/stagehand"; export * from "../types/page"; -export { LLMClient } from "./llm/LLMClient"; +export * from "./llm/LLMClient"; From 04555d47673eacf8764c78d99034ca93bea960c6 Mon Sep 17 00:00:00 2001 From: Sean McGuire <75873287+seanmcguire12@users.noreply.github.com> Date: Thu, 9 Jan 2025 15:51:16 -0800 Subject: [PATCH 19/20] combination needs e2e:bb (#398) --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3642d4ee..97b5be99 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -136,7 +136,7 @@ jobs: run: npm run e2e:bb run-combination-evals: - needs: [run-e2e-tests, determine-evals] + needs: [run-e2e-bb-tests, run-e2e-tests, determine-evals] runs-on: ubuntu-latest timeout-minutes: 40 env: From 7c484124db7bb587ea1b23631c29e3940cdf3a4e Mon Sep 17 00:00:00 2001 From: Sean McGuire <75873287+seanmcguire12@users.noreply.github.com> Date: Thu, 9 Jan 2025 18:31:05 -0800 Subject: [PATCH 20/20] rm/replace flakey evals (#397) * replace extract_regulations with a new eval * rm ibm eval * move to text_extract * address comment * prettier --- evals/evals.config.json | 8 +- evals/tasks/extract_csa.ts | 151 +++++++++++++++++++++++++++++ evals/tasks/extract_regulations.ts | 137 -------------------------- evals/tasks/ibm.ts | 59 ----------- 4 files changed, 153 insertions(+), 202 deletions(-) create mode 100644 evals/tasks/extract_csa.ts delete mode 100644 evals/tasks/extract_regulations.ts delete mode 100644 evals/tasks/ibm.ts diff --git a/evals/evals.config.json b/evals/evals.config.json index af67a546..f0f3fb15 100644 --- a/evals/evals.config.json +++ b/evals/evals.config.json @@ -122,10 +122,6 @@ "name": "homedepot", "categories": ["experimental"] }, - { - "name": "ibm", - "categories": ["experimental"] - }, { "name": "rakuten_jp", "categories": ["experimental"] @@ -160,8 +156,8 @@ "categories": ["extract"] }, { - "name": "extract_regulations", - "categories": ["extract"] + "name": "extract_csa", + "categories": ["text_extract"] }, { "name": "extract_resistor_info", diff --git a/evals/tasks/extract_csa.ts b/evals/tasks/extract_csa.ts new file mode 100644 index 00000000..46536566 --- /dev/null +++ b/evals/tasks/extract_csa.ts @@ -0,0 +1,151 @@ +import { EvalFunction } from "../../types/evals"; +import { initStagehand } from "../initStagehand"; +import { z } from "zod"; + +export const extract_csa: EvalFunction = async ({ + modelName, + logger, + useTextExtract, +}) => { + const { stagehand, initResponse } = await initStagehand({ + modelName, + logger, + }); + + const { debugUrl, sessionUrl } = initResponse; + + const { page } = stagehand; + await page.goto( + "https://clerk.assembly.ca.gov/weekly-histories?from_date=&to_date=2025-01-09", + ); + + const result = await page.extract({ + instruction: + "Extract all the publications on the page including the publication date, session type, publication type, and annotation", + schema: z.object({ + publications: z.array( + z.object({ + publication_date: z.string(), + session_type: z.string(), + publication_type: z.string(), + annotation: z.string(), + }), + ), + }), + modelName, + useTextExtract, + }); + + await stagehand.close(); + + const publications = result.publications; + const expectedLength = 15; + + const expectedFirstItem = { + publication_date: "12-20-2024", + session_type: "Regular Session", + publication_type: "Assembly Weekly History", + annotation: "", + }; + + const expectedLastItem = { + publication_date: "11-30-2016", + session_type: "1st Extraordinary Session", + publication_type: "Assembly Weekly History", + annotation: "", + }; + + if (publications.length !== expectedLength) { + logger.error({ + message: "Incorrect number of publications extracted", + level: 0, + auxiliary: { + expected: { + value: expectedLength.toString(), + type: "integer", + }, + actual: { + value: publications.length.toString(), + type: "integer", + }, + }, + }); + return { + _success: false, + error: "Incorrect number of publications extracted", + logs: logger.getLogs(), + debugUrl, + sessionUrl, + }; + } + const firstItemMatches = + publications[0].publication_date === expectedFirstItem.publication_date && + publications[0].session_type === expectedFirstItem.session_type && + publications[0].publication_type === expectedFirstItem.publication_type && + publications[0].annotation === expectedFirstItem.annotation; + + if (!firstItemMatches) { + logger.error({ + message: "First publication extracted does not match expected", + level: 0, + auxiliary: { + expected: { + value: JSON.stringify(expectedFirstItem), + type: "object", + }, + actual: { + value: JSON.stringify(publications[0]), + type: "object", + }, + }, + }); + return { + _success: false, + error: "First publication extracted does not match expected", + logs: logger.getLogs(), + debugUrl, + sessionUrl, + }; + } + + const lastItemMatches = + publications[publications.length - 1].publication_date === + expectedLastItem.publication_date && + publications[publications.length - 1].session_type === + expectedLastItem.session_type && + publications[publications.length - 1].publication_type === + expectedLastItem.publication_type && + publications[publications.length - 1].annotation === + expectedLastItem.annotation; + + if (!lastItemMatches) { + logger.error({ + message: "Last publication extracted does not match expected", + level: 0, + auxiliary: { + expected: { + value: JSON.stringify(expectedLastItem), + type: "object", + }, + actual: { + value: JSON.stringify(publications[publications.length - 1]), + type: "object", + }, + }, + }); + return { + _success: false, + error: "Last publication extracted does not match expected", + logs: logger.getLogs(), + debugUrl, + sessionUrl, + }; + } + + return { + _success: true, + logs: logger.getLogs(), + debugUrl, + sessionUrl, + }; +}; diff --git a/evals/tasks/extract_regulations.ts b/evals/tasks/extract_regulations.ts deleted file mode 100644 index 7ebec4d3..00000000 --- a/evals/tasks/extract_regulations.ts +++ /dev/null @@ -1,137 +0,0 @@ -import { EvalFunction } from "../../types/evals"; -import { initStagehand } from "../initStagehand"; -import { z } from "zod"; - -export const extract_regulations: EvalFunction = async ({ - modelName, - logger, - useTextExtract, -}) => { - const { stagehand, initResponse } = await initStagehand({ - modelName, - logger, - }); - - const { debugUrl, sessionUrl } = initResponse; - - await stagehand.page.goto("https://www.jsc.gov.jo/Links2/en/Regulations"); - - const result = await stagehand.page.extract({ - instruction: - "Extract the list of regulations with their descriptions and issue dates", - schema: z.object({ - regulations: z.array( - z.object({ - description: z.string(), - issue_date: z.string(), - }), - ), - }), - modelName, - useTextExtract, - }); - - await stagehand.close(); - - const regulations = result.regulations; - const expectedLength = 4; - - const expectedFirstItem = { - description: - "The Regulation of Investors Protection Fund in Securities No. (47) for the Year 2018 Amended Pursuant to Regulation No. (24) for the Year 2019", - issue_date: "2019", - }; - - const expectedLastItem = { - description: "Islamic Finance sukuk conrract regulation", - issue_date: "2014", - }; - - if (regulations.length !== expectedLength) { - logger.error({ - message: "Incorrect number of regulations extracted", - level: 0, - auxiliary: { - expected: { - value: expectedLength.toString(), - type: "integer", - }, - actual: { - value: regulations.length.toString(), - type: "integer", - }, - }, - }); - return { - _success: false, - error: "Incorrect number of regulations extracted", - logs: logger.getLogs(), - debugUrl, - sessionUrl, - }; - } - const firstItemMatches = - regulations[0].description === expectedFirstItem.description && - regulations[0].issue_date === expectedFirstItem.issue_date; - - if (!firstItemMatches) { - logger.error({ - message: "First regulation extracted does not match expected", - level: 0, - auxiliary: { - expected: { - value: JSON.stringify(expectedFirstItem), - type: "object", - }, - actual: { - value: JSON.stringify(regulations[0]), - type: "object", - }, - }, - }); - return { - _success: false, - error: "First regulation extracted does not match expected", - logs: logger.getLogs(), - debugUrl, - sessionUrl, - }; - } - - const lastItemMatches = - regulations[regulations.length - 1].description === - expectedLastItem.description && - regulations[regulations.length - 1].issue_date === - expectedLastItem.issue_date; - - if (!lastItemMatches) { - logger.error({ - message: "Last regulation extracted does not match expected", - level: 0, - auxiliary: { - expected: { - value: JSON.stringify(expectedLastItem), - type: "object", - }, - actual: { - value: JSON.stringify(regulations[regulations.length - 1]), - type: "object", - }, - }, - }); - return { - _success: false, - error: "Last regulation extracted does not match expected", - logs: logger.getLogs(), - debugUrl, - sessionUrl, - }; - } - - return { - _success: true, - logs: logger.getLogs(), - debugUrl, - sessionUrl, - }; -}; diff --git a/evals/tasks/ibm.ts b/evals/tasks/ibm.ts deleted file mode 100644 index d3ddcbef..00000000 --- a/evals/tasks/ibm.ts +++ /dev/null @@ -1,59 +0,0 @@ -import { EvalFunction } from "../../types/evals"; -import { initStagehand } from "../initStagehand"; -import { z } from "zod"; - -export const ibm: EvalFunction = async ({ modelName, logger }) => { - const { stagehand, initResponse } = await initStagehand({ - modelName, - logger, - }); - - const { debugUrl, sessionUrl } = initResponse; - - try { - await stagehand.page.goto("https://www.ibm.com/artificial-intelligence"); - - await stagehand.page.act({ - action: "if there is a cookies popup, accept it", - }); - - const { title } = await stagehand.page.extract({ - instruction: "extract the title of the article", - schema: z.object({ - title: z.string().describe("the title of the article"), - }), - }); - - await stagehand.page.act({ - action: "click on the 'explore AI use cases' button", - }); - - await stagehand.page.waitForLoadState("networkidle"); - - const url = await stagehand.page.url(); - - await stagehand.close(); - - const titleCheck = title.toLowerCase().includes("ai"); - const urlCheck = url === "https://www.ibm.com/watsonx/use-cases"; - - return { - _success: titleCheck && urlCheck, - debugUrl, - sessionUrl, - logs: logger.getLogs(), - }; - } catch (error) { - console.error("Error or timeout occurred:", error); - - await stagehand.close(); - - return { - _success: false, - error: JSON.parse(JSON.stringify(error, null, 2)), - debugUrl, - sessionUrl, - logs: logger.getLogs(), - }; - } -};