diff --git a/.changeset/nervous-dolls-clean.md b/.changeset/nervous-dolls-clean.md new file mode 100644 index 00000000..b5c00412 --- /dev/null +++ b/.changeset/nervous-dolls-clean.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +We now wrap playwright page/context within StagehandPage and StagehandContext objects. This helps us augment the Stagehand experience by being able to augment the underlying Playwright diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f8da0104..bf1df41f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,10 +55,39 @@ jobs: - name: Run Build run: npm run build - run-extract-evals: + run-e2e-tests: needs: [run-lint, run-build] runs-on: ubuntu-latest timeout-minutes: 50 + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} + BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} + HEADLESS: true + + steps: + - name: Check out repository code + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install dependencies + run: npm install --no-frozen-lockfile + + - name: Install Playwright browsers + run: npm exec playwright install --with-deps + + - name: Run E2E Tests + run: npm run e2e + + run-extract-evals: + needs: [run-lint, run-build, run-e2e-tests] + runs-on: ubuntu-latest + timeout-minutes: 50 env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} diff --git a/.gitignore b/.gitignore index 806b4de1..c259e84d 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ evals/public evals/playground.ts tmp/ eval-summary.json +pnpm-lock.yaml \ No newline at end of file diff --git a/evals/deterministic/auxiliary/logo.png b/evals/deterministic/auxiliary/logo.png new file mode 100644 index 00000000..7ae5d485 Binary files /dev/null and b/evals/deterministic/auxiliary/logo.png differ diff --git a/evals/deterministic/playwright.config.ts b/evals/deterministic/playwright.config.ts new file mode 100644 index 00000000..c6b9ef3c --- /dev/null +++ b/evals/deterministic/playwright.config.ts @@ -0,0 +1,31 @@ +import { defineConfig, devices } from "@playwright/test"; + +/** + * See https://playwright.dev/docs/test-configuration. + */ +export default defineConfig({ + testDir: "./tests", + + /* Fail the build on CI if you accidentally left test.only in the source code. */ + /* Run tests in files in parallel */ + fullyParallel: true, + /* Reporter to use. See https://playwright.dev/docs/test-reporters */ + // reporter: "html", + reporter: "line", + /* Retry on CI only */ + retries: 2, + + /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */ + use: { + /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */ + trace: "on-first-retry", + }, + + /* Configure projects for major browsers */ + projects: [ + { + name: "chromium", + use: { ...devices["Desktop Chrome"] }, + }, + ], +}); diff --git a/evals/deterministic/stagehand.config.ts b/evals/deterministic/stagehand.config.ts new file mode 100644 index 00000000..62406a27 --- /dev/null +++ b/evals/deterministic/stagehand.config.ts @@ -0,0 +1,26 @@ +import type { ConstructorParams, LogLine } from "../../lib"; + +const StagehandConfig: ConstructorParams = { + env: "BROWSERBASE" /* Environment to run Stagehand in */, + apiKey: process.env.BROWSERBASE_API_KEY /* API key for authentication */, + projectId: process.env.BROWSERBASE_PROJECT_ID /* Project identifier */, + verbose: 1 /* Logging verbosity level (0=quiet, 1=normal, 2=verbose) */, + debugDom: true /* Enable DOM debugging features */, + headless: false /* Run browser in headless mode */, + logger: (message: LogLine) => + console.log( + `[stagehand::${message.category}] ${message.message}`, + ) /* Custom logging function */, + domSettleTimeoutMs: 30_000 /* Timeout for DOM to settle in milliseconds */, + browserbaseSessionCreateParams: { + projectId: process.env.BROWSERBASE_PROJECT_ID!, + }, + enableCaching: true /* Enable caching functionality */, + browserbaseSessionID: + undefined /* Session ID for resuming Browserbase sessions */, + modelName: "gpt-4o" /* Name of the model to use */, + modelClientOptions: { + apiKey: process.env.OPENAI_API_KEY, + } /* Configuration options for the model client */, +}; +export default StagehandConfig; diff --git a/evals/deterministic/tests/contexts.test.ts b/evals/deterministic/tests/contexts.test.ts new file mode 100644 index 00000000..26afef98 --- /dev/null +++ b/evals/deterministic/tests/contexts.test.ts @@ -0,0 +1,145 @@ +import Browserbase from "@browserbasehq/sdk"; +import { Stagehand } from "../../../lib"; +import { expect, test } from "@playwright/test"; +import StagehandConfig from "../stagehand.config"; + +// Configuration +const CONTEXT_TEST_URL = "https://docs.browserbase.com"; +const BROWSERBASE_PROJECT_ID = process.env["BROWSERBASE_PROJECT_ID"]!; +const BROWSERBASE_API_KEY = process.env["BROWSERBASE_API_KEY"]!; + +const bb = new Browserbase({ + apiKey: BROWSERBASE_API_KEY, +}); + +// Helper functions +function addHour(date: Date): number { + const SECOND = 1000; + return new Date(date.getTime() + 60 * 60 * 1000).getTime() / SECOND; +} + +async function findCookie(stagehand: Stagehand, name: string) { + const defaultContext = stagehand.context; + const cookies = await defaultContext?.cookies(); + return cookies?.find((cookie) => cookie.name === name); +} + +async function createContext() { + console.log("Creating a new context..."); + const context = await bb.contexts.create({ + projectId: BROWSERBASE_PROJECT_ID, + }); + const contextId = context.id; + console.log(`Context created with ID: ${contextId}`); + return contextId; +} + +async function setRandomCookie(contextId: string, stagehand: Stagehand) { + console.log( + `Populating context ${contextId} during session ${stagehand.browserbaseSessionID}`, + ); + const page = stagehand.page; + + await page.goto(CONTEXT_TEST_URL, { waitUntil: "domcontentloaded" }); + + const now = new Date(); + const testCookieName = `bb_${now.getTime().toString()}`; + const testCookieValue = now.toISOString(); + + await stagehand.context.addCookies([ + { + domain: `.${new URL(CONTEXT_TEST_URL).hostname}`, + expires: addHour(now), + name: testCookieName, + path: "/", + value: testCookieValue, + }, + ]); + + expect(findCookie(stagehand, testCookieName)).toBeDefined(); + console.log(`Set test cookie: ${testCookieName}=${testCookieValue}`); + return { testCookieName, testCookieValue }; +} + +test.describe("Contexts", () => { + test("Persists and re-uses a context", async () => { + let contextId: string; + let testCookieName: string; + let testCookieValue: string; + let stagehand: Stagehand; + + await test.step("Create a context", async () => { + contextId = await createContext(); + }); + + await test.step("Instantiate Stagehand with the context to persist", async () => { + // We will be adding cookies to the context in this session, so we need mark persist=true + stagehand = new Stagehand({ + ...StagehandConfig, + browserbaseSessionCreateParams: { + projectId: BROWSERBASE_PROJECT_ID, + browserSettings: { + context: { + id: contextId, + persist: true, + }, + }, + }, + }); + await stagehand.init(); + }); + + await test.step("Set a random cookie on the page", async () => { + ({ testCookieName } = await setRandomCookie(contextId, stagehand)); + + const page = stagehand.page; + await page.goto("https://www.google.com", { + waitUntil: "domcontentloaded", + }); + await page.goBack(); + }); + + await test.step("Validate cookie persistence between pages", async () => { + const cookie = await findCookie(stagehand, testCookieName); + const found = !!cookie; + expect(found).toBe(true); + console.log("Cookie persisted between pages:", found); + + await stagehand.close(); + // Wait for context to persist + console.log("Waiting for context to persist..."); + await new Promise((resolve) => setTimeout(resolve, 5000)); + }); + + await test.step("Create another session with the same context", async () => { + // We don't need to persist cookies in this session, so we can mark persist=false + const newStagehand = new Stagehand({ + ...StagehandConfig, + browserbaseSessionCreateParams: { + projectId: BROWSERBASE_PROJECT_ID, + browserSettings: { + context: { + id: contextId, + persist: false, + }, + }, + }, + }); + await newStagehand.init(); + console.log( + `Reusing context ${contextId} during session ${newStagehand.browserbaseSessionID}`, + ); + const newPage = newStagehand.page; + await newPage.goto(CONTEXT_TEST_URL, { waitUntil: "domcontentloaded" }); + + const foundCookie = await findCookie(newStagehand, testCookieName); + console.log("Cookie found in new session:", !!foundCookie); + console.log( + "Cookie value matches:", + foundCookie?.value === testCookieValue, + ); + + await newStagehand.close(); + }); + }); +}); diff --git a/evals/deterministic/tests/downloads.test.ts b/evals/deterministic/tests/downloads.test.ts new file mode 100644 index 00000000..293e7029 --- /dev/null +++ b/evals/deterministic/tests/downloads.test.ts @@ -0,0 +1,69 @@ +import { test, expect } from "@playwright/test"; +import AdmZip from "adm-zip"; +import StagehandConfig from "../stagehand.config"; +import { Stagehand } from "../../../lib"; +import Browserbase from "@browserbasehq/sdk"; + +const downloadRe = /sandstorm-(\d{13})+\.mp3/; + +test("Downloads", async () => { + const stagehand = new Stagehand(StagehandConfig); + await stagehand.init(); + const page = stagehand.page; + const context = stagehand.context; + + const client = await context.newCDPSession(page); + await client.send("Browser.setDownloadBehavior", { + behavior: "allow", + // `downloadPath` gets appended to the browser's default download directory. + // set to "downloads", it ends up being "/app/apps/browser/downloads/". + downloadPath: "downloads", + eventsEnabled: true, + }); + + await page.goto("https://browser-tests-alpha.vercel.app/api/download-test"); + + const [download] = await Promise.all([ + page.waitForEvent("download"), + page.locator("#download").click(), + ]); + + const downloadError = await download.failure(); + + await stagehand.close(); + + if (downloadError !== null) { + throw new Error( + `Download for session ${stagehand.browserbaseSessionID} failed: ${downloadError}`, + ); + } + + expect(async () => { + const bb = new Browserbase(); + const zipBuffer = await bb.sessions.downloads.list( + stagehand.browserbaseSessionID, + ); + if (!zipBuffer) { + throw new Error( + `Download buffer is empty for session ${stagehand.browserbaseSessionID}`, + ); + } + + const zip = new AdmZip(Buffer.from(await zipBuffer.arrayBuffer())); + const zipEntries = zip.getEntries(); + const mp3Entry = zipEntries.find((entry) => + downloadRe.test(entry.entryName), + ); + + if (!mp3Entry) { + throw new Error( + `Session ${stagehand.browserbaseSessionID} is missing a file matching "${downloadRe.toString()}" in its zip entries: ${JSON.stringify(zipEntries.map((entry) => entry.entryName))}`, + ); + } + + const expectedFileSize = 6137541; + expect(mp3Entry.header.size).toBe(expectedFileSize); + }).toPass({ + timeout: 30_000, + }); +}); diff --git a/evals/deterministic/tests/uploads.test.ts b/evals/deterministic/tests/uploads.test.ts new file mode 100644 index 00000000..9cfb1baf --- /dev/null +++ b/evals/deterministic/tests/uploads.test.ts @@ -0,0 +1,36 @@ +import { join } from "node:path"; +import { test, expect } from "@playwright/test"; +import { Stagehand } from "../../../lib"; +import StagehandConfig from "../stagehand.config"; + +test.describe("Playwright Upload", () => { + let stagehand: Stagehand; + + test.beforeAll(async () => { + stagehand = new Stagehand(StagehandConfig); + await stagehand.init(); + }); + + test.afterAll(async () => { + await stagehand.close(); + }); + + test("uploads a file", async () => { + const page = stagehand.page; + await page.goto("https://browser-tests-alpha.vercel.app/api/upload-test"); + + const fileInput = page.locator("#fileUpload"); + await fileInput.setInputFiles( + join(__dirname, "..", "auxiliary", "logo.png"), + ); + + const fileNameSpan = page.locator("#fileName"); + const fileName = await fileNameSpan.innerText(); + + const fileSizeSpan = page.locator("#fileSize"); + const fileSize = Number(await fileSizeSpan.innerText()); + + expect(fileName).toBe("logo.png"); + expect(fileSize).toBeGreaterThan(0); + }); +}); diff --git a/examples/example.ts b/examples/example.ts index 3d1ecd22..ac3ff650 100644 --- a/examples/example.ts +++ b/examples/example.ts @@ -1,66 +1,18 @@ -import StagehandConfig from "./stagehand.config.js"; -import { Stagehand } from "../lib/index.js"; -import { z } from "zod"; +/** + * This file is meant to be used as a scratchpad for developing new evals. + * To create a Stagehand project with best practices and configuration, run: + * + * npx create-browser-app@latest my-browser-app + */ -async function main() { - const stagehand = new Stagehand({ - ...StagehandConfig, - }); - await stagehand.init(); - const page = stagehand.page; - - // You can use the `page` instance to write any Playwright code - // For more info: https://playwright.dev/docs/pom - await page.goto("https://www.google.com"); - - // In the event that your Playwright code fails, you can use the `act` method to - // let Stagehand AI take over and complete the action. - try { - throw new Error("Comment me out to run the base Playwright code!"); - await page.locator('textarea[name="q"]').click(); - await page.locator('textarea[name="q"]').fill("Stagehand GitHub"); - await page.keyboard.press("Enter"); - await page.waitForLoadState("networkidle"); - } catch { - await stagehand.act({ - action: "type in 'Stagehand GitHub' in the search bar and hit enter", - }); - } +import { Stagehand } from "../lib"; +import StagehandConfig from "./stagehand.config"; - const githubResult = await stagehand.extract({ - instruction: "find the github link in the search results", - // Zod is a schema validation library for TypeScript. - // For more information on Zod, visit: https://zod.dev/ - schema: z.object({ - title: z.string(), - link: z.string(), - description: z.string(), - }), - }); - console.log( - `The top result is ${githubResult.title}: ${githubResult.link}. ${githubResult.description}`, - ); - - // Click the first link in the search results to to the GitHub page - try { - // Stagehand's `observe` method returns a list of selectors that can be used to interact with the page - // NOTE: you could also just do stagehand.act() to click the top result, but this is a good example of how to use observe - const observeResult = await stagehand.observe({ - instruction: "Find the link to click to click the top result", - }); - console.log("We can click:", observeResult); - - // Click the selector at the top of the list - await page.locator(`${observeResult[0].selector}`).click(); - await page.waitForLoadState("networkidle"); - } catch { - await stagehand.act({ - action: "click the first link in the search results", - }); - } - await stagehand.close(); +async function example() { + const stagehand = new Stagehand(StagehandConfig); + await stagehand.init(); } (async () => { - await main().catch(console.error); + await example(); })(); diff --git a/lib/StagehandContext.ts b/lib/StagehandContext.ts new file mode 100644 index 00000000..026a413a --- /dev/null +++ b/lib/StagehandContext.ts @@ -0,0 +1,29 @@ +import type { BrowserContext as PlaywrightContext } from "@playwright/test"; +import { Stagehand } from "./index"; + +export class StagehandContext { + private readonly stagehand: Stagehand; + private readonly intContext: PlaywrightContext; + + private constructor(context: PlaywrightContext, stagehand: Stagehand) { + this.intContext = context; + this.stagehand = stagehand; + } + + static async init( + context: PlaywrightContext, + stagehand: Stagehand, + ): Promise { + const proxyContext = new Proxy(context, { + get: (target, prop) => { + return target[prop as keyof PlaywrightContext]; + }, + }); + const instance = new StagehandContext(proxyContext, stagehand); + return instance; + } + + public get context(): PlaywrightContext { + return this.intContext; + } +} diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts new file mode 100644 index 00000000..ad75fcab --- /dev/null +++ b/lib/StagehandPage.ts @@ -0,0 +1,151 @@ +import type { Page as PlaywrightPage } from "@playwright/test"; +import { GotoOptions, Stagehand } from "./index"; + +export class StagehandPage { + private stagehand: Stagehand; + private intPage: PlaywrightPage; + + constructor(page: PlaywrightPage, stagehand: Stagehand) { + this.intPage = page; + this.stagehand = stagehand; + } + + async init( + page: PlaywrightPage, + stagehand: Stagehand, + ): Promise { + this.intPage = new Proxy(page, { + get: (target, prop) => { + // Override the goto method to add debugDom and waitForSettledDom + if (prop === "goto") + return async (url: string, options: GotoOptions) => { + const result = await page.goto(url, options); + if (stagehand.debugDom) { + await page.evaluate( + (debugDom) => (window.showChunks = debugDom), + stagehand.debugDom, + ); + } + await page.waitForLoadState("domcontentloaded"); + await this._waitForSettledDom(); + return result; + }; + + return target[prop as keyof PlaywrightPage]; + }, + }); + await this._waitForSettledDom(); + return this; + } + + public get page(): PlaywrightPage { + return this.intPage; + } + + // We can make methods public because StagehandPage is private to the Stagehand class. + // When a user gets stagehand.page, they are getting a proxy to the Playwright page. + // We can override the methods on the proxy to add our own behavior + public async _waitForSettledDom(timeoutMs?: number) { + try { + const timeout = timeoutMs ?? this.stagehand.domSettleTimeoutMs; + let timeoutHandle: NodeJS.Timeout; + + await this.page.waitForLoadState("domcontentloaded"); + + const timeoutPromise = new Promise((resolve) => { + timeoutHandle = setTimeout(() => { + this.stagehand.log({ + category: "dom", + message: "DOM settle timeout exceeded, continuing anyway", + level: 1, + auxiliary: { + timeout_ms: { + value: timeout.toString(), + type: "integer", + }, + }, + }); + resolve(); + }, timeout); + }); + + try { + await Promise.race([ + this.page.evaluate(() => { + return new Promise((resolve) => { + if (typeof window.waitForDomSettle === "function") { + window.waitForDomSettle().then(resolve); + } else { + console.warn( + "waitForDomSettle is not defined, considering DOM as settled", + ); + resolve(); + } + }); + }), + this.page.waitForLoadState("domcontentloaded"), + this.page.waitForSelector("body"), + timeoutPromise, + ]); + } finally { + clearTimeout(timeoutHandle!); + } + } catch (e) { + this.stagehand.log({ + category: "dom", + message: "Error in waitForSettledDom", + level: 1, + auxiliary: { + error: { + value: e.message, + type: "string", + }, + trace: { + value: e.stack, + type: "string", + }, + }, + }); + } + } + + public async startDomDebug() { + try { + await this.page + .evaluate(() => { + if (typeof window.debugDom === "function") { + window.debugDom(); + } else { + this.stagehand.log({ + category: "dom", + message: "debugDom is not defined", + level: 1, + }); + } + }) + .catch(() => {}); + } catch (e) { + this.stagehand.log({ + category: "dom", + message: "Error in startDomDebug", + level: 1, + auxiliary: { + error: { + value: e.message, + type: "string", + }, + trace: { + value: e.stack, + type: "string", + }, + }, + }); + } + } + + public async cleanupDomDebug() { + if (this.stagehand.debugDom) { + await this.page.evaluate(() => window.cleanupDebug()).catch(() => {}); + } + } +} diff --git a/lib/handlers/actHandler.ts b/lib/handlers/actHandler.ts index 1de2630a..1b07eb8d 100644 --- a/lib/handlers/actHandler.ts +++ b/lib/handlers/actHandler.ts @@ -11,20 +11,18 @@ import { LLMClient } from "../llm/LLMClient"; import { LLMProvider } from "../llm/LLMProvider"; import { generateId } from "../utils"; import { ScreenshotService } from "../vision"; - +import { StagehandPage } from "../StagehandPage"; export class StagehandActHandler { private readonly stagehand: Stagehand; + private readonly stagehandPage: StagehandPage; private readonly verbose: 0 | 1 | 2; private readonly llmProvider: LLMProvider; private readonly enableCaching: boolean; private readonly logger: (logLine: LogLine) => void; - private readonly waitForSettledDom: ( - domSettleTimeoutMs?: number, - ) => Promise; private readonly actionCache: ActionCache | undefined; - private readonly startDomDebug: () => Promise; - private readonly cleanupDomDebug: () => Promise; - private actions: { [key: string]: { result: string; action: string } }; + private readonly actions: { + [key: string]: { result: string; action: string }; + }; constructor({ stagehand, @@ -32,30 +30,24 @@ export class StagehandActHandler { llmProvider, enableCaching, logger, - waitForSettledDom, - startDomDebug, - cleanupDomDebug, + stagehandPage, }: { stagehand: Stagehand; verbose: 0 | 1 | 2; llmProvider: LLMProvider; enableCaching: boolean; logger: (logLine: LogLine) => void; - waitForSettledDom: (domSettleTimeoutMs?: number) => Promise; llmClient: LLMClient; - startDomDebug: () => Promise; - cleanupDomDebug: () => Promise; + stagehandPage: StagehandPage; }) { this.stagehand = stagehand; this.verbose = verbose; this.llmProvider = llmProvider; this.enableCaching = enableCaching; this.logger = logger; - this.waitForSettledDom = waitForSettledDom; this.actionCache = enableCaching ? new ActionCache(this.logger) : undefined; - this.startDomDebug = startDomDebug; - this.cleanupDomDebug = cleanupDomDebug; this.actions = {}; + this.stagehandPage = stagehandPage; } private async _recordAction(action: string, result: string): Promise { @@ -83,7 +75,7 @@ export class StagehandActHandler { llmClient: LLMClient; domSettleTimeoutMs?: number; }): Promise { - await this.waitForSettledDom(domSettleTimeoutMs); + await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs); // o1 is overkill for this task + this task uses a lot of tokens. So we switch it 4o let verifyLLmClient = llmClient; @@ -98,7 +90,7 @@ export class StagehandActHandler { ); } - const { selectorMap } = await this.stagehand.page.evaluate(() => { + const { selectorMap } = await this.stagehandPage.page.evaluate(() => { return window.processAllOfDom(); }); @@ -441,15 +433,13 @@ export class StagehandActHandler { }, }); await newOpenedTab.close(); - await this.stagehand.page.goto(newOpenedTab.url()); - await this.stagehand.page.waitForLoadState("domcontentloaded"); - await this.waitForSettledDom(domSettleTimeoutMs); + await this.stagehandPage.page.goto(newOpenedTab.url()); + await this.stagehandPage.page.waitForLoadState("domcontentloaded"); + await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs); } - // Wait for the network to be idle with timeout of 5s (will only wait if loading a new page) - // await this.waitForSettledDom(domSettleTimeoutMs); await Promise.race([ - this.stagehand.page.waitForLoadState("networkidle"), + this.stagehandPage.page.waitForLoadState("networkidle"), new Promise((resolve) => setTimeout(resolve, 5_000)), ]).catch((e) => { this.logger({ @@ -475,14 +465,14 @@ export class StagehandActHandler { level: 1, }); - if (this.stagehand.page.url() !== initialUrl) { + if (this.stagehandPage.page.url() !== initialUrl) { this.logger({ category: "action", message: "new page detected with URL", level: 1, auxiliary: { url: { - value: this.stagehand.page.url(), + value: this.stagehandPage.page.url(), type: "string", }, }, @@ -507,7 +497,7 @@ export class StagehandActHandler { ); } - await this.waitForSettledDom(domSettleTimeoutMs); + await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs); } private async _getComponentString(locator: Locator) { @@ -988,8 +978,8 @@ export class StagehandActHandler { domSettleTimeoutMs?: number; }): Promise<{ success: boolean; message: string; action: string }> { try { - await this.waitForSettledDom(domSettleTimeoutMs); - await this.startDomDebug(); + await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs); + await this.stagehandPage.startDomDebug(); if (this.enableCaching && !skipActionCacheForThisStep) { const response = await this._runCachedActionIfAvailable({ @@ -1153,7 +1143,7 @@ export class StagehandActHandler { }, }); - await this.cleanupDomDebug(); + await this.stagehandPage.cleanupDomDebug(); if (!response) { if (chunksSeen.length + 1 < chunks.length) { diff --git a/lib/handlers/extractHandler.ts b/lib/handlers/extractHandler.ts index d09e07ca..143d599f 100644 --- a/lib/handlers/extractHandler.ts +++ b/lib/handlers/extractHandler.ts @@ -1,11 +1,11 @@ -import { LLMProvider } from "../llm/LLMProvider"; -import { Stagehand } from "../index"; import { z } from "zod"; import { LogLine } from "../../types/log"; import { TextAnnotation } from "../../types/textannotation"; import { extract } from "../inference"; import { LLMClient } from "../llm/LLMClient"; import { formatText } from "../utils"; +import { StagehandPage } from "../StagehandPage"; +import { Stagehand } from "../index"; const PROXIMITY_THRESHOLD = 15; @@ -81,26 +81,13 @@ const PROXIMITY_THRESHOLD = 15; export class StagehandExtractHandler { private readonly stagehand: Stagehand; - + private readonly stagehandPage: StagehandPage; private readonly logger: (logLine: LogLine) => void; - private readonly waitForSettledDom: ( - domSettleTimeoutMs?: number, - ) => Promise; - private readonly startDomDebug: () => Promise; - private readonly cleanupDomDebug: () => Promise; - private readonly llmProvider: LLMProvider; - private readonly llmClient: LLMClient; - private readonly verbose: 0 | 1 | 2; constructor({ stagehand, logger, - waitForSettledDom, - startDomDebug, - cleanupDomDebug, - llmProvider, - llmClient, - verbose, + stagehandPage, }: { stagehand: Stagehand; logger: (message: { @@ -109,21 +96,11 @@ export class StagehandExtractHandler { level?: number; auxiliary?: { [key: string]: { value: string; type: string } }; }) => void; - waitForSettledDom: (domSettleTimeoutMs?: number) => Promise; - startDomDebug: () => Promise; - cleanupDomDebug: () => Promise; - llmProvider: LLMProvider; - llmClient: LLMClient; - verbose: 0 | 1 | 2; + stagehandPage: StagehandPage; }) { this.stagehand = stagehand; this.logger = logger; - this.waitForSettledDom = waitForSettledDom; - this.startDomDebug = startDomDebug; - this.cleanupDomDebug = cleanupDomDebug; - this.llmProvider = llmProvider; - this.llmClient = llmClient; - this.verbose = verbose; + this.stagehandPage = stagehandPage; } public async extract({ @@ -195,13 +172,13 @@ export class StagehandExtractHandler { }); // **1:** Wait for the DOM to settle and start DOM debugging - await this.waitForSettledDom(domSettleTimeoutMs); - await this.startDomDebug(); + await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs); + await this.stagehandPage.startDomDebug(); // **2:** Store the original DOM before any mutations // we need to store the original DOM here because calling createTextBoundingBoxes() // will mutate the DOM by adding spans around every word - const originalDOM = await this.stagehand.page.evaluate(() => + const originalDOM = await this.stagehandPage.page.evaluate(() => window.storeDOM(), ); @@ -244,7 +221,7 @@ export class StagehandExtractHandler { top: number; width: number; height: number; - }> = await this.stagehand.page.evaluate( + }> = await this.stagehandPage.page.evaluate( (xpath) => window.getElementBoundingBoxes(xpath), xpath, ); @@ -311,7 +288,7 @@ export class StagehandExtractHandler { } // **7:** Restore the original DOM after mutations - await this.stagehand.page.evaluate( + await this.stagehandPage.page.evaluate( (dom) => window.restoreDOM(dom), originalDOM, ); @@ -335,7 +312,7 @@ export class StagehandExtractHandler { metadata: { completed }, ...output } = extractionResponse; - await this.cleanupDomDebug(); + await this.stagehandPage.cleanupDomDebug(); // **10:** Handle the extraction response and log the results this.logger({ @@ -408,8 +385,8 @@ export class StagehandExtractHandler { // **1:** Wait for the DOM to settle and start DOM debugging // This ensures the page is stable before extracting any data. - await this.waitForSettledDom(domSettleTimeoutMs); - await this.startDomDebug(); + await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs); + await this.stagehandPage.startDomDebug(); // **2:** Call processDom() to handle chunk-based extraction // processDom determines which chunk of the page to process next. @@ -464,7 +441,7 @@ export class StagehandExtractHandler { ...output } = extractionResponse; - await this.cleanupDomDebug(); + await this.stagehandPage.cleanupDomDebug(); this.logger({ category: "extraction", @@ -507,7 +484,7 @@ export class StagehandExtractHandler { }, }, }); - await this.waitForSettledDom(domSettleTimeoutMs); + await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs); // Recursively continue with the next chunk return this.domExtract({ diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index 1bed25f8..3e53f9c8 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -2,21 +2,15 @@ import { LogLine } from "../../types/log"; import { Stagehand } from "../index"; import { observe } from "../inference"; import { LLMClient } from "../llm/LLMClient"; -import { LLMProvider } from "../llm/LLMProvider"; import { generateId } from "../utils"; import { ScreenshotService } from "../vision"; +import { StagehandPage } from "../StagehandPage"; export class StagehandObserveHandler { private readonly stagehand: Stagehand; private readonly logger: (logLine: LogLine) => void; - private readonly waitForSettledDom: ( - domSettleTimeoutMs?: number, - ) => Promise; - private readonly startDomDebug: () => Promise; - private readonly cleanupDomDebug: () => Promise; - private readonly llmProvider: LLMProvider; + private readonly stagehandPage: StagehandPage; private readonly verbose: 0 | 1 | 2; - private readonly llmClient: LLMClient; private observations: { [key: string]: { result: { selector: string; description: string }[]; @@ -27,30 +21,15 @@ export class StagehandObserveHandler { constructor({ stagehand, logger, - waitForSettledDom, - startDomDebug, - cleanupDomDebug, - llmProvider, - verbose, - llmClient, + stagehandPage, }: { stagehand: Stagehand; logger: (logLine: LogLine) => void; - waitForSettledDom: (domSettleTimeoutMs?: number) => Promise; - startDomDebug: () => Promise; - cleanupDomDebug: () => Promise; - llmProvider: LLMProvider; - verbose: 0 | 1 | 2; - llmClient: LLMClient; + stagehandPage: StagehandPage; }) { this.stagehand = stagehand; this.logger = logger; - this.waitForSettledDom = waitForSettledDom; - this.startDomDebug = startDomDebug; - this.cleanupDomDebug = cleanupDomDebug; - this.llmProvider = llmProvider; - this.verbose = verbose; - this.llmClient = llmClient; + this.stagehandPage = stagehandPage; this.observations = {}; } @@ -95,8 +74,8 @@ export class StagehandObserveHandler { }, }); - await this.waitForSettledDom(domSettleTimeoutMs); - await this.startDomDebug(); + await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs); + await this.stagehandPage.startDomDebug(); const evalResult = await this.stagehand.page.evaluate( (fullPage: boolean) => fullPage ? window.processAllOfDom() : window.processDom([]), @@ -154,7 +133,7 @@ export class StagehandObserveHandler { }, ); - await this.cleanupDomDebug(); + await this.stagehandPage.cleanupDomDebug(); this.logger({ category: "observation", diff --git a/lib/index.ts b/lib/index.ts index e6b28b51..14dd2d41 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -1,5 +1,5 @@ import { Browserbase } from "@browserbasehq/sdk"; -import { type BrowserContext, chromium, type Page } from "@playwright/test"; +import { type BrowserContext, chromium, Page } from "@playwright/test"; import { randomUUID } from "crypto"; import dotenv from "dotenv"; import fs from "fs"; @@ -29,6 +29,8 @@ import { StagehandObserveHandler } from "./handlers/observeHandler"; import { LLMClient } from "./llm/LLMClient"; import { LLMProvider } from "./llm/LLMProvider"; import { logLineToString } from "./utils"; +import { StagehandPage } from "./StagehandPage"; +import { StagehandContext } from "./StagehandContext"; dotenv.config({ path: ".env" }); @@ -194,7 +196,7 @@ async function getBrowser( const context = browser.contexts()[0]; - return { browser, context, debugUrl, sessionUrl, sessionId }; + return { browser, context, debugUrl, sessionUrl, sessionId, env }; } else { logger({ category: "init", @@ -260,7 +262,7 @@ async function getBrowser( await applyStealthScripts(context); - return { context, contextPath: tmpDir }; + return { context, contextPath: tmpDir, env: "LOCAL" }; } } @@ -304,19 +306,19 @@ async function applyStealthScripts(context: BrowserContext) { export class Stagehand { private llmProvider: LLMProvider; private llmClient: LLMClient; - public page: Page; - public context: BrowserContext; + private stagehandPage!: StagehandPage; + private stagehandContext!: StagehandContext; public browserbaseSessionID?: string; - private env: "LOCAL" | "BROWSERBASE"; + private intEnv: "LOCAL" | "BROWSERBASE"; + public readonly domSettleTimeoutMs: number; + public readonly debugDom: boolean; + public readonly headless: boolean; + private logger: (logLine: LogLine) => void; private apiKey: string | undefined; private projectId: string | undefined; private verbose: 0 | 1 | 2; - private debugDom: boolean; - private headless: boolean; - private logger: (logLine: LogLine) => void; private externalLogger?: (logLine: LogLine) => void; - private domSettleTimeoutMs: number; private browserbaseSessionCreateParams?: Browserbase.Sessions.SessionCreateParams; private enableCaching: boolean; private variables: { [key: string]: unknown }; @@ -353,7 +355,7 @@ export class Stagehand { (process.env.ENABLE_CACHING && process.env.ENABLE_CACHING === "true"); this.llmProvider = llmProvider || new LLMProvider(this.logger, this.enableCaching); - this.env = env; + this.intEnv = env; this.apiKey = apiKey ?? process.env.BROWSERBASE_API_KEY; this.projectId = projectId ?? process.env.BROWSERBASE_PROJECT_ID; this.verbose = verbose ?? 0; @@ -368,6 +370,25 @@ export class Stagehand { this.browserbaseSessionID = browserbaseSessionID; } + public get page(): Page { + // End users should not be able to access the StagehandPage directly + // This is a proxy to the underlying Playwright Page + if (!this.stagehandPage) { + throw new Error( + "Stagehand not initialized. Make sure to await stagehand.init() first.", + ); + } + return this.stagehandPage.page; + } + + public get env(): "LOCAL" | "BROWSERBASE" { + return this.intEnv; + } + + public get context(): BrowserContext { + return this.stagehandContext.context; + } + async init( /** @deprecated Use constructor options instead */ initOptions?: InitOptions, @@ -377,7 +398,7 @@ export class Stagehand { "Passing parameters to init() is deprecated and will be removed in the next major version. Use constructor options instead.", ); } - const { context, debugUrl, sessionUrl, contextPath, sessionId } = + const { context, debugUrl, sessionUrl, contextPath, sessionId, env } = await getBrowser( this.apiKey, this.projectId, @@ -393,27 +414,18 @@ export class Stagehand { debugUrl: undefined, sessionUrl: undefined, sessionId: undefined, + env: this.env, }; return br; }); + this.intEnv = env; this.contextPath = contextPath; - this.context = context; - this.page = context.pages()[0]; - // Redundant but needed for users who are re-connecting to a previously-created session - await this.page.waitForLoadState("domcontentloaded"); - await this._waitForSettledDom(); - - // Overload the page.goto method - const originalGoto = this.page.goto.bind(this.page); - this.page.goto = async (url: string, options: GotoOptions) => { - const result = await originalGoto(url, options); - if (this.debugDom) { - await this.page.evaluate(() => (window.showChunks = this.debugDom)); - } - await this.page.waitForLoadState("domcontentloaded"); - await this._waitForSettledDom(); - return result; - }; + this.stagehandContext = await StagehandContext.init(context, this); + const defaultPage = this.context.pages()[0]; + this.stagehandPage = await new StagehandPage(defaultPage, this).init( + defaultPage, + this, + ); // Set the browser to headless mode if specified if (this.headless) { @@ -430,33 +442,22 @@ export class Stagehand { llmProvider: this.llmProvider, enableCaching: this.enableCaching, logger: this.logger, - waitForSettledDom: this._waitForSettledDom.bind(this), - startDomDebug: this.startDomDebug.bind(this), - cleanupDomDebug: this.cleanupDomDebug.bind(this), + stagehandPage: this.stagehandPage, llmClient: this.llmClient, }); this.extractHandler = new StagehandExtractHandler({ stagehand: this, logger: this.logger, - waitForSettledDom: this._waitForSettledDom.bind(this), - startDomDebug: this.startDomDebug.bind(this), - cleanupDomDebug: this.cleanupDomDebug.bind(this), - llmProvider: this.llmProvider, - verbose: this.verbose, - llmClient: this.llmClient, + stagehandPage: this.stagehandPage, }); this.observeHandler = new StagehandObserveHandler({ stagehand: this, logger: this.logger, - waitForSettledDom: this._waitForSettledDom.bind(this), - startDomDebug: this.startDomDebug.bind(this), - cleanupDomDebug: this.cleanupDomDebug.bind(this), - llmProvider: this.llmProvider, - verbose: this.verbose, - llmClient: this.llmClient, + stagehandPage: this.stagehandPage, }); + this.browserbaseSessionID = sessionId; return { debugUrl, sessionUrl, sessionId }; @@ -469,8 +470,8 @@ export class Stagehand { console.warn( "initFromPage is deprecated and will be removed in the next major version. To instantiate from a page, use `browserbaseSessionID` in the constructor.", ); - this.page = page; - this.context = page.context(); + this.stagehandPage = await new StagehandPage(page, this).init(page, this); + this.stagehandContext = await StagehandContext.init(page.context(), this); const originalGoto = this.page.goto.bind(this.page); this.page.goto = async (url: string, options?: GotoOptions) => { @@ -479,7 +480,7 @@ export class Stagehand { await this.page.evaluate(() => (window.showChunks = this.debugDom)); } await this.page.waitForLoadState("domcontentloaded"); - await this._waitForSettledDom(); + await this.stagehandPage._waitForSettledDom(); return result; }; @@ -534,7 +535,7 @@ export class Stagehand { private async _log_to_browserbase(logObj: LogLine) { logObj.level = logObj.level || 1; - if (!this.page) { + if (!this.stagehandPage) { return; } @@ -580,108 +581,6 @@ export class Stagehand { } } - private async _waitForSettledDom(timeoutMs?: number) { - try { - const timeout = timeoutMs ?? this.domSettleTimeoutMs; - let timeoutHandle: NodeJS.Timeout; - - const timeoutPromise = new Promise((resolve) => { - timeoutHandle = setTimeout(() => { - this.log({ - category: "dom", - message: "DOM settle timeout exceeded, continuing anyway", - level: 1, - auxiliary: { - timeout_ms: { - value: timeout.toString(), - type: "integer", - }, - }, - }); - resolve(); - }, timeout); - }); - - try { - await Promise.race([ - this.page.evaluate(() => { - return new Promise((resolve) => { - if (typeof window.waitForDomSettle === "function") { - window.waitForDomSettle().then(resolve); - } else { - console.warn( - "waitForDomSettle is not defined, considering DOM as settled", - ); - resolve(); - } - }); - }), - this.page.waitForLoadState("domcontentloaded"), - this.page.waitForSelector("body"), - timeoutPromise, - ]); - } finally { - clearTimeout(timeoutHandle!); - } - } catch (e) { - this.log({ - category: "dom", - message: "Error in waitForSettledDom", - level: 1, - auxiliary: { - error: { - value: e.message, - type: "string", - }, - trace: { - value: e.stack, - type: "string", - }, - }, - }); - } - } - - private async startDomDebug() { - try { - await this.page - .evaluate(() => { - if (typeof window.debugDom === "function") { - window.debugDom(); - } else { - this.log({ - category: "dom", - message: "debugDom is not defined", - level: 1, - }); - } - }) - .catch(() => {}); - } catch (e) { - this.log({ - category: "dom", - message: "Error in startDomDebug", - level: 1, - auxiliary: { - error: { - value: e.message, - type: "string", - }, - trace: { - value: e.stack, - type: "string", - }, - }, - }); - } - } - - private async cleanupDomDebug() { - if (this.debugDom) { - await this.page.evaluate(() => window.cleanupDebug()).catch(() => {}); - } - } - async act({ action, modelName, diff --git a/package-lock.json b/package-lock.json index 6053a447..7db917fb 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@browserbasehq/stagehand", - "version": "1.6.0", + "version": "1.7.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@browserbasehq/stagehand", - "version": "1.6.0", + "version": "1.7.0", "license": "MIT", "dependencies": { "@anthropic-ai/sdk": "^0.27.3", @@ -18,9 +18,11 @@ "@changesets/changelog-github": "^0.5.0", "@changesets/cli": "^2.27.9", "@eslint/js": "^9.16.0", + "@types/adm-zip": "^0.5.7", "@types/cheerio": "^0.22.35", "@types/express": "^4.17.21", "@types/node": "^20.11.30", + "adm-zip": "^0.5.16", "autoevals": "^0.0.64", "braintrust": "^0.0.171", "cheerio": "^1.0.0", @@ -2312,6 +2314,16 @@ "win32" ] }, + "node_modules/@types/adm-zip": { + "version": "0.5.7", + "resolved": "https://registry.npmjs.org/@types/adm-zip/-/adm-zip-0.5.7.tgz", + "integrity": "sha512-DNEs/QvmyRLurdQPChqq0Md4zGvPwHerAJYWk9l2jCbD1VPpnzRJorOdiq4zsw09NFbYnhfsoEhWtxIzXpn2yw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/body-parser": { "version": "1.19.5", "resolved": "https://registry.npmjs.org/@types/body-parser/-/body-parser-1.19.5.tgz", @@ -2836,6 +2848,16 @@ "acorn": ">=8.9.0" } }, + "node_modules/adm-zip": { + "version": "0.5.16", + "resolved": "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.16.tgz", + "integrity": "sha512-TGw5yVi4saajsSEgz25grObGHEUaDrniwvA2qwSC060KfqGPdglhvPMA2lPIoxs3PQIItj2iag35fONcQqgUaQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12.0" + } + }, "node_modules/agentkeepalive": { "version": "4.5.0", "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz", diff --git a/package.json b/package.json index 055b85e7..e0eeaee7 100644 --- a/package.json +++ b/package.json @@ -15,6 +15,7 @@ "eslint": "eslint .", "cache:clear": "rm -rf .cache", "evals": "npm run build-dom-scripts && tsx evals/index.eval.ts", + "e2e": "npm run build-dom-scripts && cd evals/deterministic && npx playwright test", "build-dom-scripts": "tsx lib/dom/genDomScripts.ts", "build-types": "tsc --emitDeclarationOnly --outDir dist", "build-js": "tsup lib/index.ts --dts", @@ -28,15 +29,17 @@ "lib/**" ], "keywords": [], - "author": "Paul Klein IV", + "author": "Browserbase", "license": "MIT", "devDependencies": { "@changesets/changelog-github": "^0.5.0", "@changesets/cli": "^2.27.9", "@eslint/js": "^9.16.0", + "@types/adm-zip": "^0.5.7", "@types/cheerio": "^0.22.35", "@types/express": "^4.17.21", "@types/node": "^20.11.30", + "adm-zip": "^0.5.16", "autoevals": "^0.0.64", "braintrust": "^0.0.171", "cheerio": "^1.0.0", diff --git a/types/browser.ts b/types/browser.ts index 9796fa60..d108fd0f 100644 --- a/types/browser.ts +++ b/types/browser.ts @@ -1,6 +1,7 @@ import { Browser, BrowserContext } from "@playwright/test"; export interface BrowserResult { + env: "LOCAL" | "BROWSERBASE"; browser?: Browser; context: BrowserContext; debugUrl?: string;