Merge remote-tracking branch 'origin/main' into fix/text_extract_coll…

…isions
browserbase · Jan 10, 2025 · b940d9b · b940d9b
2 parents 7133c95 + 7c48412
commit b940d9b
Show file tree

Hide file tree

Showing 53 changed files with 2,460 additions and 1,300 deletions.
diff --git a/.changeset/fast-dodos-yawn.md b/.changeset/fast-dodos-yawn.md
diff --git a/.changeset/ninety-timers-punch.md → .changeset/hot-moose-stare.md b/.changeset/ninety-timers-punch.md → .changeset/hot-moose-stare.md
@@ -2,4 +2,4 @@
 "@browserbasehq/stagehand": patch
 ---
 
-Remove stagehand nav entirely
+add demo gif
diff --git a/.changeset/sweet-mice-compare.md b/.changeset/sweet-mice-compare.md
@@ -0,0 +1,5 @@
+---
+"@browserbasehq/stagehand": minor
+---
+
+Allow the input of custom instructions into the constructor so that users can guide, or provide guardrails to, the LLM in making decisions.
diff --git a/.changeset/tender-years-crash.md b/.changeset/tender-years-crash.md
diff --git a/.changeset/chilled-kangaroos-rhyme.md → .changeset/thin-squids-listen.md b/.changeset/chilled-kangaroos-rhyme.md → .changeset/thin-squids-listen.md
@@ -2,4 +2,4 @@
 "@browserbasehq/stagehand": patch
 ---
 
-Fix $1-types exposed to the user
+Export LLMClient type
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,13 +1,11 @@
 name: Evals
 
 on:
-  push:
-    branches:
-      - main
   pull_request:
     types:
       - opened
       - synchronize
+      - labeled
 
 env:
   EVAL_MODELS: "gpt-4o,gpt-4o-mini,claude-3-5-sonnet-latest"
@@ -18,6 +16,32 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+  determine-evals:
+    runs-on: ubuntu-latest
+    outputs:
+      run-extract: ${{ steps.check-labels.outputs.run-extract }}
+      run-act: ${{ steps.check-labels.outputs.run-act }}
+      run-observe: ${{ steps.check-labels.outputs.run-observe }}
+      run-text-extract: ${{ steps.check-labels.outputs.run-text-extract }}
+    steps:
+      - id: check-labels
+        run: |
+          # Default to running all tests on main branch
+          if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
+            echo "Running all tests for main branch"
+            echo "run-extract=true" >> $GITHUB_OUTPUT
+            echo "run-act=true" >> $GITHUB_OUTPUT
+            echo "run-observe=true" >> $GITHUB_OUTPUT
+            echo "run-text-extract=true" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+
+          # Check for specific labels
+          echo "run-extract=${{ contains(github.event.pull_request.labels.*.name, 'extract') }}" >> $GITHUB_OUTPUT
+          echo "run-act=${{ contains(github.event.pull_request.labels.*.name, 'act') }}" >> $GITHUB_OUTPUT
+          echo "run-observe=${{ contains(github.event.pull_request.labels.*.name, 'observe') }}" >> $GITHUB_OUTPUT
+          echo "run-text-extract=${{ contains(github.event.pull_request.labels.*.name, 'text-extract') }}" >> $GITHUB_OUTPUT
+
   run-lint:
     runs-on: ubuntu-latest
     steps:
@@ -56,6 +80,36 @@ jobs:
     needs: [run-lint, run-build]
     runs-on: ubuntu-latest
     timeout-minutes: 50
+    env:
+      HEADLESS: true
+
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v4
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+      - name: Install dependencies
+        run: npm install --no-frozen-lockfile
+
+      - name: Install Playwright browsers
+        run: npm exec playwright install --with-deps
+
+      - name: Run E2E Tests (Deterministic Playwright)
+        run: npm run e2e
+
+  run-e2e-bb-tests:
+    needs: [run-e2e-tests]
+    runs-on: ubuntu-latest
+    timeout-minutes: 50
+
+    if: >
+      github.event_name == 'push' ||
+      (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository)
+
     env:
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
@@ -78,13 +132,58 @@ jobs:
       - name: Install Playwright browsers
         run: npm exec playwright install --with-deps
 
-      - name: Run E2E Tests
-        run: npm run e2e
+      - name: Run E2E Tests (browserbase)
+        run: npm run e2e:bb
+
+  run-combination-evals:
+    needs: [run-e2e-bb-tests, run-e2e-tests, determine-evals]
+    runs-on: ubuntu-latest
+    timeout-minutes: 40
+    env:
+      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+      BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
+      BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
+      BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
+      HEADLESS: true
+      EVAL_ENV: browserbase
+
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v4
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+      - name: Install dependencies
+        run: npm install --no-frozen-lockfile
+
+      - name: Install Playwright browsers
+        run: npm exec playwright install --with-deps
+
+      - name: Run Combination Evals
+        run: npm run evals category combination
+
+      - name: Log Combination Evals Performance
+        run: |
+          experimentName=$(jq -r '.experimentName' eval-summary.json)
+          echo "View results at https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentName}"
+          if [ -f eval-summary.json ]; then
+            combination_score=$(jq '.categories.combination' eval-summary.json)
+            echo "Combination category score: $combination_score%"
+            exit 0
+          else
+            echo "Eval summary not found for combination category. Failing CI."
+            exit 1
+          fi
 
   run-act-evals:
+    needs: [run-e2e-tests, determine-evals, run-combination-evals]
+    if: needs.determine-evals.outputs.run-act == 'true'
     runs-on: ubuntu-latest
     timeout-minutes: 25
-    needs: [run-text-extract-evals]
     env:
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
@@ -129,7 +228,8 @@ jobs:
           fi
 
   run-extract-evals:
-    needs: [run-lint, run-build, run-e2e-tests]
+    needs: [run-e2e-tests, determine-evals, run-combination-evals]
+    if: needs.determine-evals.outputs.run-extract == 'true'
     runs-on: ubuntu-latest
     timeout-minutes: 50
     env:
@@ -187,7 +287,8 @@ jobs:
           fi
 
   run-text-extract-evals:
-    needs: [run-extract-evals]
+    needs: [run-e2e-tests, determine-evals, run-combination-evals]
+    if: needs.determine-evals.outputs.run-text-extract == 'true'
     runs-on: ubuntu-latest
     timeout-minutes: 120
     env:
@@ -245,9 +346,10 @@ jobs:
           fi
 
   run-observe-evals:
+    needs: [run-e2e-tests, determine-evals, run-combination-evals]
+    if: needs.determine-evals.outputs.run-observe == 'true'
     runs-on: ubuntu-latest
     timeout-minutes: 25
-    needs: [run-act-evals]
     env:
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
@@ -290,47 +392,3 @@ jobs:
             echo "Eval summary not found for observe category. Failing CI."
             exit 1
           fi
-
-  run-combination-evals:
-    runs-on: ubuntu-latest
-    timeout-minutes: 40
-    needs: [run-observe-evals]
-    env:
-      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-      BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
-      BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
-      BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
-      HEADLESS: true
-      EVAL_ENV: browserbase
-
-    steps:
-      - name: Check out repository code
-        uses: actions/checkout@v4
-
-      - name: Set up Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: "20"
-
-      - name: Install dependencies
-        run: npm install --no-frozen-lockfile
-
-      - name: Install Playwright browsers
-        run: npm exec playwright install --with-deps
-
-      - name: Run Combination Evals
-        run: npm run evals category combination
-
-      - name: Log Combination Evals Performance
-        run: |
-          experimentName=$(jq -r '.experimentName' eval-summary.json)
-          echo "View results at https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentName}"
-          if [ -f eval-summary.json ]; then
-            combination_score=$(jq '.categories.combination' eval-summary.json)
-            echo "Combination category score: $combination_score%"
-            exit 0
-          else
-            echo "Eval summary not found for combination category. Failing CI."
-            exit 1
-          fi
diff --git a/.gitignore b/.gitignore
@@ -16,4 +16,5 @@ evals/public
 evals/playground.ts
 tmp/
 eval-summary.json
-pnpm-lock.yaml
+pnpm-lock.yaml
+evals/deterministic/tests/BrowserContext/tmp-test.har
diff --git a/.prettierignore b/.prettierignore
@@ -1 +1,2 @@
 pnpm-lock.yaml
+README.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,37 @@
 # @browserbasehq/stagehand
 
+## 1.9.0
+
+### Minor Changes
+
+- [#374](https://github.com/browserbase/stagehand/pull/374) [`207244e`](https://github.com/browserbase/stagehand/commit/207244e3a46c4474d4d28db039eab131164790ca) Thanks [@sameelarif](https://github.com/sameelarif)! - Pass in a Stagehand Page object into the `on("popup")` listener to allow for multi-page handling.
+
+- [#367](https://github.com/browserbase/stagehand/pull/367) [`75c0e20`](https://github.com/browserbase/stagehand/commit/75c0e20cde54951399753e0fa841df463e1271b8) Thanks [@kamath](https://github.com/kamath)! - Logger in LLMClient is inherited by default from Stagehand. Named rather than positional arguments are used in implemented LLMClients.
+
+- [#381](https://github.com/browserbase/stagehand/pull/381) [`db2ef59`](https://github.com/browserbase/stagehand/commit/db2ef5997664e81b1dfb5ca992392362f2d3bab1) Thanks [@kamath](https://github.com/kamath)! - make logs only sync
+
+- [#385](https://github.com/browserbase/stagehand/pull/385) [`5899ec2`](https://github.com/browserbase/stagehand/commit/5899ec2c4b73c636bfd8120ec3aac225af7dd949) Thanks [@sameelarif](https://github.com/sameelarif)! - Moved the LLMClient logger paremeter to the createChatCompletion method options.
+
+- [#364](https://github.com/browserbase/stagehand/pull/364) [`08907eb`](https://github.com/browserbase/stagehand/commit/08907ebbc2cb47cfc3151946764656a7f4ce99c6) Thanks [@kamath](https://github.com/kamath)! - exposed llmClient in stagehand constructor
+
+### Patch Changes
+
+- [#383](https://github.com/browserbase/stagehand/pull/383) [`a77efcc`](https://github.com/browserbase/stagehand/commit/a77efccfde3a3948013eda3a52935e8a21d45b3e) Thanks [@sameelarif](https://github.com/sameelarif)! - Unified LLM input/output types for reduced dependence on OpenAI types
+
+- [`b7b3701`](https://github.com/browserbase/stagehand/commit/b7b370160bf35b09f5dc132f6e86f6e34fb70a85) Thanks [@kamath](https://github.com/kamath)! - Fix $1-types exposed to the user
+
+- [#353](https://github.com/browserbase/stagehand/pull/353) [`5c6f14b`](https://github.com/browserbase/stagehand/commit/5c6f14bade201e08cb86d2e14e246cb65707f7ee) Thanks [@kamath](https://github.com/kamath)! - Throw custom error if context is referenced without initialization, remove act/extract handler from index
+
+- [#360](https://github.com/browserbase/stagehand/pull/360) [`89841fc`](https://github.com/browserbase/stagehand/commit/89841fc42ae82559baddfe2a9593bc3260c082a2) Thanks [@kamath](https://github.com/kamath)! - Remove stagehand nav entirely
+
+- [#379](https://github.com/browserbase/stagehand/pull/379) [`b1c6579`](https://github.com/browserbase/stagehand/commit/b1c657976847de86d82324030f90c2f6a1f3f976) Thanks [@seanmcguire12](https://github.com/seanmcguire12)! - dont require LLM Client to use non-ai stagehand functions
+
+- [#371](https://github.com/browserbase/stagehand/pull/371) [`30e7d09`](https://github.com/browserbase/stagehand/commit/30e7d091445004c71aec1748d3a7d75fb86d1f11) Thanks [@kamath](https://github.com/kamath)! - pretty readme :)
+
+- [#382](https://github.com/browserbase/stagehand/pull/382) [`a41271b`](https://github.com/browserbase/stagehand/commit/a41271baf351e20f4c79b4b654d8a947b615a121) Thanks [@sameelarif](https://github.com/sameelarif)! - Added example implementation of the Vercel AI SDK as an LLMClient
+
+- [#344](https://github.com/browserbase/stagehand/pull/344) [`c1cf345`](https://github.com/browserbase/stagehand/commit/c1cf34535ed30262989b1dbe262fb0414cdf8230) Thanks [@kamath](https://github.com/kamath)! - Remove duplicate logging and expose Page/BrowserContext types
+
 ## 1.8.0
 
 ### Minor Changes