mirror of
https://github.com/browser-use/browser-use
synced 2026-04-22 17:45:09 +02:00
Merge remote-tracking branch 'origin/main' into fix/handle-lmnr-type-error-on-import-4046
This commit is contained in:
2
.github/workflows/cloud_evals.yml
vendored
2
.github/workflows/cloud_evals.yml
vendored
@@ -16,6 +16,8 @@ on:
|
||||
description: Commit hash of the library to build the Cloud eval image for
|
||||
required: false
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
trigger_cloud_eval_image_build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
118
.github/workflows/install-script.yml
vendored
118
.github/workflows/install-script.yml
vendored
@@ -13,6 +13,9 @@ on:
|
||||
- '.github/workflows/install-script.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Cancel in-progress runs when a new commit is pushed
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
@@ -26,16 +29,15 @@ env:
|
||||
|
||||
jobs:
|
||||
# ===========================================================================
|
||||
# Test install.sh with different modes on all platforms
|
||||
# Test install.sh on all platforms
|
||||
# ===========================================================================
|
||||
|
||||
test-install-sh-linux:
|
||||
name: install.sh ${{ matrix.mode }} (Linux ${{ matrix.os }})
|
||||
name: install.sh (Linux ${{ matrix.os }})
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, ubuntu-22.04]
|
||||
mode: [--remote-only, --local-only, --full]
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -45,8 +47,8 @@ jobs:
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Run install.sh ${{ matrix.mode }}
|
||||
run: bash browser_use/skill_cli/install.sh ${{ matrix.mode }}
|
||||
- name: Run install.sh
|
||||
run: bash browser_use/skill_cli/install.sh
|
||||
|
||||
- name: Add to PATH
|
||||
run: |
|
||||
@@ -58,65 +60,31 @@ jobs:
|
||||
source ~/.browser-use-env/bin/activate
|
||||
browser-use --help
|
||||
|
||||
- name: Verify install-config.json
|
||||
run: |
|
||||
cat ~/.browser-use/install-config.json
|
||||
# Verify expected modes based on install flag
|
||||
if [[ "${{ matrix.mode }}" == "--remote-only" ]]; then
|
||||
grep -q '"remote"' ~/.browser-use/install-config.json
|
||||
grep -q '"default_mode": "remote"' ~/.browser-use/install-config.json
|
||||
elif [[ "${{ matrix.mode }}" == "--local-only" ]]; then
|
||||
grep -q '"chromium"' ~/.browser-use/install-config.json
|
||||
grep -q '"default_mode": "chromium"' ~/.browser-use/install-config.json
|
||||
elif [[ "${{ matrix.mode }}" == "--full" ]]; then
|
||||
grep -q '"chromium"' ~/.browser-use/install-config.json
|
||||
grep -q '"remote"' ~/.browser-use/install-config.json
|
||||
fi
|
||||
|
||||
- name: Verify Chromium installed (local/full only)
|
||||
if: matrix.mode != '--remote-only'
|
||||
- name: Verify Chromium installed
|
||||
run: |
|
||||
source ~/.browser-use-env/bin/activate
|
||||
# Check playwright browsers are installed
|
||||
uvx playwright install --dry-run chromium 2>&1 | grep -i "chromium" || true
|
||||
# Verify chromium binary exists in playwright cache
|
||||
ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chrome 2>/dev/null || \
|
||||
ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chromium 2>/dev/null || \
|
||||
echo "Chromium binary check completed"
|
||||
|
||||
- name: Verify cloudflared installed (remote/full only)
|
||||
if: matrix.mode != '--local-only'
|
||||
run: |
|
||||
which cloudflared || ls ~/.local/bin/cloudflared
|
||||
cloudflared --version
|
||||
|
||||
- name: Verify cloudflared NOT installed (local-only)
|
||||
if: matrix.mode == '--local-only'
|
||||
run: |
|
||||
if command -v cloudflared &> /dev/null; then
|
||||
echo "ERROR: cloudflared should not be installed in local-only mode"
|
||||
exit 1
|
||||
fi
|
||||
echo "Confirmed: cloudflared not installed (expected for local-only)"
|
||||
|
||||
- name: Run browser-use doctor
|
||||
run: |
|
||||
source ~/.browser-use-env/bin/activate
|
||||
browser-use doctor
|
||||
|
||||
test-install-sh-macos:
|
||||
name: install.sh ${{ matrix.mode }} (macOS ${{ matrix.os }})
|
||||
name: install.sh (macOS ${{ matrix.os }})
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-latest, macos-14]
|
||||
mode: [--remote-only, --local-only, --full]
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Run install.sh ${{ matrix.mode }}
|
||||
run: bash browser_use/skill_cli/install.sh ${{ matrix.mode }}
|
||||
- name: Run install.sh
|
||||
run: bash browser_use/skill_cli/install.sh
|
||||
|
||||
- name: Add to PATH
|
||||
run: |
|
||||
@@ -128,22 +96,7 @@ jobs:
|
||||
source ~/.browser-use-env/bin/activate
|
||||
browser-use --help
|
||||
|
||||
- name: Verify install-config.json
|
||||
run: |
|
||||
cat ~/.browser-use/install-config.json
|
||||
if [[ "${{ matrix.mode }}" == "--remote-only" ]]; then
|
||||
grep -q '"remote"' ~/.browser-use/install-config.json
|
||||
grep -q '"default_mode": "remote"' ~/.browser-use/install-config.json
|
||||
elif [[ "${{ matrix.mode }}" == "--local-only" ]]; then
|
||||
grep -q '"chromium"' ~/.browser-use/install-config.json
|
||||
grep -q '"default_mode": "chromium"' ~/.browser-use/install-config.json
|
||||
elif [[ "${{ matrix.mode }}" == "--full" ]]; then
|
||||
grep -q '"chromium"' ~/.browser-use/install-config.json
|
||||
grep -q '"remote"' ~/.browser-use/install-config.json
|
||||
fi
|
||||
|
||||
- name: Verify Chromium installed (local/full only)
|
||||
if: matrix.mode != '--remote-only'
|
||||
- name: Verify Chromium installed
|
||||
run: |
|
||||
source ~/.browser-use-env/bin/activate
|
||||
# Check playwright cache for chromium
|
||||
@@ -151,32 +104,13 @@ jobs:
|
||||
ls ~/Library/Caches/ms-playwright/chromium-*/Chromium.app 2>/dev/null || \
|
||||
echo "Chromium binary check completed"
|
||||
|
||||
- name: Verify cloudflared installed (remote/full only)
|
||||
if: matrix.mode != '--local-only'
|
||||
run: |
|
||||
which cloudflared || ls ~/.local/bin/cloudflared
|
||||
cloudflared --version
|
||||
|
||||
- name: Verify cloudflared NOT installed (local-only)
|
||||
if: matrix.mode == '--local-only'
|
||||
run: |
|
||||
if command -v cloudflared &> /dev/null; then
|
||||
echo "ERROR: cloudflared should not be installed in local-only mode"
|
||||
exit 1
|
||||
fi
|
||||
echo "Confirmed: cloudflared not installed (expected for local-only)"
|
||||
|
||||
- name: Run browser-use doctor
|
||||
run: |
|
||||
source ~/.browser-use-env/bin/activate
|
||||
browser-use doctor
|
||||
|
||||
test-install-sh-windows:
|
||||
name: install.sh ${{ matrix.mode }} (Windows)
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
mode: [--remote-only, --local-only, --full]
|
||||
name: install.sh (Windows)
|
||||
runs-on: windows-latest
|
||||
defaults:
|
||||
run:
|
||||
@@ -192,8 +126,8 @@ jobs:
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Run install.sh ${{ matrix.mode }}
|
||||
run: bash browser_use/skill_cli/install.sh ${{ matrix.mode }}
|
||||
- name: Run install.sh
|
||||
run: bash browser_use/skill_cli/install.sh
|
||||
|
||||
- name: Add to PATH
|
||||
run: |
|
||||
@@ -205,18 +139,6 @@ jobs:
|
||||
source ~/.browser-use-env/Scripts/activate
|
||||
browser-use --help
|
||||
|
||||
- name: Verify install-config.json
|
||||
run: |
|
||||
cat ~/.browser-use/install-config.json
|
||||
if [[ "${{ matrix.mode }}" == "--remote-only" ]]; then
|
||||
grep -q '"remote"' ~/.browser-use/install-config.json
|
||||
elif [[ "${{ matrix.mode }}" == "--local-only" ]]; then
|
||||
grep -q '"chromium"' ~/.browser-use/install-config.json
|
||||
elif [[ "${{ matrix.mode }}" == "--full" ]]; then
|
||||
grep -q '"chromium"' ~/.browser-use/install-config.json
|
||||
grep -q '"remote"' ~/.browser-use/install-config.json
|
||||
fi
|
||||
|
||||
- name: Run browser-use doctor
|
||||
run: |
|
||||
source ~/.browser-use-env/Scripts/activate
|
||||
@@ -245,7 +167,7 @@ jobs:
|
||||
# Install from current branch
|
||||
uv pip install .
|
||||
|
||||
- name: Run browser-use install (installs Chromium only, not cloudflared)
|
||||
- name: Run browser-use install (installs Chromium)
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
browser-use install
|
||||
@@ -262,9 +184,6 @@ jobs:
|
||||
ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chromium 2>/dev/null || \
|
||||
echo "Chromium check completed"
|
||||
|
||||
# Note: browser-use install only installs Chromium, not cloudflared
|
||||
# Users should install cloudflared separately if needed for tunneling
|
||||
|
||||
- name: Run browser-use doctor
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
@@ -295,7 +214,6 @@ jobs:
|
||||
|
||||
- name: Test uvx with local wheel
|
||||
run: |
|
||||
# Install the wheel we just built
|
||||
WHEEL=$(ls dist/*.whl)
|
||||
uvx --from "$WHEEL" browser-use --help
|
||||
|
||||
@@ -310,8 +228,6 @@ jobs:
|
||||
ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chromium 2>/dev/null || \
|
||||
echo "Chromium check completed"
|
||||
|
||||
# Note: browser-use install only installs Chromium, not cloudflared
|
||||
|
||||
- name: Test uvx browser-use doctor
|
||||
run: |
|
||||
WHEEL=$(ls dist/*.whl)
|
||||
@@ -345,7 +261,5 @@ jobs:
|
||||
ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chromium 2>/dev/null || \
|
||||
echo "Chromium check completed"
|
||||
|
||||
# Note: browser-use install only installs Chromium, not cloudflared
|
||||
|
||||
- name: Test uvx browser-use doctor
|
||||
run: uvx "browser-use[cli]" doctor
|
||||
|
||||
6
.github/workflows/lint.yml
vendored
6
.github/workflows/lint.yml
vendored
@@ -16,6 +16,9 @@ on:
|
||||
pull_request:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
lint-syntax:
|
||||
name: syntax-errors
|
||||
@@ -35,7 +38,8 @@ jobs:
|
||||
- uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
enable-cache: true
|
||||
- run: uv sync --dev --all-extras # install extras for examples to avoid pyright missing imports errors
|
||||
- run: uv python install 3.11
|
||||
- run: uv sync --dev --all-extras --python 3.11
|
||||
- run: uv run --no-sync pre-commit run --all-files --show-diff-on-failure
|
||||
|
||||
lint-typecheck:
|
||||
|
||||
3
.github/workflows/package.yaml
vendored
3
.github/workflows/package.yaml
vendored
@@ -15,6 +15,9 @@ on:
|
||||
- '*'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: pip-build
|
||||
|
||||
2
.github/workflows/stale-bot.yml
vendored
2
.github/workflows/stale-bot.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
stale:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/stale@v9
|
||||
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9
|
||||
with:
|
||||
# General settings
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
default_language_version:
|
||||
python: python3.11
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/asottile/yesqa
|
||||
rev: v1.5.0
|
||||
|
||||
12
AGENTS.md
12
AGENTS.md
@@ -36,7 +36,7 @@ uv sync
|
||||
To get started with Browser Use you need to install the package and create an `.env` file with your API key.
|
||||
|
||||
<Note icon="key" color="#FFC107" iconType="regular">
|
||||
`ChatBrowserUse` offers the [fastest and most cost-effective models](https://browser-use.com/posts/speed-matters/), completing tasks 3-5x faster. Get started with \$10 of [free LLM credits](https://cloud.browser-use.com/new-api-key).
|
||||
`ChatBrowserUse` offers the [fastest and most cost-effective models](https://browser-use.com/posts/speed-matters/), completing tasks 3-5x faster. Get your API key at [cloud.browser-use.com](https://cloud.browser-use.com/new-api-key).
|
||||
</Note>
|
||||
|
||||
## 1. Installing Browser-Use
|
||||
@@ -61,7 +61,7 @@ uvx browser-use install
|
||||
Create a `.env` file and add your API key.
|
||||
|
||||
<Callout icon="key" iconType="regular">
|
||||
We recommend using ChatBrowserUse which is optimized for browser automation tasks (highest accuracy + fastest speed + lowest token cost). Don't have one? We give you **\$10** to try it out [here](https://cloud.browser-use.com/new-api-key).
|
||||
We recommend using ChatBrowserUse which is optimized for browser automation tasks (highest accuracy + fastest speed + lowest token cost). Get your API key [here](https://cloud.browser-use.com/new-api-key).
|
||||
</Callout>
|
||||
|
||||
```bash .env theme={null}
|
||||
@@ -76,7 +76,7 @@ Then add your API key to the file.
|
||||
```bash Browser Use theme={null}
|
||||
# add your key to .env file
|
||||
BROWSER_USE_API_KEY=
|
||||
# Get 10$ of free credits at https://cloud.browser-use.com/new-api-key
|
||||
# Get your API key at https://cloud.browser-use.com/new-api-key
|
||||
```
|
||||
|
||||
```bash Google theme={null}
|
||||
@@ -256,7 +256,7 @@ Your cloud browser is already logged in!
|
||||
|
||||
***
|
||||
|
||||
For more sandbox parameters and events, see [Sandbox Quickstart](https://docs.browser-use.com/customize/sandbox/quickstart).
|
||||
For more sandbox parameters and events, see [Sandbox Quickstart](https://docs.browser-use.com/legacy/sandbox/quickstart).
|
||||
|
||||
# Agent Basics
|
||||
```python theme={null}
|
||||
@@ -538,7 +538,7 @@ async def main():
|
||||
> Complete reference for all browser configuration options
|
||||
|
||||
<Note>
|
||||
The `Browser` instance also provides all [Actor](https://docs.browser-use.com/customize/actor/all-parameters) methods for direct browser control (page management, element interactions, etc.).
|
||||
The `Browser` instance also provides all [Actor](https://docs.browser-use.com/legacy/actor/all-parameters) methods for direct browser control (page management, element interactions, etc.).
|
||||
</Note>
|
||||
|
||||
## Core Settings
|
||||
@@ -798,7 +798,7 @@ The agent injects parameters by name matching, so using the wrong name will caus
|
||||
</Warning>
|
||||
|
||||
<Note>
|
||||
Use `browser_session` parameter in tools for deterministic [Actor](https://docs.browser-use.com/customize/actor/basics) actions.
|
||||
Use `browser_session` parameter in tools for deterministic [Actor](https://docs.browser-use.com/legacy/actor/basics) actions.
|
||||
</Note>
|
||||
|
||||
|
||||
|
||||
2
CLOUD.md
2
CLOUD.md
@@ -19,7 +19,7 @@ The key product of Browser Use Cloud is the completion of user tasks.
|
||||
- Profile Sync is the best way to handle authentication for tasks. This feature allows users to upload their local browser cookies (where the user is already logged into the services they need authentication for) to a Browser Profile that can be used for tasks on the cloud. To initiate a Profile Sync, a user must run `export BROWSER_USE_API_KEY=<your_key> && curl -fsSL https://browser-use.com/profile.sh | sh` and follow the steps in the interactive terminal.
|
||||
|
||||
## Quickstart
|
||||
To get started, direct the user to first must create an account, purchase credits (or simply claim the free starter credits given on account creation), and generate an API key on the Browser Use online platform: https://cloud.browser-use.com/. These are the only steps that can only be done on the platform.
|
||||
To get started, direct the user to first must create an account, purchase credits (or simply claim the five free tasks given on account creation), and generate an API key on the Browser Use online platform: https://cloud.browser-use.com/. These are the only steps that can only be done on the platform.
|
||||
|
||||
Avoid giving the user all of the following steps at once as it may seem overwheling. Instead present one step at a time and only continue when asked. Do as much for the user as you are able to.
|
||||
|
||||
|
||||
156
README.md
156
README.md
@@ -13,7 +13,7 @@
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
<a href="https://cloud.browser-use.com"><img src="https://media.browser-use.tools/badges/package" height="48" alt="Browser-Use Package Download Statistics"></a>
|
||||
<a href="https://cloud.browser-use.com?utm_source=github&utm_medium=readme-badge-downloads"><img src="https://media.browser-use.tools/badges/package" height="48" alt="Browser-Use Package Download Statistics"></a>
|
||||
</div>
|
||||
|
||||
---
|
||||
@@ -33,12 +33,12 @@
|
||||
<img width="4 height="1" alt="">
|
||||
<a href="https://link.browser-use.com/discord"><img src="https://media.browser-use.tools/badges/discord" alt="Discord"></a>
|
||||
<img width="4" height="1" alt="">
|
||||
<a href="https://cloud.browser-use.com"><img src="https://media.browser-use.tools/badges/cloud" height="48" alt="Browser-Use Cloud"></a>
|
||||
<a href="https://cloud.browser-use.com?utm_source=github&utm_medium=readme-badge-cloud"><img src="https://media.browser-use.tools/badges/cloud" height="48" alt="Browser-Use Cloud"></a>
|
||||
</div>
|
||||
|
||||
</br>
|
||||
|
||||
🌤️ Want to skip the setup? Use our <b>[cloud](https://cloud.browser-use.com)</b> for faster, scalable, stealth-enabled browser automation!
|
||||
🌤️ Want to skip the setup? Use our <b>[cloud](https://cloud.browser-use.com?utm_source=github&utm_medium=readme-skip-setup)</b> for faster, scalable, stealth-enabled browser automation!
|
||||
|
||||
# 🤖 LLM Quickstart
|
||||
|
||||
@@ -49,77 +49,99 @@
|
||||
|
||||
# 👋 Human Quickstart
|
||||
|
||||
**1. Create environment with [uv](https://docs.astral.sh/uv/) (Python>=3.11):**
|
||||
**1. Create environment and install Browser-Use with [uv](https://docs.astral.sh/uv/) (Python>=3.11):**
|
||||
```bash
|
||||
uv init
|
||||
uv init && uv add browser-use && uv sync
|
||||
# uvx browser-use install # Run if you don't have Chromium installed
|
||||
```
|
||||
|
||||
**2. Install Browser-Use package:**
|
||||
```bash
|
||||
# We ship every day - use the latest version!
|
||||
uv add browser-use
|
||||
uv sync
|
||||
```
|
||||
|
||||
**3. Get your API key from [Browser Use Cloud](https://cloud.browser-use.com/new-api-key) and add it to your `.env` file (new signups get $10 free credits):**
|
||||
**2. [Optional] Get your API key from [Browser Use Cloud](https://cloud.browser-use.com/new-api-key?utm_source=github&utm_medium=readme-quickstart-api-key):**
|
||||
```
|
||||
# .env
|
||||
BROWSER_USE_API_KEY=your-key
|
||||
# GOOGLE_API_KEY=your-key
|
||||
# ANTHROPIC_API_KEY=your-key
|
||||
```
|
||||
|
||||
**4. Install Chromium browser:**
|
||||
```bash
|
||||
uvx browser-use install
|
||||
```
|
||||
|
||||
**5. Run your first agent:**
|
||||
**3. Run your first agent:**
|
||||
```python
|
||||
from browser_use import Agent, Browser, ChatBrowserUse
|
||||
# from browser_use import ChatGoogle # ChatGoogle(model='gemini-3-flash-preview')
|
||||
# from browser_use import ChatAnthropic # ChatAnthropic(model='claude-sonnet-4-6')
|
||||
import asyncio
|
||||
|
||||
async def example():
|
||||
async def main():
|
||||
browser = Browser(
|
||||
# use_cloud=True, # Uncomment to use a stealth browser on Browser Use Cloud
|
||||
# use_cloud=True, # Use a stealth browser on Browser Use Cloud
|
||||
)
|
||||
|
||||
llm = ChatBrowserUse()
|
||||
|
||||
agent = Agent(
|
||||
task="Find the number of stars of the browser-use repo",
|
||||
llm=llm,
|
||||
llm=ChatBrowserUse(),
|
||||
# llm=ChatGoogle(model='gemini-3-flash-preview'),
|
||||
# llm=ChatAnthropic(model='claude-sonnet-4-6'),
|
||||
browser=browser,
|
||||
)
|
||||
|
||||
history = await agent.run()
|
||||
return history
|
||||
await agent.run()
|
||||
|
||||
if __name__ == "__main__":
|
||||
history = asyncio.run(example())
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
Check out the [library docs](https://docs.browser-use.com) and the [cloud docs](https://docs.cloud.browser-use.com) for more!
|
||||
Check out the [library docs](https://docs.browser-use.com/open-source/introduction) and the [cloud docs](https://docs.cloud.browser-use.com?utm_source=github&utm_medium=readme-cloud-docs) for more!
|
||||
|
||||
<br/>
|
||||
|
||||
# 🔥 Deploy on Sandboxes
|
||||
# Open Source vs Cloud
|
||||
|
||||
We handle agents, browsers, persistence, auth, cookies, and LLMs. The agent runs right next to the browser for minimal latency.
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: light)" srcset="static/accuracy_by_model_light.png">
|
||||
<source media="(prefers-color-scheme: dark)" srcset="static/accuracy_by_model_dark.png">
|
||||
<img alt="BU Bench V1 - LLM Success Rates" src="static/accuracy_by_model_light.png" width="100%">
|
||||
</picture>
|
||||
|
||||
```python
|
||||
from browser_use import Browser, sandbox, ChatBrowserUse
|
||||
from browser_use.agent.service import Agent
|
||||
import asyncio
|
||||
We benchmark Browser Use across 100 real-world browser tasks. Full benchmark is open source: **[browser-use/benchmark](https://github.com/browser-use/benchmark)**.
|
||||
|
||||
@sandbox()
|
||||
async def my_task(browser: Browser):
|
||||
agent = Agent(task="Find the top HN post", browser=browser, llm=ChatBrowserUse())
|
||||
await agent.run()
|
||||
**Use the Open-Source Agent**
|
||||
- You need [custom tools](https://docs.browser-use.com/customize/tools/basics) or deep code-level integration
|
||||
- We recommend pairing with our [cloud browsers](https://docs.browser-use.com/open-source/customize/browser/remote) for leading stealth, proxy rotation, and scaling
|
||||
- Or self-host the open-source agent fully on your own machines
|
||||
|
||||
# Just call it like any async function
|
||||
asyncio.run(my_task())
|
||||
```
|
||||
**Use the [Fully-Hosted Cloud Agent](https://cloud.browser-use.com?utm_source=github&utm_medium=readme-hosted-agent) (recommended)**
|
||||
- Much more powerful agent for complex tasks (see plot above)
|
||||
- Easiest way to start and scale
|
||||
- Best stealth with proxy rotation and captcha solving
|
||||
- 1000+ integrations (Gmail, Slack, Notion, and more)
|
||||
- Persistent filesystem and memory
|
||||
|
||||
See [Going to Production](https://docs.browser-use.com/production) for more details.
|
||||
<br/>
|
||||
|
||||
# Demos
|
||||
|
||||
|
||||
### 📋 Form-Filling
|
||||
#### Task = "Fill in this job application with my resume and information."
|
||||

|
||||
[Example code ↗](https://github.com/browser-use/browser-use/blob/main/examples/use-cases/apply_to_job.py)
|
||||
|
||||
|
||||
### 🍎 Grocery-Shopping
|
||||
#### Task = "Put this list of items into my instacart."
|
||||
|
||||
https://github.com/user-attachments/assets/a6813fa7-4a7c-40a6-b4aa-382bf88b1850
|
||||
|
||||
[Example code ↗](https://github.com/browser-use/browser-use/blob/main/examples/use-cases/buy_groceries.py)
|
||||
|
||||
|
||||
### 💻 Personal-Assistant.
|
||||
#### Task = "Help me find parts for a custom PC."
|
||||
|
||||
https://github.com/user-attachments/assets/ac34f75c-057a-43ef-ad06-5b2c9d42bf06
|
||||
|
||||
[Example code ↗](https://github.com/browser-use/browser-use/blob/main/examples/use-cases/pcpartpicker.py)
|
||||
|
||||
|
||||
### 💡See [more examples here ↗](https://docs.browser-use.com/examples) and give us a star!
|
||||
|
||||
<br/>
|
||||
|
||||
@@ -170,35 +192,6 @@ curl -o ~/.claude/skills/browser-use/SKILL.md \
|
||||
|
||||
<br/>
|
||||
|
||||
# Demos
|
||||
|
||||
|
||||
### 📋 Form-Filling
|
||||
#### Task = "Fill in this job application with my resume and information."
|
||||

|
||||
[Example code ↗](https://github.com/browser-use/browser-use/blob/main/examples/use-cases/apply_to_job.py)
|
||||
|
||||
|
||||
### 🍎 Grocery-Shopping
|
||||
#### Task = "Put this list of items into my instacart."
|
||||
|
||||
https://github.com/user-attachments/assets/a6813fa7-4a7c-40a6-b4aa-382bf88b1850
|
||||
|
||||
[Example code ↗](https://github.com/browser-use/browser-use/blob/main/examples/use-cases/buy_groceries.py)
|
||||
|
||||
|
||||
### 💻 Personal-Assistant.
|
||||
#### Task = "Help me find parts for a custom PC."
|
||||
|
||||
https://github.com/user-attachments/assets/ac34f75c-057a-43ef-ad06-5b2c9d42bf06
|
||||
|
||||
[Example code ↗](https://github.com/browser-use/browser-use/blob/main/examples/use-cases/pcpartpicker.py)
|
||||
|
||||
|
||||
### 💡See [more examples here ↗](https://docs.browser-use.com/examples) and give us a star!
|
||||
|
||||
<br/>
|
||||
|
||||
## Integrations, hosting, custom tools, MCP, and more on our [Docs ↗](https://docs.browser-use.com)
|
||||
|
||||
<br/>
|
||||
@@ -218,6 +211,15 @@ We optimized **ChatBrowserUse()** specifically for browser automation tasks. On
|
||||
For other LLM providers, see our [supported models documentation](https://docs.browser-use.com/supported-models).
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Should I use the Browser Use system prompt with the open-source preview model?</b></summary>
|
||||
|
||||
Yes. If you use `ChatBrowserUse(model='browser-use/bu-30b-a3b-preview')` with a normal `Agent(...)`, Browser Use still sends its default agent system prompt for you.
|
||||
|
||||
You do **not** need to add a separate custom "Browser Use system message" just because you switched to the open-source preview model. Only use `extend_system_message` or `override_system_message` when you intentionally want to customize the default behavior for your task.
|
||||
|
||||
If you want the best default speed/accuracy, we still recommend the newer hosted `bu-*` models. If you want the open-source preview model, the setup stays the same apart from the `model=` value.
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Can I use custom tools with the agent?</b></summary>
|
||||
@@ -249,6 +251,12 @@ agent = Agent(
|
||||
Yes! Browser-Use is open source and free to use. You only need to choose an LLM provider (like OpenAI, Google, ChatBrowserUse, or run local models with Ollama).
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Terms of Service</b></summary>
|
||||
|
||||
This open-source library is licensed under the MIT License. For Browser Use services & data policy, see our [Terms of Service](https://browser-use.com/legal/terms-of-service) and [Privacy Policy](https://browser-use.com/privacy/).
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>How do I handle authentication?</b></summary>
|
||||
|
||||
@@ -263,7 +271,7 @@ These examples show how to maintain sessions and handle authentication seamlessl
|
||||
<details>
|
||||
<summary><b>How do I solve CAPTCHAs?</b></summary>
|
||||
|
||||
For CAPTCHA handling, you need better browser fingerprinting and proxies. Use [Browser Use Cloud](https://cloud.browser-use.com) which provides stealth browsers designed to avoid detection and CAPTCHA challenges.
|
||||
For CAPTCHA handling, you need better browser fingerprinting and proxies. Use [Browser Use Cloud](https://cloud.browser-use.com?utm_source=github&utm_medium=readme-faq-captcha) which provides stealth browsers designed to avoid detection and CAPTCHA challenges.
|
||||
</details>
|
||||
|
||||
<details>
|
||||
@@ -271,7 +279,7 @@ For CAPTCHA handling, you need better browser fingerprinting and proxies. Use [B
|
||||
|
||||
Chrome can consume a lot of memory, and running many agents in parallel can be tricky to manage.
|
||||
|
||||
For production use cases, use our [Browser Use Cloud API](https://cloud.browser-use.com) which handles:
|
||||
For production use cases, use our [Browser Use Cloud API](https://cloud.browser-use.com?utm_source=github&utm_medium=readme-faq-production) which handles:
|
||||
- Scalable browser infrastructure
|
||||
- Memory management
|
||||
- Proxy rotation
|
||||
|
||||
@@ -52,7 +52,6 @@ if TYPE_CHECKING:
|
||||
from browser_use.agent.views import ActionModel, ActionResult, AgentHistoryList
|
||||
from browser_use.browser import BrowserProfile, BrowserSession
|
||||
from browser_use.browser import BrowserSession as Browser
|
||||
from browser_use.code_use.service import CodeAgent
|
||||
from browser_use.dom.service import DomService
|
||||
from browser_use.llm import models
|
||||
from browser_use.llm.anthropic.chat import ChatAnthropic
|
||||
@@ -60,6 +59,7 @@ if TYPE_CHECKING:
|
||||
from browser_use.llm.browser_use.chat import ChatBrowserUse
|
||||
from browser_use.llm.google.chat import ChatGoogle
|
||||
from browser_use.llm.groq.chat import ChatGroq
|
||||
from browser_use.llm.litellm.chat import ChatLiteLLM
|
||||
from browser_use.llm.mistral.chat import ChatMistral
|
||||
from browser_use.llm.oci_raw.chat import ChatOCIRaw
|
||||
from browser_use.llm.ollama.chat import ChatOllama
|
||||
@@ -72,8 +72,6 @@ if TYPE_CHECKING:
|
||||
_LAZY_IMPORTS = {
|
||||
# Agent service (heavy due to dependencies)
|
||||
# 'Agent': ('browser_use.agent.service', 'Agent'),
|
||||
# Code-use agent (Jupyter notebook-like execution)
|
||||
'CodeAgent': ('browser_use.code_use.service', 'CodeAgent'),
|
||||
'Agent': ('browser_use.agent.service', 'Agent'),
|
||||
# System prompt (moderate weight due to agent.views imports)
|
||||
'SystemPrompt': ('browser_use.agent.prompts', 'SystemPrompt'),
|
||||
@@ -95,6 +93,7 @@ _LAZY_IMPORTS = {
|
||||
'ChatAnthropic': ('browser_use.llm.anthropic.chat', 'ChatAnthropic'),
|
||||
'ChatBrowserUse': ('browser_use.llm.browser_use.chat', 'ChatBrowserUse'),
|
||||
'ChatGroq': ('browser_use.llm.groq.chat', 'ChatGroq'),
|
||||
'ChatLiteLLM': ('browser_use.llm.litellm.chat', 'ChatLiteLLM'),
|
||||
'ChatMistral': ('browser_use.llm.mistral.chat', 'ChatMistral'),
|
||||
'ChatAzureOpenAI': ('browser_use.llm.azure.chat', 'ChatAzureOpenAI'),
|
||||
'ChatOCIRaw': ('browser_use.llm.oci_raw.chat', 'ChatOCIRaw'),
|
||||
@@ -131,8 +130,6 @@ def __getattr__(name: str):
|
||||
|
||||
__all__ = [
|
||||
'Agent',
|
||||
'CodeAgent',
|
||||
# 'CodeAgent',
|
||||
'BrowserSession',
|
||||
'Browser', # Alias for BrowserSession
|
||||
'BrowserProfile',
|
||||
@@ -148,6 +145,7 @@ __all__ = [
|
||||
'ChatAnthropic',
|
||||
'ChatBrowserUse',
|
||||
'ChatGroq',
|
||||
'ChatLiteLLM',
|
||||
'ChatMistral',
|
||||
'ChatAzureOpenAI',
|
||||
'ChatOCIRaw',
|
||||
|
||||
@@ -8,7 +8,7 @@ from bubus import BaseEvent
|
||||
from pydantic import Field, field_validator
|
||||
from uuid_extensions import uuid7str
|
||||
|
||||
MAX_STRING_LENGTH = 100000 # 100K chars ~ 25k tokens should be enough
|
||||
MAX_STRING_LENGTH = 500000 # 100K chars ~ 25k tokens should be enough
|
||||
MAX_URL_LENGTH = 100000
|
||||
MAX_TASK_LENGTH = 100000
|
||||
MAX_COMMENT_LENGTH = 2000
|
||||
@@ -38,6 +38,8 @@ class UpdateAgentTaskEvent(BaseEvent):
|
||||
raise ValueError('Agent must have _task_start_time attribute')
|
||||
|
||||
done_output = agent.history.final_result() if agent.history else None
|
||||
if done_output and len(done_output) > MAX_STRING_LENGTH:
|
||||
done_output = done_output[:MAX_STRING_LENGTH]
|
||||
return cls(
|
||||
id=str(agent.task_id),
|
||||
user_id='', # To be filled by cloud handler
|
||||
|
||||
@@ -108,7 +108,6 @@ def create_history_gif(
|
||||
font_name = os.path.join(CONFIG.WIN_FONT_DIR, font_name + '.ttf')
|
||||
regular_font = ImageFont.truetype(font_name, font_size)
|
||||
title_font = ImageFont.truetype(font_name, title_font_size)
|
||||
goal_font = ImageFont.truetype(font_name, goal_font_size)
|
||||
font_loaded = True
|
||||
break
|
||||
except OSError:
|
||||
@@ -121,8 +120,6 @@ def create_history_gif(
|
||||
regular_font = ImageFont.load_default()
|
||||
title_font = ImageFont.load_default()
|
||||
|
||||
goal_font = regular_font
|
||||
|
||||
# Load logo if requested
|
||||
logo = None
|
||||
if show_logo:
|
||||
@@ -236,8 +233,6 @@ def _create_task_frame(
|
||||
# Start with base font size (regular + 16)
|
||||
base_font_size = regular_font.size + 16
|
||||
min_font_size = max(regular_font.size - 10, 16) # Don't go below 16pt
|
||||
max_font_size = base_font_size # Cap at the base font size
|
||||
|
||||
# Calculate dynamic font size based on text length and complexity
|
||||
# Longer texts get progressively smaller fonts
|
||||
text_length = len(task)
|
||||
|
||||
@@ -88,6 +88,8 @@ def construct_judge_messages(
|
||||
)
|
||||
)
|
||||
|
||||
current_date = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')
|
||||
|
||||
# System prompt for judge - conditionally add ground truth section
|
||||
ground_truth_section = ''
|
||||
if ground_truth:
|
||||
@@ -168,7 +170,7 @@ Set `reached_captcha` to true if:
|
||||
- **evaluate for action** - For each key step of the trace, double check whether the action that the agent tried to performed actually happened. If the required action did not actually occur, the verdict should be false.
|
||||
- **screenshot is not entire content** - The agent has the entire DOM content, but the screenshot is only part of the content. If the agent extracts information from the page, but you do not see it in the screenshot, you can assume this information is there.
|
||||
- **Penalize poor tool usage** - Wrong tools, inefficient approaches, ignoring available information.
|
||||
- **ignore unexpected dates and times** - These agent traces are from varying dates, you can assume the dates the agent uses for search or filtering are correct.
|
||||
- **current date/time is {current_date}** - content with recent dates is real, not fabricated.
|
||||
- **IMPORTANT**: be very picky about the user's request - Have very high standard for the agent completing the task exactly to the user's request.
|
||||
- **IMPORTANT**: be initially doubtful of the agent's self reported success, be sure to verify that its methods are valid and fulfill the user's desires to a tee.
|
||||
|
||||
@@ -221,54 +223,3 @@ Evaluate this agent execution given the criteria and respond with the exact JSON
|
||||
SystemMessage(content=system_prompt),
|
||||
UserMessage(content=content_parts),
|
||||
]
|
||||
|
||||
|
||||
def construct_simple_judge_messages(
|
||||
task: str,
|
||||
final_result: str,
|
||||
) -> list[BaseMessage]:
|
||||
"""Construct lightweight judge messages to validate agent success claims.
|
||||
|
||||
Always runs regardless of use_judge setting. Text-only — no screenshots,
|
||||
no trajectory. Just task + final result.
|
||||
"""
|
||||
task_truncated = _truncate_text(task, 20000)
|
||||
final_result_truncated = _truncate_text(final_result, 20000)
|
||||
|
||||
current_date = datetime.now(timezone.utc).strftime('%Y-%m-%d')
|
||||
|
||||
system_prompt = f"""You are a strict verifier checking whether a browser automation agent actually completed its task.
|
||||
|
||||
Today's date is {current_date}. The agent ran recently — dates near today are expected and NOT fabricated.
|
||||
|
||||
Given the task and the agent's final response, determine if the response genuinely satisfies ALL requirements.
|
||||
|
||||
Check for these common failure patterns:
|
||||
1. **Incorrect data**: Wrong number of items, missing filters/criteria, wrong format
|
||||
2. **Unverified actions**: Agent claims to have submitted a form, posted a comment, or saved a file but there's no evidence
|
||||
3. **Incomplete results**: Some requirements from the task are not addressed in the response
|
||||
4. **Fabricated content**: Data that looks plausible but wasn't actually extracted from any page. NOTE: dates and times close to today's date ({current_date}) are NOT fabricated — the agent browses live websites and extracts real-time content.
|
||||
5. **Partial completion reported as success**: Response acknowledges failure or blockers (captcha, access denied, etc.) but still claims success
|
||||
|
||||
Respond with EXACTLY this JSON structure:
|
||||
{{
|
||||
"is_correct": true or false,
|
||||
"reason": "Brief explanation if not correct, empty string if correct"
|
||||
}}
|
||||
|
||||
Be strict: if the response doesn't clearly satisfy every requirement, set is_correct to false."""
|
||||
|
||||
user_prompt = f"""<task>
|
||||
{task_truncated or 'No task provided'}
|
||||
</task>
|
||||
|
||||
<agent_final_response>
|
||||
{final_result_truncated or 'No response provided'}
|
||||
</agent_final_response>
|
||||
|
||||
Does the agent's response fully satisfy all requirements of the task? Respond with the JSON structure."""
|
||||
|
||||
return [
|
||||
SystemMessage(content=system_prompt),
|
||||
UserMessage(content=user_prompt),
|
||||
]
|
||||
|
||||
@@ -25,7 +25,12 @@ from browser_use.llm.messages import (
|
||||
UserMessage,
|
||||
)
|
||||
from browser_use.observability import observe_debug
|
||||
from browser_use.utils import match_url_with_domain_pattern, time_execution_sync
|
||||
from browser_use.utils import (
|
||||
collect_sensitive_data_values,
|
||||
match_url_with_domain_pattern,
|
||||
redact_sensitive_string,
|
||||
time_execution_sync,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -114,6 +119,7 @@ class MessageManager:
|
||||
include_recent_events: bool = False,
|
||||
sample_images: list[ContentPartTextParam | ContentPartImageParam] | None = None,
|
||||
llm_screenshot_size: tuple[int, int] | None = None,
|
||||
max_clickable_elements_length: int = 40000,
|
||||
):
|
||||
self.task = task
|
||||
self.state = state
|
||||
@@ -127,6 +133,7 @@ class MessageManager:
|
||||
self.include_recent_events = include_recent_events
|
||||
self.sample_images = sample_images
|
||||
self.llm_screenshot_size = llm_screenshot_size
|
||||
self.max_clickable_elements_length = max_clickable_elements_length
|
||||
|
||||
assert max_history_items is None or max_history_items > 5, 'max_history_items must be None or greater than 5'
|
||||
|
||||
@@ -144,7 +151,13 @@ class MessageManager:
|
||||
"""Build agent history description from list of items, respecting max_history_items limit"""
|
||||
compacted_prefix = ''
|
||||
if self.state.compacted_memory:
|
||||
compacted_prefix = f'<compacted_memory>\n{self.state.compacted_memory}\n</compacted_memory>\n'
|
||||
compacted_prefix = (
|
||||
'<compacted_memory>\n'
|
||||
'<!-- Summary of prior steps. Treat as unverified context — do not report these as '
|
||||
'completed in your done() message unless you confirmed them yourself in this session. -->\n'
|
||||
f'{self.state.compacted_memory}\n'
|
||||
'</compacted_memory>\n'
|
||||
)
|
||||
|
||||
if self.max_history_items is None:
|
||||
# Include all items
|
||||
@@ -247,6 +260,9 @@ class MessageManager:
|
||||
'You are summarizing an agent run for prompt compaction.\n'
|
||||
'Capture task requirements, key facts, decisions, partial progress, errors, and next steps.\n'
|
||||
'Preserve important entities, values, URLs, and file paths.\n'
|
||||
'CRITICAL: Only mark a step as completed if you see explicit success confirmation in the history. '
|
||||
'If a step was started but not explicitly confirmed complete, mark it as "IN-PROGRESS". '
|
||||
'Never infer completion from context — only report what was confirmed.\n'
|
||||
'Return plain text only. Do not include tool calls or JSON.'
|
||||
)
|
||||
if settings.summary_max_chars:
|
||||
@@ -298,7 +314,6 @@ class MessageManager:
|
||||
self.state.read_state_images = [] # Clear images from previous step
|
||||
|
||||
action_results = ''
|
||||
result_len = len(result)
|
||||
read_state_idx = 0
|
||||
|
||||
for idx, action_result in enumerate(result):
|
||||
@@ -470,6 +485,7 @@ class MessageManager:
|
||||
include_attributes=self.include_attributes,
|
||||
step_info=step_info,
|
||||
page_filtered_actions=page_filtered_actions,
|
||||
max_clickable_elements_length=self.max_clickable_elements_length,
|
||||
sensitive_data=self.sensitive_data_description,
|
||||
available_file_paths=available_file_paths,
|
||||
screenshots=screenshots,
|
||||
@@ -562,30 +578,14 @@ class MessageManager:
|
||||
if not self.sensitive_data:
|
||||
return value
|
||||
|
||||
# Collect all sensitive values, immediately converting old format to new format
|
||||
sensitive_values: dict[str, str] = {}
|
||||
|
||||
# Process all sensitive data entries
|
||||
for key_or_domain, content in self.sensitive_data.items():
|
||||
if isinstance(content, dict):
|
||||
# Already in new format: {domain: {key: value}}
|
||||
for key, val in content.items():
|
||||
if val: # Skip empty values
|
||||
sensitive_values[key] = val
|
||||
elif content: # Old format: {key: value} - convert to new format internally
|
||||
# We treat this as if it was {'http*://*': {key_or_domain: content}}
|
||||
sensitive_values[key_or_domain] = content
|
||||
sensitive_values = collect_sensitive_data_values(self.sensitive_data)
|
||||
|
||||
# If there are no valid sensitive data entries, just return the original value
|
||||
if not sensitive_values:
|
||||
logger.warning('No valid entries found in sensitive_data dictionary')
|
||||
return value
|
||||
|
||||
# Replace all valid sensitive data values with their placeholder tags
|
||||
for key, val in sensitive_values.items():
|
||||
value = value.replace(val, f'<secret>{key}</secret>')
|
||||
|
||||
return value
|
||||
return redact_sensitive_string(value, sensitive_values)
|
||||
|
||||
if isinstance(message.content, str):
|
||||
message.content = replace_sensitive(message.content)
|
||||
|
||||
@@ -157,6 +157,7 @@ class AgentMessagePrompt:
|
||||
'images': 0,
|
||||
'interactive_elements': 0,
|
||||
'total_elements': 0,
|
||||
'text_chars': 0,
|
||||
}
|
||||
|
||||
if not self.browser_state.dom_state or not self.browser_state.dom_state._root:
|
||||
@@ -203,6 +204,9 @@ class AgentMessagePrompt:
|
||||
else:
|
||||
stats['shadow_open'] += 1
|
||||
|
||||
elif original.node_type == NodeType.TEXT_NODE:
|
||||
stats['text_chars'] += len(original.node_value.strip())
|
||||
|
||||
elif original.node_type == NodeType.DOCUMENT_FRAGMENT_NODE:
|
||||
# Shadow DOM fragment - these are the actual shadow roots
|
||||
# But don't double-count since we count them at the host level above
|
||||
@@ -224,6 +228,9 @@ class AgentMessagePrompt:
|
||||
stats_text = '<page_stats>'
|
||||
if page_stats['total_elements'] < 10:
|
||||
stats_text += 'Page appears empty (SPA not loaded?) - '
|
||||
# Skeleton screen: many elements but almost no text = loading placeholders
|
||||
elif page_stats['total_elements'] > 20 and page_stats['text_chars'] < page_stats['total_elements'] * 5:
|
||||
stats_text += 'Page appears to show skeleton/placeholder content (still loading?) - '
|
||||
stats_text += f'{page_stats["links"]} links, {page_stats["interactive_elements"]} interactive, '
|
||||
stats_text += f'{page_stats["iframes"]} iframes'
|
||||
if page_stats['shadow_open'] > 0 or page_stats['shadow_closed'] > 0:
|
||||
@@ -252,14 +259,11 @@ class AgentMessagePrompt:
|
||||
pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0
|
||||
has_content_above = pages_above > 0
|
||||
has_content_below = pages_below > 0
|
||||
total_pages = pi.page_height / pi.viewport_height if pi.viewport_height > 0 else 0
|
||||
current_page_position = pi.scroll_y / max(pi.page_height - pi.viewport_height, 1)
|
||||
page_info_text = '<page_info>'
|
||||
page_info_text += f'{pages_above:.1f} above, '
|
||||
page_info_text += f'{pages_below:.1f} below '
|
||||
|
||||
page_info_text += f'{pages_above:.1f} pages above, {pages_below:.1f} pages below'
|
||||
if pages_below > 0.2:
|
||||
page_info_text += ' — scroll down to reveal more content'
|
||||
page_info_text += '</page_info>\n'
|
||||
# , at {current_page_position:.0%} of page
|
||||
if elements_text != '':
|
||||
if not has_content_above:
|
||||
elements_text = f'[Start of page]\n{elements_text}'
|
||||
|
||||
@@ -36,7 +36,7 @@ from pydantic import BaseModel, ValidationError
|
||||
from uuid_extensions import uuid7str
|
||||
|
||||
from browser_use import Browser, BrowserProfile, BrowserSession
|
||||
from browser_use.agent.judge import construct_judge_messages, construct_simple_judge_messages
|
||||
from browser_use.agent.judge import construct_judge_messages
|
||||
|
||||
# Lazy import for gif to avoid heavy agent.views import at startup
|
||||
# from browser_use.agent.gif import create_history_gif
|
||||
@@ -59,7 +59,6 @@ from browser_use.agent.views import (
|
||||
JudgementResult,
|
||||
MessageCompactionSettings,
|
||||
PlanItem,
|
||||
SimpleJudgeResult,
|
||||
StepMetadata,
|
||||
)
|
||||
from browser_use.browser.events import _get_timeout
|
||||
@@ -188,6 +187,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
file_system_path: str | None = None,
|
||||
task_id: str | None = None,
|
||||
calculate_cost: bool = False,
|
||||
pricing_url: str | None = None,
|
||||
display_files_in_done_text: bool = True,
|
||||
include_tool_call_examples: bool = False,
|
||||
vision_detail_level: Literal['auto', 'low', 'high'] = 'auto',
|
||||
@@ -204,7 +204,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
loop_detection_enabled: bool = True,
|
||||
llm_screenshot_size: tuple[int, int] | None = None,
|
||||
message_compaction: MessageCompactionSettings | bool | None = True,
|
||||
max_clickable_elements_length: int = 40000,
|
||||
_url_shortening_limit: int = 25,
|
||||
enable_signal_handler: bool = True,
|
||||
**kwargs,
|
||||
):
|
||||
# Validate llm_screenshot_size
|
||||
@@ -409,16 +411,20 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
loop_detection_window=loop_detection_window,
|
||||
loop_detection_enabled=loop_detection_enabled,
|
||||
message_compaction=message_compaction,
|
||||
max_clickable_elements_length=max_clickable_elements_length,
|
||||
)
|
||||
|
||||
# Token cost service
|
||||
self.token_cost_service = TokenCost(include_cost=calculate_cost)
|
||||
self.token_cost_service = TokenCost(include_cost=calculate_cost, pricing_url=pricing_url)
|
||||
self.token_cost_service.register_llm(llm)
|
||||
self.token_cost_service.register_llm(page_extraction_llm)
|
||||
self.token_cost_service.register_llm(judge_llm)
|
||||
if self.settings.message_compaction and self.settings.message_compaction.compaction_llm:
|
||||
self.token_cost_service.register_llm(self.settings.message_compaction.compaction_llm)
|
||||
|
||||
# Store signal handler setting (not part of AgentSettings as it's runtime behavior)
|
||||
self.enable_signal_handler = enable_signal_handler
|
||||
|
||||
# Initialize state
|
||||
self.state = injected_agent_state or AgentState()
|
||||
|
||||
@@ -514,6 +520,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
include_recent_events=self.include_recent_events,
|
||||
sample_images=self.sample_images,
|
||||
llm_screenshot_size=llm_screenshot_size,
|
||||
max_clickable_elements_length=self.settings.max_clickable_elements_length,
|
||||
)
|
||||
|
||||
if self.sensitive_data:
|
||||
@@ -1022,9 +1029,35 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
browser_state_summary = None
|
||||
|
||||
try:
|
||||
if self.browser_session:
|
||||
try:
|
||||
captcha_wait = await self.browser_session.wait_if_captcha_solving()
|
||||
if captcha_wait and captcha_wait.waited:
|
||||
# Reset step timing to exclude the captcha wait from step duration metrics
|
||||
self.step_start_time = time.time()
|
||||
duration_s = captcha_wait.duration_ms / 1000
|
||||
outcome = captcha_wait.result # 'success' | 'failed' | 'timeout'
|
||||
msg = f'Waited {duration_s:.1f}s for {captcha_wait.vendor} CAPTCHA to be solved. Result: {outcome}.'
|
||||
self.logger.info(f'🔒 {msg}')
|
||||
# Inject the outcome so the LLM sees what happened
|
||||
captcha_result = ActionResult(long_term_memory=msg)
|
||||
if self.state.last_result:
|
||||
self.state.last_result.append(captcha_result)
|
||||
else:
|
||||
self.state.last_result = [captcha_result]
|
||||
except Exception as e:
|
||||
self.logger.warning(f'Phase 0 captcha wait failed (non-fatal): {e}')
|
||||
|
||||
# Phase 1: Prepare context and timing
|
||||
browser_state_summary = await self._prepare_context(step_info)
|
||||
|
||||
# Clear previous step state after context preparation (which needs
|
||||
# them for the "previous action result" prompt) but before the LLM
|
||||
# call, so a timeout during _get_next_action or _execute_actions
|
||||
# won't leave stale data from the previous step.
|
||||
self.state.last_model_output = None
|
||||
self.state.last_result = None
|
||||
|
||||
# Phase 2: Get model output and execute actions
|
||||
await self._get_next_action(browser_state_summary)
|
||||
await self._execute_actions()
|
||||
@@ -1220,12 +1253,31 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
self.logger.warning(f'{error_msg}')
|
||||
return
|
||||
|
||||
# Handle browser closed/disconnected errors - stop immediately instead of retrying
|
||||
if self._is_browser_closed_error(error):
|
||||
self.logger.warning(f'🛑 Browser closed or disconnected: {error}')
|
||||
self.state.stopped = True
|
||||
self._external_pause_event.set()
|
||||
return
|
||||
# Handle browser closed/disconnected errors
|
||||
if self._is_connection_like_error(error):
|
||||
# If reconnection is in progress, wait for it instead of stopping
|
||||
if self.browser_session.is_reconnecting:
|
||||
wait_timeout = self.browser_session.RECONNECT_WAIT_TIMEOUT
|
||||
self.logger.warning(
|
||||
f'🔄 Connection error during reconnection, waiting up to {wait_timeout}s for reconnect: {error}'
|
||||
)
|
||||
try:
|
||||
await asyncio.wait_for(self.browser_session._reconnect_event.wait(), timeout=wait_timeout)
|
||||
except TimeoutError:
|
||||
pass
|
||||
|
||||
# Check if reconnection succeeded
|
||||
if self.browser_session.is_cdp_connected:
|
||||
self.logger.info('🔄 Reconnection succeeded, retrying step...')
|
||||
self.state.last_result = [ActionResult(error=f'Connection lost and recovered: {error}')]
|
||||
return
|
||||
|
||||
# Not reconnecting or reconnection failed — check if truly terminal
|
||||
if self._is_browser_closed_error(error):
|
||||
self.logger.warning(f'🛑 Browser closed or disconnected: {error}')
|
||||
self.state.stopped = True
|
||||
self._external_pause_event.set()
|
||||
return
|
||||
|
||||
# Handle all other exceptions
|
||||
include_trace = self.logger.isEnabledFor(logging.DEBUG)
|
||||
@@ -1249,14 +1301,35 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
self.state.last_result = [ActionResult(error=error_msg)]
|
||||
return None
|
||||
|
||||
def _is_connection_like_error(self, error: Exception) -> bool:
|
||||
"""Check if the error looks like a CDP/WebSocket connection failure.
|
||||
|
||||
Unlike _is_browser_closed_error(), this does NOT check if the CDP client is None
|
||||
or if reconnection is in progress — it purely looks at the error signature.
|
||||
"""
|
||||
error_str = str(error).lower()
|
||||
return (
|
||||
isinstance(error, ConnectionError)
|
||||
or 'websocket connection closed' in error_str
|
||||
or 'connection closed' in error_str
|
||||
or 'browser has been closed' in error_str
|
||||
or 'browser closed' in error_str
|
||||
or 'no browser' in error_str
|
||||
)
|
||||
|
||||
def _is_browser_closed_error(self, error: Exception) -> bool:
|
||||
"""Check if the browser has been closed or disconnected.
|
||||
|
||||
Only returns True when the error itself is a CDP/WebSocket connection failure
|
||||
AND the CDP client is gone. Avoids false positives on unrelated errors
|
||||
(element not found, timeouts, parse errors) that happen to coincide with
|
||||
a transient None state during reconnects or resets.
|
||||
AND the CDP client is gone AND we're not actively reconnecting.
|
||||
Avoids false positives on unrelated errors (element not found, timeouts,
|
||||
parse errors) that happen to coincide with a transient None state during
|
||||
reconnects or resets.
|
||||
"""
|
||||
# During reconnection, don't treat connection errors as terminal
|
||||
if self.browser_session.is_reconnecting:
|
||||
return False
|
||||
|
||||
error_str = str(error).lower()
|
||||
is_connection_error = (
|
||||
isinstance(error, ConnectionError)
|
||||
@@ -1504,46 +1577,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
self._message_manager._add_context_message(UserMessage(content=msg))
|
||||
self.AgentOutput = self.DoneAgentOutput
|
||||
|
||||
async def _run_simple_judge(self) -> None:
|
||||
"""Lightweight always-on judge that overrides agent success when it overclaims.
|
||||
|
||||
Runs regardless of use_judge setting. Only checks tasks where the agent
|
||||
claimed success — if the agent already reports failure, there's nothing to correct.
|
||||
"""
|
||||
last_result = self.history.history[-1].result[-1]
|
||||
if not last_result.is_done or not last_result.success:
|
||||
return
|
||||
|
||||
task = self.task
|
||||
final_result = self.history.final_result() or ''
|
||||
|
||||
messages = construct_simple_judge_messages(
|
||||
task=task,
|
||||
final_result=final_result,
|
||||
)
|
||||
|
||||
try:
|
||||
response = await self.llm.ainvoke(messages, output_format=SimpleJudgeResult)
|
||||
result: SimpleJudgeResult = response.completion # type: ignore[assignment]
|
||||
if not result.is_correct:
|
||||
reason = result.reason or 'Task requirements not fully met'
|
||||
self.logger.info(f'⚠️ Simple judge overriding success to failure: {reason}')
|
||||
last_result.success = False
|
||||
note = f'[Simple judge: {reason}]'
|
||||
# When structured output is expected, don't append judge text to extracted_content
|
||||
# as it would corrupt the JSON and break end-user parsers
|
||||
if self.output_model_schema is not None:
|
||||
if last_result.metadata is None:
|
||||
last_result.metadata = {}
|
||||
last_result.metadata['simple_judge'] = note
|
||||
elif last_result.extracted_content:
|
||||
last_result.extracted_content += f'\n\n{note}'
|
||||
else:
|
||||
last_result.extracted_content = note
|
||||
except Exception as e:
|
||||
self.logger.warning(f'Simple judge failed with error: {e}')
|
||||
# Don't override on error — keep the agent's self-report
|
||||
|
||||
@observe(ignore_input=True, ignore_output=False)
|
||||
async def _judge_trace(self) -> JudgementResult | None:
|
||||
"""Judge the trace of the agent"""
|
||||
@@ -1614,8 +1647,10 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
if judgement.failure_reason:
|
||||
judge_log += f' Failure Reason: {judgement.failure_reason}\n'
|
||||
if judgement.reached_captcha:
|
||||
judge_log += ' 🤖 Captcha Detected: Agent encountered captcha challenges\n'
|
||||
judge_log += ' 👉 🥷 Use Browser Use Cloud for the most stealth browser infra: https://docs.browser-use.com/customize/browser/remote\n'
|
||||
self.logger.warning(
|
||||
'Agent was blocked by a captcha. Cloud browsers include stealth fingerprinting and proxy rotation to avoid this.\n'
|
||||
' Try: Browser(use_cloud=True) | Get an API key: https://cloud.browser-use.com?utm_source=oss&utm_medium=captcha_nudge'
|
||||
)
|
||||
judge_log += f' {judgement.reasoning}\n'
|
||||
self.logger.info(judge_log)
|
||||
|
||||
@@ -2023,8 +2058,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
if not (self.logger.isEnabledFor(logging.DEBUG) and parsed.action):
|
||||
return
|
||||
|
||||
action_count = len(parsed.action)
|
||||
|
||||
# Collect action details
|
||||
action_details = []
|
||||
for i, action in enumerate(parsed.action):
|
||||
@@ -2129,11 +2162,10 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
has_captcha_issue = any(keyword in final_result_str for keyword in captcha_keywords)
|
||||
|
||||
if has_captcha_issue:
|
||||
# Suggest use_cloud=True for captcha/cloudflare issues
|
||||
task_preview = self.task[:10] if len(self.task) > 10 else self.task
|
||||
self.logger.info('')
|
||||
self.logger.info('Failed because of CAPTCHA? For better browser stealth, try:')
|
||||
self.logger.info(f' agent = Agent(task="{task_preview}...", browser=Browser(use_cloud=True))')
|
||||
self.logger.warning(
|
||||
'Agent was blocked by a captcha. Cloud browsers include stealth fingerprinting and proxy rotation to avoid this.\n'
|
||||
' Try: Browser(use_cloud=True) | Get an API key: https://cloud.browser-use.com?utm_source=oss&utm_medium=captcha_nudge'
|
||||
)
|
||||
|
||||
# General failure message
|
||||
self.logger.info('')
|
||||
@@ -2225,9 +2257,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
await self.step(step_info)
|
||||
|
||||
if self.history.is_done():
|
||||
# Always run simple judge to align agent success with reality
|
||||
await self._run_simple_judge()
|
||||
|
||||
await self.log_completion()
|
||||
|
||||
# Run full judge before done callback if enabled
|
||||
@@ -2424,14 +2453,15 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
await self._demo_mode_log(error_msg, 'error', {'step': step + 1})
|
||||
self.state.consecutive_failures += 1
|
||||
self.state.last_result = [ActionResult(error=error_msg)]
|
||||
# Ensure step counter advances on timeout — _finalize() may have
|
||||
# been skipped or returned early due to the cancellation.
|
||||
if self.state.n_steps == step + 1:
|
||||
self.state.n_steps += 1
|
||||
|
||||
if on_step_end is not None:
|
||||
await on_step_end(self)
|
||||
|
||||
if self.history.is_done():
|
||||
# Always run simple judge to align agent success with reality
|
||||
await self._run_simple_judge()
|
||||
|
||||
await self.log_completion()
|
||||
|
||||
# Run full judge before done callback if enabled
|
||||
@@ -2480,6 +2510,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
resume_callback=self.resume,
|
||||
custom_exit_callback=on_force_exit_log_telemetry, # Pass the new telemetrycallback
|
||||
exit_on_second_int=True,
|
||||
disabled=not self.enable_signal_handler,
|
||||
)
|
||||
signal_handler.register()
|
||||
|
||||
@@ -2672,7 +2703,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
to pre-action values. Any change aborts the remaining queue.
|
||||
"""
|
||||
results: list[ActionResult] = []
|
||||
time_elapsed = 0
|
||||
total_actions = len(actions)
|
||||
|
||||
assert self.browser_session is not None, 'BrowserSession is not set up'
|
||||
@@ -2682,19 +2712,20 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
and self.browser_session._cached_browser_state_summary.dom_state is not None
|
||||
):
|
||||
cached_selector_map = dict(self.browser_session._cached_browser_state_summary.dom_state.selector_map)
|
||||
cached_element_hashes = {e.parent_branch_hash() for e in cached_selector_map.values()}
|
||||
else:
|
||||
cached_selector_map = {}
|
||||
cached_element_hashes = set()
|
||||
except Exception as e:
|
||||
self.logger.error(f'Error getting cached selector map: {e}')
|
||||
cached_selector_map = {}
|
||||
cached_element_hashes = set()
|
||||
|
||||
for i, action in enumerate(actions):
|
||||
# Get action name from the action model BEFORE try block to ensure it's always available in except
|
||||
action_data = action.model_dump(exclude_unset=True)
|
||||
action_name = next(iter(action_data.keys())) if action_data else 'unknown'
|
||||
|
||||
if i > 0:
|
||||
# ONLY ALLOW TO CALL `done` IF IT IS A SINGLE ACTION
|
||||
if action.model_dump(exclude_unset=True).get('done') is not None:
|
||||
if action_data.get('done') is not None:
|
||||
msg = f'Done action is allowed only as a single action - stopped after action {i} / {total_actions}.'
|
||||
self.logger.debug(msg)
|
||||
break
|
||||
@@ -2706,9 +2737,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
|
||||
try:
|
||||
await self._check_stop_or_pause()
|
||||
# Get action name from the action model
|
||||
action_data = action.model_dump(exclude_unset=True)
|
||||
action_name = next(iter(action_data.keys())) if action_data else 'unknown'
|
||||
|
||||
# Log action before execution
|
||||
await self._log_action(action, action_name, i + 1, total_actions)
|
||||
@@ -2717,8 +2745,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
pre_action_url = await self.browser_session.get_current_page_url()
|
||||
pre_action_focus = self.browser_session.agent_focus_target_id
|
||||
|
||||
time_start = time.time()
|
||||
|
||||
result = await self.tools.act(
|
||||
action=action,
|
||||
browser_session=self.browser_session,
|
||||
@@ -2729,9 +2755,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
extraction_schema=self.extraction_schema,
|
||||
)
|
||||
|
||||
time_end = time.time()
|
||||
time_elapsed = time_end - time_start
|
||||
|
||||
if result.error:
|
||||
await self._demo_mode_log(
|
||||
f'Action "{action_name}" failed: {result.error}',
|
||||
@@ -3429,7 +3452,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
hist_node = historical_elem.node_name.lower() if historical_elem else ''
|
||||
similar_elements = []
|
||||
if historical_elem and historical_elem.attributes:
|
||||
hist_aria = historical_elem.attributes.get('aria-label', '')
|
||||
for idx, elem in selector_map.items():
|
||||
if elem.node_name.lower() == hist_node and elem.attributes:
|
||||
elem_aria = elem.attributes.get('aria-label', '')
|
||||
@@ -3911,6 +3933,17 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
# Kill the browser session - this dispatches BrowserStopEvent,
|
||||
# stops the EventBus with clear=True, and recreates a fresh EventBus
|
||||
await self.browser_session.kill()
|
||||
else:
|
||||
# keep_alive=True sessions shouldn't keep the event loop alive after agent.run()
|
||||
await self.browser_session.event_bus.stop(
|
||||
clear=False,
|
||||
timeout=_get_timeout('TIMEOUT_BrowserSessionEventBusStopOnAgentClose', 1.0),
|
||||
)
|
||||
try:
|
||||
self.browser_session.event_bus.event_queue = None
|
||||
self.browser_session.event_bus._on_idle = None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Close skill service if configured
|
||||
if self.skill_service is not None:
|
||||
|
||||
@@ -40,18 +40,25 @@ USER REQUEST: This is your ultimate objective and always remains visible.
|
||||
1. Browser State will be given as:
|
||||
Current URL: URL of the page you are currently viewing.
|
||||
Open Tabs: Open tabs with their ids.
|
||||
Interactive Elements: All interactive elements will be provided in format as [index]<type>text</type> where
|
||||
- index: Numeric identifier for interaction
|
||||
- type: HTML element type (button, input, etc.)
|
||||
- text: Element description
|
||||
Interactive Elements: All interactive elements will be provided in a tree-style XML format:
|
||||
- Format: `[index]<tagname attribute=value />` for interactive elements
|
||||
- Text content appears as child nodes on separate lines (not inside tags)
|
||||
- Indentation with tabs shows parent/child relationships
|
||||
Examples:
|
||||
[33]<div>User form</div>
|
||||
\t*[35]<button aria-label='Submit form'>Submit</button>
|
||||
[33]<div />
|
||||
User form
|
||||
[35]<input type=text placeholder=Enter name />
|
||||
*[38]<button aria-label=Submit form />
|
||||
Submit
|
||||
[40]<a />
|
||||
About us
|
||||
Note that:
|
||||
- Only elements with numeric indexes in [] are interactive
|
||||
- (stacked) indentation (with \t) is important and means that the element is a (html) child of the element above (with a lower index)
|
||||
- Elements tagged with a star `*[` are the new interactive elements that appeared on the website since the last step - if url has not changed. Your previous actions caused that change. Think if you need to interact with them, e.g. after input you might need to select the right option from the list.
|
||||
- Pure text elements without [] are not interactive.
|
||||
- Pure text elements without [] are not interactive
|
||||
- `|SCROLL|` prefix indicates scrollable containers with scroll position info
|
||||
- `|SHADOW(open)|` or `|SHADOW(closed)|` prefix indicates shadow DOM elements
|
||||
</browser_state>
|
||||
<browser_vision>
|
||||
If you used screenshot before, you will be provided with a screenshot of the current page with bounding boxes around interactive elements. This is your GROUND TRUTH: reason about the image in your thinking to evaluate your progress.
|
||||
@@ -65,14 +72,14 @@ Strictly follow these rules while using the browser and navigating the web:
|
||||
- If research is needed, open a **new tab** instead of reusing the current one.
|
||||
- If the page changes after, for example, an input text action, analyse if you need to interact with new elements, e.g. selecting the right option from the list.
|
||||
- By default, only elements in the visible viewport are listed.
|
||||
- If a captcha appears, attempt solving it if possible. If not, use fallback strategies (e.g., alternative site, backtrack). Do not spend more than 3-4 steps on a single captcha - if blocked, try alternative approaches or report the limitation.
|
||||
- CAPTCHAs are automatically solved by the browser. If you encounter a CAPTCHA, it will be handled for you and you will be notified of the result. Do not attempt to solve CAPTCHAs manually — just continue with your task after the CAPTCHA is resolved.
|
||||
- If the page is not fully loaded, use the wait action.
|
||||
- You can call extract on specific pages to gather structured semantic information from the entire page, including parts not currently visible.
|
||||
- Call extract only if the information you are looking for is not visible in your <browser_state> otherwise always just use the needed text from the <browser_state>.
|
||||
- Calling the extract tool is expensive! DO NOT query the same page with the same extract query multiple times. Make sure that you are on the page with relevant information based on the screenshot before calling this tool.
|
||||
- Use search_page to quickly find specific text or patterns on the page — it's free and instant. Great for: verifying content exists, finding where data is located, checking for error messages, locating prices/dates/IDs.
|
||||
- Use find_elements with CSS selectors to explore DOM structure — also free and instant. Great for: counting items (e.g. table rows, product cards), getting links or attributes, understanding page layout before extracting.
|
||||
- Prefer search_page and find_elements over scrolling when looking for specific content not visible in browser_state.
|
||||
- Prefer search_page over scrolling when looking for specific text content not visible in browser_state. Use find_elements when you need to understand element structure or extract attributes.
|
||||
- If you fill an input field and your action sequence is interrupted, most often something changed e.g. suggestions popped up under the field.
|
||||
- If the action sequence was interrupted in previous step due to page changes, make sure to complete any remaining actions that were not executed. For example, if you tried to input text and click a search button but the click was not executed because the page changed, you should retry the click action in your next step.
|
||||
- If the <user_request> includes specific page information such as product type, rating, price, location, etc., ALWAYS look for filter/sort options FIRST before browsing results. Apply all relevant filters before scrolling through results.
|
||||
@@ -84,7 +91,7 @@ Strictly follow these rules while using the browser and navigating the web:
|
||||
1. Very specific step by step instructions:
|
||||
- Follow them as very precise and don't skip steps. Try to complete everything as requested.
|
||||
2. Open ended tasks. Plan yourself, be creative in achieving them.
|
||||
- If you get stuck e.g. with logins or captcha in open-ended tasks you can re-evaluate the task and try alternative ways, e.g. sometimes accidentally login pops up, even though there some part of the page is accessible or you get some information via web search.
|
||||
- If you get stuck e.g. with logins in open-ended tasks you can re-evaluate the task and try alternative ways, e.g. sometimes accidentally login pops up, even though there some part of the page is accessible or you get some information via web search. CAPTCHAs are handled automatically.
|
||||
- If you reach a PDF viewer, the file is automatically downloaded and you can see its path in <available_file_paths>. You can either read the file or scroll in the page to see more.
|
||||
- Handle popups, modals, cookie banners, and overlays immediately before attempting other actions. Look for close buttons (X, Close, Dismiss, No thanks, Skip) or accept/reject options. If a popup blocks interaction with the main page, handle it first.
|
||||
- If you encounter access denied (403), bot detection, or rate limiting, do NOT repeatedly retry the same URL. Try alternative approaches or report the limitation.
|
||||
@@ -138,9 +145,9 @@ BEFORE calling `done` with `success=true`, you MUST perform this verification:
|
||||
3. **Verify actions actually completed:**
|
||||
- If you submitted a form, posted a comment, or saved a file — check the page state or screenshot to confirm it happened.
|
||||
- If you took a screenshot or downloaded a file — verify it exists in your file system.
|
||||
4. **Check for fabricated content:**
|
||||
- Every fact, price, name, and date in your response must come from the page you visited — never generate plausible-sounding data.
|
||||
5. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.**
|
||||
4. **Verify data grounding:** Every URL, price, name, and value must appear verbatim in your tool outputs or browser_state. Do NOT use your training knowledge to fill gaps — if information was not found on the page during this session, say so explicitly. Never fabricate or invent values.
|
||||
5. **Blocking error check:** If you hit an unresolved blocker (payment declined, login failed without credentials, email/verification wall, required paywall, access denied not bypassed) → set `success=false`. Temporary obstacles you overcame (auto-solved CAPTCHAs, dismissed popups, retried errors) do NOT count.
|
||||
6. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.**
|
||||
Partial results with `success=false` are more valuable than overclaiming success.
|
||||
</pre_done_verification>
|
||||
</task_completion_rules>
|
||||
@@ -154,9 +161,11 @@ Check the browser state each step to verify your previous action achieved its go
|
||||
You can output multiple actions in one step. Try to be efficient where it makes sense. Do not predict actions which do not make sense for the current page.
|
||||
|
||||
**Action categories:**
|
||||
- **Page-changing (always last):** `navigate`, `search`, `go_back`, `switch` — these always change the page. Remaining actions after them are skipped automatically.
|
||||
- **Potentially page-changing:** `click` (on links/buttons that navigate), `evaluate` (with JS navigation) — monitored at runtime; if the page changes, remaining actions are skipped.
|
||||
- **Safe to chain:** `input`, `scroll`, `find_text`, `extract`, `search_page`, file operations — these do not change the page and can be freely combined.
|
||||
- **Page-changing (always last):** `navigate`, `search`, `go_back`, `switch`, `evaluate` — these always change the page. Remaining actions after them are skipped automatically. Note: `evaluate` runs arbitrary JS that can modify the DOM, so it is never safe to chain other actions after it.
|
||||
- **Potentially page-changing:** `click` (on links/buttons that navigate) — monitored at runtime; if the page changes, remaining actions are skipped.
|
||||
- **Safe to chain:** `input`, `scroll`, `find_text`, `extract`, `search_page`, `find_elements`, file operations — these do not change the page and can be freely combined.
|
||||
|
||||
**Shadow DOM:** Elements inside shadow DOM that have `[index]` markers are directly clickable with `click(index)`. Do NOT use `evaluate` to click them.
|
||||
|
||||
**Recommended combinations:**
|
||||
- `input` + `input` + `input` + `click` → Fill multiple form fields then submit
|
||||
@@ -239,7 +248,7 @@ Action list should NEVER be empty.
|
||||
3. ALWAYS apply filters when user specifies criteria (price, rating, location, etc.)
|
||||
4. NEVER repeat the same failing action more than 2-3 times - try alternatives
|
||||
5. NEVER assume success - always verify from screenshot or browser state
|
||||
6. If blocked by captcha/login/403, try alternative approaches rather than retrying
|
||||
6. CAPTCHAs are solved automatically. If blocked by login/403, try alternative approaches rather than retrying
|
||||
7. Put ALL relevant findings in done action's text field
|
||||
8. Match user's requested output format exactly
|
||||
9. Track progress in memory to avoid loops
|
||||
@@ -253,7 +262,7 @@ When encountering errors or unexpected states:
|
||||
2. Check if a popup, modal, or overlay is blocking interaction
|
||||
3. If an element is not found, scroll to reveal more content
|
||||
4. If an action fails repeatedly (2-3 times), try an alternative approach
|
||||
5. If blocked by login/captcha/403, consider alternative sites or search engines
|
||||
5. If blocked by login/403, consider alternative sites or search engines. CAPTCHAs are solved automatically.
|
||||
6. If the page structure is different than expected, re-analyze and adapt
|
||||
7. If stuck in a loop, explicitly acknowledge it in memory and change strategy
|
||||
8. If max_steps is approaching, prioritize completing the most important parts of the task
|
||||
|
||||
@@ -31,7 +31,7 @@ Strictly follow these rules while using the browser and navigating the web:
|
||||
- If research is needed, open a **new tab** instead of reusing the current one.
|
||||
- If the page changes after, for example, an input text action, analyse if you need to interact with new elements, e.g. selecting the right option from the list.
|
||||
- By default, only elements in the visible viewport are listed. Scroll to see more elements if needed.
|
||||
- If a captcha appears, attempt solving it if possible. If not, use fallback strategies (e.g., alternative site, backtrack). Do not spend more than 3-4 steps on a single captcha - if blocked, try alternative approaches or report the limitation.
|
||||
- CAPTCHAs are automatically solved by the browser. If you encounter a CAPTCHA, it will be handled for you and you will be notified of the result. Do not attempt to solve CAPTCHAs manually — just continue with your task after the CAPTCHA is resolved.
|
||||
- If the page is not fully loaded, use the wait action to allow content to render.
|
||||
- You can call extract on specific pages to gather structured semantic information from the entire page, including parts not currently visible.
|
||||
- Call extract only if the information you are looking for is not visible in your <browser_state> otherwise always just use the needed text from the <browser_state>.
|
||||
@@ -46,7 +46,7 @@ Strictly follow these rules while using the browser and navigating the web:
|
||||
- There are 2 types of tasks:
|
||||
1. Very specific step by step instructions: Follow them as very precise and don't skip steps. Try to complete everything as requested.
|
||||
2. Open ended tasks. Plan yourself, be creative in achieving them.
|
||||
- If you get stuck e.g. with logins or captcha in open-ended tasks you can re-evaluate the task and try alternative ways, e.g. sometimes accidentally login pops up, even though there some part of the page is accessible or you get some information via web search.
|
||||
- If you get stuck e.g. with logins in open-ended tasks you can re-evaluate the task and try alternative ways, e.g. sometimes accidentally login pops up, even though there some part of the page is accessible or you get some information via web search. CAPTCHAs are handled automatically.
|
||||
- If you reach a PDF viewer, the file is automatically downloaded and you can see its path in <available_file_paths>. You can either read the file or scroll in the page to see more.
|
||||
- Handle popups, modals, cookie banners, and overlays immediately before attempting other actions. Look for close buttons (X, Close, Dismiss, No thanks, Skip) or accept/reject options. If a popup blocks interaction with the main page, handle it first. Many websites show cookie consent dialogs, newsletter popups, or promotional overlays that must be dismissed.
|
||||
- If you encounter access denied (403), bot detection, or rate limiting, do NOT repeatedly retry the same URL. Try alternative approaches or report the limitation. Consider using a search engine to find alternative sources for the same information.
|
||||
@@ -93,9 +93,9 @@ BEFORE calling `done` with `success=true`, you MUST perform this verification:
|
||||
3. **Verify actions actually completed:**
|
||||
- If you submitted a form, posted a comment, or saved a file — check the page state or screenshot to confirm it happened.
|
||||
- If you took a screenshot or downloaded a file — verify it exists in your file system.
|
||||
4. **Check for fabricated content:**
|
||||
- Every fact, price, name, and date in your response must come from the page you visited — never generate plausible-sounding data.
|
||||
5. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.**
|
||||
4. **Verify data grounding:** Every URL, price, name, and value must appear verbatim in your tool outputs or browser_state. Do NOT use your training knowledge to fill gaps — if information was not found on the page during this session, say so explicitly. Never fabricate or invent values.
|
||||
5. **Blocking error check:** If you hit an unresolved blocker (payment declined, login failed without credentials, email/verification wall, required paywall, access denied not bypassed) → set `success=false`. Temporary obstacles you overcame (auto-solved CAPTCHAs, dismissed popups, retried errors) do NOT count.
|
||||
6. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.**
|
||||
Partial results with `success=false` are more valuable than overclaiming success.
|
||||
</pre_done_verification>
|
||||
</task_completion_rules>
|
||||
@@ -166,7 +166,7 @@ Always put `memory` field before the `action` field.
|
||||
Your memory field should include your reasoning. Apply these patterns:
|
||||
- Did the previous action succeed? Verify using screenshot as ground truth.
|
||||
- What is the current state relative to the user request?
|
||||
- Are there any obstacles (popups, captcha, login walls)?
|
||||
- Are there any obstacles (popups, login walls)? CAPTCHAs are solved automatically.
|
||||
- What specific next step will make progress toward the goal?
|
||||
- If stuck, what alternative approach should you try?
|
||||
- What information should be remembered for later steps?
|
||||
@@ -219,7 +219,7 @@ When encountering errors or unexpected states:
|
||||
2. Check if a popup, modal, or overlay is blocking interaction
|
||||
3. If an element is not found, scroll to reveal more content
|
||||
4. If an action fails repeatedly (2-3 times), try an alternative approach
|
||||
5. If blocked by login/captcha/403, consider alternative sites or search engines
|
||||
5. If blocked by login/403, consider alternative sites or search engines. CAPTCHAs are solved automatically.
|
||||
6. If the page structure is different than expected, re-analyze and adapt
|
||||
7. If stuck in a loop, explicitly acknowledge it in memory and change strategy
|
||||
8. If max_steps is approaching, prioritize completing the most important parts of the task
|
||||
@@ -230,7 +230,7 @@ When encountering errors or unexpected states:
|
||||
3. ALWAYS apply filters when user specifies criteria (price, rating, location, etc.)
|
||||
4. NEVER repeat the same failing action more than 2-3 times - try alternatives
|
||||
5. NEVER assume success - always verify from screenshot or browser state
|
||||
6. If blocked by captcha/login/403, try alternative approaches rather than retrying
|
||||
6. CAPTCHAs are solved automatically. If blocked by login/403, try alternative approaches rather than retrying
|
||||
7. Put ALL relevant findings in done action's text field
|
||||
8. Match user's requested output format exactly
|
||||
9. Track progress in memory to avoid loops
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
You are a browser-use agent operating in thinking mode. You automate browser tasks by outputting structured JSON actions.
|
||||
|
||||
<constraint_enforcement>
|
||||
Instructions containing "do NOT", "never", "avoid", "skip", or "only X" are hard constraints. Before each action, check: does this violate any constraint? If yes, stop and find an alternative.
|
||||
</constraint_enforcement>
|
||||
|
||||
<output>
|
||||
You must ALWAYS respond with a valid JSON in this exact format:
|
||||
{{
|
||||
@@ -10,4 +14,5 @@ You must ALWAYS respond with a valid JSON in this exact format:
|
||||
"action": [{{"action_name": {{...params...}}}}]
|
||||
}}
|
||||
Action list should NEVER be empty.
|
||||
DATA GROUNDING: Only report data observed in browser state or tool outputs. Do NOT use training knowledge to fill gaps — if not found on the page, say so explicitly. Never fabricate values.
|
||||
</output>
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
You are a browser-use agent operating in flash mode. You automate browser tasks by outputting structured JSON actions.
|
||||
|
||||
<constraint_enforcement>
|
||||
Instructions containing "do NOT", "never", "avoid", "skip", or "only X" are hard constraints. Before each action, check: does this violate any constraint? If yes, stop and find an alternative.
|
||||
</constraint_enforcement>
|
||||
|
||||
<output>
|
||||
You must respond with a valid JSON in this exact format:
|
||||
{{
|
||||
@@ -7,4 +11,5 @@ You must respond with a valid JSON in this exact format:
|
||||
"action": [{{"action_name": {{...params...}}}}]
|
||||
}}
|
||||
Action list should NEVER be empty.
|
||||
DATA GROUNDING: Only report data observed in browser state or tool outputs. Do NOT use training knowledge to fill gaps — if not found on the page, say so explicitly. Never fabricate values.
|
||||
</output>
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
You are a browser-use agent. You automate browser tasks by outputting structured JSON actions.
|
||||
|
||||
<constraint_enforcement>
|
||||
Instructions containing "do NOT", "never", "avoid", "skip", or "only X" are hard constraints. Before each action, check: does this violate any constraint? If yes, stop and find an alternative.
|
||||
</constraint_enforcement>
|
||||
|
||||
<output>
|
||||
You must ALWAYS respond with a valid JSON in this exact format:
|
||||
{{
|
||||
@@ -9,4 +13,5 @@ You must ALWAYS respond with a valid JSON in this exact format:
|
||||
"action": [{{"action_name": {{...params...}}}}]
|
||||
}}
|
||||
Action list should NEVER be empty.
|
||||
DATA GROUNDING: Only report data observed in browser state or tool outputs. Do NOT use training knowledge to fill gaps — if not found on the page, say so explicitly. Never fabricate values.
|
||||
</output>
|
||||
|
||||
@@ -12,4 +12,5 @@ You are allowed to use a maximum of {max_actions} actions per step. Check the br
|
||||
"action":[{{"navigate": {{ "url": "url_value"}}}}]
|
||||
}}
|
||||
Before calling `done` with `success=true`: re-read the user request, verify every requirement is met (correct count, filters applied, format matched), confirm actions actually completed via page state/screenshot, and ensure no data was fabricated. If anything is unmet or uncertain, set `success` to `false`.
|
||||
DATA GROUNDING: Only report data observed in browser state or tool outputs. Do NOT use training knowledge to fill gaps — if not found in the browser state or tool outputs, say so explicitly. Never fabricate values.
|
||||
</output>
|
||||
|
||||
@@ -27,4 +27,5 @@ You are allowed to use a maximum of {max_actions} actions per step. Check the br
|
||||
|
||||
Always put `memory` field before the `action` field.
|
||||
Before calling `done` with `success=true`: re-read the user request, verify every requirement is met (correct count, filters applied, format matched), confirm actions actually completed via page state/screenshot, and ensure no data was fabricated. If anything is unmet or uncertain, set `success` to `false`.
|
||||
DATA GROUNDING: Only report data observed in browser state or tool outputs. Do NOT use training knowledge to fill gaps — if not found on the page, say so explicitly. Never fabricate values.
|
||||
</output>
|
||||
|
||||
@@ -65,7 +65,7 @@ Strictly follow these rules while using the browser and navigating the web:
|
||||
- If research is needed, open a **new tab** instead of reusing the current one.
|
||||
- If the page changes after, for example, an input text action, analyse if you need to interact with new elements, e.g. selecting the right option from the list.
|
||||
- By default, only elements in the visible viewport are listed.
|
||||
- If a captcha appears, attempt solving it if possible. If not, use fallback strategies (e.g., alternative site, backtrack). Do not spend more than 3-4 steps on a single captcha - if blocked, try alternative approaches or report the limitation.
|
||||
- CAPTCHAs are automatically solved by the browser. If you encounter a CAPTCHA, it will be handled for you and you will be notified of the result. Do not attempt to solve CAPTCHAs manually — just continue with your task after the CAPTCHA is resolved.
|
||||
- If the page is not fully loaded, use the wait action.
|
||||
- You can call extract on specific pages to gather structured semantic information from the entire page, including parts not currently visible.
|
||||
- Call extract only if the information you are looking for is not visible in your <browser_state> otherwise always just use the needed text from the <browser_state>.
|
||||
@@ -81,7 +81,7 @@ Strictly follow these rules while using the browser and navigating the web:
|
||||
1. Very specific step by step instructions:
|
||||
- Follow them as very precise and don't skip steps. Try to complete everything as requested.
|
||||
2. Open ended tasks. Plan yourself, be creative in achieving them.
|
||||
- If you get stuck e.g. with logins or captcha in open-ended tasks you can re-evaluate the task and try alternative ways, e.g. sometimes accidentally login pops up, even though there some part of the page is accessible or you get some information via web search.
|
||||
- If you get stuck e.g. with logins in open-ended tasks you can re-evaluate the task and try alternative ways, e.g. sometimes accidentally login pops up, even though there some part of the page is accessible or you get some information via web search. CAPTCHAs are handled automatically.
|
||||
- If you reach a PDF viewer, the file is automatically downloaded and you can see its path in <available_file_paths>. You can either read the file or scroll in the page to see more.
|
||||
- Handle popups, modals, cookie banners, and overlays immediately before attempting other actions. Look for close buttons (X, Close, Dismiss, No thanks, Skip) or accept/reject options. If a popup blocks interaction with the main page, handle it first.
|
||||
- If you encounter access denied (403), bot detection, or rate limiting, do NOT repeatedly retry the same URL. Try alternative approaches or report the limitation.
|
||||
@@ -130,9 +130,9 @@ BEFORE calling `done` with `success=true`, you MUST perform this verification:
|
||||
3. **Verify actions actually completed:**
|
||||
- If you submitted a form, posted a comment, or saved a file — check the page state or screenshot to confirm it happened.
|
||||
- If you took a screenshot or downloaded a file — verify it exists in your file system.
|
||||
4. **Check for fabricated content:**
|
||||
- Every fact, price, name, and date in your response must come from the page you visited — never generate plausible-sounding data.
|
||||
5. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.**
|
||||
4. **Verify data grounding:** Every URL, price, name, and value must appear verbatim in your tool outputs or browser_state. Do NOT use your training knowledge to fill gaps — if information was not found on the page during this session, say so explicitly. Never fabricate or invent values.
|
||||
5. **Blocking error check:** If you hit an unresolved blocker (payment declined, login failed without credentials, email/verification wall, required paywall, access denied not bypassed) → set `success=false`. Temporary obstacles you overcame (auto-solved CAPTCHAs, dismissed popups, retried errors) do NOT count.
|
||||
6. **If ANY requirement is unmet, uncertain, or unverifiable — set `success` to `false`.**
|
||||
Partial results with `success=false` are more valuable than overclaiming success.
|
||||
</pre_done_verification>
|
||||
</task_completion_rules>
|
||||
@@ -224,7 +224,7 @@ Action list should NEVER be empty.
|
||||
3. ALWAYS apply filters when user specifies criteria (price, rating, location, etc.)
|
||||
4. NEVER repeat the same failing action more than 2-3 times - try alternatives
|
||||
5. NEVER assume success - always verify from screenshot or browser state
|
||||
6. If blocked by captcha/login/403, try alternative approaches rather than retrying
|
||||
6. CAPTCHAs are solved automatically. If blocked by login/403, try alternative approaches rather than retrying
|
||||
7. Put ALL relevant findings in done action's text field
|
||||
8. Match user's requested output format exactly
|
||||
9. Track progress in memory to avoid loops
|
||||
@@ -238,7 +238,7 @@ When encountering errors or unexpected states:
|
||||
2. Check if a popup, modal, or overlay is blocking interaction
|
||||
3. If an element is not found, scroll to reveal more content
|
||||
4. If an action fails repeatedly (2-3 times), try an alternative approach
|
||||
5. If blocked by login/captcha/403, consider alternative sites or search engines
|
||||
5. If blocked by login/403, consider alternative sites or search engines. CAPTCHAs are solved automatically.
|
||||
6. If the page structure is different than expected, re-analyze and adapt
|
||||
7. If stuck in a loop, explicitly acknowledge it in memory and change strategy
|
||||
8. If max_steps is approaching, prioritize completing the most important parts of the task
|
||||
|
||||
@@ -27,6 +27,7 @@ from browser_use.filesystem.file_system import FileSystemState
|
||||
from browser_use.llm.base import BaseChatModel
|
||||
from browser_use.tokens.views import UsageSummary
|
||||
from browser_use.tools.registry.views import ActionModel
|
||||
from browser_use.utils import collect_sensitive_data_values, redact_sensitive_string
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -35,7 +36,7 @@ class MessageCompactionSettings(BaseModel):
|
||||
"""Summarizes older history into a compact memory block to reduce prompt size."""
|
||||
|
||||
enabled: bool = True
|
||||
compact_every_n_steps: int = 15
|
||||
compact_every_n_steps: int = 25
|
||||
trigger_char_count: int | None = None # Min char floor; set via trigger_token_count if preferred
|
||||
trigger_token_count: int | None = None # Alternative to trigger_char_count (~4 chars/token)
|
||||
chars_per_token: float = 4.0
|
||||
@@ -88,6 +89,7 @@ class AgentSettings(BaseModel):
|
||||
# Loop detection settings
|
||||
loop_detection_window: int = 20 # Rolling window size for action similarity tracking
|
||||
loop_detection_enabled: bool = True # Whether to enable loop detection nudges
|
||||
max_clickable_elements_length: int = 40000 # Max characters for clickable elements in prompt
|
||||
|
||||
|
||||
class PageFingerprint(BaseModel):
|
||||
@@ -302,13 +304,6 @@ class JudgementResult(BaseModel):
|
||||
)
|
||||
|
||||
|
||||
class SimpleJudgeResult(BaseModel):
|
||||
"""Result of lightweight always-on judge that validates agent success claims."""
|
||||
|
||||
is_correct: bool = Field(description='True if the agent response genuinely satisfies the task requirements')
|
||||
reason: str = Field(default='', description='Brief explanation if not correct')
|
||||
|
||||
|
||||
class ActionResult(BaseModel):
|
||||
"""Result of executing an action"""
|
||||
|
||||
@@ -518,29 +513,13 @@ class AgentHistory(BaseModel):
|
||||
if not sensitive_data:
|
||||
return value
|
||||
|
||||
# Collect all sensitive values, immediately converting old format to new format
|
||||
sensitive_values: dict[str, str] = {}
|
||||
|
||||
# Process all sensitive data entries
|
||||
for key_or_domain, content in sensitive_data.items():
|
||||
if isinstance(content, dict):
|
||||
# Already in new format: {domain: {key: value}}
|
||||
for key, val in content.items():
|
||||
if val: # Skip empty values
|
||||
sensitive_values[key] = val
|
||||
elif content: # Old format: {key: value} - convert to new format internally
|
||||
# We treat this as if it was {'http*://*': {key_or_domain: content}}
|
||||
sensitive_values[key_or_domain] = content
|
||||
sensitive_values = collect_sensitive_data_values(sensitive_data)
|
||||
|
||||
# If there are no valid sensitive data entries, just return the original value
|
||||
if not sensitive_values:
|
||||
return value
|
||||
|
||||
# Replace all valid sensitive data values with their placeholder tags
|
||||
for key, val in sensitive_values.items():
|
||||
value = value.replace(val, f'<secret>{key}</secret>')
|
||||
|
||||
return value
|
||||
return redact_sensitive_string(value, sensitive_values)
|
||||
|
||||
def _filter_sensitive_data_from_dict(
|
||||
self, data: dict[str, Any], sensitive_data: dict[str, str | dict[str, str]] | None
|
||||
@@ -651,7 +630,7 @@ class AgentHistoryList(BaseModel, Generic[AgentStructuredOutput]):
|
||||
Path(filepath).parent.mkdir(parents=True, exist_ok=True)
|
||||
data = self.model_dump(sensitive_data=sensitive_data)
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
@@ -696,14 +675,18 @@ class AgentHistoryList(BaseModel, Generic[AgentStructuredOutput]):
|
||||
@classmethod
|
||||
def load_from_dict(cls, data: dict[str, Any], output_model: type[AgentOutput]) -> AgentHistoryList:
|
||||
# loop through history and validate output_model actions to enrich with custom actions
|
||||
for h in data['history']:
|
||||
if h['model_output']:
|
||||
if isinstance(h['model_output'], dict):
|
||||
h['model_output'] = output_model.model_validate(h['model_output'])
|
||||
for h in data.get('history', []):
|
||||
# Use .get() to avoid KeyError on incomplete or legacy history entries
|
||||
model_output = h.get('model_output')
|
||||
if model_output:
|
||||
if isinstance(model_output, dict):
|
||||
h['model_output'] = output_model.model_validate(model_output)
|
||||
else:
|
||||
h['model_output'] = None
|
||||
if 'interacted_element' not in h['state']:
|
||||
h['state']['interacted_element'] = None
|
||||
state = h.get('state') or {}
|
||||
if 'interacted_element' not in state:
|
||||
state['interacted_element'] = None
|
||||
h['state'] = state
|
||||
|
||||
history = cls.model_validate(data)
|
||||
return history
|
||||
@@ -733,8 +716,10 @@ class AgentHistoryList(BaseModel, Generic[AgentStructuredOutput]):
|
||||
|
||||
def final_result(self) -> None | str:
|
||||
"""Final result from history"""
|
||||
if self.history and self.history[-1].result[-1].extracted_content:
|
||||
return self.history[-1].result[-1].extracted_content
|
||||
if self.history and len(self.history[-1].result) > 0:
|
||||
last_result = self.history[-1].result[-1]
|
||||
if last_result.extracted_content:
|
||||
return last_result.extracted_content
|
||||
return None
|
||||
|
||||
def is_done(self) -> bool:
|
||||
|
||||
@@ -50,7 +50,8 @@ class CloudBrowserClient:
|
||||
|
||||
if not api_token:
|
||||
raise CloudBrowserAuthError(
|
||||
'No authentication token found. Please set BROWSER_USE_API_KEY environment variable to authenticate with the cloud service. You can also create an API key at https://cloud.browser-use.com/new-api-key'
|
||||
'BROWSER_USE_API_KEY is not set. To use cloud browsers, get a key at:\n'
|
||||
'https://cloud.browser-use.com/new-api-key?utm_source=oss&utm_medium=use_cloud'
|
||||
)
|
||||
|
||||
headers = {'X-Browser-Use-API-Key': api_token, 'Content-Type': 'application/json', **(extra_headers or {})}
|
||||
@@ -65,7 +66,8 @@ class CloudBrowserClient:
|
||||
|
||||
if response.status_code == 401:
|
||||
raise CloudBrowserAuthError(
|
||||
'Authentication failed. Please make sure you have set BROWSER_USE_API_KEY environment variable to authenticate with the cloud service. You can also create an API key at https://cloud.browser-use.com/new-api-key'
|
||||
'BROWSER_USE_API_KEY is invalid. Get a new key at:\n'
|
||||
'https://cloud.browser-use.com/new-api-key?utm_source=oss&utm_medium=use_cloud'
|
||||
)
|
||||
elif response.status_code == 403:
|
||||
raise CloudBrowserAuthError('Access forbidden. Please check your browser-use cloud subscription status.')
|
||||
@@ -137,7 +139,8 @@ class CloudBrowserClient:
|
||||
|
||||
if not api_token:
|
||||
raise CloudBrowserAuthError(
|
||||
'No authentication token found. Please set BROWSER_USE_API_KEY environment variable to authenticate with the cloud service. You can also create an API key at https://cloud.browser-use.com/new-api-key'
|
||||
'BROWSER_USE_API_KEY is not set. To use cloud browsers, get a key at:\n'
|
||||
'https://cloud.browser-use.com/new-api-key?utm_source=oss&utm_medium=use_cloud'
|
||||
)
|
||||
|
||||
headers = {'X-Browser-Use-API-Key': api_token, 'Content-Type': 'application/json', **(extra_headers or {})}
|
||||
@@ -192,7 +195,10 @@ class CloudBrowserClient:
|
||||
raise CloudBrowserError(f'Unexpected error stopping cloud browser: {e}')
|
||||
|
||||
async def close(self):
|
||||
"""Close the HTTP client and cleanup any active sessions."""
|
||||
"""Close the HTTP client and cleanup any active sessions.
|
||||
|
||||
Safe to call multiple times — subsequent calls are no-ops.
|
||||
"""
|
||||
# Try to stop current session if active
|
||||
if self.current_session_id:
|
||||
try:
|
||||
@@ -200,4 +206,5 @@ class CloudBrowserClient:
|
||||
except Exception as e:
|
||||
logger.debug(f'Failed to stop cloud browser session during cleanup: {e}')
|
||||
|
||||
await self.client.aclose()
|
||||
if not self.client.is_closed:
|
||||
await self.client.aclose()
|
||||
|
||||
@@ -59,6 +59,13 @@ class CreateBrowserRequest(BaseModel):
|
||||
title='Cloud Timeout',
|
||||
)
|
||||
|
||||
enable_recording: bool = Field(
|
||||
default=False,
|
||||
alias='enableRecording',
|
||||
description='Enable session recording for playback in the cloud dashboard.',
|
||||
title='Enable Recording',
|
||||
)
|
||||
|
||||
|
||||
CloudBrowserParams = CreateBrowserRequest # alias for easier readability
|
||||
|
||||
|
||||
@@ -119,7 +119,7 @@ class NavigateToUrlEvent(BaseEvent[None]):
|
||||
# existing_tab: PageHandle | None = None # TODO
|
||||
|
||||
# time limits enforced by bubus, not exposed to LLM:
|
||||
event_timeout: float | None = Field(default_factory=lambda: _get_timeout('TIMEOUT_NavigateToUrlEvent', 15.0)) # seconds
|
||||
event_timeout: float | None = Field(default_factory=lambda: _get_timeout('TIMEOUT_NavigateToUrlEvent', 30.0)) # seconds
|
||||
|
||||
|
||||
class ClickElementEvent(ElementSelectedEvent[dict[str, Any] | None]):
|
||||
@@ -406,7 +406,7 @@ class TabClosedEvent(BaseEvent):
|
||||
# new_focus_target_id: int | None = None
|
||||
# new_focus_url: str | None = None
|
||||
|
||||
event_timeout: float | None = Field(default_factory=lambda: _get_timeout('TIMEOUT_TabClosedEvent', 10.0)) # seconds
|
||||
event_timeout: float | None = Field(default_factory=lambda: _get_timeout('TIMEOUT_TabClosedEvent', 3.0)) # seconds
|
||||
|
||||
|
||||
# TODO: emit this when DOM changes significantly, inner frame navigates, form submits, history.pushState(), etc.
|
||||
@@ -471,6 +471,26 @@ class BrowserErrorEvent(BaseEvent):
|
||||
event_timeout: float | None = Field(default_factory=lambda: _get_timeout('TIMEOUT_BrowserErrorEvent', 30.0)) # seconds
|
||||
|
||||
|
||||
class BrowserReconnectingEvent(BaseEvent):
|
||||
"""WebSocket reconnection attempt is starting."""
|
||||
|
||||
cdp_url: str
|
||||
attempt: int
|
||||
max_attempts: int
|
||||
|
||||
event_timeout: float | None = Field(default_factory=lambda: _get_timeout('TIMEOUT_BrowserReconnectingEvent', 30.0)) # seconds
|
||||
|
||||
|
||||
class BrowserReconnectedEvent(BaseEvent):
|
||||
"""WebSocket reconnection succeeded."""
|
||||
|
||||
cdp_url: str
|
||||
attempt: int
|
||||
downtime_seconds: float
|
||||
|
||||
event_timeout: float | None = Field(default_factory=lambda: _get_timeout('TIMEOUT_BrowserReconnectedEvent', 30.0)) # seconds
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Storage State Events
|
||||
# ============================================================================
|
||||
@@ -576,6 +596,42 @@ class DialogOpenedEvent(BaseEvent):
|
||||
# target_id: TargetID # TODO: add this to avoid needing target_id_from_frame() later
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Captcha Solver Events
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class CaptchaSolverStartedEvent(BaseEvent):
|
||||
"""Captcha solving started by the browser proxy.
|
||||
|
||||
Emitted when the browser proxy detects a CAPTCHA and begins solving it.
|
||||
The agent should wait for a corresponding CaptchaSolverFinishedEvent before proceeding.
|
||||
"""
|
||||
|
||||
target_id: TargetID
|
||||
vendor: str # e.g. 'cloudflare', 'recaptcha', 'hcaptcha', 'datadome', 'perimeterx', 'geetest'
|
||||
url: str
|
||||
started_at: int # Unix millis
|
||||
|
||||
event_timeout: float | None = Field(default_factory=lambda: _get_timeout('TIMEOUT_CaptchaSolverStartedEvent', 5.0))
|
||||
|
||||
|
||||
class CaptchaSolverFinishedEvent(BaseEvent):
|
||||
"""Captcha solving finished by the browser proxy.
|
||||
|
||||
Emitted when the browser proxy finishes solving a CAPTCHA (successfully or not).
|
||||
"""
|
||||
|
||||
target_id: TargetID
|
||||
vendor: str
|
||||
url: str
|
||||
duration_ms: int
|
||||
finished_at: int # Unix millis
|
||||
success: bool # Whether the captcha was solved successfully
|
||||
|
||||
event_timeout: float | None = Field(default_factory=lambda: _get_timeout('TIMEOUT_CaptchaSolverFinishedEvent', 5.0))
|
||||
|
||||
|
||||
# Note: Model rebuilding for forward references is handled in the importing modules
|
||||
# Events with 'EnhancedDOMTreeNode' forward references (ClickElementEvent, TypeTextEvent,
|
||||
# ScrollEvent, UploadFileEvent) need model_rebuild() called after imports are complete
|
||||
|
||||
@@ -124,7 +124,7 @@ CHROME_DEFAULT_ARGS = [
|
||||
'--disable-back-forward-cache', # Avoids surprises like main request not being intercepted during page.goBack().
|
||||
'--disable-breakpad',
|
||||
'--disable-client-side-phishing-detection',
|
||||
'--disable-component-extensions-with-background-pages',
|
||||
# '--disable-component-extensions-with-background-pages', # kills user-loaded extensions on Chrome 145+
|
||||
'--disable-component-update', # Avoids unneeded network activity after startup.
|
||||
'--no-default-browser-check',
|
||||
# '--disable-default-apps',
|
||||
@@ -150,7 +150,7 @@ CHROME_DEFAULT_ARGS = [
|
||||
# added by us:
|
||||
'--enable-features=NetworkService,NetworkServiceInProcess',
|
||||
'--enable-network-information-downlink-max',
|
||||
'--test-type=gpu',
|
||||
# '--test-type=gpu', # blocks unpacked extension loading on Chrome 145+
|
||||
'--disable-sync',
|
||||
'--allow-legacy-extension-manifests',
|
||||
'--allow-pre-commit-input',
|
||||
@@ -430,14 +430,14 @@ class BrowserLaunchArgs(BaseModel):
|
||||
if self.downloads_path is None:
|
||||
import uuid
|
||||
|
||||
# Create unique directory in /tmp for downloads
|
||||
# Create unique directory in system temp folder for downloads
|
||||
unique_id = str(uuid.uuid4())[:8] # 8 characters
|
||||
downloads_path = Path(f'/tmp/browser-use-downloads-{unique_id}')
|
||||
downloads_path = Path(tempfile.gettempdir()) / f'browser-use-downloads-{unique_id}'
|
||||
|
||||
# Ensure path doesn't already exist (extremely unlikely but possible)
|
||||
while downloads_path.exists():
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
downloads_path = Path(f'/tmp/browser-use-downloads-{unique_id}')
|
||||
downloads_path = Path(tempfile.gettempdir()) / f'browser-use-downloads-{unique_id}'
|
||||
|
||||
self.downloads_path = downloads_path
|
||||
self.downloads_path.mkdir(parents=True, exist_ok=True)
|
||||
@@ -602,6 +602,10 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
|
||||
default_factory=_get_enable_default_extensions_default,
|
||||
description="Enable automation-optimized extensions: ad blocking (uBlock Origin), cookie handling (I still don't care about cookies), and URL cleaning (ClearURLs). All extensions work automatically without manual intervention. Extensions are automatically downloaded and loaded when enabled. Can be disabled via BROWSER_USE_DISABLE_EXTENSIONS=1 environment variable.",
|
||||
)
|
||||
captcha_solver: bool = Field(
|
||||
default=True,
|
||||
description='Enable the captcha solver watchdog that listens for captcha events from the browser proxy. Automatically pauses agent steps while a CAPTCHA is being solved. Only active when the browser emits BrowserUse CDP events (e.g. Browser Use cloud browsers). Harmless when disabled or when events are not emitted.',
|
||||
)
|
||||
demo_mode: bool = Field(
|
||||
default=False,
|
||||
description='Enable demo mode side panel that streams agent logs directly inside the browser window (requires headless=False).',
|
||||
@@ -933,6 +937,25 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
|
||||
|
||||
return args
|
||||
|
||||
@staticmethod
|
||||
def _check_extension_manifest_version(ext_dir: Path, ext_name: str) -> bool:
|
||||
"""Check that an extension uses Manifest V3. Returns False for MV2 extensions (unsupported by Chrome 145+)."""
|
||||
import json
|
||||
|
||||
manifest_path = ext_dir / 'manifest.json'
|
||||
if not manifest_path.exists():
|
||||
return False
|
||||
try:
|
||||
with open(manifest_path, encoding='utf-8') as f:
|
||||
manifest = json.load(f)
|
||||
mv = manifest.get('manifest_version', 2)
|
||||
if mv < 3:
|
||||
logger.warning(f'Skipping {ext_name} extension: Manifest V{mv} is no longer supported by Chrome')
|
||||
return False
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def _ensure_default_extensions_downloaded(self) -> list[str]:
|
||||
"""
|
||||
Ensure default extensions are downloaded and cached locally.
|
||||
@@ -940,23 +963,18 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
|
||||
"""
|
||||
|
||||
# Extension definitions - optimized for automation and content extraction
|
||||
# Combines uBlock Origin (ad blocking) + "I still don't care about cookies" (cookie banner handling)
|
||||
# uBlock Origin Lite (ad blocking, MV3) + "I still don't care about cookies" (cookie banner handling)
|
||||
extensions = [
|
||||
{
|
||||
'name': 'uBlock Origin',
|
||||
'id': 'cjpalhdlnbpafiamejdnhcphjbkeiagm',
|
||||
'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=133&acceptformat=crx3&x=id%3Dcjpalhdlnbpafiamejdnhcphjbkeiagm%26uc',
|
||||
'name': 'uBlock Origin Lite',
|
||||
'id': 'ddkjiahejlhfcafbddmgiahcphecmpfh',
|
||||
'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=133&acceptformat=crx3&x=id%3Dddkjiahejlhfcafbddmgiahcphecmpfh%26uc',
|
||||
},
|
||||
{
|
||||
'name': "I still don't care about cookies",
|
||||
'id': 'edibdbjcniadpccecjdfdjjppcpchdlm',
|
||||
'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=133&acceptformat=crx3&x=id%3Dedibdbjcniadpccecjdfdjjppcpchdlm%26uc',
|
||||
},
|
||||
{
|
||||
'name': 'ClearURLs',
|
||||
'id': 'lckanjgmijmafbedllaakclkaicjfmnk',
|
||||
'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=133&acceptformat=crx3&x=id%3Dlckanjgmijmafbedllaakclkaicjfmnk%26uc',
|
||||
},
|
||||
{
|
||||
'name': 'Force Background Tab',
|
||||
'id': 'gidlfommnbibbmegmgajdbikelkdcmcl',
|
||||
@@ -994,7 +1012,8 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
|
||||
|
||||
# Check if extension is already extracted
|
||||
if ext_dir.exists() and (ext_dir / 'manifest.json').exists():
|
||||
# logger.debug(f'✅ Using cached {ext["name"]} extension from {_log_pretty_path(ext_dir)}')
|
||||
if not self._check_extension_manifest_version(ext_dir, ext['name']):
|
||||
continue
|
||||
extension_paths.append(str(ext_dir))
|
||||
loaded_extension_names.append(ext['name'])
|
||||
continue
|
||||
@@ -1011,6 +1030,9 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
|
||||
logger.info(f'📂 Extracting {ext["name"]} extension...')
|
||||
self._extract_extension(crx_file, ext_dir)
|
||||
|
||||
if not self._check_extension_manifest_version(ext_dir, ext['name']):
|
||||
continue
|
||||
|
||||
extension_paths.append(str(ext_dir))
|
||||
loaded_extension_names.append(ext['name'])
|
||||
|
||||
@@ -1149,7 +1171,6 @@ async function initialize(checkInitialized, magic) {{
|
||||
zip_data = f.read()
|
||||
|
||||
# Write ZIP data to temp file and extract
|
||||
import tempfile
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_zip:
|
||||
temp_zip.write(zip_data)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -401,6 +401,8 @@ class SessionManager:
|
||||
if '-32001' not in error_str and 'Session with given id not found' not in error_str:
|
||||
self.logger.debug(f'[SessionManager] Auto-attach failed for {target_type}: {e}')
|
||||
|
||||
from browser_use.browser.session import Target
|
||||
|
||||
async with self._lock:
|
||||
# Track this session for the target
|
||||
if target_id not in self._target_sessions:
|
||||
@@ -409,23 +411,22 @@ class SessionManager:
|
||||
self._target_sessions[target_id].add(session_id)
|
||||
self._session_to_target[session_id] = target_id
|
||||
|
||||
# Create or update Target (source of truth for url/title)
|
||||
if target_id not in self._targets:
|
||||
from browser_use.browser.session import Target
|
||||
|
||||
target = Target(
|
||||
target_id=target_id,
|
||||
target_type=target_type,
|
||||
url=target_info.get('url', 'about:blank'),
|
||||
title=target_info.get('title', 'Unknown title'),
|
||||
)
|
||||
self._targets[target_id] = target
|
||||
self.logger.debug(f'[SessionManager] Created target {target_id[:8]}... (type={target_type})')
|
||||
else:
|
||||
# Update existing target info
|
||||
existing_target = self._targets[target_id]
|
||||
existing_target.url = target_info.get('url', existing_target.url)
|
||||
existing_target.title = target_info.get('title', existing_target.title)
|
||||
# Create or update Target inside the same lock so that get_target() is never
|
||||
# called in the window between _target_sessions being set and _targets being set.
|
||||
if target_id not in self._targets:
|
||||
target = Target(
|
||||
target_id=target_id,
|
||||
target_type=target_type,
|
||||
url=target_info.get('url', 'about:blank'),
|
||||
title=target_info.get('title', 'Unknown title'),
|
||||
)
|
||||
self._targets[target_id] = target
|
||||
self.logger.debug(f'[SessionManager] Created target {target_id[:8]}... (type={target_type})')
|
||||
else:
|
||||
# Update existing target info
|
||||
existing_target = self._targets[target_id]
|
||||
existing_target.url = target_info.get('url', existing_target.url)
|
||||
existing_target.title = target_info.get('title', existing_target.title)
|
||||
|
||||
# Create CDPSession (communication channel)
|
||||
from browser_use.browser.session import CDPSession
|
||||
@@ -441,6 +442,21 @@ class SessionManager:
|
||||
# Add to sessions dict
|
||||
self._sessions[session_id] = cdp_session
|
||||
|
||||
# If proxy auth is configured, enable Fetch auth handling on this session
|
||||
# Avoids overwriting Target.attachedToTarget handlers elsewhere
|
||||
try:
|
||||
proxy_cfg = self.browser_session.browser_profile.proxy
|
||||
username = proxy_cfg.username if proxy_cfg else None
|
||||
password = proxy_cfg.password if proxy_cfg else None
|
||||
if username and password:
|
||||
await cdp_session.cdp_client.send.Fetch.enable(
|
||||
params={'handleAuthRequests': True},
|
||||
session_id=cdp_session.session_id,
|
||||
)
|
||||
self.logger.debug(f'[SessionManager] Fetch.enable(handleAuthRequests=True) on session {session_id[:8]}...')
|
||||
except Exception as e:
|
||||
self.logger.debug(f'[SessionManager] Fetch.enable on attached session failed: {type(e).__name__}: {e}')
|
||||
|
||||
self.logger.debug(
|
||||
f'[SessionManager] Created session {session_id[:8]}... for target {target_id[:8]}... '
|
||||
f'(total sessions: {len(self._sessions)})'
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Base watchdog class for browser monitoring components."""
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
import time
|
||||
from collections.abc import Iterable
|
||||
@@ -73,10 +74,54 @@ class BaseWatchdog(BaseModel):
|
||||
watchdog_instance = getattr(handler, '__self__', None)
|
||||
watchdog_class_name = watchdog_instance.__class__.__name__ if watchdog_instance else 'Unknown'
|
||||
|
||||
# Events that should always run even when CDP is disconnected (lifecycle management)
|
||||
LIFECYCLE_EVENT_NAMES = frozenset(
|
||||
{
|
||||
'BrowserStartEvent',
|
||||
'BrowserStopEvent',
|
||||
'BrowserStoppedEvent',
|
||||
'BrowserLaunchEvent',
|
||||
'BrowserErrorEvent',
|
||||
'BrowserKillEvent',
|
||||
'BrowserReconnectingEvent',
|
||||
'BrowserReconnectedEvent',
|
||||
}
|
||||
)
|
||||
|
||||
# Create a wrapper function with unique name to avoid duplicate handler warnings
|
||||
# Capture handler by value to avoid closure issues
|
||||
def make_unique_handler(actual_handler):
|
||||
async def unique_handler(event):
|
||||
# Circuit breaker: skip handler if CDP WebSocket is dead
|
||||
# (prevents handlers from hanging on broken connections until timeout)
|
||||
# Lifecycle events are exempt — they manage browser start/stop
|
||||
if event.event_type not in LIFECYCLE_EVENT_NAMES and not browser_session.is_cdp_connected:
|
||||
# If reconnection is in progress, wait for it instead of silently skipping
|
||||
if browser_session.is_reconnecting:
|
||||
wait_timeout = browser_session.RECONNECT_WAIT_TIMEOUT
|
||||
browser_session.logger.debug(
|
||||
f'🚌 [{watchdog_class_name}.{actual_handler.__name__}] ⏳ Waiting for reconnection ({wait_timeout}s)...'
|
||||
)
|
||||
try:
|
||||
await asyncio.wait_for(browser_session._reconnect_event.wait(), timeout=wait_timeout)
|
||||
except TimeoutError:
|
||||
raise ConnectionError(
|
||||
f'[{watchdog_class_name}.{actual_handler.__name__}] '
|
||||
f'Reconnection wait timed out after {wait_timeout}s'
|
||||
)
|
||||
# After wait: check if reconnection actually succeeded
|
||||
if not browser_session.is_cdp_connected:
|
||||
raise ConnectionError(
|
||||
f'[{watchdog_class_name}.{actual_handler.__name__}] Reconnection failed — CDP still not connected'
|
||||
)
|
||||
# Reconnection succeeded — fall through to execute handler normally
|
||||
else:
|
||||
# Not reconnecting — intentional stop, backward compat silent skip
|
||||
browser_session.logger.debug(
|
||||
f'🚌 [{watchdog_class_name}.{actual_handler.__name__}] ⚡ Skipped — CDP not connected'
|
||||
)
|
||||
return None
|
||||
|
||||
# just for debug logging, not used for anything else
|
||||
parent_event = event_bus.event_history.get(event.event_parent_id) if event.event_parent_id else None
|
||||
grandparent_event = (
|
||||
|
||||
@@ -59,11 +59,14 @@ class AboutBlankWatchdog(BaseWatchdog):
|
||||
|
||||
async def on_TabClosedEvent(self, event: TabClosedEvent) -> None:
|
||||
"""Check tabs when a tab is closed and proactively create about:blank if needed."""
|
||||
# logger.debug('[AboutBlankWatchdog] Tab closing, checking if we need to create about:blank tab')
|
||||
|
||||
# Don't create new tabs if browser is shutting down
|
||||
if self._stopping:
|
||||
# logger.debug('[AboutBlankWatchdog] Browser is stopping, not creating new tabs')
|
||||
return
|
||||
|
||||
# Don't attempt CDP operations if the WebSocket is dead — dispatching
|
||||
# NavigateToUrlEvent on a broken connection will hang until timeout
|
||||
if not self.browser_session.is_cdp_connected:
|
||||
self.logger.debug('[AboutBlankWatchdog] CDP not connected, skipping tab recovery')
|
||||
return
|
||||
|
||||
# Check if we're about to close the last tab (event happens BEFORE tab closes)
|
||||
@@ -89,6 +92,9 @@ class AboutBlankWatchdog(BaseWatchdog):
|
||||
async def _check_and_ensure_about_blank_tab(self) -> None:
|
||||
"""Check current tabs and ensure exactly one about:blank tab with animation exists."""
|
||||
try:
|
||||
if not self.browser_session.is_cdp_connected:
|
||||
return
|
||||
|
||||
# For quick checks, just get page targets without titles to reduce noise
|
||||
page_targets = await self.browser_session._cdp_get_all_pages()
|
||||
|
||||
|
||||
207
browser_use/browser/watchdogs/captcha_watchdog.py
Normal file
207
browser_use/browser/watchdogs/captcha_watchdog.py
Normal file
@@ -0,0 +1,207 @@
|
||||
"""Captcha solver watchdog — monitors captcha events from the browser proxy.
|
||||
|
||||
Listens for BrowserUse.captchaSolverStarted/Finished CDP events and exposes a
|
||||
wait_if_captcha_solving() method that the agent step loop uses to block until
|
||||
a captcha is resolved (with a configurable timeout).
|
||||
|
||||
NOTE: Only a single captcha solve is tracked at a time. If multiple captchas
|
||||
overlap (e.g. rapid successive navigations), only the latest one is tracked and
|
||||
earlier in-flight waits may return prematurely.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, ClassVar, Literal
|
||||
|
||||
from bubus import BaseEvent
|
||||
from cdp_use.cdp.browseruse.events import CaptchaSolverFinishedEvent as CDPCaptchaSolverFinishedEvent
|
||||
from cdp_use.cdp.browseruse.events import CaptchaSolverStartedEvent as CDPCaptchaSolverStartedEvent
|
||||
from pydantic import PrivateAttr
|
||||
|
||||
from browser_use.browser.events import (
|
||||
BrowserConnectedEvent,
|
||||
BrowserStoppedEvent,
|
||||
CaptchaSolverFinishedEvent,
|
||||
CaptchaSolverStartedEvent,
|
||||
_get_timeout,
|
||||
)
|
||||
from browser_use.browser.watchdog_base import BaseWatchdog
|
||||
|
||||
CaptchaResultType = Literal['success', 'failed', 'timeout', 'unknown']
|
||||
|
||||
|
||||
@dataclass
|
||||
class CaptchaWaitResult:
|
||||
"""Result returned by wait_if_captcha_solving() when the agent had to wait."""
|
||||
|
||||
waited: bool
|
||||
vendor: str
|
||||
url: str
|
||||
duration_ms: int
|
||||
result: CaptchaResultType
|
||||
|
||||
|
||||
class CaptchaWatchdog(BaseWatchdog):
|
||||
"""Monitors captcha solver events from the browser proxy.
|
||||
|
||||
When the proxy detects a CAPTCHA and starts solving it, a CDP event
|
||||
``BrowserUse.captchaSolverStarted`` is sent over the WebSocket. This
|
||||
watchdog catches that event and blocks the agent's step loop (via
|
||||
``wait_if_captcha_solving``) until ``BrowserUse.captchaSolverFinished``
|
||||
arrives or the configurable timeout expires.
|
||||
"""
|
||||
|
||||
# Event contracts
|
||||
LISTENS_TO: ClassVar[list[type[BaseEvent]]] = [
|
||||
BrowserConnectedEvent,
|
||||
BrowserStoppedEvent,
|
||||
]
|
||||
EMITS: ClassVar[list[type[BaseEvent]]] = [
|
||||
CaptchaSolverStartedEvent,
|
||||
CaptchaSolverFinishedEvent,
|
||||
]
|
||||
|
||||
# --- private state ---
|
||||
_captcha_solving: bool = PrivateAttr(default=False)
|
||||
_captcha_solved_event: asyncio.Event = PrivateAttr(default_factory=asyncio.Event)
|
||||
_captcha_info: dict[str, Any] = PrivateAttr(default_factory=dict)
|
||||
_captcha_result: CaptchaResultType = PrivateAttr(default='unknown')
|
||||
_captcha_duration_ms: int = PrivateAttr(default=0)
|
||||
_cdp_handlers_registered: bool = PrivateAttr(default=False)
|
||||
|
||||
def model_post_init(self, __context: Any) -> None:
|
||||
# Start in "not blocked" state so callers never wait when there is no captcha.
|
||||
self._captcha_solved_event.set()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Event handlers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def on_BrowserConnectedEvent(self, event: BrowserConnectedEvent) -> None:
|
||||
"""Register CDP event handlers for BrowserUse captcha solver events."""
|
||||
if self._cdp_handlers_registered:
|
||||
self.logger.debug('CaptchaWatchdog: CDP handlers already registered, skipping')
|
||||
return
|
||||
|
||||
cdp_client = self.browser_session.cdp_client
|
||||
|
||||
def _on_captcha_started(event_data: CDPCaptchaSolverStartedEvent, session_id: str | None) -> None:
|
||||
try:
|
||||
self._captcha_solving = True
|
||||
self._captcha_result = 'unknown'
|
||||
self._captcha_duration_ms = 0
|
||||
self._captcha_info = {
|
||||
'vendor': event_data.get('vendor', 'unknown'),
|
||||
'url': event_data.get('url', ''),
|
||||
'targetId': event_data.get('targetId', ''),
|
||||
'startedAt': event_data.get('startedAt', 0),
|
||||
}
|
||||
# Block any waiter
|
||||
self._captcha_solved_event.clear()
|
||||
|
||||
vendor = self._captcha_info['vendor']
|
||||
url = self._captcha_info['url']
|
||||
self.logger.info(f'🔒 Captcha solving started: {vendor} on {url}')
|
||||
|
||||
self.event_bus.dispatch(
|
||||
CaptchaSolverStartedEvent(
|
||||
target_id=event_data.get('targetId', ''),
|
||||
vendor=vendor,
|
||||
url=url,
|
||||
started_at=event_data.get('startedAt', 0),
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
self.logger.exception('Error handling captchaSolverStarted CDP event')
|
||||
# Ensure consistent state: unblock any waiter
|
||||
self._captcha_solving = False
|
||||
self._captcha_solved_event.set()
|
||||
|
||||
def _on_captcha_finished(event_data: CDPCaptchaSolverFinishedEvent, session_id: str | None) -> None:
|
||||
try:
|
||||
success = event_data.get('success', False)
|
||||
self._captcha_solving = False
|
||||
self._captcha_duration_ms = event_data.get('durationMs', 0)
|
||||
self._captcha_result = 'success' if success else 'failed'
|
||||
|
||||
vendor = event_data.get('vendor', self._captcha_info.get('vendor', 'unknown'))
|
||||
url = event_data.get('url', self._captcha_info.get('url', ''))
|
||||
duration_s = self._captcha_duration_ms / 1000
|
||||
|
||||
self.logger.info(f'🔓 Captcha solving finished: {self._captcha_result} — {vendor} on {url} ({duration_s:.1f}s)')
|
||||
|
||||
# Unblock any waiter
|
||||
self._captcha_solved_event.set()
|
||||
|
||||
self.event_bus.dispatch(
|
||||
CaptchaSolverFinishedEvent(
|
||||
target_id=event_data.get('targetId', ''),
|
||||
vendor=vendor,
|
||||
url=url,
|
||||
duration_ms=self._captcha_duration_ms,
|
||||
finished_at=event_data.get('finishedAt', 0),
|
||||
success=success,
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
self.logger.exception('Error handling captchaSolverFinished CDP event')
|
||||
# Ensure consistent state: unblock any waiter
|
||||
self._captcha_solving = False
|
||||
self._captcha_solved_event.set()
|
||||
|
||||
cdp_client.register.BrowserUse.captchaSolverStarted(_on_captcha_started)
|
||||
cdp_client.register.BrowserUse.captchaSolverFinished(_on_captcha_finished)
|
||||
self._cdp_handlers_registered = True
|
||||
self.logger.debug('🔒 CaptchaWatchdog: registered CDP event handlers for BrowserUse captcha events')
|
||||
|
||||
async def on_BrowserStoppedEvent(self, event: BrowserStoppedEvent) -> None:
|
||||
"""Clear captcha state when the browser disconnects so nothing hangs."""
|
||||
self._captcha_solving = False
|
||||
self._captcha_result = 'unknown'
|
||||
self._captcha_duration_ms = 0
|
||||
self._captcha_info = {}
|
||||
self._captcha_solved_event.set()
|
||||
self._cdp_handlers_registered = False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def wait_if_captcha_solving(self, timeout: float | None = None) -> CaptchaWaitResult | None:
|
||||
"""Wait if a captcha is currently being solved.
|
||||
|
||||
Returns:
|
||||
``None`` if no captcha was in progress.
|
||||
A ``CaptchaWaitResult`` with the outcome otherwise.
|
||||
"""
|
||||
if not self._captcha_solving:
|
||||
return None
|
||||
|
||||
if timeout is None:
|
||||
timeout = _get_timeout('TIMEOUT_CaptchaSolverWait', 120.0)
|
||||
assert timeout is not None
|
||||
vendor = self._captcha_info.get('vendor', 'unknown')
|
||||
url = self._captcha_info.get('url', '')
|
||||
self.logger.info(f'⏳ Waiting for {vendor} captcha to be solved on {url} (timeout={timeout}s)...')
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(self._captcha_solved_event.wait(), timeout=timeout)
|
||||
return CaptchaWaitResult(
|
||||
waited=True,
|
||||
vendor=vendor,
|
||||
url=url,
|
||||
duration_ms=self._captcha_duration_ms,
|
||||
result=self._captcha_result,
|
||||
)
|
||||
except TimeoutError:
|
||||
# Timed out — unblock and report
|
||||
self._captcha_solving = False
|
||||
self._captcha_solved_event.set()
|
||||
self.logger.warning(f'⏰ Captcha wait timed out after {timeout}s for {vendor} on {url}')
|
||||
return CaptchaWaitResult(
|
||||
waited=True,
|
||||
vendor=vendor,
|
||||
url=url,
|
||||
duration_ms=int(timeout * 1000),
|
||||
result='timeout',
|
||||
)
|
||||
@@ -518,6 +518,11 @@ class DefaultActionWatchdog(BaseWatchdog):
|
||||
raise BrowserError(error_msg)
|
||||
|
||||
try:
|
||||
|
||||
def invalidate_dom_cache() -> None:
|
||||
if self.browser_session._dom_watchdog:
|
||||
self.browser_session._dom_watchdog.clear_cache()
|
||||
|
||||
# Convert direction and amount to pixels
|
||||
# Positive pixels = scroll down, negative = scroll up
|
||||
pixels = event.amount if event.direction == 'down' else -event.amount
|
||||
@@ -547,6 +552,7 @@ class DefaultActionWatchdog(BaseWatchdog):
|
||||
# Wait a bit for the scroll to settle and DOM to update
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
invalidate_dom_cache()
|
||||
return None
|
||||
|
||||
# Perform target-level scroll
|
||||
@@ -554,6 +560,7 @@ class DefaultActionWatchdog(BaseWatchdog):
|
||||
|
||||
# Note: We don't clear cached state here - let multi_act handle DOM change detection
|
||||
# by explicitly rebuilding and comparing when needed
|
||||
invalidate_dom_cache()
|
||||
|
||||
# Log success
|
||||
self.logger.debug(f'📜 Scrolled {event.direction} by {event.amount} pixels')
|
||||
@@ -612,10 +619,48 @@ class DefaultActionWatchdog(BaseWatchdog):
|
||||
|
||||
|
||||
// Simple containment-based clickability logic
|
||||
const isClickable = this === elementAtPoint ||
|
||||
let isClickable = this === elementAtPoint ||
|
||||
this.contains(elementAtPoint) ||
|
||||
elementAtPoint.contains(this);
|
||||
|
||||
// Check label-input associations when containment check fails
|
||||
if (!isClickable) {
|
||||
const target = this;
|
||||
const atPoint = elementAtPoint;
|
||||
|
||||
// Case 1: target is <input>, atPoint is its associated <label> (or child of that label)
|
||||
if (target.tagName === 'INPUT' && target.id) {
|
||||
const escapedId = CSS.escape(target.id);
|
||||
const assocLabel = document.querySelector('label[for="' + escapedId + '"]');
|
||||
if (assocLabel && (assocLabel === atPoint || assocLabel.contains(atPoint))) {
|
||||
isClickable = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Case 2: target is <input>, atPoint is inside a <label> ancestor that wraps the target
|
||||
if (!isClickable && target.tagName === 'INPUT') {
|
||||
let ancestor = atPoint;
|
||||
for (let i = 0; i < 3 && ancestor; i++) {
|
||||
if (ancestor.tagName === 'LABEL' && ancestor.contains(target)) {
|
||||
isClickable = true;
|
||||
break;
|
||||
}
|
||||
ancestor = ancestor.parentElement;
|
||||
}
|
||||
}
|
||||
|
||||
// Case 3: target is <label>, atPoint is the associated <input>
|
||||
if (!isClickable && target.tagName === 'LABEL') {
|
||||
if (target.htmlFor && atPoint.tagName === 'INPUT' && atPoint.id === target.htmlFor) {
|
||||
isClickable = true;
|
||||
}
|
||||
// Also check if atPoint is an input inside the label
|
||||
if (!isClickable && atPoint.tagName === 'INPUT' && target.contains(atPoint)) {
|
||||
isClickable = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
targetInfo: getElementInfo(this),
|
||||
elementAtPointInfo: getElementInfo(elementAtPoint),
|
||||
@@ -686,6 +731,32 @@ class DefaultActionWatchdog(BaseWatchdog):
|
||||
# Get element bounds
|
||||
backend_node_id = element_node.backend_node_id
|
||||
|
||||
# For checkbox/radio: capture pre-click state to verify toggle worked
|
||||
is_toggle_element = tag_name == 'input' and element_type in ('checkbox', 'radio')
|
||||
pre_click_checked: bool | None = None
|
||||
checkbox_object_id: str | None = None
|
||||
if is_toggle_element and backend_node_id:
|
||||
try:
|
||||
resolve_res = await cdp_session.cdp_client.send.DOM.resolveNode(
|
||||
params={'backendNodeId': backend_node_id}, session_id=session_id
|
||||
)
|
||||
obj_info = resolve_res.get('object', {})
|
||||
checkbox_object_id = obj_info.get('objectId') if obj_info else None
|
||||
if not checkbox_object_id:
|
||||
raise Exception('Failed to resolve checkbox element objectId')
|
||||
state_res = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
||||
params={
|
||||
'functionDeclaration': 'function() { return this.checked; }',
|
||||
'objectId': checkbox_object_id,
|
||||
'returnByValue': True,
|
||||
},
|
||||
session_id=session_id,
|
||||
)
|
||||
pre_click_checked = state_res.get('result', {}).get('value')
|
||||
self.logger.debug(f'Checkbox pre-click state: checked={pre_click_checked}')
|
||||
except Exception as e:
|
||||
self.logger.debug(f'Could not capture pre-click checkbox state: {e}')
|
||||
|
||||
# Get viewport dimensions for visibility checks
|
||||
layout_metrics = await cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=session_id)
|
||||
viewport_width = layout_metrics['layoutViewport']['clientWidth']
|
||||
@@ -883,6 +954,43 @@ class DefaultActionWatchdog(BaseWatchdog):
|
||||
|
||||
self.logger.debug('🖱️ Clicked successfully using x,y coordinates')
|
||||
|
||||
# For checkbox/radio: verify state toggled, fall back to JS element.click() if not
|
||||
if is_toggle_element and pre_click_checked is not None and checkbox_object_id:
|
||||
try:
|
||||
await asyncio.sleep(0.05)
|
||||
state_res = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
||||
params={
|
||||
'functionDeclaration': 'function() { return this.checked; }',
|
||||
'objectId': checkbox_object_id,
|
||||
'returnByValue': True,
|
||||
},
|
||||
session_id=session_id,
|
||||
)
|
||||
post_click_checked = state_res.get('result', {}).get('value')
|
||||
if post_click_checked == pre_click_checked:
|
||||
# CDP mouse events didn't toggle the checkbox — try JS element.click()
|
||||
self.logger.debug(
|
||||
f'Checkbox state unchanged after CDP click (checked={pre_click_checked}), using JS fallback'
|
||||
)
|
||||
await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
||||
params={'functionDeclaration': 'function() { this.click(); }', 'objectId': checkbox_object_id},
|
||||
session_id=session_id,
|
||||
)
|
||||
await asyncio.sleep(0.05)
|
||||
final_res = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
||||
params={
|
||||
'functionDeclaration': 'function() { return this.checked; }',
|
||||
'objectId': checkbox_object_id,
|
||||
'returnByValue': True,
|
||||
},
|
||||
session_id=session_id,
|
||||
)
|
||||
post_click_checked = final_res.get('result', {}).get('value')
|
||||
self.logger.debug(f'Checkbox post-click state: checked={post_click_checked}')
|
||||
return {'click_x': center_x, 'click_y': center_y, 'checked': post_click_checked}
|
||||
except Exception as e:
|
||||
self.logger.debug(f'Checkbox state verification failed (non-critical): {e}')
|
||||
|
||||
# Return coordinates as dict for metadata
|
||||
return {'click_x': center_x, 'click_y': center_y}
|
||||
|
||||
@@ -1294,10 +1402,8 @@ class DefaultActionWatchdog(BaseWatchdog):
|
||||
return True
|
||||
else:
|
||||
self.logger.debug(f'⚠️ JavaScript clear partially failed, field still contains: "{final_text}"')
|
||||
return False
|
||||
else:
|
||||
self.logger.debug(f'❌ JavaScript clear failed: {clear_info.get("error", "Unknown error")}')
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug(f'JavaScript clear failed with exception: {e}')
|
||||
|
||||
@@ -264,12 +264,16 @@ class DOMWatchdog(BaseWatchdog):
|
||||
not_a_meaningful_website = page_url.lower().split(':', 1)[0] not in ('http', 'https')
|
||||
|
||||
# Check for pending network requests BEFORE waiting (so we can see what's loading)
|
||||
# Timeout after 2s — on slow CI machines or heavy pages, this call can hang
|
||||
# for 15s+ eating into the 30s BrowserStateRequestEvent budget.
|
||||
pending_requests_before_wait = []
|
||||
if not not_a_meaningful_website:
|
||||
try:
|
||||
pending_requests_before_wait = await self._get_pending_network_requests()
|
||||
pending_requests_before_wait = await asyncio.wait_for(self._get_pending_network_requests(), timeout=2.0)
|
||||
if pending_requests_before_wait:
|
||||
self.logger.debug(f'🔍 Found {len(pending_requests_before_wait)} pending requests before stability wait')
|
||||
except TimeoutError:
|
||||
self.logger.debug('Pending network request check timed out (2s), skipping')
|
||||
except Exception as e:
|
||||
self.logger.debug(f'Failed to get pending requests before wait: {e}')
|
||||
pending_requests = pending_requests_before_wait
|
||||
|
||||
@@ -62,8 +62,8 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
_download_cdp_session: Any = PrivateAttr(default=None) # Store CDP session reference
|
||||
_cdp_event_tasks: set[asyncio.Task] = PrivateAttr(default_factory=set) # Track CDP event handler tasks
|
||||
_cdp_downloads_info: dict[str, dict[str, Any]] = PrivateAttr(default_factory=dict) # Map guid -> info
|
||||
_use_js_fetch_for_local: bool = PrivateAttr(default=False) # Guard JS fetch path for local regular downloads
|
||||
_session_pdf_urls: dict[str, str] = PrivateAttr(default_factory=dict) # URL -> path for PDFs downloaded this session
|
||||
_initial_downloads_snapshot: set[str] = PrivateAttr(default_factory=set) # Files present when watchdog started
|
||||
_network_monitored_targets: set[str] = PrivateAttr(default_factory=set) # Track targets with network monitoring enabled
|
||||
_detected_downloads: set[str] = PrivateAttr(default_factory=set) # Track detected download URLs to avoid duplicates
|
||||
_network_callback_registered: bool = PrivateAttr(default=False) # Track if global network callback is registered
|
||||
@@ -120,6 +120,15 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
expanded_path.mkdir(parents=True, exist_ok=True)
|
||||
self.logger.debug(f'[DownloadsWatchdog] Ensured downloads directory exists: {expanded_path}')
|
||||
|
||||
# Capture initial files to detect new downloads reliably
|
||||
if expanded_path.exists():
|
||||
for f in expanded_path.iterdir():
|
||||
if f.is_file() and not f.name.startswith('.'):
|
||||
self._initial_downloads_snapshot.add(f.name)
|
||||
self.logger.debug(
|
||||
f'[DownloadsWatchdog] Captured initial downloads: {len(self._initial_downloads_snapshot)} files'
|
||||
)
|
||||
|
||||
async def on_TabCreatedEvent(self, event: TabCreatedEvent) -> None:
|
||||
"""Monitor new tabs for downloads."""
|
||||
# logger.info(f'[DownloadsWatchdog] TabCreatedEvent received for tab {event.target_id[-4:]}: {event.url}')
|
||||
@@ -192,6 +201,7 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
self._session_pdf_urls.clear()
|
||||
self._network_monitored_targets.clear()
|
||||
self._detected_downloads.clear()
|
||||
self._initial_downloads_snapshot.clear()
|
||||
self._network_callback_registered = False
|
||||
|
||||
async def on_NavigationCompleteEvent(self, event: NavigationCompleteEvent) -> None:
|
||||
@@ -326,10 +336,31 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
except (KeyError, AttributeError):
|
||||
pass
|
||||
else:
|
||||
# No local file path provided, local polling in _handle_cdp_download will handle it
|
||||
self.logger.debug(
|
||||
'[DownloadsWatchdog] No filePath in progress event (local); polling will handle detection'
|
||||
)
|
||||
# No filePath provided - detect by comparing with initial snapshot
|
||||
self.logger.debug('[DownloadsWatchdog] No filePath in progress event; detecting via filesystem')
|
||||
downloads_path = self.browser_session.browser_profile.downloads_path
|
||||
if downloads_path:
|
||||
downloads_dir = Path(downloads_path).expanduser().resolve()
|
||||
if downloads_dir.exists():
|
||||
for f in downloads_dir.iterdir():
|
||||
if (
|
||||
f.is_file()
|
||||
and not f.name.startswith('.')
|
||||
and f.name not in self._initial_downloads_snapshot
|
||||
):
|
||||
# Check file has content before processing
|
||||
if f.stat().st_size > 4:
|
||||
# Found a new file! Add to snapshot immediately to prevent duplicate detection
|
||||
self._initial_downloads_snapshot.add(f.name)
|
||||
self.logger.debug(f'[DownloadsWatchdog] Detected new download: {f.name}')
|
||||
self._track_download(str(f))
|
||||
# Mark as handled
|
||||
try:
|
||||
if guid in self._cdp_downloads_info:
|
||||
self._cdp_downloads_info[guid]['handled'] = True
|
||||
except (KeyError, AttributeError):
|
||||
pass
|
||||
break
|
||||
else:
|
||||
# Remote browser: do not touch local filesystem. Fallback to downloadPath+suggestedFilename
|
||||
info = self._cdp_downloads_info.get(guid, {})
|
||||
@@ -456,17 +487,24 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
response = event.get('response', {})
|
||||
url = response.get('url', '')
|
||||
content_type = response.get('mimeType', '').lower()
|
||||
headers = response.get('headers', {})
|
||||
headers = {
|
||||
k.lower(): v for k, v in response.get('headers', {}).items()
|
||||
} # Normalize for case-insensitive lookup
|
||||
request_type = event.get('type', '')
|
||||
|
||||
# Skip non-HTTP URLs (data:, about:, chrome-extension:, etc.)
|
||||
if not url.startswith('http'):
|
||||
return
|
||||
|
||||
# Skip fetch/XHR - real browsers don't download PDFs from programmatic requests
|
||||
if request_type in ('Fetch', 'XHR'):
|
||||
return
|
||||
|
||||
# Check if it's a PDF
|
||||
is_pdf = 'application/pdf' in content_type
|
||||
|
||||
# Check if it's marked as download via Content-Disposition header
|
||||
content_disposition = headers.get('content-disposition', '').lower()
|
||||
content_disposition = str(headers.get('content-disposition', '')).lower()
|
||||
is_download_attachment = 'attachment' in content_disposition
|
||||
|
||||
# Filter out image/video/audio files even if marked as attachment
|
||||
@@ -518,6 +556,14 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
if not (is_pdf or is_download_attachment):
|
||||
return
|
||||
|
||||
# If already downloaded this URL and file still exists, do nothing
|
||||
existing_path = self._session_pdf_urls.get(url)
|
||||
if existing_path:
|
||||
if os.path.exists(existing_path):
|
||||
return
|
||||
# Stale cache entry, allow re-download
|
||||
del self._session_pdf_urls[url]
|
||||
|
||||
# Check if we've already processed this URL in this session
|
||||
if url in self._detected_downloads:
|
||||
self.logger.debug(f'[DownloadsWatchdog] Already detected download: {url[:80]}...')
|
||||
@@ -543,6 +589,7 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
|
||||
# Trigger download asynchronously in background (don't block event handler)
|
||||
async def download_in_background():
|
||||
# Don't permanently block re-processing this URL if download fails
|
||||
try:
|
||||
download_path = await self.download_file_from_url(
|
||||
url=url,
|
||||
@@ -557,6 +604,9 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
self.logger.warning(f'[DownloadsWatchdog] ⚠️ Failed to download: {url[:80]}...')
|
||||
except Exception as e:
|
||||
self.logger.error(f'[DownloadsWatchdog] Error downloading in background: {type(e).__name__}: {e}')
|
||||
finally:
|
||||
# Allow future detections of the same URL
|
||||
self._detected_downloads.discard(url)
|
||||
|
||||
# Create background task
|
||||
task = create_task_with_error_handling(
|
||||
@@ -611,8 +661,13 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
# Check if already downloaded in this session
|
||||
if url in self._session_pdf_urls:
|
||||
existing_path = self._session_pdf_urls[url]
|
||||
self.logger.debug(f'[DownloadsWatchdog] File already downloaded in session: {existing_path}')
|
||||
return existing_path
|
||||
if os.path.exists(existing_path):
|
||||
self.logger.debug(f'[DownloadsWatchdog] File already downloaded in session: {existing_path}')
|
||||
return existing_path
|
||||
|
||||
# Stale cache entry: the file was removed/cleaned up after we cached it.
|
||||
self.logger.debug(f'[DownloadsWatchdog] Cached download path no longer exists, re-downloading: {existing_path}')
|
||||
del self._session_pdf_urls[url]
|
||||
|
||||
try:
|
||||
# Get or create CDP session for this target
|
||||
@@ -814,107 +869,6 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
# We just need to wait for it to appear in the downloads directory
|
||||
expected_path = downloads_dir / suggested_filename
|
||||
|
||||
# Debug: List current directory contents
|
||||
self.logger.debug(f'[DownloadsWatchdog] Downloads directory: {downloads_dir}')
|
||||
if downloads_dir.exists():
|
||||
files_before = list(downloads_dir.iterdir())
|
||||
self.logger.debug(f'[DownloadsWatchdog] Files before download: {[f.name for f in files_before]}')
|
||||
|
||||
# Try manual JavaScript fetch as a fallback for local browsers (disabled for regular local downloads)
|
||||
if self.browser_session.is_local and self._use_js_fetch_for_local:
|
||||
self.logger.debug(f'[DownloadsWatchdog] Attempting JS fetch fallback for {download_url}')
|
||||
|
||||
unique_filename = None
|
||||
file_size = None
|
||||
download_result = None
|
||||
try:
|
||||
# Escape the URL for JavaScript
|
||||
import json
|
||||
|
||||
escaped_url = json.dumps(download_url)
|
||||
|
||||
# Get the proper session for the frame that initiated the download
|
||||
cdp_session = await self.browser_session.cdp_client_for_frame(event.get('frameId'))
|
||||
assert cdp_session
|
||||
|
||||
result = await cdp_session.cdp_client.send.Runtime.evaluate(
|
||||
params={
|
||||
'expression': f"""
|
||||
(async () => {{
|
||||
try {{
|
||||
const response = await fetch({escaped_url});
|
||||
if (!response.ok) {{
|
||||
throw new Error(`HTTP error! status: ${{response.status}}`);
|
||||
}}
|
||||
const blob = await response.blob();
|
||||
const arrayBuffer = await blob.arrayBuffer();
|
||||
const uint8Array = new Uint8Array(arrayBuffer);
|
||||
return {{
|
||||
data: Array.from(uint8Array),
|
||||
size: uint8Array.length,
|
||||
contentType: response.headers.get('content-type') || 'application/octet-stream'
|
||||
}};
|
||||
}} catch (error) {{
|
||||
throw new Error(`Fetch failed: ${{error.message}}`);
|
||||
}}
|
||||
}})()
|
||||
""",
|
||||
'awaitPromise': True,
|
||||
'returnByValue': True,
|
||||
},
|
||||
session_id=cdp_session.session_id,
|
||||
)
|
||||
download_result = result.get('result', {}).get('value')
|
||||
|
||||
if download_result and download_result.get('data'):
|
||||
# Save the file
|
||||
file_data = bytes(download_result['data'])
|
||||
file_size = len(file_data)
|
||||
|
||||
# Ensure unique filename
|
||||
unique_filename = await self._get_unique_filename(str(downloads_dir), suggested_filename)
|
||||
final_path = downloads_dir / unique_filename
|
||||
|
||||
# Write the file
|
||||
import anyio
|
||||
|
||||
async with await anyio.open_file(final_path, 'wb') as f:
|
||||
await f.write(file_data)
|
||||
|
||||
self.logger.debug(f'[DownloadsWatchdog] ✅ Downloaded and saved file: {final_path} ({file_size} bytes)')
|
||||
expected_path = final_path
|
||||
# Emit download event immediately
|
||||
file_ext = expected_path.suffix.lower().lstrip('.')
|
||||
file_type = file_ext if file_ext else None
|
||||
self.event_bus.dispatch(
|
||||
FileDownloadedEvent(
|
||||
guid=guid,
|
||||
url=download_url,
|
||||
path=str(expected_path),
|
||||
file_name=unique_filename or expected_path.name,
|
||||
file_size=file_size or 0,
|
||||
file_type=file_type,
|
||||
mime_type=(download_result.get('contentType') if download_result else None),
|
||||
from_cache=False,
|
||||
auto_download=False,
|
||||
)
|
||||
)
|
||||
# Mark as handled to prevent duplicate dispatch from progress/polling paths
|
||||
try:
|
||||
if guid in self._cdp_downloads_info:
|
||||
self._cdp_downloads_info[guid]['handled'] = True
|
||||
except (KeyError, AttributeError):
|
||||
pass
|
||||
self.logger.debug(
|
||||
f'[DownloadsWatchdog] ✅ File download completed via CDP: {suggested_filename} ({file_size} bytes) saved to {expected_path}'
|
||||
)
|
||||
return
|
||||
else:
|
||||
self.logger.error('[DownloadsWatchdog] ❌ No data received from fetch')
|
||||
|
||||
except Exception as fetch_error:
|
||||
self.logger.error(f'[DownloadsWatchdog] ❌ Failed to download file via fetch: {fetch_error}')
|
||||
|
||||
# For remote browsers, don't poll local filesystem; downloadProgress handler will emit the event
|
||||
if not self.browser_session.is_local:
|
||||
return
|
||||
@@ -925,24 +879,23 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
# Poll the downloads directory for new files
|
||||
self.logger.debug(f'[DownloadsWatchdog] Checking if browser auto-download saved the file for us: {suggested_filename}')
|
||||
|
||||
# Get initial list of files in downloads directory
|
||||
initial_files = set()
|
||||
if Path(downloads_dir).exists():
|
||||
for f in Path(downloads_dir).iterdir():
|
||||
if f.is_file() and not f.name.startswith('.'):
|
||||
initial_files.add(f.name)
|
||||
|
||||
# Poll for new files
|
||||
max_wait = 20 # seconds
|
||||
start_time = asyncio.get_event_loop().time()
|
||||
|
||||
while asyncio.get_event_loop().time() - start_time < max_wait:
|
||||
while asyncio.get_event_loop().time() - start_time < max_wait: # noqa: ASYNC110
|
||||
await asyncio.sleep(5.0) # Check every 5 seconds
|
||||
|
||||
if Path(downloads_dir).exists():
|
||||
for file_path in Path(downloads_dir).iterdir():
|
||||
# Skip hidden files and files that were already there
|
||||
if file_path.is_file() and not file_path.name.startswith('.') and file_path.name not in initial_files:
|
||||
if (
|
||||
file_path.is_file()
|
||||
and not file_path.name.startswith('.')
|
||||
and file_path.name not in self._initial_downloads_snapshot
|
||||
):
|
||||
# Add to snapshot immediately to prevent duplicate detection
|
||||
self._initial_downloads_snapshot.add(file_path.name)
|
||||
# Check if file has content (> 4 bytes)
|
||||
try:
|
||||
file_size = file_path.stat().st_size
|
||||
@@ -971,13 +924,13 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
file_type=file_type,
|
||||
)
|
||||
)
|
||||
# Mark as handled after dispatch
|
||||
try:
|
||||
if guid in self._cdp_downloads_info:
|
||||
self._cdp_downloads_info[guid]['handled'] = True
|
||||
except (KeyError, AttributeError):
|
||||
pass
|
||||
return
|
||||
# Mark as handled after dispatch
|
||||
try:
|
||||
if guid in self._cdp_downloads_info:
|
||||
self._cdp_downloads_info[guid]['handled'] = True
|
||||
except (KeyError, AttributeError):
|
||||
pass
|
||||
return
|
||||
except Exception as e:
|
||||
self.logger.debug(f'[DownloadsWatchdog] Error checking file {file_path}: {e}')
|
||||
|
||||
|
||||
@@ -665,7 +665,7 @@ class HarRecordingWatchdog(BaseWatchdog):
|
||||
|
||||
tmp_path = self._har_path.with_suffix(self._har_path.suffix + '.tmp')
|
||||
# Write as bytes explicitly to avoid any text/binary mode confusion in different environments
|
||||
tmp_path.write_bytes(json.dumps(har_obj, indent=2).encode('utf-8'))
|
||||
tmp_path.write_bytes(json.dumps(har_obj, indent=2, ensure_ascii=False).encode('utf-8'))
|
||||
tmp_path.replace(self._har_path)
|
||||
|
||||
def _format_page_started_datetime(self, timestamp: float | None) -> str:
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
"""Local browser watchdog for managing browser subprocess lifecycle."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import shutil
|
||||
@@ -21,7 +23,7 @@ from browser_use.browser.watchdog_base import BaseWatchdog
|
||||
from browser_use.observability import observe_debug
|
||||
|
||||
if TYPE_CHECKING:
|
||||
pass
|
||||
from browser_use.browser.profile import BrowserChannel
|
||||
|
||||
|
||||
class LocalBrowserWatchdog(BaseWatchdog):
|
||||
@@ -124,8 +126,8 @@ class LocalBrowserWatchdog(BaseWatchdog):
|
||||
self.logger.debug(f'[LocalBrowserWatchdog] 📦 Using custom local browser executable_path= {browser_path}')
|
||||
else:
|
||||
# self.logger.debug('[LocalBrowserWatchdog] 🔍 Looking for local browser binary path...')
|
||||
# Try fallback paths first (system browsers preferred)
|
||||
browser_path = self._find_installed_browser_path()
|
||||
# Try fallback paths first (Playwright's Chromium preferred by default)
|
||||
browser_path = self._find_installed_browser_path(channel=profile.channel)
|
||||
if not browser_path:
|
||||
self.logger.error(
|
||||
'[LocalBrowserWatchdog] ⚠️ No local browser binary found, installing browser using playwright subprocess...'
|
||||
@@ -215,14 +217,18 @@ class LocalBrowserWatchdog(BaseWatchdog):
|
||||
raise RuntimeError(f'Failed to launch browser after {max_retries} attempts')
|
||||
|
||||
@staticmethod
|
||||
def _find_installed_browser_path() -> str | None:
|
||||
def _find_installed_browser_path(channel: BrowserChannel | None = None) -> str | None:
|
||||
"""Try to find browser executable from common fallback locations.
|
||||
|
||||
If a channel is specified, paths for that browser are searched first.
|
||||
Falls back to all known browser paths if the channel-specific search fails.
|
||||
|
||||
Prioritizes:
|
||||
1. System Chrome Stable
|
||||
1. Playwright chromium
|
||||
2. Other system native browsers (Chromium -> Chrome Canary/Dev -> Brave)
|
||||
3. Playwright headless-shell fallback
|
||||
1. Channel-specific paths (if channel is set to a non-default value)
|
||||
2. Playwright bundled Chromium (when no channel or default channel specified)
|
||||
3. System Chrome stable
|
||||
4. Other system native browsers (Chromium -> Chrome Canary/Dev -> Brave -> Edge)
|
||||
5. Playwright headless-shell fallback
|
||||
|
||||
Returns:
|
||||
Path to browser executable or None if not found
|
||||
@@ -231,60 +237,90 @@ class LocalBrowserWatchdog(BaseWatchdog):
|
||||
import platform
|
||||
from pathlib import Path
|
||||
|
||||
from browser_use.browser.profile import BROWSERUSE_DEFAULT_CHANNEL, BrowserChannel
|
||||
|
||||
system = platform.system()
|
||||
patterns = []
|
||||
|
||||
# Get playwright browsers path from environment variable if set
|
||||
playwright_path = os.environ.get('PLAYWRIGHT_BROWSERS_PATH')
|
||||
|
||||
# Build tagged pattern lists per OS: (browser_group, path)
|
||||
# browser_group is used to match against the requested channel
|
||||
if system == 'Darwin': # macOS
|
||||
if not playwright_path:
|
||||
playwright_path = '~/Library/Caches/ms-playwright'
|
||||
patterns = [
|
||||
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
f'{playwright_path}/chromium-*/chrome-mac/Chromium.app/Contents/MacOS/Chromium',
|
||||
'/Applications/Chromium.app/Contents/MacOS/Chromium',
|
||||
'/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
|
||||
'/Applications/Brave Browser.app/Contents/MacOS/Brave Browser',
|
||||
f'{playwright_path}/chromium_headless_shell-*/chrome-mac/Chromium.app/Contents/MacOS/Chromium',
|
||||
all_patterns = [
|
||||
('chrome', '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'),
|
||||
('chromium', f'{playwright_path}/chromium-*/chrome-mac/Chromium.app/Contents/MacOS/Chromium'),
|
||||
('chromium', '/Applications/Chromium.app/Contents/MacOS/Chromium'),
|
||||
('chrome-canary', '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary'),
|
||||
('brave', '/Applications/Brave Browser.app/Contents/MacOS/Brave Browser'),
|
||||
('msedge', '/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge'),
|
||||
('chromium', f'{playwright_path}/chromium_headless_shell-*/chrome-mac/Chromium.app/Contents/MacOS/Chromium'),
|
||||
]
|
||||
elif system == 'Linux':
|
||||
if not playwright_path:
|
||||
playwright_path = '~/.cache/ms-playwright'
|
||||
patterns = [
|
||||
'/usr/bin/google-chrome-stable',
|
||||
'/usr/bin/google-chrome',
|
||||
'/usr/local/bin/google-chrome',
|
||||
f'{playwright_path}/chromium-*/chrome-linux*/chrome',
|
||||
'/usr/bin/chromium',
|
||||
'/usr/bin/chromium-browser',
|
||||
'/usr/local/bin/chromium',
|
||||
'/snap/bin/chromium',
|
||||
'/usr/bin/google-chrome-beta',
|
||||
'/usr/bin/google-chrome-dev',
|
||||
'/usr/bin/brave-browser',
|
||||
f'{playwright_path}/chromium_headless_shell-*/chrome-linux*/chrome',
|
||||
all_patterns = [
|
||||
('chrome', '/usr/bin/google-chrome-stable'),
|
||||
('chrome', '/usr/bin/google-chrome'),
|
||||
('chrome', '/usr/local/bin/google-chrome'),
|
||||
('chromium', f'{playwright_path}/chromium-*/chrome-linux*/chrome'),
|
||||
('chromium', '/usr/bin/chromium'),
|
||||
('chromium', '/usr/bin/chromium-browser'),
|
||||
('chromium', '/usr/local/bin/chromium'),
|
||||
('chromium', '/snap/bin/chromium'),
|
||||
('chrome-beta', '/usr/bin/google-chrome-beta'),
|
||||
('chrome-dev', '/usr/bin/google-chrome-dev'),
|
||||
('brave', '/usr/bin/brave-browser'),
|
||||
('msedge', '/usr/bin/microsoft-edge-stable'),
|
||||
('msedge', '/usr/bin/microsoft-edge'),
|
||||
('chromium', f'{playwright_path}/chromium_headless_shell-*/chrome-linux*/chrome'),
|
||||
]
|
||||
elif system == 'Windows':
|
||||
if not playwright_path:
|
||||
playwright_path = r'%LOCALAPPDATA%\ms-playwright'
|
||||
patterns = [
|
||||
r'C:\Program Files\Google\Chrome\Application\chrome.exe',
|
||||
r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe',
|
||||
r'%LOCALAPPDATA%\Google\Chrome\Application\chrome.exe',
|
||||
r'%PROGRAMFILES%\Google\Chrome\Application\chrome.exe',
|
||||
r'%PROGRAMFILES(X86)%\Google\Chrome\Application\chrome.exe',
|
||||
f'{playwright_path}\\chromium-*\\chrome-win\\chrome.exe',
|
||||
r'C:\Program Files\Chromium\Application\chrome.exe',
|
||||
r'C:\Program Files (x86)\Chromium\Application\chrome.exe',
|
||||
r'%LOCALAPPDATA%\Chromium\Application\chrome.exe',
|
||||
r'C:\Program Files\BraveSoftware\Brave-Browser\Application\brave.exe',
|
||||
r'C:\Program Files (x86)\BraveSoftware\Brave-Browser\Application\brave.exe',
|
||||
r'C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe',
|
||||
r'C:\Program Files\Microsoft\Edge\Application\msedge.exe',
|
||||
r'%LOCALAPPDATA%\Microsoft\Edge\Application\msedge.exe',
|
||||
f'{playwright_path}\\chromium_headless_shell-*\\chrome-win\\chrome.exe',
|
||||
all_patterns = [
|
||||
('chrome', r'C:\Program Files\Google\Chrome\Application\chrome.exe'),
|
||||
('chrome', r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe'),
|
||||
('chrome', r'%LOCALAPPDATA%\Google\Chrome\Application\chrome.exe'),
|
||||
('chrome', r'%PROGRAMFILES%\Google\Chrome\Application\chrome.exe'),
|
||||
('chrome', r'%PROGRAMFILES(X86)%\Google\Chrome\Application\chrome.exe'),
|
||||
('chromium', f'{playwright_path}\\chromium-*\\chrome-win\\chrome.exe'),
|
||||
('chromium', r'C:\Program Files\Chromium\Application\chrome.exe'),
|
||||
('chromium', r'C:\Program Files (x86)\Chromium\Application\chrome.exe'),
|
||||
('chromium', r'%LOCALAPPDATA%\Chromium\Application\chrome.exe'),
|
||||
('brave', r'C:\Program Files\BraveSoftware\Brave-Browser\Application\brave.exe'),
|
||||
('brave', r'C:\Program Files (x86)\BraveSoftware\Brave-Browser\Application\brave.exe'),
|
||||
('msedge', r'C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe'),
|
||||
('msedge', r'C:\Program Files\Microsoft\Edge\Application\msedge.exe'),
|
||||
('msedge', r'%LOCALAPPDATA%\Microsoft\Edge\Application\msedge.exe'),
|
||||
('chromium', f'{playwright_path}\\chromium_headless_shell-*\\chrome-win\\chrome.exe'),
|
||||
]
|
||||
else:
|
||||
all_patterns = []
|
||||
|
||||
# Map channel enum values to browser group tags
|
||||
_channel_to_group: dict[BrowserChannel, str] = {
|
||||
BrowserChannel.CHROME: 'chrome',
|
||||
BrowserChannel.CHROME_BETA: 'chrome-beta',
|
||||
BrowserChannel.CHROME_DEV: 'chrome-dev',
|
||||
BrowserChannel.CHROME_CANARY: 'chrome-canary',
|
||||
BrowserChannel.CHROMIUM: 'chromium',
|
||||
BrowserChannel.MSEDGE: 'msedge',
|
||||
BrowserChannel.MSEDGE_BETA: 'msedge',
|
||||
BrowserChannel.MSEDGE_DEV: 'msedge',
|
||||
BrowserChannel.MSEDGE_CANARY: 'msedge',
|
||||
}
|
||||
|
||||
# Prioritize the target browser group, then fall back to the rest.
|
||||
if channel and channel != BROWSERUSE_DEFAULT_CHANNEL and channel in _channel_to_group:
|
||||
target_group = _channel_to_group[channel]
|
||||
else:
|
||||
target_group = _channel_to_group[BROWSERUSE_DEFAULT_CHANNEL]
|
||||
prioritized = [p for g, p in all_patterns if g == target_group]
|
||||
rest = [p for g, p in all_patterns if g != target_group]
|
||||
patterns = prioritized + rest
|
||||
|
||||
for pattern in patterns:
|
||||
# Expand user home directory
|
||||
@@ -326,7 +362,7 @@ class LocalBrowserWatchdog(BaseWatchdog):
|
||||
import platform
|
||||
|
||||
# Build command - only use --with-deps on Linux (it fails on Windows/macOS)
|
||||
cmd = ['uvx', 'playwright', 'install', 'chrome']
|
||||
cmd = ['uvx', 'playwright', 'install', 'chromium']
|
||||
if platform.system() == 'Linux':
|
||||
cmd.append('--with-deps')
|
||||
|
||||
@@ -344,7 +380,7 @@ class LocalBrowserWatchdog(BaseWatchdog):
|
||||
if browser_path:
|
||||
return browser_path
|
||||
self.logger.error(f'[LocalBrowserWatchdog] ❌ Playwright local browser installation error: \n{stdout}\n{stderr}')
|
||||
raise RuntimeError('No local browser path found after: uvx playwright install chrome')
|
||||
raise RuntimeError('No local browser path found after: uvx playwright install chromium')
|
||||
except TimeoutError:
|
||||
# Kill the subprocess if it times out
|
||||
process.kill()
|
||||
|
||||
@@ -52,8 +52,26 @@ class ScreenshotWatchdog(BaseWatchdog):
|
||||
|
||||
cdp_session = await self.browser_session.get_or_create_cdp_session(target_id, focus=True)
|
||||
|
||||
# Remove highlights BEFORE taking the screenshot so they don't appear in the image.
|
||||
# Done here (not in finally) so CancelledError is never swallowed — any await in a
|
||||
# finally block can suppress external task cancellation.
|
||||
# remove_highlights() has its own asyncio.timeout(3.0) internally so it won't block.
|
||||
try:
|
||||
await self.browser_session.remove_highlights()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Prepare screenshot parameters
|
||||
params = CaptureScreenshotParameters(format='png', captureBeyondViewport=False)
|
||||
params_dict: dict[str, Any] = {'format': 'png', 'captureBeyondViewport': event.full_page}
|
||||
if event.clip:
|
||||
params_dict['clip'] = {
|
||||
'x': event.clip['x'],
|
||||
'y': event.clip['y'],
|
||||
'width': event.clip['width'],
|
||||
'height': event.clip['height'],
|
||||
'scale': 1,
|
||||
}
|
||||
params = CaptureScreenshotParameters(**params_dict)
|
||||
|
||||
# Take screenshot using CDP
|
||||
self.logger.debug(f'[ScreenshotWatchdog] Taking screenshot with params: {params}')
|
||||
@@ -68,9 +86,3 @@ class ScreenshotWatchdog(BaseWatchdog):
|
||||
except Exception as e:
|
||||
self.logger.error(f'[ScreenshotWatchdog] Screenshot failed: {e}')
|
||||
raise
|
||||
finally:
|
||||
# Try to remove highlights even on failure
|
||||
try:
|
||||
await self.browser_session.remove_highlights()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -68,7 +68,6 @@ class SecurityWatchdog(BaseWatchdog):
|
||||
await session.cdp_client.send.Page.navigate(params={'url': 'about:blank'}, session_id=session.session_id)
|
||||
self.logger.info(f'⛔️ Navigated to about:blank after blocked URL: {event.url}')
|
||||
except Exception as e:
|
||||
pass
|
||||
self.logger.error(f'⛔️ Failed to navigate to about:blank: {type(e).__name__} {e}')
|
||||
|
||||
async def on_TabCreatedEvent(self, event: TabCreatedEvent) -> None:
|
||||
|
||||
@@ -202,7 +202,7 @@ class StorageStateWatchdog(BaseWatchdog):
|
||||
|
||||
# Write atomically
|
||||
temp_path = json_path.with_suffix('.json.tmp')
|
||||
temp_path.write_text(json.dumps(merged_state, indent=4))
|
||||
temp_path.write_text(json.dumps(merged_state, indent=4, ensure_ascii=False), encoding='utf-8')
|
||||
|
||||
# Backup existing file
|
||||
if json_path.exists():
|
||||
@@ -249,25 +249,60 @@ class StorageStateWatchdog(BaseWatchdog):
|
||||
|
||||
# Apply cookies if present
|
||||
if 'cookies' in storage and storage['cookies']:
|
||||
await self.browser_session._cdp_set_cookies(storage['cookies'])
|
||||
# Playwright exports session cookies with expires=0/-1. CDP treats expires=0 as expired.
|
||||
# Normalize session cookies by omitting expires
|
||||
normalized_cookies: list[Cookie] = []
|
||||
for cookie in storage['cookies']:
|
||||
if not isinstance(cookie, dict):
|
||||
normalized_cookies.append(cookie) # type: ignore[arg-type]
|
||||
continue
|
||||
c = dict(cookie)
|
||||
expires = c.get('expires')
|
||||
if expires in (0, 0.0, -1, -1.0):
|
||||
c.pop('expires', None)
|
||||
normalized_cookies.append(Cookie(**c))
|
||||
|
||||
await self.browser_session._cdp_set_cookies(normalized_cookies)
|
||||
self._last_cookie_state = storage['cookies'].copy()
|
||||
self.logger.debug(f'[StorageStateWatchdog] Added {len(storage["cookies"])} cookies from storage state')
|
||||
|
||||
# Apply origins (localStorage/sessionStorage) if present
|
||||
if 'origins' in storage and storage['origins']:
|
||||
for origin in storage['origins']:
|
||||
if 'localStorage' in origin:
|
||||
origin_value = origin.get('origin')
|
||||
if not origin_value:
|
||||
continue
|
||||
|
||||
# Scope storage restoration to its origin to avoid cross-site pollution.
|
||||
if origin.get('localStorage'):
|
||||
lines = []
|
||||
for item in origin['localStorage']:
|
||||
script = f"""
|
||||
window.localStorage.setItem({json.dumps(item['name'])}, {json.dumps(item['value'])});
|
||||
"""
|
||||
await self.browser_session._cdp_add_init_script(script)
|
||||
if 'sessionStorage' in origin:
|
||||
lines.append(f'window.localStorage.setItem({json.dumps(item["name"])}, {json.dumps(item["value"])});')
|
||||
script = (
|
||||
'(function(){\n'
|
||||
f' if (window.location && window.location.origin !== {json.dumps(origin_value)}) return;\n'
|
||||
' try {\n'
|
||||
f' {" ".join(lines)}\n'
|
||||
' } catch (e) {}\n'
|
||||
'})();'
|
||||
)
|
||||
await self.browser_session._cdp_add_init_script(script)
|
||||
|
||||
if origin.get('sessionStorage'):
|
||||
lines = []
|
||||
for item in origin['sessionStorage']:
|
||||
script = f"""
|
||||
window.sessionStorage.setItem({json.dumps(item['name'])}, {json.dumps(item['value'])});
|
||||
"""
|
||||
await self.browser_session._cdp_add_init_script(script)
|
||||
lines.append(
|
||||
f'window.sessionStorage.setItem({json.dumps(item["name"])}, {json.dumps(item["value"])});'
|
||||
)
|
||||
script = (
|
||||
'(function(){\n'
|
||||
f' if (window.location && window.location.origin !== {json.dumps(origin_value)}) return;\n'
|
||||
' try {\n'
|
||||
f' {" ".join(lines)}\n'
|
||||
' } catch (e) {}\n'
|
||||
'})();'
|
||||
)
|
||||
await self.browser_session._cdp_add_init_script(script)
|
||||
self.logger.debug(
|
||||
f'[StorageStateWatchdog] Applied localStorage/sessionStorage from {len(storage["origins"])} origins'
|
||||
)
|
||||
|
||||
@@ -129,7 +129,7 @@ if '--template' in sys.argv:
|
||||
click.echo(' uv pip install browser-use')
|
||||
click.echo(' 2. Set up your API key in .env file or environment:')
|
||||
click.echo(' BROWSER_USE_API_KEY=your-key')
|
||||
click.echo(' (Get your key at https://cloud.browser-use.com/new-api-key)')
|
||||
click.echo(' (Get your key at https://cloud.browser-use.com/new-api-key?utm_source=oss&utm_medium=cli)')
|
||||
click.echo(' 3. Run your script:')
|
||||
click.echo(f' python {output_path.name}')
|
||||
except Exception as e:
|
||||
@@ -178,9 +178,12 @@ except ImportError:
|
||||
try:
|
||||
import readline
|
||||
|
||||
_add_history = getattr(readline, 'add_history', None)
|
||||
if _add_history is None:
|
||||
raise ImportError('readline missing add_history')
|
||||
READLINE_AVAILABLE = True
|
||||
except ImportError:
|
||||
# readline not available on Windows by default
|
||||
_add_history = None
|
||||
READLINE_AVAILABLE = False
|
||||
|
||||
|
||||
@@ -294,8 +297,8 @@ def save_user_config(config: dict[str, Any]) -> None:
|
||||
|
||||
# Save to separate history file
|
||||
history_file = CONFIG.BROWSER_USE_CONFIG_DIR / 'command_history.json'
|
||||
with open(history_file, 'w') as f:
|
||||
json.dump(history, f, indent=2)
|
||||
with open(history_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(history, f, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
def update_config_with_click_args(config: dict[str, Any], ctx: click.Context) -> dict[str, Any]:
|
||||
@@ -341,12 +344,11 @@ def update_config_with_click_args(config: dict[str, Any], ctx: click.Context) ->
|
||||
|
||||
def setup_readline_history(history: list[str]) -> None:
|
||||
"""Set up readline with command history."""
|
||||
if not READLINE_AVAILABLE:
|
||||
if not _add_history:
|
||||
return
|
||||
|
||||
# Add history items to readline
|
||||
for item in history:
|
||||
readline.add_history(item)
|
||||
_add_history(item)
|
||||
|
||||
|
||||
def get_llm(config: dict[str, Any]):
|
||||
@@ -694,8 +696,6 @@ class BrowserUseApp(App):
|
||||
'trafilatura.htmlprocessing',
|
||||
'trafilatura',
|
||||
'groq',
|
||||
'portalocker',
|
||||
'portalocker.utils',
|
||||
]:
|
||||
third_party = logging.getLogger(logger_name)
|
||||
third_party.setLevel(logging.ERROR)
|
||||
@@ -720,9 +720,9 @@ class BrowserUseApp(App):
|
||||
# Step 2: Set up input history
|
||||
logger.debug('Setting up readline history...')
|
||||
try:
|
||||
if READLINE_AVAILABLE and self.task_history:
|
||||
if READLINE_AVAILABLE and self.task_history and _add_history:
|
||||
for item in self.task_history:
|
||||
readline.add_history(item)
|
||||
_add_history(item)
|
||||
logger.debug(f'Added {len(self.task_history)} items to readline history')
|
||||
else:
|
||||
logger.debug('No readline history to set up')
|
||||
@@ -1129,7 +1129,7 @@ class BrowserUseApp(App):
|
||||
|
||||
# Exit the application
|
||||
self.exit()
|
||||
print('\nTry running tasks on our cloud: https://browser-use.com')
|
||||
print('\nTry running tasks on our cloud: https://browser-use.com?utm_source=oss&utm_medium=cli')
|
||||
|
||||
def compose(self) -> ComposeResult:
|
||||
"""Create the UI layout."""
|
||||
@@ -1144,7 +1144,11 @@ class BrowserUseApp(App):
|
||||
with Container(id='links-panel'):
|
||||
with HorizontalGroup(classes='link-row'):
|
||||
yield Static('Run at scale on cloud: [blink]☁️[/] ', markup=True, classes='link-label')
|
||||
yield Link('https://browser-use.com', url='https://browser-use.com', classes='link-white link-url')
|
||||
yield Link(
|
||||
'https://browser-use.com',
|
||||
url='https://browser-use.com?utm_source=oss&utm_medium=cli',
|
||||
classes='link-white link-url',
|
||||
)
|
||||
|
||||
yield Static('') # Empty line
|
||||
|
||||
@@ -2224,7 +2228,7 @@ def _run_template_generation(template: str, output: str | None, force: bool):
|
||||
click.echo(' uv pip install browser-use')
|
||||
click.echo(' 2. Set up your API key in .env file or environment:')
|
||||
click.echo(' BROWSER_USE_API_KEY=your-key')
|
||||
click.echo(' (Get your key at https://cloud.browser-use.com/new-api-key)')
|
||||
click.echo(' (Get your key at https://cloud.browser-use.com/new-api-key?utm_source=oss&utm_medium=cli)')
|
||||
click.echo(' 3. Run your script:')
|
||||
click.echo(f' python {output_path.name}')
|
||||
else:
|
||||
@@ -2353,7 +2357,7 @@ def init(
|
||||
click.echo(' uv pip install browser-use')
|
||||
click.echo(' 2. Set up your API key in .env file or environment:')
|
||||
click.echo(' BROWSER_USE_API_KEY=your-key')
|
||||
click.echo(' (Get your key at https://cloud.browser-use.com/new-api-key)')
|
||||
click.echo(' (Get your key at https://cloud.browser-use.com/new-api-key?utm_source=oss&utm_medium=cli)')
|
||||
click.echo(' 3. Run your script:')
|
||||
click.echo(f' python {output_path.name}')
|
||||
else:
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
# Code-Use Mode
|
||||
|
||||
Code-Use Mode is a Notebook-like code execution system for browser automation. Instead of the agent choosing from a predefined set of actions, the LLM writes Python code that gets executed in a persistent namespace with all browser control functions available.
|
||||
|
||||
## Problem Solved
|
||||
|
||||
**Code-Use Mode solves this** by giving the agent a Python execution environment where it can:
|
||||
- Store extracted data in variables
|
||||
- Loop through pages programmatically
|
||||
- Combine results from multiple extractions
|
||||
- Process and filter data before saving
|
||||
- Use conditional logic to decide what to do next
|
||||
- Output more tokens than the LLM writes
|
||||
|
||||
### Namespace
|
||||
The namespace is initialized with:
|
||||
|
||||
**Browser Control Functions:**
|
||||
- `navigate(url)` - Navigate to a URL
|
||||
- `click(index)` - Click an element
|
||||
- `input(index, text)` - Type text
|
||||
- `scroll(down, pages)` - Scroll the page
|
||||
- `upload_file(path)` - Upload a file
|
||||
- `evaluate(code, variables={})` - Execute JavaScript
|
||||
- `done(text, success, files_to_display=[])` - Mark task complete
|
||||
|
||||
**Custom evaluate() Function:**
|
||||
```python
|
||||
# Returns values directly, not wrapped in ActionResult
|
||||
result = await evaluate('''
|
||||
(function(){
|
||||
return Array.from(document.querySelectorAll('.product')).map(p => ({
|
||||
name: p.querySelector('.name').textContent,
|
||||
price: p.querySelector('.price').textContent
|
||||
}))
|
||||
})()
|
||||
''')
|
||||
# result is now a list of dicts, ready to use!
|
||||
```
|
||||
|
||||
**Utilities:**
|
||||
The agent can just utilize packages like `requests`, `pandas`, `numpy`, `matplotlib`, `BeautifulSoup`, `tabulate`, `csv`, ...
|
||||
|
||||
The agent will write code like:
|
||||
|
||||
### Step 1: Navigate
|
||||
```python
|
||||
# Navigate to first page
|
||||
await navigate(url='https://example.com/products?page=1')
|
||||
```
|
||||
### Step 2 analyse our DOM state and write code to extract the data we need.
|
||||
|
||||
```js extract_products
|
||||
(function(){
|
||||
return Array.from(document.querySelectorAll('.product')).map(p => ({
|
||||
name: p.querySelector('.name')?.textContent || '',
|
||||
price: p.querySelector('.price')?.textContent || '',
|
||||
rating: p.querySelector('.rating')?.textContent || ''
|
||||
}))
|
||||
})()
|
||||
```
|
||||
|
||||
```python
|
||||
# Extract products using JavaScript
|
||||
all_products = []
|
||||
for page in range(1, 6):
|
||||
if page > 1:
|
||||
await navigate(url=f'https://example.com/products?page={page}')
|
||||
|
||||
products = await evaluate(extract_products)
|
||||
all_products.extend(products)
|
||||
print(f'Page {page}: Found {len(products)} products')
|
||||
```
|
||||
|
||||
### Step 3: Analyse output & save the data to a file
|
||||
```python
|
||||
# Save to file
|
||||
import json
|
||||
with open('products.json', 'w') as f:
|
||||
json.dump(all_products, f, indent=2)
|
||||
|
||||
print(f'Total: {len(all_products)} products saved to products.json')
|
||||
await done(text='Extracted all products', success=True, files_to_display=['products.json'])
|
||||
```
|
||||
@@ -1,16 +0,0 @@
|
||||
"""Code-use mode - Jupyter notebook-like code execution for browser automation."""
|
||||
|
||||
from browser_use.code_use.namespace import create_namespace
|
||||
from browser_use.code_use.notebook_export import export_to_ipynb, session_to_python_script
|
||||
from browser_use.code_use.service import CodeAgent
|
||||
from browser_use.code_use.views import CodeCell, ExecutionStatus, NotebookSession
|
||||
|
||||
__all__ = [
|
||||
'CodeAgent',
|
||||
'create_namespace',
|
||||
'export_to_ipynb',
|
||||
'session_to_python_script',
|
||||
'CodeCell',
|
||||
'ExecutionStatus',
|
||||
'NotebookSession',
|
||||
]
|
||||
@@ -1,190 +0,0 @@
|
||||
"""Browser state formatting helpers for code-use agent."""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from browser_use.browser.session import BrowserSession
|
||||
from browser_use.browser.views import BrowserStateSummary
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def format_browser_state_for_llm(
|
||||
state: BrowserStateSummary,
|
||||
namespace: dict[str, Any],
|
||||
browser_session: BrowserSession,
|
||||
) -> str:
|
||||
"""
|
||||
Format browser state summary for LLM consumption in code-use mode.
|
||||
|
||||
Args:
|
||||
state: Browser state summary from browser_session.get_browser_state_summary()
|
||||
namespace: The code execution namespace (for showing available variables)
|
||||
browser_session: Browser session for additional checks (jQuery, etc.)
|
||||
|
||||
Returns:
|
||||
Formatted browser state text for LLM
|
||||
"""
|
||||
assert state.dom_state is not None
|
||||
dom_state = state.dom_state
|
||||
|
||||
# Use eval_representation (compact serializer for code agents)
|
||||
dom_html = dom_state.eval_representation()
|
||||
if dom_html == '':
|
||||
dom_html = 'Empty DOM tree (you might have to wait for the page to load)'
|
||||
|
||||
# Format with URL and title header
|
||||
lines = ['## Browser State']
|
||||
lines.append(f'**URL:** {state.url}')
|
||||
lines.append(f'**Title:** {state.title}')
|
||||
lines.append('')
|
||||
|
||||
# Add tabs info if multiple tabs exist
|
||||
if len(state.tabs) > 1:
|
||||
lines.append('**Tabs:**')
|
||||
current_target_candidates = []
|
||||
# Find tabs that match current URL and title
|
||||
for tab in state.tabs:
|
||||
if tab.url == state.url and tab.title == state.title:
|
||||
current_target_candidates.append(tab.target_id)
|
||||
current_target_id = current_target_candidates[0] if len(current_target_candidates) == 1 else None
|
||||
|
||||
for tab in state.tabs:
|
||||
is_current = ' (current)' if tab.target_id == current_target_id else ''
|
||||
lines.append(f' - Tab {tab.target_id[-4:]}: {tab.url} - {tab.title[:30]}{is_current}')
|
||||
lines.append('')
|
||||
|
||||
# Add page scroll info if available
|
||||
if state.page_info:
|
||||
pi = state.page_info
|
||||
pages_above = pi.pixels_above / pi.viewport_height if pi.viewport_height > 0 else 0
|
||||
pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0
|
||||
total_pages = pi.page_height / pi.viewport_height if pi.viewport_height > 0 else 0
|
||||
|
||||
scroll_info = f'**Page:** {pages_above:.1f} pages above, {pages_below:.1f} pages below'
|
||||
if total_pages > 1.2: # Only mention total if significantly > 1 page
|
||||
scroll_info += f', {total_pages:.1f} total pages'
|
||||
lines.append(scroll_info)
|
||||
lines.append('')
|
||||
|
||||
# Add network loading info if there are pending requests
|
||||
if state.pending_network_requests:
|
||||
# Remove duplicates by URL (keep first occurrence with earliest duration)
|
||||
seen_urls = set()
|
||||
unique_requests = []
|
||||
for req in state.pending_network_requests:
|
||||
if req.url not in seen_urls:
|
||||
seen_urls.add(req.url)
|
||||
unique_requests.append(req)
|
||||
|
||||
lines.append(f'**⏳ Loading:** {len(unique_requests)} network requests still loading')
|
||||
# Show up to 20 unique requests with truncated URLs (30 chars max)
|
||||
for req in unique_requests[:20]:
|
||||
duration_sec = req.loading_duration_ms / 1000
|
||||
url_display = req.url if len(req.url) <= 30 else req.url[:27] + '...'
|
||||
logger.info(f' - [{duration_sec:.1f}s] {url_display}')
|
||||
lines.append(f' - [{duration_sec:.1f}s] {url_display}')
|
||||
if len(unique_requests) > 20:
|
||||
lines.append(f' - ... and {len(unique_requests) - 20} more')
|
||||
lines.append('**Tip:** Content may still be loading. Consider waiting with `await asyncio.sleep(1)` if data is missing.')
|
||||
lines.append('')
|
||||
|
||||
# Add available variables and functions BEFORE DOM structure
|
||||
# Show useful utilities (json, asyncio, etc.) and user-defined vars, but hide system objects
|
||||
skip_vars = {
|
||||
'browser',
|
||||
'file_system', # System objects
|
||||
'np',
|
||||
'pd',
|
||||
'plt',
|
||||
'numpy',
|
||||
'pandas',
|
||||
'matplotlib',
|
||||
'requests',
|
||||
'BeautifulSoup',
|
||||
'bs4',
|
||||
'pypdf',
|
||||
'PdfReader',
|
||||
'wait',
|
||||
}
|
||||
|
||||
# Highlight code block variables separately from regular variables
|
||||
code_block_vars = []
|
||||
regular_vars = []
|
||||
tracked_code_blocks = namespace.get('_code_block_vars', set())
|
||||
for name in namespace.keys():
|
||||
# Skip private vars and system objects/actions
|
||||
if not name.startswith('_') and name not in skip_vars:
|
||||
if name in tracked_code_blocks:
|
||||
code_block_vars.append(name)
|
||||
else:
|
||||
regular_vars.append(name)
|
||||
|
||||
# Sort for consistent display
|
||||
available_vars_sorted = sorted(regular_vars)
|
||||
code_block_vars_sorted = sorted(code_block_vars)
|
||||
|
||||
# Build available line with code blocks and variables
|
||||
parts = []
|
||||
if code_block_vars_sorted:
|
||||
# Show detailed info for code block variables
|
||||
code_block_details = []
|
||||
for var_name in code_block_vars_sorted:
|
||||
value = namespace.get(var_name)
|
||||
if value is not None:
|
||||
type_name = type(value).__name__
|
||||
value_str = str(value) if not isinstance(value, str) else value
|
||||
|
||||
# Check if it's a function (starts with "(function" or "(async function")
|
||||
is_function = value_str.strip().startswith('(function') or value_str.strip().startswith('(async function')
|
||||
|
||||
if is_function:
|
||||
# For functions, only show name and type
|
||||
detail = f'{var_name}({type_name})'
|
||||
else:
|
||||
# For non-functions, show first and last 20 chars
|
||||
first_20 = value_str[:20].replace('\n', '\\n').replace('\t', '\\t')
|
||||
last_20 = value_str[-20:].replace('\n', '\\n').replace('\t', '\\t') if len(value_str) > 20 else ''
|
||||
|
||||
if last_20 and first_20 != last_20:
|
||||
detail = f'{var_name}({type_name}): "{first_20}...{last_20}"'
|
||||
else:
|
||||
detail = f'{var_name}({type_name}): "{first_20}"'
|
||||
code_block_details.append(detail)
|
||||
|
||||
parts.append(f'**Code block variables:** {" | ".join(code_block_details)}')
|
||||
if available_vars_sorted:
|
||||
parts.append(f'**Variables:** {", ".join(available_vars_sorted)}')
|
||||
|
||||
lines.append(f'**Available:** {" | ".join(parts)}')
|
||||
lines.append('')
|
||||
|
||||
# Add DOM structure
|
||||
lines.append('**DOM Structure:**')
|
||||
|
||||
# Add scroll position hints for DOM
|
||||
if state.page_info:
|
||||
pi = state.page_info
|
||||
pages_above = pi.pixels_above / pi.viewport_height if pi.viewport_height > 0 else 0
|
||||
pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0
|
||||
|
||||
if pages_above > 0:
|
||||
dom_html = f'... {pages_above:.1f} pages above \n{dom_html}'
|
||||
else:
|
||||
dom_html = '[Start of page]\n' + dom_html
|
||||
|
||||
if pages_below <= 0:
|
||||
dom_html += '\n[End of page]'
|
||||
|
||||
# Truncate DOM if too long and notify LLM
|
||||
max_dom_length = 60000
|
||||
if len(dom_html) > max_dom_length:
|
||||
lines.append(dom_html[:max_dom_length])
|
||||
lines.append(
|
||||
f'\n[DOM truncated after {max_dom_length} characters. Full page contains {len(dom_html)} characters total. Use evaluate to explore more.]'
|
||||
)
|
||||
else:
|
||||
lines.append(dom_html)
|
||||
|
||||
browser_state_text = '\n'.join(lines)
|
||||
return browser_state_text
|
||||
@@ -1,665 +0,0 @@
|
||||
"""Namespace initialization for code-use mode.
|
||||
|
||||
This module creates a namespace with all browser tools available as functions,
|
||||
similar to a Jupyter notebook environment.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import csv
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
from browser_use.browser import BrowserSession
|
||||
from browser_use.filesystem.file_system import FileSystem
|
||||
from browser_use.llm.base import BaseChatModel
|
||||
from browser_use.tools.service import CodeAgentTools, Tools
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Try to import optional data science libraries
|
||||
try:
|
||||
import numpy as np # type: ignore
|
||||
|
||||
NUMPY_AVAILABLE = True
|
||||
except ImportError:
|
||||
NUMPY_AVAILABLE = False
|
||||
|
||||
try:
|
||||
import pandas as pd # type: ignore
|
||||
|
||||
PANDAS_AVAILABLE = True
|
||||
except ImportError:
|
||||
PANDAS_AVAILABLE = False
|
||||
|
||||
try:
|
||||
import matplotlib.pyplot as plt # type: ignore
|
||||
|
||||
MATPLOTLIB_AVAILABLE = True
|
||||
except ImportError:
|
||||
MATPLOTLIB_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from bs4 import BeautifulSoup # type: ignore
|
||||
|
||||
BS4_AVAILABLE = True
|
||||
except ImportError:
|
||||
BS4_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from pypdf import PdfReader # type: ignore
|
||||
|
||||
PYPDF_AVAILABLE = True
|
||||
except ImportError:
|
||||
PYPDF_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from tabulate import tabulate # type: ignore
|
||||
|
||||
TABULATE_AVAILABLE = True
|
||||
except ImportError:
|
||||
TABULATE_AVAILABLE = False
|
||||
|
||||
|
||||
def _strip_js_comments(js_code: str) -> str:
|
||||
"""
|
||||
Remove JavaScript comments before CDP evaluation.
|
||||
CDP's Runtime.evaluate doesn't handle comments in all contexts.
|
||||
|
||||
Args:
|
||||
js_code: JavaScript code potentially containing comments
|
||||
|
||||
Returns:
|
||||
JavaScript code with comments stripped
|
||||
"""
|
||||
# Remove multi-line comments (/* ... */)
|
||||
js_code = re.sub(r'/\*.*?\*/', '', js_code, flags=re.DOTALL)
|
||||
|
||||
# Remove single-line comments - only lines that START with // (after whitespace)
|
||||
# This avoids breaking XPath strings, URLs, regex patterns, etc.
|
||||
js_code = re.sub(r'^\s*//.*$', '', js_code, flags=re.MULTILINE)
|
||||
|
||||
return js_code
|
||||
|
||||
|
||||
class EvaluateError(Exception):
|
||||
"""Special exception raised by evaluate() to stop Python execution immediately."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
async def validate_task_completion(
|
||||
task: str,
|
||||
output: str | None,
|
||||
llm: BaseChatModel,
|
||||
) -> tuple[bool, str]:
|
||||
"""
|
||||
Validate if task is truly complete by asking LLM without system prompt or history.
|
||||
|
||||
Args:
|
||||
task: The original task description
|
||||
output: The output from the done() call
|
||||
llm: The LLM to use for validation
|
||||
|
||||
Returns:
|
||||
Tuple of (is_complete, reasoning)
|
||||
"""
|
||||
from browser_use.llm.messages import UserMessage
|
||||
|
||||
# Build validation prompt
|
||||
validation_prompt = f"""You are a task completion validator. Analyze if the agent has truly completed the user's task.
|
||||
|
||||
**Original Task:**
|
||||
{task}
|
||||
|
||||
**Agent's Output:**
|
||||
{output[:100000] if output else '(No output provided)'}
|
||||
|
||||
**Your Task:**
|
||||
Determine if the agent has successfully completed the user's task. Consider:
|
||||
1. Has the agent delivered what the user requested?
|
||||
2. If data extraction was requested, is there actual data?
|
||||
3. If the task is impossible (e.g., localhost website, login required but no credentials), is it truly impossible?
|
||||
4. Could the agent continue and make meaningful progress?
|
||||
|
||||
**Response Format:**
|
||||
Reasoning: [Your analysis of whether the task is complete]
|
||||
Verdict: [YES or NO]
|
||||
|
||||
YES = Task is complete OR truly impossible to complete
|
||||
NO = Agent should continue working"""
|
||||
|
||||
try:
|
||||
# Call LLM with just the validation prompt (no system prompt, no history)
|
||||
response = await llm.ainvoke([UserMessage(content=validation_prompt)])
|
||||
response_text = response.completion
|
||||
|
||||
# Parse the response
|
||||
reasoning = ''
|
||||
verdict = 'NO'
|
||||
|
||||
# Extract reasoning and verdict
|
||||
lines = response_text.split('\n')
|
||||
for line in lines:
|
||||
if line.strip().lower().startswith('reasoning:'):
|
||||
reasoning = line.split(':', 1)[1].strip()
|
||||
elif line.strip().lower().startswith('verdict:'):
|
||||
verdict_text = line.split(':', 1)[1].strip().upper()
|
||||
if 'YES' in verdict_text:
|
||||
verdict = 'YES'
|
||||
elif 'NO' in verdict_text:
|
||||
verdict = 'NO'
|
||||
|
||||
# If we couldn't parse, try to find YES/NO in the response
|
||||
if not reasoning:
|
||||
reasoning = response_text
|
||||
|
||||
is_complete = verdict == 'YES'
|
||||
|
||||
logger.info(f'Task validation: {verdict}')
|
||||
logger.debug(f'Validation reasoning: {reasoning}')
|
||||
|
||||
return is_complete, reasoning
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f'Failed to validate task completion: {e}')
|
||||
# On error, assume the agent knows what they're doing
|
||||
return True, f'Validation failed: {e}'
|
||||
|
||||
|
||||
async def evaluate(code: str, browser_session: BrowserSession) -> Any:
|
||||
"""
|
||||
Execute JavaScript code in the browser and return the result.
|
||||
|
||||
Args:
|
||||
code: JavaScript code to execute (must be wrapped in IIFE)
|
||||
|
||||
Returns:
|
||||
The result of the JavaScript execution
|
||||
|
||||
Raises:
|
||||
EvaluateError: If JavaScript execution fails. This stops Python execution immediately.
|
||||
|
||||
Example:
|
||||
result = await evaluate('''
|
||||
(function(){
|
||||
return Array.from(document.querySelectorAll('.product')).map(p => ({
|
||||
name: p.querySelector('.name').textContent,
|
||||
price: p.querySelector('.price').textContent
|
||||
}))
|
||||
})()
|
||||
''')
|
||||
"""
|
||||
# Strip JavaScript comments before CDP evaluation (CDP doesn't support them in all contexts)
|
||||
code = _strip_js_comments(code)
|
||||
|
||||
cdp_session = await browser_session.get_or_create_cdp_session()
|
||||
|
||||
try:
|
||||
# Execute JavaScript with proper error handling
|
||||
result = await cdp_session.cdp_client.send.Runtime.evaluate(
|
||||
params={'expression': code, 'returnByValue': True, 'awaitPromise': True},
|
||||
session_id=cdp_session.session_id,
|
||||
)
|
||||
|
||||
# Check for JavaScript execution errors
|
||||
if result.get('exceptionDetails'):
|
||||
exception = result['exceptionDetails']
|
||||
error_text = exception.get('text', 'Unknown error')
|
||||
|
||||
# Try to get more details from the exception
|
||||
error_details = []
|
||||
if 'exception' in exception:
|
||||
exc_obj = exception['exception']
|
||||
if 'description' in exc_obj:
|
||||
error_details.append(exc_obj['description'])
|
||||
elif 'value' in exc_obj:
|
||||
error_details.append(str(exc_obj['value']))
|
||||
|
||||
# Build comprehensive error message with full CDP context
|
||||
error_msg = f'JavaScript execution error: {error_text}'
|
||||
if error_details:
|
||||
error_msg += f'\nDetails: {" | ".join(error_details)}'
|
||||
|
||||
# Raise special exception that will stop Python execution immediately
|
||||
raise EvaluateError(error_msg)
|
||||
|
||||
# Get the result data
|
||||
result_data = result.get('result', {})
|
||||
|
||||
# Get the actual value
|
||||
value = result_data.get('value')
|
||||
|
||||
# Return the value directly
|
||||
if value is None:
|
||||
return None if 'value' in result_data else 'undefined'
|
||||
elif isinstance(value, (dict, list)):
|
||||
# Complex objects - already deserialized by returnByValue
|
||||
return value
|
||||
else:
|
||||
# Primitive values
|
||||
return value
|
||||
|
||||
except EvaluateError:
|
||||
# Re-raise EvaluateError as-is to stop Python execution
|
||||
raise
|
||||
except Exception as e:
|
||||
# Wrap other exceptions in EvaluateError
|
||||
raise EvaluateError(f'Failed to execute JavaScript: {type(e).__name__}: {e}') from e
|
||||
|
||||
|
||||
def create_namespace(
|
||||
browser_session: BrowserSession,
|
||||
tools: Tools | None = None,
|
||||
page_extraction_llm: BaseChatModel | None = None,
|
||||
file_system: FileSystem | None = None,
|
||||
available_file_paths: list[str] | None = None,
|
||||
sensitive_data: dict[str, str | dict[str, str]] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Create a namespace with all browser tools available as functions.
|
||||
|
||||
This function creates a dictionary of functions that can be used to interact
|
||||
with the browser, similar to a Jupyter notebook environment.
|
||||
|
||||
Args:
|
||||
browser_session: The browser session to use
|
||||
tools: Optional Tools instance (will create default if not provided)
|
||||
page_extraction_llm: Optional LLM for page extraction
|
||||
file_system: Optional file system for file operations
|
||||
available_file_paths: Optional list of available file paths
|
||||
sensitive_data: Optional sensitive data dictionary
|
||||
|
||||
Returns:
|
||||
Dictionary containing all available functions and objects
|
||||
|
||||
Example:
|
||||
namespace = create_namespace(browser_session)
|
||||
await namespace['navigate'](url='https://google.com')
|
||||
result = await namespace['evaluate']('document.title')
|
||||
"""
|
||||
if tools is None:
|
||||
# Use CodeAgentTools with default exclusions optimized for code-use mode
|
||||
# For code-use, we keep: navigate, evaluate, wait, done
|
||||
# and exclude: most browser interaction, file system actions (use Python instead)
|
||||
tools = CodeAgentTools()
|
||||
|
||||
if available_file_paths is None:
|
||||
available_file_paths = []
|
||||
|
||||
namespace: dict[str, Any] = {
|
||||
# Core objects
|
||||
'browser': browser_session,
|
||||
'file_system': file_system,
|
||||
# Standard library modules (always available)
|
||||
'json': json,
|
||||
'asyncio': asyncio,
|
||||
'Path': Path,
|
||||
'csv': csv,
|
||||
're': re,
|
||||
'datetime': datetime,
|
||||
'requests': requests,
|
||||
}
|
||||
|
||||
# Add optional data science libraries if available
|
||||
if NUMPY_AVAILABLE:
|
||||
namespace['np'] = np
|
||||
namespace['numpy'] = np
|
||||
if PANDAS_AVAILABLE:
|
||||
namespace['pd'] = pd
|
||||
namespace['pandas'] = pd
|
||||
if MATPLOTLIB_AVAILABLE:
|
||||
namespace['plt'] = plt
|
||||
namespace['matplotlib'] = plt
|
||||
if BS4_AVAILABLE:
|
||||
namespace['BeautifulSoup'] = BeautifulSoup
|
||||
namespace['bs4'] = BeautifulSoup
|
||||
if PYPDF_AVAILABLE:
|
||||
namespace['PdfReader'] = PdfReader
|
||||
namespace['pypdf'] = PdfReader
|
||||
if TABULATE_AVAILABLE:
|
||||
namespace['tabulate'] = tabulate
|
||||
|
||||
# Track failed evaluate() calls to detect repeated failed approaches
|
||||
if '_evaluate_failures' not in namespace:
|
||||
namespace['_evaluate_failures'] = []
|
||||
|
||||
# Add custom evaluate function that returns values directly
|
||||
async def evaluate_wrapper(
|
||||
code: str | None = None, variables: dict[str, Any] | None = None, *_args: Any, **kwargs: Any
|
||||
) -> Any:
|
||||
# Handle both positional and keyword argument styles
|
||||
if code is None:
|
||||
# Check if code was passed as keyword arg
|
||||
code = kwargs.get('code', kwargs.get('js_code', kwargs.get('expression', '')))
|
||||
# Extract variables if passed as kwarg
|
||||
if variables is None:
|
||||
variables = kwargs.get('variables')
|
||||
|
||||
if not code:
|
||||
raise ValueError('No JavaScript code provided to evaluate()')
|
||||
|
||||
# Inject variables if provided
|
||||
if variables:
|
||||
vars_json = json.dumps(variables)
|
||||
stripped = code.strip()
|
||||
|
||||
# Check if code is already a function expression expecting params
|
||||
# Pattern: (function(params) { ... }) or (async function(params) { ... })
|
||||
if re.match(r'\((?:async\s+)?function\s*\(\s*\w+\s*\)', stripped):
|
||||
# Already expects params, wrap to call it with our variables
|
||||
code = f'(function(){{ const params = {vars_json}; return {stripped}(params); }})()'
|
||||
else:
|
||||
# Not a parameterized function, inject params in scope
|
||||
# Check if already wrapped in IIFE (including arrow function IIFEs)
|
||||
is_wrapped = (
|
||||
(stripped.startswith('(function()') and '})()' in stripped[-10:])
|
||||
or (stripped.startswith('(async function()') and '})()' in stripped[-10:])
|
||||
or (stripped.startswith('(() =>') and ')()' in stripped[-10:])
|
||||
or (stripped.startswith('(async () =>') and ')()' in stripped[-10:])
|
||||
)
|
||||
if is_wrapped:
|
||||
# Already wrapped, inject params at the start
|
||||
# Try to match regular function IIFE
|
||||
match = re.match(r'(\((?:async\s+)?function\s*\(\s*\)\s*\{)', stripped)
|
||||
if match:
|
||||
prefix = match.group(1)
|
||||
rest = stripped[len(prefix) :]
|
||||
code = f'{prefix} const params = {vars_json}; {rest}'
|
||||
else:
|
||||
# Try to match arrow function IIFE
|
||||
# Patterns: (() => expr)() or (() => { ... })() or (async () => ...)()
|
||||
arrow_match = re.match(r'(\((?:async\s+)?\(\s*\)\s*=>\s*\{)', stripped)
|
||||
if arrow_match:
|
||||
# Arrow function with block body: (() => { ... })()
|
||||
prefix = arrow_match.group(1)
|
||||
rest = stripped[len(prefix) :]
|
||||
code = f'{prefix} const params = {vars_json}; {rest}'
|
||||
else:
|
||||
# Arrow function with expression body or fallback: wrap in outer function
|
||||
code = f'(function(){{ const params = {vars_json}; return {stripped}; }})()'
|
||||
else:
|
||||
# Not wrapped, wrap with params
|
||||
code = f'(function(){{ const params = {vars_json}; {code} }})()'
|
||||
# Skip auto-wrap below
|
||||
return await evaluate(code, browser_session)
|
||||
|
||||
# Auto-wrap in IIFE if not already wrapped (and no variables were injected)
|
||||
if not variables:
|
||||
stripped = code.strip()
|
||||
# Check for regular function IIFEs, async function IIFEs, and arrow function IIFEs
|
||||
is_wrapped = (
|
||||
(stripped.startswith('(function()') and '})()' in stripped[-10:])
|
||||
or (stripped.startswith('(async function()') and '})()' in stripped[-10:])
|
||||
or (stripped.startswith('(() =>') and ')()' in stripped[-10:])
|
||||
or (stripped.startswith('(async () =>') and ')()' in stripped[-10:])
|
||||
)
|
||||
if not is_wrapped:
|
||||
code = f'(function(){{{code}}})()'
|
||||
|
||||
# Execute and track failures
|
||||
try:
|
||||
result = await evaluate(code, browser_session)
|
||||
|
||||
# Print result structure for debugging
|
||||
if isinstance(result, list) and result and isinstance(result[0], dict):
|
||||
result_preview = f'list of dicts - len={len(result)}, example 1:\n'
|
||||
sample_result = result[0]
|
||||
for key, value in list(sample_result.items())[:10]:
|
||||
value_str = str(value)[:10] if not isinstance(value, (int, float, bool, type(None))) else str(value)
|
||||
result_preview += f' {key}: {value_str}...\n'
|
||||
if len(sample_result) > 10:
|
||||
result_preview += f' ... {len(sample_result) - 10} more keys'
|
||||
print(result_preview)
|
||||
|
||||
elif isinstance(result, list):
|
||||
if len(result) == 0:
|
||||
print('type=list, len=0')
|
||||
else:
|
||||
result_preview = str(result)[:100]
|
||||
print(f'type=list, len={len(result)}, preview={result_preview}...')
|
||||
elif isinstance(result, dict):
|
||||
result_preview = f'type=dict, len={len(result)}, sample keys:\n'
|
||||
for key, value in list(result.items())[:10]:
|
||||
value_str = str(value)[:10] if not isinstance(value, (int, float, bool, type(None))) else str(value)
|
||||
result_preview += f' {key}: {value_str}...\n'
|
||||
if len(result) > 10:
|
||||
result_preview += f' ... {len(result) - 10} more keys'
|
||||
print(result_preview)
|
||||
|
||||
else:
|
||||
print(f'type={type(result).__name__}, value={repr(result)[:50]}')
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
# Track errors for pattern detection
|
||||
namespace['_evaluate_failures'].append({'error': str(e), 'type': 'exception'})
|
||||
raise
|
||||
|
||||
namespace['evaluate'] = evaluate_wrapper
|
||||
|
||||
# Add get_selector_from_index helper for code_use mode
|
||||
async def get_selector_from_index_wrapper(index: int) -> str:
|
||||
"""
|
||||
Get the CSS selector for an element by its interactive index.
|
||||
|
||||
This allows you to use the element's index from the browser state to get
|
||||
its CSS selector for use in JavaScript evaluate() calls.
|
||||
|
||||
Args:
|
||||
index: The interactive index from the browser state (e.g., [123])
|
||||
|
||||
Returns:
|
||||
str: CSS selector that can be used in JavaScript
|
||||
|
||||
Example:
|
||||
selector = await get_selector_from_index(123)
|
||||
await evaluate(f'''
|
||||
(function(){{
|
||||
const el = document.querySelector({json.dumps(selector)});
|
||||
if (el) el.click();
|
||||
}})()
|
||||
''')
|
||||
"""
|
||||
from browser_use.dom.utils import generate_css_selector_for_element
|
||||
|
||||
# Get element by index from browser session
|
||||
node = await browser_session.get_element_by_index(index)
|
||||
if node is None:
|
||||
msg = f'Element index {index} not available - page may have changed. Try refreshing browser state.'
|
||||
logger.warning(f'⚠️ {msg}')
|
||||
raise RuntimeError(msg)
|
||||
|
||||
# Check if element is in shadow DOM
|
||||
shadow_hosts = []
|
||||
current = node.parent_node
|
||||
while current:
|
||||
if current.shadow_root_type is not None:
|
||||
# This is a shadow host
|
||||
host_tag = current.tag_name.lower()
|
||||
host_id = current.attributes.get('id', '') if current.attributes else ''
|
||||
host_desc = f'{host_tag}#{host_id}' if host_id else host_tag
|
||||
shadow_hosts.insert(0, host_desc)
|
||||
current = current.parent_node
|
||||
|
||||
# Check if in iframe
|
||||
in_iframe = False
|
||||
current = node.parent_node
|
||||
while current:
|
||||
if current.tag_name.lower() == 'iframe':
|
||||
in_iframe = True
|
||||
break
|
||||
current = current.parent_node
|
||||
|
||||
# Use the robust selector generation function (now handles special chars in IDs)
|
||||
selector = generate_css_selector_for_element(node)
|
||||
|
||||
# Log shadow DOM/iframe info if detected
|
||||
if shadow_hosts:
|
||||
shadow_path = ' > '.join(shadow_hosts)
|
||||
logger.info(f'Element [{index}] is inside Shadow DOM. Path: {shadow_path}')
|
||||
logger.info(f' Selector: {selector}')
|
||||
logger.info(
|
||||
f' To access: document.querySelector("{shadow_hosts[0].split("#")[0]}").shadowRoot.querySelector("{selector}")'
|
||||
)
|
||||
if in_iframe:
|
||||
logger.info(f"Element [{index}] is inside an iframe. Regular querySelector won't work.")
|
||||
|
||||
if selector:
|
||||
return selector
|
||||
|
||||
# Fallback: just use tag name if available
|
||||
if node.tag_name:
|
||||
return node.tag_name.lower()
|
||||
|
||||
raise ValueError(f'Could not generate selector for element index {index}')
|
||||
|
||||
namespace['get_selector_from_index'] = get_selector_from_index_wrapper
|
||||
|
||||
# Inject all tools as functions into the namespace
|
||||
# Skip 'evaluate' since we have a custom implementation above
|
||||
for action_name, action in tools.registry.registry.actions.items():
|
||||
if action_name == 'evaluate':
|
||||
continue # Skip - use custom evaluate that returns Python objects directly
|
||||
param_model = action.param_model
|
||||
action_function = action.function
|
||||
|
||||
# Create a closure to capture the current action_name, param_model, and action_function
|
||||
def make_action_wrapper(act_name, par_model, act_func):
|
||||
async def action_wrapper(*args, **kwargs):
|
||||
# Convert positional args to kwargs based on param model fields
|
||||
if args:
|
||||
# Get the field names from the pydantic model
|
||||
field_names = list(par_model.model_fields.keys())
|
||||
for i, arg in enumerate(args):
|
||||
if i < len(field_names):
|
||||
kwargs[field_names[i]] = arg
|
||||
|
||||
# Create params from kwargs
|
||||
try:
|
||||
params = par_model(**kwargs)
|
||||
except Exception as e:
|
||||
raise ValueError(f'Invalid parameters for {act_name}: {e}') from e
|
||||
|
||||
# Special validation for done() - enforce minimal code cell
|
||||
if act_name == 'done':
|
||||
consecutive_failures = namespace.get('_consecutive_errors')
|
||||
if consecutive_failures and consecutive_failures > 3:
|
||||
pass
|
||||
|
||||
else:
|
||||
# Check if there are multiple Python blocks in this response
|
||||
all_blocks = namespace.get('_all_code_blocks', {})
|
||||
python_blocks = [k for k in sorted(all_blocks.keys()) if k.startswith('python_')]
|
||||
|
||||
if len(python_blocks) > 1:
|
||||
msg = (
|
||||
'done() should be the ONLY code block in the response.\n'
|
||||
'You have multiple Python blocks in this response. Consider calling done() in a separate response '
|
||||
'Now verify the last output and if it satisfies the task, call done(), else continue working.'
|
||||
)
|
||||
print(msg)
|
||||
|
||||
# Get the current cell code from namespace (injected by service.py before execution)
|
||||
current_code = namespace.get('_current_cell_code')
|
||||
if current_code and isinstance(current_code, str):
|
||||
# Count non-empty, non-comment lines
|
||||
lines = [line.strip() for line in current_code.strip().split('\n')]
|
||||
code_lines = [line for line in lines if line and not line.startswith('#')]
|
||||
|
||||
# Check if the line above await done() contains an if block
|
||||
done_line_index = -1
|
||||
for i, line in enumerate(reversed(code_lines)):
|
||||
if 'await done()' in line or 'await done(' in line:
|
||||
done_line_index = len(code_lines) - 1 - i
|
||||
break
|
||||
|
||||
has_if_above = False
|
||||
has_else_above = False
|
||||
has_elif_above = False
|
||||
if done_line_index > 0:
|
||||
line_above = code_lines[done_line_index - 1]
|
||||
has_if_above = line_above.strip().startswith('if ') and line_above.strip().endswith(':')
|
||||
has_else_above = line_above.strip().startswith('else:')
|
||||
has_elif_above = line_above.strip().startswith('elif ')
|
||||
if has_if_above or has_else_above or has_elif_above:
|
||||
msg = (
|
||||
'done() should be called individually after verifying the result from any logic.\n'
|
||||
'Consider validating your output first, THEN call done() in a final step without if/else/elif blocks only if the task is truly complete.'
|
||||
)
|
||||
logger.error(msg)
|
||||
print(msg)
|
||||
raise RuntimeError(msg)
|
||||
|
||||
# Build special context
|
||||
special_context = {
|
||||
'browser_session': browser_session,
|
||||
'page_extraction_llm': page_extraction_llm,
|
||||
'available_file_paths': available_file_paths,
|
||||
'has_sensitive_data': False, # Can be handled separately if needed
|
||||
'file_system': file_system,
|
||||
}
|
||||
|
||||
# Execute the action
|
||||
result = await act_func(params=params, **special_context)
|
||||
|
||||
# For code-use mode, we want to return the result directly
|
||||
# not wrapped in ActionResult
|
||||
if hasattr(result, 'extracted_content'):
|
||||
# Special handling for done action - mark task as complete
|
||||
if act_name == 'done' and hasattr(result, 'is_done') and result.is_done:
|
||||
namespace['_task_done'] = True
|
||||
# Store the extracted content as the final result
|
||||
if result.extracted_content:
|
||||
namespace['_task_result'] = result.extracted_content
|
||||
# Store the self-reported success status
|
||||
if hasattr(result, 'success'):
|
||||
namespace['_task_success'] = result.success
|
||||
|
||||
# If there's extracted content, return it
|
||||
if result.extracted_content:
|
||||
return result.extracted_content
|
||||
# If there's an error, raise it
|
||||
if result.error:
|
||||
raise RuntimeError(result.error)
|
||||
# Otherwise return None
|
||||
return None
|
||||
return result
|
||||
|
||||
return action_wrapper
|
||||
|
||||
# Rename 'input' to 'input_text' to avoid shadowing Python's built-in input()
|
||||
namespace_action_name = 'input_text' if action_name == 'input' else action_name
|
||||
|
||||
# Add the wrapper to the namespace
|
||||
namespace[namespace_action_name] = make_action_wrapper(action_name, param_model, action_function)
|
||||
|
||||
return namespace
|
||||
|
||||
|
||||
def get_namespace_documentation(namespace: dict[str, Any]) -> str:
|
||||
"""
|
||||
Generate documentation for all available functions in the namespace.
|
||||
|
||||
Args:
|
||||
namespace: The namespace dictionary
|
||||
|
||||
Returns:
|
||||
Markdown-formatted documentation string
|
||||
"""
|
||||
docs = ['# Available Functions\n']
|
||||
|
||||
# Document each function
|
||||
for name, obj in sorted(namespace.items()):
|
||||
if callable(obj) and not name.startswith('_'):
|
||||
# Get function signature and docstring
|
||||
if hasattr(obj, '__doc__') and obj.__doc__:
|
||||
docs.append(f'## {name}\n')
|
||||
docs.append(f'{obj.__doc__}\n')
|
||||
|
||||
return '\n'.join(docs)
|
||||
@@ -1,276 +0,0 @@
|
||||
"""Export code-use session to Jupyter notebook format."""
|
||||
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from browser_use.code_use.service import CodeAgent
|
||||
|
||||
from .views import CellType, NotebookExport
|
||||
|
||||
|
||||
def export_to_ipynb(agent: CodeAgent, output_path: str | Path) -> Path:
|
||||
"""
|
||||
Export a NotebookSession to a Jupyter notebook (.ipynb) file.
|
||||
Now includes JavaScript code blocks that were stored in the namespace.
|
||||
|
||||
Args:
|
||||
session: The NotebookSession to export
|
||||
output_path: Path where to save the notebook file
|
||||
agent: Optional CodeAgent instance to access namespace for JavaScript blocks
|
||||
|
||||
Returns:
|
||||
Path to the saved notebook file
|
||||
|
||||
Example:
|
||||
```python
|
||||
session = await agent.run()
|
||||
notebook_path = export_to_ipynb(agent, 'my_automation.ipynb')
|
||||
print(f'Notebook saved to {notebook_path}')
|
||||
```
|
||||
"""
|
||||
output_path = Path(output_path)
|
||||
|
||||
# Create notebook structure
|
||||
notebook = NotebookExport(
|
||||
metadata={
|
||||
'kernelspec': {'display_name': 'Python 3', 'language': 'python', 'name': 'python3'},
|
||||
'language_info': {
|
||||
'name': 'python',
|
||||
'version': '3.11.0',
|
||||
'mimetype': 'text/x-python',
|
||||
'codemirror_mode': {'name': 'ipython', 'version': 3},
|
||||
'pygments_lexer': 'ipython3',
|
||||
'nbconvert_exporter': 'python',
|
||||
'file_extension': '.py',
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# Add setup cell at the beginning with proper type hints
|
||||
setup_code = """import asyncio
|
||||
import json
|
||||
from typing import Any
|
||||
from browser_use import BrowserSession
|
||||
from browser_use.code_use import create_namespace
|
||||
|
||||
# Initialize browser and namespace
|
||||
browser = BrowserSession()
|
||||
await browser.start()
|
||||
|
||||
# Create namespace with all browser control functions
|
||||
namespace: dict[str, Any] = create_namespace(browser)
|
||||
|
||||
# Import all functions into the current namespace
|
||||
globals().update(namespace)
|
||||
|
||||
# Type hints for better IDE support (these are now available globally)
|
||||
# navigate, click, input, evaluate, search, extract, scroll, done, etc.
|
||||
|
||||
print("Browser-use environment initialized!")
|
||||
print("Available functions: navigate, click, input, evaluate, search, extract, done, etc.")"""
|
||||
|
||||
setup_cell = {
|
||||
'cell_type': 'code',
|
||||
'metadata': {},
|
||||
'source': setup_code.split('\n'),
|
||||
'execution_count': None,
|
||||
'outputs': [],
|
||||
}
|
||||
notebook.cells.append(setup_cell)
|
||||
|
||||
# Add JavaScript code blocks as variables FIRST
|
||||
if hasattr(agent, 'namespace') and agent.namespace:
|
||||
# Look for JavaScript variables in the namespace
|
||||
code_block_vars = agent.namespace.get('_code_block_vars', set())
|
||||
|
||||
for var_name in sorted(code_block_vars):
|
||||
var_value = agent.namespace.get(var_name)
|
||||
if isinstance(var_value, str) and var_value.strip():
|
||||
# Check if this looks like JavaScript code
|
||||
# Look for common JS patterns
|
||||
js_patterns = [
|
||||
r'function\s+\w+\s*\(',
|
||||
r'\(\s*function\s*\(\)',
|
||||
r'=>\s*{',
|
||||
r'document\.',
|
||||
r'Array\.from\(',
|
||||
r'\.querySelector',
|
||||
r'\.textContent',
|
||||
r'\.innerHTML',
|
||||
r'return\s+',
|
||||
r'console\.log',
|
||||
r'window\.',
|
||||
r'\.map\(',
|
||||
r'\.filter\(',
|
||||
r'\.forEach\(',
|
||||
]
|
||||
|
||||
is_js = any(re.search(pattern, var_value, re.IGNORECASE) for pattern in js_patterns)
|
||||
|
||||
if is_js:
|
||||
# Create a code cell with the JavaScript variable
|
||||
js_cell = {
|
||||
'cell_type': 'code',
|
||||
'metadata': {},
|
||||
'source': [f'# JavaScript Code Block: {var_name}\n', f'{var_name} = """{var_value}"""'],
|
||||
'execution_count': None,
|
||||
'outputs': [],
|
||||
}
|
||||
notebook.cells.append(js_cell)
|
||||
|
||||
# Convert cells
|
||||
python_cell_count = 0
|
||||
for cell in agent.session.cells:
|
||||
notebook_cell: dict = {
|
||||
'cell_type': cell.cell_type.value,
|
||||
'metadata': {},
|
||||
'source': cell.source.splitlines(keepends=True),
|
||||
}
|
||||
|
||||
if cell.cell_type == CellType.CODE:
|
||||
python_cell_count += 1
|
||||
notebook_cell['execution_count'] = cell.execution_count
|
||||
notebook_cell['outputs'] = []
|
||||
|
||||
# Add output if available
|
||||
if cell.output:
|
||||
notebook_cell['outputs'].append(
|
||||
{
|
||||
'output_type': 'stream',
|
||||
'name': 'stdout',
|
||||
'text': cell.output.split('\n'),
|
||||
}
|
||||
)
|
||||
|
||||
# Add error if available
|
||||
if cell.error:
|
||||
notebook_cell['outputs'].append(
|
||||
{
|
||||
'output_type': 'error',
|
||||
'ename': 'Error',
|
||||
'evalue': cell.error.split('\n')[0] if cell.error else '',
|
||||
'traceback': cell.error.split('\n') if cell.error else [],
|
||||
}
|
||||
)
|
||||
|
||||
# Add browser state as a separate output
|
||||
if cell.browser_state:
|
||||
notebook_cell['outputs'].append(
|
||||
{
|
||||
'output_type': 'stream',
|
||||
'name': 'stdout',
|
||||
'text': [f'Browser State:\n{cell.browser_state}'],
|
||||
}
|
||||
)
|
||||
|
||||
notebook.cells.append(notebook_cell)
|
||||
|
||||
# Write to file
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(notebook.model_dump(), f, indent=2, ensure_ascii=False)
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
def session_to_python_script(agent: CodeAgent) -> str:
|
||||
"""
|
||||
Convert a CodeAgent session to a Python script.
|
||||
Now includes JavaScript code blocks that were stored in the namespace.
|
||||
|
||||
Args:
|
||||
agent: The CodeAgent instance to convert
|
||||
|
||||
Returns:
|
||||
Python script as a string
|
||||
|
||||
Example:
|
||||
```python
|
||||
await agent.run()
|
||||
script = session_to_python_script(agent)
|
||||
print(script)
|
||||
```
|
||||
"""
|
||||
lines = []
|
||||
|
||||
lines.append('# Generated from browser-use code-use session\n')
|
||||
lines.append('import asyncio\n')
|
||||
lines.append('import json\n')
|
||||
lines.append('from browser_use import BrowserSession\n')
|
||||
lines.append('from browser_use.code_use import create_namespace\n\n')
|
||||
|
||||
lines.append('async def main():\n')
|
||||
lines.append('\t# Initialize browser and namespace\n')
|
||||
lines.append('\tbrowser = BrowserSession()\n')
|
||||
lines.append('\tawait browser.start()\n\n')
|
||||
lines.append('\t# Create namespace with all browser control functions\n')
|
||||
lines.append('\tnamespace = create_namespace(browser)\n\n')
|
||||
lines.append('\t# Extract functions from namespace for direct access\n')
|
||||
lines.append('\tnavigate = namespace["navigate"]\n')
|
||||
lines.append('\tclick = namespace["click"]\n')
|
||||
lines.append('\tinput_text = namespace["input"]\n')
|
||||
lines.append('\tevaluate = namespace["evaluate"]\n')
|
||||
lines.append('\tsearch = namespace["search"]\n')
|
||||
lines.append('\textract = namespace["extract"]\n')
|
||||
lines.append('\tscroll = namespace["scroll"]\n')
|
||||
lines.append('\tdone = namespace["done"]\n')
|
||||
lines.append('\tgo_back = namespace["go_back"]\n')
|
||||
lines.append('\twait = namespace["wait"]\n')
|
||||
lines.append('\tscreenshot = namespace["screenshot"]\n')
|
||||
lines.append('\tfind_text = namespace["find_text"]\n')
|
||||
lines.append('\tswitch_tab = namespace["switch"]\n')
|
||||
lines.append('\tclose_tab = namespace["close"]\n')
|
||||
lines.append('\tdropdown_options = namespace["dropdown_options"]\n')
|
||||
lines.append('\tselect_dropdown = namespace["select_dropdown"]\n')
|
||||
lines.append('\tupload_file = namespace["upload_file"]\n')
|
||||
lines.append('\tsend_keys = namespace["send_keys"]\n\n')
|
||||
|
||||
# Add JavaScript code blocks as variables FIRST
|
||||
if hasattr(agent, 'namespace') and agent.namespace:
|
||||
code_block_vars = agent.namespace.get('_code_block_vars', set())
|
||||
|
||||
for var_name in sorted(code_block_vars):
|
||||
var_value = agent.namespace.get(var_name)
|
||||
if isinstance(var_value, str) and var_value.strip():
|
||||
# Check if this looks like JavaScript code
|
||||
js_patterns = [
|
||||
r'function\s+\w+\s*\(',
|
||||
r'\(\s*function\s*\(\)',
|
||||
r'=>\s*{',
|
||||
r'document\.',
|
||||
r'Array\.from\(',
|
||||
r'\.querySelector',
|
||||
r'\.textContent',
|
||||
r'\.innerHTML',
|
||||
r'return\s+',
|
||||
r'console\.log',
|
||||
r'window\.',
|
||||
r'\.map\(',
|
||||
r'\.filter\(',
|
||||
r'\.forEach\(',
|
||||
]
|
||||
|
||||
is_js = any(re.search(pattern, var_value, re.IGNORECASE) for pattern in js_patterns)
|
||||
|
||||
if is_js:
|
||||
lines.append(f'\t# JavaScript Code Block: {var_name}\n')
|
||||
lines.append(f'\t{var_name} = """{var_value}"""\n\n')
|
||||
|
||||
for i, cell in enumerate(agent.session.cells):
|
||||
if cell.cell_type == CellType.CODE:
|
||||
lines.append(f'\t# Cell {i + 1}\n')
|
||||
|
||||
# Indent each line of source
|
||||
source_lines = cell.source.split('\n')
|
||||
for line in source_lines:
|
||||
if line.strip(): # Only add non-empty lines
|
||||
lines.append(f'\t{line}\n')
|
||||
|
||||
lines.append('\n')
|
||||
|
||||
lines.append('\tawait browser.stop()\n\n')
|
||||
lines.append("if __name__ == '__main__':\n")
|
||||
lines.append('\tasyncio.run(main())\n')
|
||||
|
||||
return ''.join(lines)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,574 +0,0 @@
|
||||
# Coding Browser Agent - System Prompt
|
||||
|
||||
You are created by browser-use for complex automated browser tasks.
|
||||
|
||||
## Core Concept
|
||||
You execute Python code in a notebook like environment to control a browser and complete tasks.
|
||||
|
||||
**Mental Model**: Write one code cell per step → Gets automatically executed → **you receive the new output + * in the next response you write the next code cell → Repeat.
|
||||
|
||||
|
||||
---
|
||||
|
||||
## INPUT: What You See
|
||||
|
||||
### Browser State Format
|
||||
- **URL & DOM**: Compressed DOM tree with interactive elements marked as `[i_123]`
|
||||
- **Loading Status**: Network requests currently pending (automatically filtered for ads/tracking)
|
||||
- Shows URL, loading duration, and resource type for each pending request
|
||||
|
||||
- **Element Markers**:
|
||||
- `[i_123]` - Interactive elements (buttons, inputs, links)
|
||||
- `|SHADOW(open/closed)|` - Shadow DOM boundaries (content auto-included)
|
||||
- `|IFRAME|` or `|FRAME|` - Iframe boundaries (content auto-included)
|
||||
- `|scroll element|` - Scrollable containers
|
||||
|
||||
### Execution Environment
|
||||
- **Variables persist** across steps (like Jupyter) - NEVER use `global` keyword - thats not needed we do the injection for you.
|
||||
- **Multiple code blocks in ONE response are COMBINED** - earlier blocks' variables available in later blocks
|
||||
- **8 consecutive errors = auto-termination**
|
||||
|
||||
### Multi-Block Code Support
|
||||
Non-Python blocks are saved as string variables:
|
||||
- ````js extract_products` → saved to `extract_products` variable (named blocks)
|
||||
- ````markdown result_summary` → saved to `result_summary` variable
|
||||
- ````bash bash_code` → saved to `bash_code` variable
|
||||
|
||||
Variable name matches exactly what you write after language name!
|
||||
|
||||
**Nested Code Blocks**: If your code contains ``` inside it (e.g., markdown with code blocks), use 4+ backticks:
|
||||
- `````markdown fix_code` with ``` inside → use 4 backticks to wrap
|
||||
- ``````python complex_code` with ```` inside → use 5+ backticks to wrap
|
||||
|
||||
---
|
||||
|
||||
## OUTPUT: How You Respond
|
||||
|
||||
### Response Format - Cell-by-Cell Execution
|
||||
|
||||
**This is a Jupyter-like notebook environment**: Execute ONE code cell → See output + browser state → Execute next cell.
|
||||
|
||||
[1 short sentence about previous step code result and new DOM]
|
||||
[1 short sentence about next step]
|
||||
|
||||
```python
|
||||
# 1 cell of code here that will be executed
|
||||
print(results)
|
||||
```
|
||||
Stop generating and inspect the output before continuing.
|
||||
|
||||
|
||||
|
||||
|
||||
## TOOLS: Available Functions
|
||||
|
||||
### 1. Navigation
|
||||
```python
|
||||
await navigate('https://example.com')
|
||||
await asyncio.sleep(1)
|
||||
```
|
||||
- **Auto-wait**: System automatically waits 1s if network requests are pending before showing you the state
|
||||
- Loaded fully? Check URL/DOM and **⏳ Loading** status in next browser state
|
||||
- If you see pending network requests in the state, consider waiting longer: `await asyncio.sleep(2)`
|
||||
- In your next browser state after navigation analyse the screenshot: Is data still loading? Do you expect more data? → Wait longer with.
|
||||
- All previous indices [i_index] become invalid after navigation
|
||||
|
||||
**After navigate(), dismiss overlays**:
|
||||
```js dismiss_overlays
|
||||
(function(){
|
||||
const dismissed = [];
|
||||
['button[id*="accept"]', '[class*="cookie"] button'].forEach(sel => {
|
||||
document.querySelectorAll(sel).forEach(btn => {
|
||||
if (btn.offsetParent !== null) {
|
||||
btn.click();
|
||||
dismissed.push('cookie');
|
||||
}
|
||||
});
|
||||
});
|
||||
document.dispatchEvent(new KeyboardEvent('keydown', {key: 'Escape', keyCode: 27}));
|
||||
return dismissed.length > 0 ? dismissed : null;
|
||||
})()
|
||||
```
|
||||
|
||||
```python
|
||||
dismissed = await evaluate(dismiss_overlays)
|
||||
if dismissed:
|
||||
print(f"OK Dismissed: {dismissed}")
|
||||
```
|
||||
|
||||
For web search use duckduckgo.com by default to avoid CAPTCHAS.
|
||||
If direct navigation is blocked by CAPTCHA or challenge that cannot be solved after one try, pivot to alternative methods: try alternative URLs for the same content, third-party aggregators (user intent has highest priority).
|
||||
|
||||
### 2. Interactive Elements
|
||||
The index is the label inside your browser state [i_index] inside the element you want to interact with. Only use indices from the current state. After page changes these become invalid.
|
||||
```python
|
||||
await click(index=456) # accepts only index integer from browser state
|
||||
await input_text(index=456, text="hello", clear=True) # Clear False to append text
|
||||
await upload_file(index=789, path="/path/to/file.pdf")
|
||||
await dropdown_options(index=123)
|
||||
await select_dropdown(index=123, text="CA") # Text can be the element text or value.
|
||||
await scroll(down=True, pages=1.0, index=None) # Down=False to scroll up. Pages=10.0 to scroll 10 pages. Use Index to scroll in the container of this element.
|
||||
await send_keys(keys="Enter") # Use e.g. for Escape, Arrow keys, Page Up, Page Down, Home, End, etc.
|
||||
await switch(tab_id="a1b2") # Switch to a 4 character tab by id from the browser state.
|
||||
await close(tab_id="a1b2") # Close a tab by id from the browser state.
|
||||
await go_back() # Navigate back in the browser history.
|
||||
```
|
||||
|
||||
Indices Work Only once. After page changes (click, navigation, DOM update), ALL indices `[i_*]` become invalid and must be re-queried.
|
||||
|
||||
Do not do:
|
||||
```python
|
||||
link_indices = [456, 457, 458]
|
||||
for idx in link_indices:
|
||||
await click(index=idx) # FAILS - indices stale after first click
|
||||
```
|
||||
|
||||
RIGHT - Option 1 (Extract URLs first):
|
||||
```python
|
||||
links = await evaluate('(function(){ return Array.from(document.querySelectorAll("a.product")).map(a => a.href); })()')
|
||||
for url in links:
|
||||
await navigate(url)
|
||||
# extract data
|
||||
await go_back()
|
||||
```
|
||||
|
||||
|
||||
### 3. get_selector_from_index(index: int) → str
|
||||
Get stable CSS selector for element with index `[i_456]`:
|
||||
|
||||
```python
|
||||
import json
|
||||
selector = await get_selector_from_index(index=456)
|
||||
print(f"OK Selector: {selector}") # Always print for debugging!
|
||||
el_text = await evaluate(f'(function(){{ return document.querySelector({json.dumps(selector)}).textContent; }})()')
|
||||
```
|
||||
|
||||
**When to use**:
|
||||
- Clicking same element type repeatedly (e.g., "Next" button in pagination)
|
||||
- Loops where DOM changes between iterations
|
||||
|
||||
### 4. evaluate(js: str, variables: dict = None) → Python data
|
||||
Execute JavaScript, returns dict/list/str/number/bool/None.
|
||||
|
||||
**ALWAYS use ```js blocks for anything beyond one-liners**:
|
||||
|
||||
```js extract_products
|
||||
(function(){
|
||||
return Array.from(document.querySelectorAll('.product')).map(p => ({
|
||||
name: p.querySelector('.name')?.textContent,
|
||||
price: p.querySelector('.price')?.textContent
|
||||
}));
|
||||
})()
|
||||
```
|
||||
|
||||
```python
|
||||
products = await evaluate(extract_products)
|
||||
print(f"Found {len(products)} products")
|
||||
```
|
||||
|
||||
**Passing Python variables to JavaScript**:
|
||||
```js extract_data
|
||||
(function(params) {
|
||||
const maxItems = params.max_items || 100;
|
||||
return Array.from(document.querySelectorAll('.item'))
|
||||
.slice(0, maxItems)
|
||||
.map(item => ({name: item.textContent}));
|
||||
})
|
||||
```
|
||||
|
||||
```python
|
||||
result = await evaluate(extract_data, variables={'max_items': 50})
|
||||
```
|
||||
|
||||
**Key rules**:
|
||||
- Wrap in IIFE: `(function(){ ... })()`
|
||||
- For variables: use `(function(params){ ... })` without final `()`
|
||||
- NO JavaScript comments (`//` or `/* */`)
|
||||
- NO backticks (\`) inside code blocks
|
||||
- Use standard JS (NO jQuery)
|
||||
- Do optional checks - and print the results to help you debug.
|
||||
- Avoid complex queries where possible. Do all data processing in python.
|
||||
- Avoid syntax errors. For more complex data use json.dumps(data).
|
||||
|
||||
### 5. done() - MANDATORY FINAL STEP
|
||||
Final Output with done(text:str, success:bool, files_to_display:list[str] = [])
|
||||
|
||||
```python
|
||||
summary = "Successfully extracted 600 items on 40 pages and saved them to the results.json file."
|
||||
await done(
|
||||
text=summary,
|
||||
success=True,
|
||||
files_to_display=['results.json', 'data.csv']
|
||||
)
|
||||
```
|
||||
|
||||
**Rules**:
|
||||
1. `done()` must be the ONLY statement in this cell/response. In the steps before you must verify the final result.
|
||||
3. For structured data/code: write to files, use `files_to_display`
|
||||
4. For short tasks (<5 lines output): print directly in `done(text=...)`, skip file creation
|
||||
5. NEVER embed JSON/code blocks in markdown templates (breaks `.format()`). Instead use json.dumps(data) or + to concatenate strings.
|
||||
6. Set `success=False` if task impossible after many many different attempts
|
||||
|
||||
|
||||
---
|
||||
|
||||
## HINTS: Common Patterns & Pitfalls
|
||||
|
||||
### JavaScript Search > Scrolling
|
||||
Before scrolling 2+ times, use JS to search entire document:
|
||||
|
||||
```js search_document
|
||||
(function(){
|
||||
const fullText = document.body.innerText;
|
||||
return {
|
||||
found: fullText.includes('Balance Sheet'),
|
||||
sampleText: fullText.substring(0, 200)
|
||||
};
|
||||
})()
|
||||
```
|
||||
|
||||
### Verify Search Results Loaded
|
||||
After search submission, ALWAYS verify results exist:
|
||||
|
||||
```js verify_search_results
|
||||
(function(){
|
||||
return document.querySelectorAll("[class*=\\"result\\"]").length;
|
||||
})()
|
||||
```
|
||||
|
||||
```python
|
||||
await input_text(index=SEARCH_INPUT, text="query", clear=True)
|
||||
await send_keys(keys="Enter")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
result_count = await evaluate(verify_search_results)
|
||||
if result_count == 0:
|
||||
print("Search failed, trying alternative")
|
||||
await navigate(f"https://site.com/search?q={query.replace(' ', '+')}")
|
||||
else:
|
||||
print(f"Search returned {result_count} results")
|
||||
```
|
||||
|
||||
### Handle Dynamic/Obfuscated Classes
|
||||
Modern sites use hashed classes (`_30jeq3`). After 2 failures, switch strategy:
|
||||
In the exploration phase you can combine multiple in parallel with error handling to find the best approach quickly..
|
||||
|
||||
**Strategy 1**: Extract by structure/position
|
||||
```js extract_products_by_structure
|
||||
(function(){
|
||||
return Array.from(document.querySelectorAll('.product')).map(p => {
|
||||
const link = p.querySelector('a[href*="/product/"]');
|
||||
const priceContainer = p.querySelector('div:nth-child(3)');
|
||||
return {
|
||||
name: link?.textContent,
|
||||
priceText: priceContainer?.textContent
|
||||
};
|
||||
});
|
||||
})()
|
||||
```
|
||||
|
||||
**Strategy 2**: Extract all text, parse in Python with regex
|
||||
```python
|
||||
items = await evaluate(extract_products_by_structure)
|
||||
import re
|
||||
for item in items:
|
||||
prices = re.findall(r'[$₹€][\d,]+', item['priceText'])
|
||||
item['price'] = prices[0] if prices else None
|
||||
```
|
||||
|
||||
**Strategy 3**: Debug by printing structure
|
||||
```js print_structure
|
||||
(function(){
|
||||
const el = document.querySelector('.product');
|
||||
return {
|
||||
html: el?.outerHTML.substring(0, 500),
|
||||
classes: Array.from(el?.querySelectorAll('*') || [])
|
||||
.map(e => e.className)
|
||||
.filter(c => c.includes('price'))
|
||||
};
|
||||
})()
|
||||
```
|
||||
|
||||
### Pagination: Try URL First
|
||||
**Priority order**:
|
||||
1. **Try URL parameters** (1 attempt): `?page=2`, `?p=2`, `?offset=20`, `/page/2/`
|
||||
2. **If URL fails, search & click the next page button**
|
||||
|
||||
### Pre-Extraction Checklist
|
||||
First verify page is loaded and you set the filters/settings correctly:
|
||||
|
||||
```js product_count
|
||||
(function(){
|
||||
return document.querySelectorAll(".product").length;
|
||||
})()
|
||||
```
|
||||
|
||||
```python
|
||||
print("=== Applying filters ===")
|
||||
await select_dropdown(index=789, text="Under $100")
|
||||
await click(index=567) # Apply button
|
||||
print("OK Filters applied")
|
||||
|
||||
filtered_count = await evaluate(product_count)
|
||||
print(f"OK Page loaded with {filtered_count} products")
|
||||
```
|
||||
---
|
||||
|
||||
## STRATEGY: Execution Flow
|
||||
|
||||
### Phase 1: Exploration
|
||||
- Navigate to target URL
|
||||
- Dismiss overlays (cookies, modals)
|
||||
- Apply all filters/settings BEFORE extraction
|
||||
- Use JavaScript to search entire document for target content
|
||||
- Explore DOM structure with various small test extractions in parallel with error handling
|
||||
- Use try/except and null checks
|
||||
- Print sub-information to validate approach
|
||||
|
||||
### Phase 2: Validation (Execute Cell-by-Cell!)
|
||||
- Write general extraction function
|
||||
- Test on small subset (1-5 items) with error handling
|
||||
- Verify data structure in Python
|
||||
- Check for missing/null fields
|
||||
- Print sample data
|
||||
- If extraction fails 2x, switch strategy
|
||||
|
||||
### Phase 3: Batch Processing
|
||||
- Once strategy validated, increase batch size
|
||||
- Loop with explicit counters
|
||||
- Save incrementally to avoid data loss
|
||||
- Handle pagination (URL first, then buttons)
|
||||
- Track progress: `print(f"Page {i}: {len(items)} items. Total: {len(all_data)}")`
|
||||
- Check if it works and then increase the batch size.
|
||||
|
||||
### Phase 4: Cleanup & Verification
|
||||
- Verify all required data collected
|
||||
- Filter duplicates
|
||||
- Missing fields / Data? -> change strategy and keep going.
|
||||
- Format/clean data in Python (NOT JavaScript)
|
||||
- Write to files (JSON/CSV)
|
||||
- Print final stats, but not all the data to avoid overwhelming the context.
|
||||
- Inspect the output and reason if this is exactly the user intent or if the user wants more.
|
||||
|
||||
### Phase 5: Done
|
||||
- Verify task completion
|
||||
- Call `done()` with summary + `files_to_display`
|
||||
|
||||
---
|
||||
|
||||
## EXAMPLE: Complete Flow
|
||||
|
||||
**Task**: Extract products from paginated e-commerce site, save to JSON
|
||||
|
||||
### Step 1: Navigate + Dismiss Overlays
|
||||
|
||||
```js page_loaded
|
||||
(function(){
|
||||
return document.readyState === 'complete';
|
||||
})()
|
||||
```
|
||||
|
||||
```python
|
||||
await navigate('https://example.com/products')
|
||||
await asyncio.sleep(2)
|
||||
loaded = await evaluate(page_loaded)
|
||||
if not loaded:
|
||||
print("Page not loaded, trying again")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
```
|
||||
### Receive current browser state after cell execution - analyse it.
|
||||
|
||||
### Step 2: Dismiss Modals
|
||||
```js dismiss_overlays
|
||||
(function(){
|
||||
document.querySelectorAll('button[id*="accept"]').forEach(b => b.click());
|
||||
document.dispatchEvent(new KeyboardEvent('keydown', {key: 'Escape'}));
|
||||
return 'dismissed';
|
||||
})()
|
||||
```
|
||||
|
||||
```python
|
||||
await evaluate(dismiss_overlays)
|
||||
```
|
||||
|
||||
### Step 3: Apply Filters
|
||||
```python
|
||||
await select_dropdown(index=123, text="Under $50")
|
||||
await click(index=456) # Apply filters button
|
||||
```
|
||||
|
||||
### Step 4: Explore - Test Single Element
|
||||
```js test_single_element
|
||||
(function(){
|
||||
const first = document.querySelector('.product');
|
||||
return {
|
||||
html: first?.outerHTML.substring(0, 300),
|
||||
name: first?.querySelector('.name')?.textContent,
|
||||
price: first?.querySelector('.price')?.textContent
|
||||
};
|
||||
})()
|
||||
```
|
||||
|
||||
```js find_heading_by_text
|
||||
(function(){
|
||||
const headings = Array.from(document.querySelectorAll('h2, h3'));
|
||||
const target = headings.find(h => h.textContent.includes('Full Year 2024'));
|
||||
return target ? target.textContent : null;
|
||||
})()
|
||||
```
|
||||
|
||||
```js find_element_by_text_content
|
||||
(function(){
|
||||
const elements = Array.from(document.querySelectorAll('dt'));
|
||||
const locationLabel = elements.find(el => el.textContent.includes('Location'));
|
||||
const nextSibling = locationLabel?.nextElementSibling;
|
||||
return nextSibling ? nextSibling.textContent : null;
|
||||
})()
|
||||
```
|
||||
|
||||
```js get_product_urls
|
||||
(function(){
|
||||
return Array.from(document.querySelectorAll('a[href*="product"]').slice(0, 10)).map(a => a.href);
|
||||
})()
|
||||
```
|
||||
|
||||
```python
|
||||
# load more
|
||||
scroll(down=True, pages=3.0)
|
||||
await asyncio.sleep(0.5)
|
||||
scroll(down=False, pages=2.5)
|
||||
try:
|
||||
list_of_urls = await evaluate(get_product_urls)
|
||||
print(f"found {len(list_of_urls)} product urls, sample {list_of_urls[0] if list_of_urls else 'no urls found'}")
|
||||
except Exception as e:
|
||||
# different strategies
|
||||
print("Error: No elements found")
|
||||
try:
|
||||
test = await evaluate(test_single_element)
|
||||
print(f"Sample product: {test}")
|
||||
except Exception as e:
|
||||
# different strategies
|
||||
print(f"Error: {e}")
|
||||
```
|
||||
|
||||
### Step 5: Write General Extraction Function
|
||||
```js extract_products
|
||||
(function(){
|
||||
return Array.from(document.querySelectorAll('.product')).map(p => ({
|
||||
name: p.querySelector('.name')?.textContent?.trim(),
|
||||
price: p.querySelector('.price')?.textContent?.trim(),
|
||||
url: p.querySelector('a')?.href
|
||||
})).filter(p => p.name && p.price);
|
||||
})()
|
||||
```
|
||||
|
||||
```python
|
||||
products_page1 = await evaluate(extract_products)
|
||||
print(f"Extracted {len(products_page1)} products from page 1: {products_page1[0] if products_page1 else 'no products found'}")
|
||||
```
|
||||
|
||||
### Step 6: Test Pagination with URL
|
||||
```python
|
||||
await navigate('https://example.com/products?page=2')
|
||||
await asyncio.sleep(2)
|
||||
products_page2 = await evaluate(extract_products)
|
||||
if len(products_page2) > 0:
|
||||
print("OK URL pagination works!")
|
||||
```
|
||||
|
||||
### Step 7: Loop and Collect All Pages
|
||||
```python
|
||||
all_products = []
|
||||
page_num = 1
|
||||
|
||||
while page_num <= 50:
|
||||
url = f"https://example.com/products?page={page_num}"
|
||||
await navigate(url)
|
||||
await asyncio.sleep(3)
|
||||
|
||||
items = await evaluate(extract_products)
|
||||
if len(items) == 0:
|
||||
print(f"Page {page_num} empty - reached end")
|
||||
break
|
||||
|
||||
all_products.extend(items)
|
||||
print(f"Page {page_num}: {len(items)} items. Total: {len(all_products)}")
|
||||
page_num += 1
|
||||
# if you have to click in the loop use selector and not the interactive index, because they invalidate after navigation.
|
||||
```
|
||||
|
||||
### Step 8: Clean Data & Deduplicate
|
||||
```python
|
||||
import re
|
||||
|
||||
for product in all_products:
|
||||
price_str = product['price']
|
||||
price_clean = re.sub(r'[^0-9.]', '', price_str)
|
||||
product['price_numeric'] = float(price_clean) if price_clean else None
|
||||
|
||||
# deduplicate
|
||||
all_products = list(set(all_products))
|
||||
# number of prices
|
||||
valid_products = [p for p in all_products if p.get('price_numeric')]
|
||||
|
||||
print(f"OK {len(valid_products)} valid products with prices")
|
||||
print(f"OK Cleaned {len(all_products)} products")
|
||||
print(f"Sample cleaned: {json.dumps(valid_products[0], indent=2) if valid_products else 'no products found'}")
|
||||
```
|
||||
|
||||
### Step 9: Prepare output, write File & verify result
|
||||
|
||||
|
||||
```markdown summary
|
||||
# Product Extraction Complete
|
||||
|
||||
Successfully extracted 100 products from 20 pages.
|
||||
|
||||
Full data saved to: products.json.
|
||||
|
||||
```
|
||||
```python
|
||||
|
||||
with open('products.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(valid_products, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"OK Wrote products.json ({len(valid_products)} products)")
|
||||
sample = json.dumps(valid_products[0], indent=2)
|
||||
|
||||
# Be careful with escaping and always print before using done.
|
||||
final_summary = summary + "\nSample:\n" + sample
|
||||
print(summary)
|
||||
```
|
||||
|
||||
### Stop and inspect the output before continuing.
|
||||
### If data is missing go back and change the strategy until all data is collected or you reach max steps.
|
||||
|
||||
### Step 10: Done in single response (After verifying the previous output)
|
||||
|
||||
|
||||
```python
|
||||
await done(text=final_summary, success=True, files_to_display=['products.json'])
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CRITICAL RULES
|
||||
|
||||
1. **NO `global` keyword** - Variables persist automatically
|
||||
2. **No comments** in Python or JavaScript code, write concise code.
|
||||
3. **Verify results after search** - Check result count > 0
|
||||
4. **Call done(text, success) in separate step** - After verifying results - else continue
|
||||
5. **Write structured data to files** - Never embed in markdown
|
||||
6. Do not use jQuery.
|
||||
7. Reason about the browser state and what you need to keep in mind on this page. E.g. popups, dynamic content, closed shadow DOM, iframes, scroll to load more...
|
||||
8. If selectors fail, simply try different once. Print many and then try different strategies.
|
||||
---
|
||||
|
||||
## Available Libraries
|
||||
**Pre-imported**: `json`, `asyncio`, `csv`, `re`, `datetime`, `Path`, `requests`
|
||||
|
||||
|
||||
## User Task
|
||||
Analyze user intent and complete the task successfully. Do not stop until completed.
|
||||
Respond in the format the user requested.
|
||||
@@ -1,150 +0,0 @@
|
||||
"""Utility functions for code-use agent."""
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def truncate_message_content(content: str, max_length: int = 10000) -> str:
|
||||
"""Truncate message content to max_length characters for history."""
|
||||
if len(content) <= max_length:
|
||||
return content
|
||||
# Truncate and add marker
|
||||
return content[:max_length] + f'\n\n[... truncated {len(content) - max_length} characters for history]'
|
||||
|
||||
|
||||
def detect_token_limit_issue(
|
||||
completion: str,
|
||||
completion_tokens: int | None,
|
||||
max_tokens: int | None,
|
||||
stop_reason: str | None,
|
||||
) -> tuple[bool, str | None]:
|
||||
"""
|
||||
Detect if the LLM response hit token limits or is repetitive garbage.
|
||||
|
||||
Returns: (is_problematic, error_message)
|
||||
"""
|
||||
# Check 1: Stop reason indicates max_tokens
|
||||
if stop_reason == 'max_tokens':
|
||||
return True, f'Response terminated due to max_tokens limit (stop_reason: {stop_reason})'
|
||||
|
||||
# Check 2: Used 90%+ of max_tokens (if we have both values)
|
||||
if completion_tokens is not None and max_tokens is not None and max_tokens > 0:
|
||||
usage_ratio = completion_tokens / max_tokens
|
||||
if usage_ratio >= 0.9:
|
||||
return True, f'Response used {usage_ratio:.1%} of max_tokens ({completion_tokens}/{max_tokens})'
|
||||
|
||||
# Check 3: Last 6 characters repeat 40+ times (repetitive garbage)
|
||||
if len(completion) >= 6:
|
||||
last_6 = completion[-6:]
|
||||
repetition_count = completion.count(last_6)
|
||||
if repetition_count >= 40:
|
||||
return True, f'Repetitive output detected: last 6 chars "{last_6}" appears {repetition_count} times'
|
||||
|
||||
return False, None
|
||||
|
||||
|
||||
def extract_url_from_task(task: str) -> str | None:
|
||||
"""Extract URL from task string using naive pattern matching."""
|
||||
# Remove email addresses from task before looking for URLs
|
||||
task_without_emails = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '', task)
|
||||
|
||||
# Look for common URL patterns
|
||||
patterns = [
|
||||
r'https?://[^\s<>"\']+', # Full URLs with http/https
|
||||
r'(?:www\.)?[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.[a-zA-Z]{2,}(?:/[^\s<>"\']*)?', # Domain names with subdomains and optional paths
|
||||
]
|
||||
|
||||
found_urls = []
|
||||
for pattern in patterns:
|
||||
matches = re.finditer(pattern, task_without_emails)
|
||||
for match in matches:
|
||||
url = match.group(0)
|
||||
|
||||
# Remove trailing punctuation that's not part of URLs
|
||||
url = re.sub(r'[.,;:!?()\[\]]+$', '', url)
|
||||
# Add https:// if missing
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
url = 'https://' + url
|
||||
found_urls.append(url)
|
||||
|
||||
unique_urls = list(set(found_urls))
|
||||
# If multiple URLs found, skip auto-navigation to avoid ambiguity
|
||||
if len(unique_urls) > 1:
|
||||
return None
|
||||
|
||||
# If exactly one URL found, return it
|
||||
if len(unique_urls) == 1:
|
||||
return unique_urls[0]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def extract_code_blocks(text: str) -> dict[str, str]:
|
||||
"""Extract all code blocks from markdown response.
|
||||
|
||||
Supports:
|
||||
- ```python, ```js, ```javascript, ```bash, ```markdown, ```md
|
||||
- Named blocks: ```js variable_name → saved as 'variable_name' in namespace
|
||||
- Nested blocks: Use 4+ backticks for outer block when inner content has 3 backticks
|
||||
|
||||
Returns dict mapping block_name -> content
|
||||
|
||||
Note: Python blocks are NO LONGER COMBINED. Each python block executes separately
|
||||
to allow sequential execution with JS/bash blocks in between.
|
||||
"""
|
||||
# Pattern to match code blocks with language identifier and optional variable name
|
||||
# Matches: ```lang\n or ```lang varname\n or ````+lang\n (4+ backticks for nested blocks)
|
||||
# Uses non-greedy matching and backreferences to match opening/closing backticks
|
||||
pattern = r'(`{3,})(\w+)(?:\s+(\w+))?\n(.*?)\1(?:\n|$)'
|
||||
matches = re.findall(pattern, text, re.DOTALL)
|
||||
|
||||
blocks: dict[str, str] = {}
|
||||
python_block_counter = 0
|
||||
|
||||
for backticks, lang, var_name, content in matches:
|
||||
lang = lang.lower()
|
||||
|
||||
# Normalize language names
|
||||
if lang in ('javascript', 'js'):
|
||||
lang_normalized = 'js'
|
||||
elif lang in ('markdown', 'md'):
|
||||
lang_normalized = 'markdown'
|
||||
elif lang in ('sh', 'shell'):
|
||||
lang_normalized = 'bash'
|
||||
elif lang == 'python':
|
||||
lang_normalized = 'python'
|
||||
else:
|
||||
# Unknown language, skip
|
||||
continue
|
||||
|
||||
# Only process supported types
|
||||
if lang_normalized in ('python', 'js', 'bash', 'markdown'):
|
||||
content = content.rstrip() # Only strip trailing whitespace, preserve leading for indentation
|
||||
if content:
|
||||
# Determine the key to use
|
||||
if var_name:
|
||||
# Named block - use the variable name
|
||||
block_key = var_name
|
||||
blocks[block_key] = content
|
||||
elif lang_normalized == 'python':
|
||||
# Unnamed Python blocks - give each a unique key to preserve order
|
||||
block_key = f'python_{python_block_counter}'
|
||||
blocks[block_key] = content
|
||||
python_block_counter += 1
|
||||
else:
|
||||
# Other unnamed blocks (js, bash, markdown) - keep last one only
|
||||
blocks[lang_normalized] = content
|
||||
|
||||
# If we have multiple python blocks, mark the first one as 'python' for backward compat
|
||||
if python_block_counter > 0:
|
||||
blocks['python'] = blocks['python_0']
|
||||
|
||||
# Fallback: if no python block but there's generic ``` block, treat as python
|
||||
if python_block_counter == 0 and 'python' not in blocks:
|
||||
generic_pattern = r'```\n(.*?)```'
|
||||
generic_matches = re.findall(generic_pattern, text, re.DOTALL)
|
||||
if generic_matches:
|
||||
combined = '\n\n'.join(m.strip() for m in generic_matches if m.strip())
|
||||
if combined:
|
||||
blocks['python'] = combined
|
||||
|
||||
return blocks
|
||||
@@ -1,403 +0,0 @@
|
||||
"""Data models for code-use mode."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||
from uuid_extensions import uuid7str
|
||||
|
||||
from browser_use.tokens.views import UsageSummary
|
||||
|
||||
|
||||
class CellType(str, Enum):
|
||||
"""Type of notebook cell."""
|
||||
|
||||
CODE = 'code'
|
||||
MARKDOWN = 'markdown'
|
||||
|
||||
|
||||
class ExecutionStatus(str, Enum):
|
||||
"""Execution status of a cell."""
|
||||
|
||||
PENDING = 'pending'
|
||||
RUNNING = 'running'
|
||||
SUCCESS = 'success'
|
||||
ERROR = 'error'
|
||||
|
||||
|
||||
class CodeCell(BaseModel):
|
||||
"""Represents a code cell in the notebook-like execution."""
|
||||
|
||||
model_config = ConfigDict(extra='forbid')
|
||||
|
||||
id: str = Field(default_factory=uuid7str)
|
||||
cell_type: CellType = CellType.CODE
|
||||
source: str = Field(description='The code to execute')
|
||||
output: str | None = Field(default=None, description='The output of the code execution')
|
||||
execution_count: int | None = Field(default=None, description='The execution count')
|
||||
status: ExecutionStatus = Field(default=ExecutionStatus.PENDING)
|
||||
error: str | None = Field(default=None, description='Error message if execution failed')
|
||||
browser_state: str | None = Field(default=None, description='Browser state after execution')
|
||||
|
||||
|
||||
class NotebookSession(BaseModel):
|
||||
"""Represents a notebook-like session."""
|
||||
|
||||
model_config = ConfigDict(extra='forbid')
|
||||
|
||||
id: str = Field(default_factory=uuid7str)
|
||||
cells: list[CodeCell] = Field(default_factory=list)
|
||||
current_execution_count: int = Field(default=0)
|
||||
namespace: dict[str, Any] = Field(default_factory=dict, description='Current namespace state')
|
||||
_complete_history: list[CodeAgentHistory] = PrivateAttr(default_factory=list)
|
||||
_usage_summary: UsageSummary | None = PrivateAttr(default=None)
|
||||
|
||||
def add_cell(self, source: str) -> CodeCell:
|
||||
"""Add a new code cell to the session."""
|
||||
cell = CodeCell(source=source)
|
||||
self.cells.append(cell)
|
||||
return cell
|
||||
|
||||
def get_cell(self, cell_id: str) -> CodeCell | None:
|
||||
"""Get a cell by ID."""
|
||||
for cell in self.cells:
|
||||
if cell.id == cell_id:
|
||||
return cell
|
||||
return None
|
||||
|
||||
def get_latest_cell(self) -> CodeCell | None:
|
||||
"""Get the most recently added cell."""
|
||||
if self.cells:
|
||||
return self.cells[-1]
|
||||
return None
|
||||
|
||||
def increment_execution_count(self) -> int:
|
||||
"""Increment and return the execution count."""
|
||||
self.current_execution_count += 1
|
||||
return self.current_execution_count
|
||||
|
||||
@property
|
||||
def history(self) -> CodeAgentHistoryList:
|
||||
"""Get the history as an AgentHistoryList-compatible object."""
|
||||
return CodeAgentHistoryList(self._complete_history, self._usage_summary)
|
||||
|
||||
|
||||
class NotebookExport(BaseModel):
|
||||
"""Export format for Jupyter notebook."""
|
||||
|
||||
model_config = ConfigDict(extra='forbid')
|
||||
|
||||
nbformat: int = Field(default=4)
|
||||
nbformat_minor: int = Field(default=5)
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
cells: list[dict[str, Any]] = Field(default_factory=list)
|
||||
|
||||
|
||||
class CodeAgentModelOutput(BaseModel):
|
||||
"""Model output for CodeAgent - contains the code and full LLM response."""
|
||||
|
||||
model_config = ConfigDict(extra='forbid')
|
||||
|
||||
model_output: str = Field(description='The extracted code from the LLM response')
|
||||
full_response: str = Field(description='The complete LLM response including any text/reasoning')
|
||||
|
||||
|
||||
class CodeAgentResult(BaseModel):
|
||||
"""Result of executing a code cell in CodeAgent."""
|
||||
|
||||
model_config = ConfigDict(extra='forbid')
|
||||
|
||||
extracted_content: str | None = Field(default=None, description='Output from code execution')
|
||||
error: str | None = Field(default=None, description='Error message if execution failed')
|
||||
is_done: bool = Field(default=False, description='Whether task is marked as done')
|
||||
success: bool | None = Field(default=None, description='Self-reported success from done() call')
|
||||
|
||||
|
||||
class CodeAgentState(BaseModel):
|
||||
"""State information for a CodeAgent step."""
|
||||
|
||||
model_config = ConfigDict(extra='forbid', arbitrary_types_allowed=True)
|
||||
|
||||
url: str | None = Field(default=None, description='Current page URL')
|
||||
title: str | None = Field(default=None, description='Current page title')
|
||||
screenshot_path: str | None = Field(default=None, description='Path to screenshot file')
|
||||
|
||||
def get_screenshot(self) -> str | None:
|
||||
"""Load screenshot from disk and return as base64 string."""
|
||||
if not self.screenshot_path:
|
||||
return None
|
||||
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
path_obj = Path(self.screenshot_path)
|
||||
if not path_obj.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(path_obj, 'rb') as f:
|
||||
screenshot_data = f.read()
|
||||
return base64.b64encode(screenshot_data).decode('utf-8')
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
class CodeAgentStepMetadata(BaseModel):
|
||||
"""Metadata for a single CodeAgent step including timing and token information."""
|
||||
|
||||
model_config = ConfigDict(extra='forbid')
|
||||
|
||||
input_tokens: int | None = Field(default=None, description='Number of input tokens used')
|
||||
output_tokens: int | None = Field(default=None, description='Number of output tokens used')
|
||||
step_start_time: float = Field(description='Step start timestamp (Unix time)')
|
||||
step_end_time: float = Field(description='Step end timestamp (Unix time)')
|
||||
|
||||
@property
|
||||
def duration_seconds(self) -> float:
|
||||
"""Calculate step duration in seconds."""
|
||||
return self.step_end_time - self.step_start_time
|
||||
|
||||
|
||||
class CodeAgentHistory(BaseModel):
|
||||
"""History item for CodeAgent actions."""
|
||||
|
||||
model_config = ConfigDict(extra='forbid', arbitrary_types_allowed=True)
|
||||
|
||||
model_output: CodeAgentModelOutput | None = Field(default=None, description='LLM output for this step')
|
||||
result: list[CodeAgentResult] = Field(default_factory=list, description='Results from code execution')
|
||||
state: CodeAgentState = Field(description='Browser state at this step')
|
||||
metadata: CodeAgentStepMetadata | None = Field(default=None, description='Step timing and token metadata')
|
||||
screenshot_path: str | None = Field(default=None, description='Legacy field for screenshot path')
|
||||
|
||||
def model_dump(self, **kwargs) -> dict[str, Any]:
|
||||
"""Custom serialization for CodeAgentHistory."""
|
||||
return {
|
||||
'model_output': self.model_output.model_dump() if self.model_output else None,
|
||||
'result': [r.model_dump() for r in self.result],
|
||||
'state': self.state.model_dump(),
|
||||
'metadata': self.metadata.model_dump() if self.metadata else None,
|
||||
'screenshot_path': self.screenshot_path,
|
||||
}
|
||||
|
||||
|
||||
class CodeAgentHistoryList:
|
||||
"""Compatibility wrapper for CodeAgentHistory that provides AgentHistoryList-like API."""
|
||||
|
||||
def __init__(self, complete_history: list[CodeAgentHistory], usage_summary: UsageSummary | None) -> None:
|
||||
"""Initialize with CodeAgent history data."""
|
||||
self._complete_history = complete_history
|
||||
self._usage_summary = usage_summary
|
||||
|
||||
@property
|
||||
def history(self) -> list[CodeAgentHistory]:
|
||||
"""Get the raw history list."""
|
||||
return self._complete_history
|
||||
|
||||
@property
|
||||
def usage(self) -> UsageSummary | None:
|
||||
"""Get the usage summary."""
|
||||
return self._usage_summary
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Return the number of history items."""
|
||||
return len(self._complete_history)
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Representation of the CodeAgentHistoryList object."""
|
||||
return f'CodeAgentHistoryList(steps={len(self._complete_history)}, action_results={len(self.action_results())})'
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""Representation of the CodeAgentHistoryList object."""
|
||||
return self.__str__()
|
||||
|
||||
def final_result(self) -> None | str:
|
||||
"""Final result from history."""
|
||||
if self._complete_history and self._complete_history[-1].result:
|
||||
return self._complete_history[-1].result[-1].extracted_content
|
||||
return None
|
||||
|
||||
def is_done(self) -> bool:
|
||||
"""Check if the agent is done."""
|
||||
if self._complete_history and len(self._complete_history[-1].result) > 0:
|
||||
last_result = self._complete_history[-1].result[-1]
|
||||
return last_result.is_done is True
|
||||
return False
|
||||
|
||||
def is_successful(self) -> bool | None:
|
||||
"""Check if the agent completed successfully."""
|
||||
if self._complete_history and len(self._complete_history[-1].result) > 0:
|
||||
last_result = self._complete_history[-1].result[-1]
|
||||
if last_result.is_done is True:
|
||||
return last_result.success
|
||||
return None
|
||||
|
||||
def errors(self) -> list[str | None]:
|
||||
"""Get all errors from history, with None for steps without errors."""
|
||||
errors = []
|
||||
for h in self._complete_history:
|
||||
step_errors = [r.error for r in h.result if r.error]
|
||||
# each step can have only one error
|
||||
errors.append(step_errors[0] if step_errors else None)
|
||||
return errors
|
||||
|
||||
def has_errors(self) -> bool:
|
||||
"""Check if the agent has any non-None errors."""
|
||||
return any(error is not None for error in self.errors())
|
||||
|
||||
def urls(self) -> list[str | None]:
|
||||
"""Get all URLs from history."""
|
||||
return [h.state.url if h.state.url is not None else None for h in self._complete_history]
|
||||
|
||||
def screenshot_paths(self, n_last: int | None = None, return_none_if_not_screenshot: bool = True) -> list[str | None]:
|
||||
"""Get all screenshot paths from history."""
|
||||
if n_last == 0:
|
||||
return []
|
||||
if n_last is None:
|
||||
if return_none_if_not_screenshot:
|
||||
return [h.state.screenshot_path if h.state.screenshot_path is not None else None for h in self._complete_history]
|
||||
else:
|
||||
return [h.state.screenshot_path for h in self._complete_history if h.state.screenshot_path is not None]
|
||||
else:
|
||||
if return_none_if_not_screenshot:
|
||||
return [
|
||||
h.state.screenshot_path if h.state.screenshot_path is not None else None
|
||||
for h in self._complete_history[-n_last:]
|
||||
]
|
||||
else:
|
||||
return [h.state.screenshot_path for h in self._complete_history[-n_last:] if h.state.screenshot_path is not None]
|
||||
|
||||
def screenshots(self, n_last: int | None = None, return_none_if_not_screenshot: bool = True) -> list[str | None]:
|
||||
"""Get all screenshots from history as base64 strings."""
|
||||
if n_last == 0:
|
||||
return []
|
||||
history_items = self._complete_history if n_last is None else self._complete_history[-n_last:]
|
||||
screenshots = []
|
||||
for item in history_items:
|
||||
screenshot_b64 = item.state.get_screenshot()
|
||||
if screenshot_b64:
|
||||
screenshots.append(screenshot_b64)
|
||||
else:
|
||||
if return_none_if_not_screenshot:
|
||||
screenshots.append(None)
|
||||
return screenshots
|
||||
|
||||
def action_results(self) -> list[CodeAgentResult]:
|
||||
"""Get all results from history."""
|
||||
results = []
|
||||
for h in self._complete_history:
|
||||
results.extend([r for r in h.result if r])
|
||||
return results
|
||||
|
||||
def extracted_content(self) -> list[str]:
|
||||
"""Get all extracted content from history."""
|
||||
content = []
|
||||
for h in self._complete_history:
|
||||
content.extend([r.extracted_content for r in h.result if r.extracted_content])
|
||||
return content
|
||||
|
||||
def number_of_steps(self) -> int:
|
||||
"""Get the number of steps in the history."""
|
||||
return len(self._complete_history)
|
||||
|
||||
def total_duration_seconds(self) -> float:
|
||||
"""Get total duration of all steps in seconds."""
|
||||
total = 0.0
|
||||
for h in self._complete_history:
|
||||
if h.metadata:
|
||||
total += h.metadata.duration_seconds
|
||||
return total
|
||||
|
||||
def last_action(self) -> None | dict:
|
||||
"""Last action in history - returns the last code execution."""
|
||||
if self._complete_history and self._complete_history[-1].model_output:
|
||||
return {
|
||||
'execute_code': {
|
||||
'code': self._complete_history[-1].model_output.model_output,
|
||||
'full_response': self._complete_history[-1].model_output.full_response,
|
||||
}
|
||||
}
|
||||
return None
|
||||
|
||||
def action_names(self) -> list[str]:
|
||||
"""Get all action names from history - returns 'execute_code' for each code execution."""
|
||||
action_names = []
|
||||
for action in self.model_actions():
|
||||
actions = list(action.keys())
|
||||
if actions:
|
||||
action_names.append(actions[0])
|
||||
return action_names
|
||||
|
||||
def model_thoughts(self) -> list[Any]:
|
||||
"""Get all thoughts from history - returns model_output for CodeAgent."""
|
||||
return [h.model_output for h in self._complete_history if h.model_output]
|
||||
|
||||
def model_outputs(self) -> list[CodeAgentModelOutput]:
|
||||
"""Get all model outputs from history."""
|
||||
return [h.model_output for h in self._complete_history if h.model_output]
|
||||
|
||||
def model_actions(self) -> list[dict]:
|
||||
"""Get all actions from history - returns code execution actions with their code."""
|
||||
actions = []
|
||||
for h in self._complete_history:
|
||||
if h.model_output:
|
||||
# Create one action dict per result (code execution)
|
||||
for _ in h.result:
|
||||
action_dict = {
|
||||
'execute_code': {
|
||||
'code': h.model_output.model_output,
|
||||
'full_response': h.model_output.full_response,
|
||||
}
|
||||
}
|
||||
actions.append(action_dict)
|
||||
return actions
|
||||
|
||||
def action_history(self) -> list[list[dict]]:
|
||||
"""Get truncated action history grouped by step."""
|
||||
step_outputs = []
|
||||
for h in self._complete_history:
|
||||
step_actions = []
|
||||
if h.model_output:
|
||||
for result in h.result:
|
||||
action_dict = {
|
||||
'execute_code': {
|
||||
'code': h.model_output.model_output,
|
||||
},
|
||||
'result': {
|
||||
'extracted_content': result.extracted_content,
|
||||
'is_done': result.is_done,
|
||||
'success': result.success,
|
||||
'error': result.error,
|
||||
},
|
||||
}
|
||||
step_actions.append(action_dict)
|
||||
step_outputs.append(step_actions)
|
||||
return step_outputs
|
||||
|
||||
def model_actions_filtered(self, include: list[str] | None = None) -> list[dict]:
|
||||
"""Get all model actions from history filtered - returns empty for CodeAgent."""
|
||||
return []
|
||||
|
||||
def add_item(self, history_item: CodeAgentHistory) -> None:
|
||||
"""Add a history item to the list."""
|
||||
self._complete_history.append(history_item)
|
||||
|
||||
def model_dump(self, **kwargs) -> dict[str, Any]:
|
||||
"""Custom serialization for CodeAgentHistoryList."""
|
||||
return {
|
||||
'history': [h.model_dump(**kwargs) for h in self._complete_history],
|
||||
'usage': self._usage_summary.model_dump() if self._usage_summary else None,
|
||||
}
|
||||
|
||||
def save_to_file(self, filepath: str | Path, sensitive_data: dict[str, str | dict[str, str]] | None = None) -> None:
|
||||
"""Save history to JSON file."""
|
||||
try:
|
||||
Path(filepath).parent.mkdir(parents=True, exist_ok=True)
|
||||
data = self.model_dump()
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except Exception as e:
|
||||
raise e
|
||||
@@ -76,6 +76,13 @@ class OldConfig:
|
||||
raise AssertionError('BROWSER_USE_CLOUD_UI_URL must be a valid URL if set')
|
||||
return url
|
||||
|
||||
@property
|
||||
def BROWSER_USE_MODEL_PRICING_URL(self) -> str:
|
||||
url = os.getenv('BROWSER_USE_MODEL_PRICING_URL', '')
|
||||
if url and '://' not in url:
|
||||
raise AssertionError('BROWSER_USE_MODEL_PRICING_URL must be a valid URL if set')
|
||||
return url
|
||||
|
||||
# Path configuration
|
||||
@property
|
||||
def XDG_CACHE_HOME(self) -> Path:
|
||||
@@ -195,6 +202,7 @@ class FlatEnvConfig(BaseSettings):
|
||||
BROWSER_USE_CLOUD_SYNC: bool | None = Field(default=None)
|
||||
BROWSER_USE_CLOUD_API_URL: str = Field(default='https://api.browser-use.com')
|
||||
BROWSER_USE_CLOUD_UI_URL: str = Field(default='')
|
||||
BROWSER_USE_MODEL_PRICING_URL: str = Field(default='')
|
||||
|
||||
# Path configuration
|
||||
XDG_CACHE_HOME: str = Field(default='~/.cache')
|
||||
|
||||
@@ -9,7 +9,6 @@ from cdp_use.cdp.domsnapshot.commands import CaptureSnapshotReturns
|
||||
from cdp_use.cdp.domsnapshot.types import (
|
||||
LayoutTreeSnapshot,
|
||||
NodeTreeSnapshot,
|
||||
RareBooleanData,
|
||||
)
|
||||
|
||||
from browser_use.dom.views import DOMRect, EnhancedSnapshotNode
|
||||
@@ -30,9 +29,9 @@ REQUIRED_COMPUTED_STYLES = [
|
||||
]
|
||||
|
||||
|
||||
def _parse_rare_boolean_data(rare_data: RareBooleanData, index: int) -> bool | None:
|
||||
"""Parse rare boolean data from snapshot - returns True if index is in the rare data."""
|
||||
return index in rare_data['index']
|
||||
def _parse_rare_boolean_data(rare_data_set: set[int], index: int) -> bool | None:
|
||||
"""Parse rare boolean data from snapshot - returns True if index is in the rare data set."""
|
||||
return index in rare_data_set
|
||||
|
||||
|
||||
def _parse_computed_styles(strings: list[str], style_indices: list[int]) -> dict[str, str]:
|
||||
@@ -85,11 +84,18 @@ def build_snapshot_lookup(
|
||||
if node_index not in layout_index_map: # Only store first occurrence
|
||||
layout_index_map[node_index] = layout_idx
|
||||
|
||||
# Pre-convert rare boolean data from list to set for O(1) lookups.
|
||||
# The raw CDP data uses List[int] which makes `index in list` O(n).
|
||||
# Called once per node, this was O(n²) total — the #1 bottleneck.
|
||||
# At 20k elements: 5,925ms (list) → 2ms (set) = 3,000x speedup.
|
||||
has_clickable_data = 'isClickable' in nodes
|
||||
is_clickable_set: set[int] = set(nodes['isClickable']['index']) if has_clickable_data else set()
|
||||
|
||||
# Build snapshot lookup for each backend node id
|
||||
for backend_node_id, snapshot_index in backend_node_to_snapshot_index.items():
|
||||
is_clickable = None
|
||||
if 'isClickable' in nodes:
|
||||
is_clickable = _parse_rare_boolean_data(nodes['isClickable'], snapshot_index)
|
||||
if has_clickable_data:
|
||||
is_clickable = _parse_rare_boolean_data(is_clickable_set, snapshot_index)
|
||||
|
||||
# Find corresponding layout node
|
||||
cursor_style = None
|
||||
|
||||
@@ -24,6 +24,7 @@ async def extract_clean_markdown(
|
||||
dom_service: DomService | None = None,
|
||||
target_id: str | None = None,
|
||||
extract_links: bool = False,
|
||||
extract_images: bool = False,
|
||||
) -> tuple[str, dict[str, Any]]:
|
||||
"""Extract clean markdown from browser content using enhanced DOM tree.
|
||||
|
||||
@@ -35,6 +36,7 @@ async def extract_clean_markdown(
|
||||
dom_service: DOM service instance (page actor path)
|
||||
target_id: Target ID for the page (required when using dom_service)
|
||||
extract_links: Whether to preserve links in markdown
|
||||
extract_images: Whether to preserve inline image src URLs in markdown
|
||||
|
||||
Returns:
|
||||
tuple: (clean_markdown_content, content_statistics)
|
||||
@@ -68,6 +70,9 @@ async def extract_clean_markdown(
|
||||
# Use markdownify for clean markdown conversion
|
||||
from markdownify import markdownify as md
|
||||
|
||||
# 'td', 'th', and headings are the only elements where markdownify sets the _inline context,
|
||||
# which causes img elements to be stripped to just alt text when keep_inline_images_in=[]
|
||||
_keep_inline_images_in = ['td', 'th', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'] if extract_images else []
|
||||
content = md(
|
||||
page_html,
|
||||
heading_style='ATX', # Use # style headings
|
||||
@@ -79,7 +84,7 @@ async def extract_clean_markdown(
|
||||
escape_misc=False, # Don't escape other characters (cleaner output)
|
||||
autolinks=False, # Don't convert URLs to <> format
|
||||
default_title=False, # Don't add default title attributes
|
||||
keep_inline_images_in=[], # Don't keep inline images in any tags (we already filter base64 in HTML)
|
||||
keep_inline_images_in=_keep_inline_images_in, # Include image src URLs when extract_images=True
|
||||
)
|
||||
|
||||
initial_markdown_length = len(content)
|
||||
|
||||
@@ -1,287 +0,0 @@
|
||||
# @file purpose: Ultra-compact serializer optimized for code-use agents
|
||||
# Focuses on minimal token usage while preserving essential interactive context
|
||||
|
||||
from browser_use.dom.utils import cap_text_length
|
||||
from browser_use.dom.views import (
|
||||
EnhancedDOMTreeNode,
|
||||
NodeType,
|
||||
SimplifiedNode,
|
||||
)
|
||||
|
||||
# Minimal but sufficient attribute list for code agents
|
||||
CODE_USE_KEY_ATTRIBUTES = [
|
||||
'id', # Essential for element selection
|
||||
'name', # For form inputs
|
||||
'type', # For input types
|
||||
'placeholder', # For empty inputs
|
||||
'aria-label', # For buttons without text
|
||||
'value', # Current values
|
||||
'alt', # For images
|
||||
'class', # Keep top 2 classes for common selectors
|
||||
]
|
||||
|
||||
# Interactive elements agent can use
|
||||
INTERACTIVE_ELEMENTS = {
|
||||
'a',
|
||||
'button',
|
||||
'input',
|
||||
'textarea',
|
||||
'select',
|
||||
'form',
|
||||
}
|
||||
|
||||
# Semantic structure elements - expanded to include more content containers
|
||||
SEMANTIC_STRUCTURE = {
|
||||
'h1',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
'h5',
|
||||
'h6',
|
||||
'nav',
|
||||
'main',
|
||||
'header',
|
||||
'footer',
|
||||
'article',
|
||||
'section',
|
||||
'p', # Paragraphs often contain prices and product info
|
||||
'span', # Spans often contain prices and labels
|
||||
'div', # Divs with useful attributes (id/class) should be shown
|
||||
'ul',
|
||||
'ol',
|
||||
'li',
|
||||
'label',
|
||||
'img',
|
||||
}
|
||||
|
||||
|
||||
class DOMCodeAgentSerializer:
|
||||
"""Optimized DOM serializer for code-use agents - balances token efficiency with context."""
|
||||
|
||||
@staticmethod
|
||||
def serialize_tree(node: SimplifiedNode | None, include_attributes: list[str], depth: int = 0) -> str:
|
||||
"""
|
||||
Serialize DOM tree with smart token optimization.
|
||||
|
||||
Strategy:
|
||||
- Keep top 2 CSS classes for querySelector compatibility
|
||||
- Show div/span/p elements with useful attributes or text
|
||||
- Show all interactive + semantic elements
|
||||
- Inline text up to 80 chars for better context
|
||||
"""
|
||||
if not node:
|
||||
return ''
|
||||
|
||||
# Skip excluded/hidden nodes
|
||||
if hasattr(node, 'excluded_by_parent') and node.excluded_by_parent:
|
||||
return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)
|
||||
|
||||
if not node.should_display:
|
||||
return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)
|
||||
|
||||
formatted_text = []
|
||||
depth_str = ' ' * depth # Use 2 spaces instead of tabs for compactness
|
||||
|
||||
if node.original_node.node_type == NodeType.ELEMENT_NODE:
|
||||
tag = node.original_node.tag_name.lower()
|
||||
is_visible = node.original_node.snapshot_node and node.original_node.is_visible
|
||||
|
||||
# Skip invisible (except iframes)
|
||||
if not is_visible and tag not in ['iframe', 'frame']:
|
||||
return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)
|
||||
|
||||
# Special handling for iframes
|
||||
if tag in ['iframe', 'frame']:
|
||||
return DOMCodeAgentSerializer._serialize_iframe(node, include_attributes, depth)
|
||||
|
||||
# Build minimal attributes
|
||||
attributes_str = DOMCodeAgentSerializer._build_minimal_attributes(node.original_node)
|
||||
|
||||
# Decide if element should be shown
|
||||
is_interactive = tag in INTERACTIVE_ELEMENTS
|
||||
is_semantic = tag in SEMANTIC_STRUCTURE
|
||||
has_useful_attrs = bool(attributes_str)
|
||||
has_text = DOMCodeAgentSerializer._has_direct_text(node)
|
||||
|
||||
# Skip non-semantic, non-interactive containers without attributes
|
||||
if not is_interactive and not is_semantic and not has_useful_attrs and not has_text:
|
||||
return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)
|
||||
|
||||
# Collapse pointless wrappers
|
||||
if tag in {'div', 'span'} and not has_useful_attrs and not has_text and len(node.children) == 1:
|
||||
return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)
|
||||
|
||||
# Build element
|
||||
line = f'{depth_str}<{tag}'
|
||||
|
||||
if attributes_str:
|
||||
line += f' {attributes_str}'
|
||||
|
||||
# Inline text
|
||||
inline_text = DOMCodeAgentSerializer._get_inline_text(node)
|
||||
if inline_text:
|
||||
line += f'>{inline_text}'
|
||||
else:
|
||||
line += '>'
|
||||
|
||||
formatted_text.append(line)
|
||||
|
||||
# Children (only if no inline text)
|
||||
if node.children and not inline_text:
|
||||
children_text = DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth + 1)
|
||||
if children_text:
|
||||
formatted_text.append(children_text)
|
||||
|
||||
elif node.original_node.node_type == NodeType.TEXT_NODE:
|
||||
# Handled inline with parent
|
||||
pass
|
||||
|
||||
elif node.original_node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE:
|
||||
# Shadow DOM - minimal marker
|
||||
if node.children:
|
||||
formatted_text.append(f'{depth_str}#shadow')
|
||||
children_text = DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth + 1)
|
||||
if children_text:
|
||||
formatted_text.append(children_text)
|
||||
|
||||
return '\n'.join(formatted_text)
|
||||
|
||||
@staticmethod
|
||||
def _serialize_children(node: SimplifiedNode, include_attributes: list[str], depth: int) -> str:
|
||||
"""Serialize children."""
|
||||
children_output = []
|
||||
for child in node.children:
|
||||
child_text = DOMCodeAgentSerializer.serialize_tree(child, include_attributes, depth)
|
||||
if child_text:
|
||||
children_output.append(child_text)
|
||||
return '\n'.join(children_output)
|
||||
|
||||
@staticmethod
|
||||
def _build_minimal_attributes(node: EnhancedDOMTreeNode) -> str:
|
||||
"""Build minimal but useful attributes - keep top 2 classes for selectors."""
|
||||
attrs = []
|
||||
|
||||
if node.attributes:
|
||||
for attr in CODE_USE_KEY_ATTRIBUTES:
|
||||
if attr in node.attributes:
|
||||
value = str(node.attributes[attr]).strip()
|
||||
if value:
|
||||
# Special handling for class - keep only first 2 classes
|
||||
if attr == 'class':
|
||||
classes = value.split()[:2]
|
||||
value = ' '.join(classes)
|
||||
# Cap at 25 chars
|
||||
value = cap_text_length(value, 25)
|
||||
attrs.append(f'{attr}="{value}"')
|
||||
|
||||
return ' '.join(attrs)
|
||||
|
||||
@staticmethod
|
||||
def _has_direct_text(node: SimplifiedNode) -> bool:
|
||||
"""Check if node has direct text children."""
|
||||
for child in node.children:
|
||||
if child.original_node.node_type == NodeType.TEXT_NODE:
|
||||
text = child.original_node.node_value.strip() if child.original_node.node_value else ''
|
||||
if len(text) > 1:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _get_inline_text(node: SimplifiedNode) -> str:
|
||||
"""Get inline text (max 80 chars for better context)."""
|
||||
text_parts = []
|
||||
for child in node.children:
|
||||
if child.original_node.node_type == NodeType.TEXT_NODE:
|
||||
text = child.original_node.node_value.strip() if child.original_node.node_value else ''
|
||||
if text and len(text) > 1:
|
||||
text_parts.append(text)
|
||||
|
||||
if not text_parts:
|
||||
return ''
|
||||
|
||||
combined = ' '.join(text_parts)
|
||||
return cap_text_length(combined, 40)
|
||||
|
||||
@staticmethod
|
||||
def _serialize_iframe(node: SimplifiedNode, include_attributes: list[str], depth: int) -> str:
|
||||
"""Handle iframe minimally."""
|
||||
formatted_text = []
|
||||
depth_str = ' ' * depth
|
||||
tag = node.original_node.tag_name.lower()
|
||||
|
||||
# Minimal iframe marker
|
||||
attributes_str = DOMCodeAgentSerializer._build_minimal_attributes(node.original_node)
|
||||
line = f'{depth_str}<{tag}'
|
||||
if attributes_str:
|
||||
line += f' {attributes_str}'
|
||||
line += '>'
|
||||
formatted_text.append(line)
|
||||
|
||||
# Iframe content
|
||||
if node.original_node.content_document:
|
||||
formatted_text.append(f'{depth_str} #iframe-content')
|
||||
|
||||
# Find and serialize body content only
|
||||
for child_node in node.original_node.content_document.children_nodes or []:
|
||||
if child_node.tag_name.lower() == 'html':
|
||||
for html_child in child_node.children:
|
||||
if html_child.tag_name.lower() == 'body':
|
||||
for body_child in html_child.children:
|
||||
DOMCodeAgentSerializer._serialize_document_node(
|
||||
body_child, formatted_text, include_attributes, depth + 2
|
||||
)
|
||||
break
|
||||
|
||||
return '\n'.join(formatted_text)
|
||||
|
||||
@staticmethod
|
||||
def _serialize_document_node(
|
||||
dom_node: EnhancedDOMTreeNode, output: list[str], include_attributes: list[str], depth: int
|
||||
) -> None:
|
||||
"""Serialize document node without SimplifiedNode wrapper."""
|
||||
depth_str = ' ' * depth
|
||||
|
||||
if dom_node.node_type == NodeType.ELEMENT_NODE:
|
||||
tag = dom_node.tag_name.lower()
|
||||
|
||||
# Skip invisible
|
||||
is_visible = dom_node.snapshot_node and dom_node.is_visible
|
||||
if not is_visible:
|
||||
return
|
||||
|
||||
# Check if worth showing
|
||||
is_interactive = tag in INTERACTIVE_ELEMENTS
|
||||
is_semantic = tag in SEMANTIC_STRUCTURE
|
||||
attributes_str = DOMCodeAgentSerializer._build_minimal_attributes(dom_node)
|
||||
|
||||
if not is_interactive and not is_semantic and not attributes_str:
|
||||
# Skip but process children
|
||||
for child in dom_node.children:
|
||||
DOMCodeAgentSerializer._serialize_document_node(child, output, include_attributes, depth)
|
||||
return
|
||||
|
||||
# Build element
|
||||
line = f'{depth_str}<{tag}'
|
||||
if attributes_str:
|
||||
line += f' {attributes_str}'
|
||||
|
||||
# Get text
|
||||
text_parts = []
|
||||
for child in dom_node.children:
|
||||
if child.node_type == NodeType.TEXT_NODE and child.node_value:
|
||||
text = child.node_value.strip()
|
||||
if text and len(text) > 1:
|
||||
text_parts.append(text)
|
||||
|
||||
if text_parts:
|
||||
combined = ' '.join(text_parts)
|
||||
line += f'>{cap_text_length(combined, 25)}'
|
||||
else:
|
||||
line += '>'
|
||||
|
||||
output.append(line)
|
||||
|
||||
# Process non-text children
|
||||
for child in dom_node.children:
|
||||
if child.node_type != NodeType.TEXT_NODE:
|
||||
DOMCodeAgentSerializer._serialize_document_node(child, output, include_attributes, depth + 1)
|
||||
@@ -36,10 +36,21 @@ class RectUnionPure:
|
||||
"""
|
||||
Maintains a *disjoint* set of rectangles.
|
||||
No external dependencies - fine for a few thousand rectangles.
|
||||
|
||||
A safety cap (_MAX_RECTS) prevents exponential explosion on pages with
|
||||
many overlapping translucent layers. Once the cap is hit, contains()
|
||||
conservatively returns False (i.e. nothing is hidden), preserving
|
||||
correctness at the cost of less aggressive paint-order filtering.
|
||||
"""
|
||||
|
||||
__slots__ = ('_rects',)
|
||||
|
||||
# Safety cap: with complex overlapping layers, each add() can fragment
|
||||
# existing rects into up to 4 pieces each. On heavy pages (20k+ elements)
|
||||
# this can cause exponential growth. 5000 is generous enough for normal
|
||||
# pages but prevents runaway memory/CPU.
|
||||
_MAX_RECTS = 5000
|
||||
|
||||
def __init__(self):
|
||||
self._rects: list[Rect] = []
|
||||
|
||||
@@ -101,6 +112,10 @@ class RectUnionPure:
|
||||
Insert r unless it is already covered.
|
||||
Returns True if the union grew.
|
||||
"""
|
||||
# Safety cap: stop accepting new rects to prevent exponential explosion
|
||||
if len(self._rects) >= self._MAX_RECTS:
|
||||
return False
|
||||
|
||||
if self.contains(r):
|
||||
return False
|
||||
|
||||
|
||||
@@ -1175,11 +1175,24 @@ class DOMTreeSerializer:
|
||||
attributes_to_include['placeholder'] = 'mm/dd/yyyy'
|
||||
attributes_to_include['format'] = 'mm/dd/yyyy'
|
||||
|
||||
# Never include values from password fields - they contain secrets that must not
|
||||
# leak into DOM snapshots sent to the LLM, where prompt injection could exfiltrate them.
|
||||
is_password_field = (
|
||||
node.tag_name
|
||||
and node.tag_name.lower() == 'input'
|
||||
and node.attributes
|
||||
and node.attributes.get('type', '').lower() == 'password'
|
||||
)
|
||||
|
||||
# Include accessibility properties
|
||||
if node.ax_node and node.ax_node.properties:
|
||||
# Properties that carry field values - must be excluded for password fields
|
||||
value_properties = {'value', 'valuetext'}
|
||||
for prop in node.ax_node.properties:
|
||||
try:
|
||||
if prop.name in include_attributes and prop.value is not None:
|
||||
if is_password_field and prop.name in value_properties:
|
||||
continue
|
||||
# Convert boolean to lowercase string, keep others as-is
|
||||
if isinstance(prop.value, bool):
|
||||
attributes_to_include[prop.name] = str(prop.value).lower()
|
||||
@@ -1193,8 +1206,10 @@ class DOMTreeSerializer:
|
||||
# Special handling for form elements - ensure current value is shown
|
||||
# For text inputs, textareas, and selects, prioritize showing the current value from AX tree
|
||||
if node.tag_name and node.tag_name.lower() in ['input', 'textarea', 'select']:
|
||||
if is_password_field:
|
||||
attributes_to_include.pop('value', None)
|
||||
# ALWAYS check AX tree - it reflects actual typed value, DOM attribute may not update
|
||||
if node.ax_node and node.ax_node.properties:
|
||||
elif node.ax_node and node.ax_node.properties:
|
||||
for prop in node.ax_node.properties:
|
||||
# Try valuetext first (human-readable display value)
|
||||
if prop.name == 'valuetext' and prop.value:
|
||||
|
||||
@@ -427,6 +427,10 @@ class DomService:
|
||||
iframe_scroll_ms = (time.time() - start_iframe_scroll) * 1000
|
||||
|
||||
# Detect elements with JavaScript click event listeners (without mutating DOM)
|
||||
# On heavy pages (>10k elements) the querySelectorAll('*') + getEventListeners()
|
||||
# loop plus per-element DOM.describeNode CDP calls can take 10s+.
|
||||
# The JS expression below bails out early if the page is too heavy.
|
||||
# Elements are still detected via the accessibility tree and ClickableElementDetector.
|
||||
start_js_listener_detection = time.time()
|
||||
js_click_listener_backend_ids: set[int] = set()
|
||||
try:
|
||||
@@ -440,9 +444,15 @@ class DomService:
|
||||
return null;
|
||||
}
|
||||
|
||||
const elementsWithListeners = [];
|
||||
const allElements = document.querySelectorAll('*');
|
||||
|
||||
// Skip on heavy pages — listener detection is too expensive
|
||||
if (allElements.length > 10000) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const elementsWithListeners = [];
|
||||
|
||||
for (const el of allElements) {
|
||||
try {
|
||||
const listeners = getEventListeners(el);
|
||||
@@ -936,38 +946,57 @@ class DomService:
|
||||
|
||||
# Use pre-fetched all_frames to find the iframe's target (no redundant CDP call)
|
||||
frame_id = node.get('frameId', None)
|
||||
|
||||
# Fallback: if frameId is missing or not in all_frames, try URL matching via
|
||||
# the src attribute. This handles dynamically-injected iframes (e.g. HubSpot
|
||||
# popups, chat widgets) where Chrome hasn't yet registered the frameId in the
|
||||
# frame tree at DOM-snapshot time.
|
||||
if (not frame_id or frame_id not in all_frames) and attributes:
|
||||
src = attributes.get('src', '')
|
||||
if src:
|
||||
src_base = src.split('?')[0].rstrip('/')
|
||||
for fid, finfo in all_frames.items():
|
||||
frame_url = finfo.get('url', '').split('?')[0].rstrip('/')
|
||||
if frame_url and frame_url == src_base:
|
||||
frame_id = fid
|
||||
self.logger.debug(f'Matched cross-origin iframe by src URL: {src!r} -> frameId={fid}')
|
||||
break
|
||||
|
||||
iframe_document_target = None
|
||||
if frame_id:
|
||||
frame_info = all_frames.get(frame_id)
|
||||
iframe_document_target = None
|
||||
if frame_info and frame_info.get('frameTargetId'):
|
||||
iframe_target_id = frame_info['frameTargetId']
|
||||
# Use frameTargetId directly from all_frames — get_all_frames() already
|
||||
# validated connectivity. Do NOT gate on session_manager.get_target():
|
||||
# there is a race where _target_sessions is set (inside the lock in
|
||||
# _handle_target_attached) before _targets is populated (outside the
|
||||
# lock), so get_target() can transiently return None for a live target.
|
||||
iframe_target = self.browser_session.session_manager.get_target(iframe_target_id)
|
||||
if iframe_target:
|
||||
iframe_document_target = {
|
||||
'targetId': iframe_target.target_id,
|
||||
'url': iframe_target.url,
|
||||
'title': iframe_target.title,
|
||||
'type': iframe_target.target_type,
|
||||
}
|
||||
else:
|
||||
iframe_document_target = None
|
||||
iframe_document_target = {
|
||||
'targetId': iframe_target_id,
|
||||
'url': iframe_target.url if iframe_target else frame_info.get('url', ''),
|
||||
'title': iframe_target.title if iframe_target else frame_info.get('title', ''),
|
||||
'type': iframe_target.target_type if iframe_target else 'iframe',
|
||||
}
|
||||
|
||||
# if target actually exists in one of the frames, just recursively build the dom tree for it
|
||||
if iframe_document_target:
|
||||
self.logger.debug(
|
||||
f'Getting content document for iframe {node.get("frameId", None)} at depth {iframe_depth + 1}'
|
||||
)
|
||||
content_document, _ = await self.get_dom_tree(
|
||||
target_id=iframe_document_target['targetId'],
|
||||
all_frames=all_frames,
|
||||
# TODO: experiment with this values -> not sure whether the whole cross origin iframe should be ALWAYS included as soon as some part of it is visible or not.
|
||||
# Current config: if the cross origin iframe is AT ALL visible, then just include everything inside of it!
|
||||
# initial_html_frames=updated_html_frames,
|
||||
initial_total_frame_offset=total_frame_offset,
|
||||
iframe_depth=iframe_depth + 1,
|
||||
)
|
||||
|
||||
dom_tree_node.content_document = content_document
|
||||
dom_tree_node.content_document.parent_node = dom_tree_node
|
||||
try:
|
||||
content_document, _ = await self.get_dom_tree(
|
||||
target_id=iframe_document_target['targetId'],
|
||||
all_frames=all_frames,
|
||||
# Current config: if the cross origin iframe is AT ALL visible, include everything inside it
|
||||
initial_total_frame_offset=total_frame_offset,
|
||||
iframe_depth=iframe_depth + 1,
|
||||
)
|
||||
dom_tree_node.content_document = content_document
|
||||
dom_tree_node.content_document.parent_node = dom_tree_node
|
||||
except Exception as e:
|
||||
self.logger.debug(f'Failed to get DOM tree for cross-origin iframe {frame_id}: {e}')
|
||||
|
||||
return dom_tree_node
|
||||
|
||||
@@ -1075,10 +1104,12 @@ class DomService:
|
||||
pagination_buttons: list[dict[str, str | int | bool]] = []
|
||||
|
||||
# Common pagination patterns to look for
|
||||
# `«` and `»` are ambiguous across sites, so treat them only as prev/next
|
||||
# fallback symbols and let word-based first/last signals win
|
||||
next_patterns = ['next', '>', '»', '→', 'siguiente', 'suivant', 'weiter', 'volgende']
|
||||
prev_patterns = ['prev', 'previous', '<', '«', '←', 'anterior', 'précédent', 'zurück', 'vorige']
|
||||
first_patterns = ['first', '⇤', '«', 'primera', 'première', 'erste', 'eerste']
|
||||
last_patterns = ['last', '⇥', '»', 'última', 'dernier', 'letzte', 'laatste']
|
||||
first_patterns = ['first', '⇤', 'primera', 'première', 'erste', 'eerste']
|
||||
last_patterns = ['last', '⇥', 'última', 'dernier', 'letzte', 'laatste']
|
||||
|
||||
for index, node in selector_map.items():
|
||||
# Skip non-clickable elements
|
||||
@@ -1104,18 +1135,18 @@ class DomService:
|
||||
|
||||
button_type: str | None = None
|
||||
|
||||
# Check for next button
|
||||
if any(pattern in all_text for pattern in next_patterns):
|
||||
button_type = 'next'
|
||||
# Check for previous button
|
||||
elif any(pattern in all_text for pattern in prev_patterns):
|
||||
button_type = 'prev'
|
||||
# Check for first button
|
||||
elif any(pattern in all_text for pattern in first_patterns):
|
||||
# Match specific first/last semantics before generic prev/next fallbacks.
|
||||
if any(pattern in all_text for pattern in first_patterns):
|
||||
button_type = 'first'
|
||||
# Check for last button
|
||||
elif any(pattern in all_text for pattern in last_patterns):
|
||||
button_type = 'last'
|
||||
# Check for next button
|
||||
elif any(pattern in all_text for pattern in next_patterns):
|
||||
button_type = 'next'
|
||||
# Check for previous button
|
||||
elif any(pattern in all_text for pattern in prev_patterns):
|
||||
button_type = 'prev'
|
||||
# Check for numeric page buttons (single or double digit)
|
||||
elif text.isdigit() and len(text) <= 2 and role in ['button', 'link', '']:
|
||||
button_type = 'page_number'
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import asyncio
|
||||
import base64
|
||||
import csv
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
@@ -164,12 +166,68 @@ class JsonFile(BaseFile):
|
||||
|
||||
|
||||
class CsvFile(BaseFile):
|
||||
"""CSV file implementation"""
|
||||
"""CSV file implementation with automatic RFC 4180 normalization.
|
||||
|
||||
LLMs frequently produce malformed CSV (missing quotes around fields with commas,
|
||||
inconsistent empty fields, unescaped internal quotes). This class parses the raw
|
||||
content through Python's csv module on every write to guarantee well-formed output.
|
||||
"""
|
||||
|
||||
@property
|
||||
def extension(self) -> str:
|
||||
return 'csv'
|
||||
|
||||
@staticmethod
|
||||
def _normalize_csv(raw: str) -> str:
|
||||
"""Parse and re-serialize CSV content to fix quoting, empty fields, and escaping.
|
||||
|
||||
Handles common LLM mistakes: unquoted fields containing commas,
|
||||
unescaped quotes inside fields, inconsistent empty fields,
|
||||
trailing/leading blank lines, and double-escaped JSON output
|
||||
(literal backslash-n and backslash-quote instead of real newlines/quotes).
|
||||
"""
|
||||
stripped = raw.strip('\n\r')
|
||||
if not stripped:
|
||||
return raw
|
||||
|
||||
# Detect double-escaped LLM tool call output: if the content has no real
|
||||
# newlines but contains literal \n sequences, the entire string is likely
|
||||
# double-escaped JSON. Unescape \" → " first, then \n → newline.
|
||||
if '\n' not in stripped and '\\n' in stripped:
|
||||
stripped = stripped.replace('\\"', '"')
|
||||
stripped = stripped.replace('\\n', '\n')
|
||||
|
||||
reader = csv.reader(io.StringIO(stripped))
|
||||
rows: list[list[str]] = []
|
||||
for row in reader:
|
||||
# Skip completely empty rows (artifacts of blank lines)
|
||||
if row:
|
||||
rows.append(row)
|
||||
|
||||
if not rows:
|
||||
return raw
|
||||
|
||||
out = io.StringIO()
|
||||
writer = csv.writer(out, lineterminator='\n')
|
||||
writer.writerows(rows)
|
||||
# Strip trailing newline so callers (write_file action) control line endings
|
||||
return out.getvalue().rstrip('\n')
|
||||
|
||||
def write_file_content(self, content: str) -> None:
|
||||
"""Normalize CSV content before storing."""
|
||||
self.update_content(self._normalize_csv(content))
|
||||
|
||||
def append_file_content(self, content: str) -> None:
|
||||
"""Normalize the appended CSV rows and merge with existing content."""
|
||||
normalized_new = self._normalize_csv(content)
|
||||
if not normalized_new.strip('\n\r'):
|
||||
return
|
||||
existing = self.content
|
||||
if existing and not existing.endswith('\n'):
|
||||
existing += '\n'
|
||||
combined = existing + normalized_new
|
||||
self.update_content(self._normalize_csv(combined))
|
||||
|
||||
|
||||
class JsonlFile(BaseFile):
|
||||
"""JSONL (JSON Lines) file implementation"""
|
||||
@@ -590,7 +648,7 @@ class FileSystem:
|
||||
truncation_note = (
|
||||
f'\n\n[Showing {len(pages_included)} of {num_pages} pages. '
|
||||
f'Skipped pages: {skipped[:10]}{"..." if len(skipped) > 10 else ""}. '
|
||||
f'Use read_long_content with a specific goal to find relevant sections.]'
|
||||
f'Use extract with start_from_char to read further into the file.]'
|
||||
)
|
||||
else:
|
||||
truncation_note = ''
|
||||
|
||||
@@ -428,7 +428,7 @@ def main(
|
||||
next_steps.append('4. Set up your API key in .env file or environment:\n', style='bold')
|
||||
next_steps.append(' BROWSER_USE_API_KEY=your-key\n', style='dim')
|
||||
next_steps.append(
|
||||
' (Get your key at https://cloud.browser-use.com/dashboard/settings?tab=api-keys&new)\n\n',
|
||||
' (Get your key at https://cloud.browser-use.com/dashboard/settings?tab=api-keys&new&utm_source=oss&utm_medium=cli)\n\n',
|
||||
style='dim italic',
|
||||
)
|
||||
next_steps.append('5. Run your script:\n', style='bold')
|
||||
|
||||
@@ -223,15 +223,29 @@ class ChatAnthropic(BaseChatModel):
|
||||
stop_reason=response.stop_reason,
|
||||
)
|
||||
except Exception as e:
|
||||
# If validation fails, try to parse it as JSON first
|
||||
if isinstance(content_block.input, str):
|
||||
data = json.loads(content_block.input)
|
||||
return ChatInvokeCompletion(
|
||||
completion=output_format.model_validate(data),
|
||||
usage=usage,
|
||||
stop_reason=response.stop_reason,
|
||||
)
|
||||
raise e
|
||||
# If validation fails, try to fix common model output issues
|
||||
_input = content_block.input
|
||||
if isinstance(_input, str):
|
||||
_input = json.loads(_input)
|
||||
elif isinstance(_input, dict):
|
||||
# Model sometimes double-serializes fields
|
||||
for key, value in _input.items():
|
||||
if isinstance(value, str) and value.startswith(('[', '{')):
|
||||
try:
|
||||
_input[key] = json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
cleaned = value.replace('\n', '\\n').replace('\r', '\\r').replace('\t', '\\t')
|
||||
try:
|
||||
_input[key] = json.loads(cleaned)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
return ChatInvokeCompletion(
|
||||
completion=output_format.model_validate(_input),
|
||||
usage=usage,
|
||||
stop_reason=response.stop_reason,
|
||||
)
|
||||
|
||||
# If no tool use block found, raise an error
|
||||
raise ValueError('Expected tool use in response but none found')
|
||||
|
||||
@@ -222,14 +222,28 @@ class ChatAnthropicBedrock(ChatAWSBedrock):
|
||||
try:
|
||||
return ChatInvokeCompletion(completion=output_format.model_validate(content_block.input), usage=usage)
|
||||
except Exception as e:
|
||||
# If validation fails, try to parse it as JSON first
|
||||
if isinstance(content_block.input, str):
|
||||
data = json.loads(content_block.input)
|
||||
return ChatInvokeCompletion(
|
||||
completion=output_format.model_validate(data),
|
||||
usage=usage,
|
||||
)
|
||||
raise e
|
||||
# If validation fails, try to fix common model output issues
|
||||
_input = content_block.input
|
||||
if isinstance(_input, str):
|
||||
_input = json.loads(_input)
|
||||
elif isinstance(_input, dict):
|
||||
# Model sometimes double-serializes fields
|
||||
for key, value in _input.items():
|
||||
if isinstance(value, str) and value.startswith(('[', '{')):
|
||||
try:
|
||||
_input[key] = json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
cleaned = value.replace('\n', '\\n').replace('\r', '\\r').replace('\t', '\\t')
|
||||
try:
|
||||
_input[key] = json.loads(cleaned)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
return ChatInvokeCompletion(
|
||||
completion=output_format.model_validate(_input),
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
# If no tool use block found, raise an error
|
||||
raise ValueError('Expected tool use in response but none found')
|
||||
|
||||
@@ -9,6 +9,7 @@ from browser_use.llm.aws.serializer import AWSBedrockMessageSerializer
|
||||
from browser_use.llm.base import BaseChatModel
|
||||
from browser_use.llm.exceptions import ModelProviderError, ModelRateLimitError
|
||||
from browser_use.llm.messages import BaseMessage
|
||||
from browser_use.llm.schema import SchemaOptimizer
|
||||
from browser_use.llm.views import ChatInvokeCompletion, ChatInvokeUsage
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -116,27 +117,14 @@ class ChatAWSBedrock(BaseChatModel):
|
||||
|
||||
def _format_tools_for_request(self, output_format: type[BaseModel]) -> list[dict[str, Any]]:
|
||||
"""Format a Pydantic model as a tool for structured output."""
|
||||
schema = output_format.model_json_schema()
|
||||
|
||||
# Convert Pydantic schema to Bedrock tool format
|
||||
properties = {}
|
||||
required = []
|
||||
|
||||
for prop_name, prop_info in schema.get('properties', {}).items():
|
||||
properties[prop_name] = {
|
||||
'type': prop_info.get('type', 'string'),
|
||||
'description': prop_info.get('description', ''),
|
||||
}
|
||||
|
||||
# Add required fields
|
||||
required = schema.get('required', [])
|
||||
schema = SchemaOptimizer.create_optimized_json_schema(output_format)
|
||||
|
||||
return [
|
||||
{
|
||||
'toolSpec': {
|
||||
'name': f'extract_{output_format.__name__.lower()}',
|
||||
'description': f'Extract information in the format of {output_format.__name__}',
|
||||
'inputSchema': {'json': {'type': 'object', 'properties': properties, 'required': required}},
|
||||
'inputSchema': {'json': schema},
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@@ -90,8 +90,8 @@ class ChatBrowserUse(BaseChatModel):
|
||||
|
||||
if not self.api_key:
|
||||
raise ValueError(
|
||||
'You need to set the BROWSER_USE_API_KEY environment variable. '
|
||||
'Get your key at https://cloud.browser-use.com/new-api-key'
|
||||
'BROWSER_USE_API_KEY is not set. To use ChatBrowserUse, get a key at:\n'
|
||||
'https://cloud.browser-use.com/new-api-key?utm_source=oss&utm_medium=chat_browser_use'
|
||||
)
|
||||
|
||||
@property
|
||||
@@ -275,9 +275,17 @@ class ChatBrowserUse(BaseChatModel):
|
||||
status_code = e.response.status_code
|
||||
|
||||
if status_code == 401:
|
||||
raise ModelProviderError(message=f'Invalid API key. {error_detail}', status_code=401, model=self.name)
|
||||
raise ModelProviderError(
|
||||
message=f'BROWSER_USE_API_KEY is invalid. Get a new key at:\nhttps://cloud.browser-use.com/new-api-key?utm_source=oss&utm_medium=chat_browser_use\n{error_detail}',
|
||||
status_code=401,
|
||||
model=self.name,
|
||||
)
|
||||
elif status_code == 402:
|
||||
raise ModelProviderError(message=f'Insufficient credits. {error_detail}', status_code=402, model=self.name)
|
||||
raise ModelProviderError(
|
||||
message=f'Browser Use credits exhausted. Add more at:\nhttps://cloud.browser-use.com/billing?utm_source=oss&utm_medium=chat_browser_use\n{error_detail}',
|
||||
status_code=402,
|
||||
model=self.name,
|
||||
)
|
||||
elif status_code == 429:
|
||||
raise ModelRateLimitError(message=f'Rate limit exceeded. {error_detail}', status_code=429, model=self.name)
|
||||
elif status_code in {500, 502, 503, 504}:
|
||||
|
||||
@@ -85,7 +85,7 @@ class ChatGoogle(BaseChatModel):
|
||||
|
||||
# Model configuration
|
||||
model: VerifiedGeminiModels | str
|
||||
temperature: float | None = 0.5
|
||||
temperature: float | None = None
|
||||
top_p: float | None = None
|
||||
seed: int | None = None
|
||||
thinking_budget: int | None = None # for Gemini 2.5: -1 for dynamic (default), 0 disables, or token count
|
||||
@@ -222,6 +222,8 @@ class ChatGoogle(BaseChatModel):
|
||||
# Apply model-specific configuration (these can override config)
|
||||
if self.temperature is not None:
|
||||
config['temperature'] = self.temperature
|
||||
else:
|
||||
config['temperature'] = 1.0 if 'gemini-3' in self.model else 0.5
|
||||
|
||||
# Add system instruction if present
|
||||
if system_instruction:
|
||||
|
||||
3
browser_use/llm/litellm/__init__.py
Normal file
3
browser_use/llm/litellm/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from browser_use.llm.litellm.chat import ChatLiteLLM
|
||||
|
||||
__all__ = ['ChatLiteLLM']
|
||||
227
browser_use/llm/litellm/chat.py
Normal file
227
browser_use/llm/litellm/chat.py
Normal file
@@ -0,0 +1,227 @@
|
||||
"""
|
||||
ChatLiteLLM - LiteLLM chat model wrapper.
|
||||
|
||||
Requires the `litellm` package to be installed separately:
|
||||
pip install litellm
|
||||
|
||||
Note: litellm is NOT included as a dependency of browser-use.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, TypeVar, overload
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from browser_use.llm.base import BaseChatModel
|
||||
from browser_use.llm.exceptions import ModelProviderError, ModelRateLimitError
|
||||
from browser_use.llm.messages import BaseMessage
|
||||
from browser_use.llm.schema import SchemaOptimizer
|
||||
from browser_use.llm.views import ChatInvokeCompletion, ChatInvokeUsage
|
||||
|
||||
from .serializer import LiteLLMMessageSerializer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
T = TypeVar('T', bound=BaseModel)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChatLiteLLM(BaseChatModel):
|
||||
model: str
|
||||
api_key: str | None = None
|
||||
api_base: str | None = None
|
||||
temperature: float | None = 0.0
|
||||
max_tokens: int | None = 4096
|
||||
max_retries: int = 3
|
||||
metadata: dict[str, Any] | None = None
|
||||
|
||||
_provider_name: str = field(default='', init=False, repr=False)
|
||||
_clean_model: str = field(default='', init=False, repr=False)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
"""Resolve provider info from the model string via litellm."""
|
||||
try:
|
||||
from litellm import get_llm_provider # type: ignore[reportMissingImports]
|
||||
|
||||
self._clean_model, self._provider_name, _, _ = get_llm_provider(self.model)
|
||||
except Exception:
|
||||
if '/' in self.model:
|
||||
self._provider_name, self._clean_model = self.model.split('/', 1)
|
||||
else:
|
||||
self._provider_name = 'openai'
|
||||
self._clean_model = self.model
|
||||
|
||||
logger.debug(
|
||||
'ChatLiteLLM initialized: model=%s, provider=%s, clean=%s, api_base=%s',
|
||||
self.model,
|
||||
self._provider_name,
|
||||
self._clean_model,
|
||||
self.api_base or '(default)',
|
||||
)
|
||||
|
||||
@property
|
||||
def provider(self) -> str:
|
||||
return self._provider_name or 'litellm'
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self._clean_model or self.model
|
||||
|
||||
@staticmethod
|
||||
def _parse_usage(response: Any) -> ChatInvokeUsage | None:
|
||||
"""Extract token usage from a litellm response."""
|
||||
usage = getattr(response, 'usage', None)
|
||||
if usage is None:
|
||||
return None
|
||||
|
||||
prompt_tokens = getattr(usage, 'prompt_tokens', 0) or 0
|
||||
completion_tokens = getattr(usage, 'completion_tokens', 0) or 0
|
||||
|
||||
prompt_cached = getattr(usage, 'cache_read_input_tokens', None)
|
||||
cache_creation = getattr(usage, 'cache_creation_input_tokens', None)
|
||||
|
||||
if prompt_cached is None:
|
||||
details = getattr(usage, 'prompt_tokens_details', None)
|
||||
if details:
|
||||
prompt_cached = getattr(details, 'cached_tokens', None)
|
||||
|
||||
return ChatInvokeUsage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
prompt_cached_tokens=int(prompt_cached) if prompt_cached is not None else None,
|
||||
prompt_cache_creation_tokens=int(cache_creation) if cache_creation is not None else None,
|
||||
prompt_image_tokens=None,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
|
||||
@overload
|
||||
async def ainvoke(
|
||||
self,
|
||||
messages: list[BaseMessage],
|
||||
output_format: None = None,
|
||||
**kwargs: Any,
|
||||
) -> ChatInvokeCompletion[str]: ...
|
||||
|
||||
@overload
|
||||
async def ainvoke(
|
||||
self,
|
||||
messages: list[BaseMessage],
|
||||
output_format: type[T],
|
||||
**kwargs: Any,
|
||||
) -> ChatInvokeCompletion[T]: ...
|
||||
|
||||
async def ainvoke(
|
||||
self,
|
||||
messages: list[BaseMessage],
|
||||
output_format: type[T] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]:
|
||||
from litellm import acompletion # type: ignore[reportMissingImports]
|
||||
from litellm.exceptions import APIConnectionError, APIError, RateLimitError, Timeout # type: ignore[reportMissingImports]
|
||||
from litellm.types.utils import ModelResponse # type: ignore[reportMissingImports]
|
||||
|
||||
litellm_messages = LiteLLMMessageSerializer.serialize(messages)
|
||||
|
||||
params: dict[str, Any] = {
|
||||
'model': self.model,
|
||||
'messages': litellm_messages,
|
||||
'num_retries': self.max_retries,
|
||||
}
|
||||
|
||||
if self.temperature is not None:
|
||||
params['temperature'] = self.temperature
|
||||
if self.max_tokens is not None:
|
||||
params['max_tokens'] = self.max_tokens
|
||||
if self.api_key:
|
||||
params['api_key'] = self.api_key
|
||||
if self.api_base:
|
||||
params['api_base'] = self.api_base
|
||||
if self.metadata:
|
||||
params['metadata'] = self.metadata
|
||||
|
||||
if output_format is not None:
|
||||
schema = SchemaOptimizer.create_optimized_json_schema(output_format)
|
||||
params['response_format'] = {
|
||||
'type': 'json_schema',
|
||||
'json_schema': {
|
||||
'name': 'agent_output',
|
||||
'strict': True,
|
||||
'schema': schema,
|
||||
},
|
||||
}
|
||||
|
||||
try:
|
||||
raw_response = await acompletion(**params)
|
||||
except RateLimitError as e:
|
||||
raise ModelRateLimitError(
|
||||
message=str(e),
|
||||
model=self.name,
|
||||
) from e
|
||||
except Timeout as e:
|
||||
raise ModelProviderError(
|
||||
message=f'Request timed out: {e}',
|
||||
model=self.name,
|
||||
) from e
|
||||
except APIConnectionError as e:
|
||||
raise ModelProviderError(
|
||||
message=str(e),
|
||||
model=self.name,
|
||||
) from e
|
||||
except APIError as e:
|
||||
status = getattr(e, 'status_code', 502) or 502
|
||||
raise ModelProviderError(
|
||||
message=str(e),
|
||||
status_code=status,
|
||||
model=self.name,
|
||||
) from e
|
||||
except ModelProviderError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise ModelProviderError(
|
||||
message=str(e),
|
||||
model=self.name,
|
||||
) from e
|
||||
|
||||
assert isinstance(raw_response, ModelResponse), f'Expected ModelResponse, got {type(raw_response)}'
|
||||
response: ModelResponse = raw_response
|
||||
|
||||
choice = response.choices[0] if response.choices else None
|
||||
if choice is None:
|
||||
raise ModelProviderError(
|
||||
message='Empty response: no choices returned by the model',
|
||||
status_code=502,
|
||||
model=self.name,
|
||||
)
|
||||
|
||||
content = choice.message.content or ''
|
||||
usage = self._parse_usage(response)
|
||||
stop_reason = choice.finish_reason
|
||||
|
||||
thinking: str | None = None
|
||||
msg_obj = choice.message
|
||||
reasoning = getattr(msg_obj, 'reasoning_content', None)
|
||||
if reasoning:
|
||||
thinking = str(reasoning)
|
||||
|
||||
if output_format is not None:
|
||||
if not content:
|
||||
raise ModelProviderError(
|
||||
message='Model returned empty content for structured output request',
|
||||
status_code=500,
|
||||
model=self.name,
|
||||
)
|
||||
parsed = output_format.model_validate_json(content)
|
||||
return ChatInvokeCompletion(
|
||||
completion=parsed,
|
||||
thinking=thinking,
|
||||
usage=usage,
|
||||
stop_reason=stop_reason,
|
||||
)
|
||||
|
||||
return ChatInvokeCompletion(
|
||||
completion=content,
|
||||
thinking=thinking,
|
||||
usage=usage,
|
||||
stop_reason=stop_reason,
|
||||
)
|
||||
120
browser_use/llm/litellm/serializer.py
Normal file
120
browser_use/llm/litellm/serializer.py
Normal file
@@ -0,0 +1,120 @@
|
||||
from typing import Any
|
||||
|
||||
from browser_use.llm.messages import (
|
||||
AssistantMessage,
|
||||
BaseMessage,
|
||||
ContentPartImageParam,
|
||||
ContentPartTextParam,
|
||||
SystemMessage,
|
||||
UserMessage,
|
||||
)
|
||||
|
||||
|
||||
class LiteLLMMessageSerializer:
|
||||
@staticmethod
|
||||
def _serialize_user_content(
|
||||
content: str | list[ContentPartTextParam | ContentPartImageParam],
|
||||
) -> str | list[dict[str, Any]]:
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
|
||||
parts: list[dict[str, Any]] = []
|
||||
for part in content:
|
||||
if part.type == 'text':
|
||||
parts.append(
|
||||
{
|
||||
'type': 'text',
|
||||
'text': part.text,
|
||||
}
|
||||
)
|
||||
elif part.type == 'image_url':
|
||||
parts.append(
|
||||
{
|
||||
'type': 'image_url',
|
||||
'image_url': {
|
||||
'url': part.image_url.url,
|
||||
'detail': part.image_url.detail,
|
||||
},
|
||||
}
|
||||
)
|
||||
return parts
|
||||
|
||||
@staticmethod
|
||||
def _serialize_system_content(
|
||||
content: str | list[ContentPartTextParam],
|
||||
) -> str | list[dict[str, Any]]:
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
|
||||
return [
|
||||
{
|
||||
'type': 'text',
|
||||
'text': p.text,
|
||||
}
|
||||
for p in content
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _serialize_assistant_content(
|
||||
content: str | list[Any] | None,
|
||||
) -> str | list[dict[str, Any]] | None:
|
||||
if content is None:
|
||||
return None
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
|
||||
parts = []
|
||||
for part in content:
|
||||
if part.type == 'text':
|
||||
parts.append(
|
||||
{
|
||||
'type': 'text',
|
||||
'text': part.text,
|
||||
}
|
||||
)
|
||||
elif part.type == 'refusal':
|
||||
parts.append(
|
||||
{
|
||||
'type': 'text',
|
||||
'text': f'[Refusal] {part.refusal}',
|
||||
}
|
||||
)
|
||||
return parts
|
||||
|
||||
@staticmethod
|
||||
def serialize(messages: list[BaseMessage]) -> list[dict[str, Any]]:
|
||||
result: list[dict[str, Any]] = []
|
||||
for msg in messages:
|
||||
if isinstance(msg, UserMessage):
|
||||
d: dict[str, Any] = {'role': 'user'}
|
||||
d['content'] = LiteLLMMessageSerializer._serialize_user_content(msg.content)
|
||||
if msg.name is not None:
|
||||
d['name'] = msg.name
|
||||
result.append(d)
|
||||
|
||||
elif isinstance(msg, SystemMessage):
|
||||
d = {'role': 'system'}
|
||||
d['content'] = LiteLLMMessageSerializer._serialize_system_content(msg.content)
|
||||
if msg.name is not None:
|
||||
d['name'] = msg.name
|
||||
result.append(d)
|
||||
|
||||
elif isinstance(msg, AssistantMessage):
|
||||
d = {'role': 'assistant'}
|
||||
d['content'] = LiteLLMMessageSerializer._serialize_assistant_content(msg.content)
|
||||
if msg.name is not None:
|
||||
d['name'] = msg.name
|
||||
if msg.tool_calls:
|
||||
d['tool_calls'] = [
|
||||
{
|
||||
'id': tc.id,
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': tc.function.name,
|
||||
'arguments': tc.function.arguments,
|
||||
},
|
||||
}
|
||||
for tc in msg.tool_calls
|
||||
]
|
||||
result.append(d)
|
||||
return result
|
||||
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
import os
|
||||
from collections.abc import Mapping
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Literal, TypeAlias, TypeVar, overload
|
||||
@@ -26,15 +27,30 @@ ChatVercelModel: TypeAlias = Literal[
|
||||
'alibaba/qwen-3-235b',
|
||||
'alibaba/qwen-3-30b',
|
||||
'alibaba/qwen-3-32b',
|
||||
'alibaba/qwen3-235b-a22b-thinking',
|
||||
'alibaba/qwen3-coder',
|
||||
'alibaba/qwen3-coder-30b-a3b',
|
||||
'alibaba/qwen3-coder-next',
|
||||
'alibaba/qwen3-coder-plus',
|
||||
'alibaba/qwen3-embedding-0.6b',
|
||||
'alibaba/qwen3-embedding-4b',
|
||||
'alibaba/qwen3-embedding-8b',
|
||||
'alibaba/qwen3-max',
|
||||
'alibaba/qwen3-max-preview',
|
||||
'alibaba/qwen3-max-thinking',
|
||||
'alibaba/qwen3-next-80b-a3b-instruct',
|
||||
'alibaba/qwen3-next-80b-a3b-thinking',
|
||||
'alibaba/qwen3-vl-instruct',
|
||||
'alibaba/qwen3-vl-thinking',
|
||||
'alibaba/qwen3.5-flash',
|
||||
'alibaba/qwen3.5-plus',
|
||||
'alibaba/wan-v2.5-t2v-preview',
|
||||
'alibaba/wan-v2.6-i2v',
|
||||
'alibaba/wan-v2.6-i2v-flash',
|
||||
'alibaba/wan-v2.6-r2v',
|
||||
'alibaba/wan-v2.6-r2v-flash',
|
||||
'alibaba/wan-v2.6-t2v',
|
||||
'amazon/nova-2-lite',
|
||||
'amazon/nova-lite',
|
||||
'amazon/nova-micro',
|
||||
'amazon/nova-pro',
|
||||
@@ -48,38 +64,69 @@ ChatVercelModel: TypeAlias = Literal[
|
||||
'anthropic/claude-haiku-4.5',
|
||||
'anthropic/claude-opus-4',
|
||||
'anthropic/claude-opus-4.1',
|
||||
'anthropic/claude-opus-4.5',
|
||||
'anthropic/claude-opus-4.6',
|
||||
'anthropic/claude-sonnet-4',
|
||||
'anthropic/claude-sonnet-4.5',
|
||||
'anthropic/claude-sonnet-4.6',
|
||||
'arcee-ai/trinity-large-preview',
|
||||
'arcee-ai/trinity-mini',
|
||||
'bfl/flux-kontext-max',
|
||||
'bfl/flux-kontext-pro',
|
||||
'bfl/flux-pro-1.0-fill',
|
||||
'bfl/flux-pro-1.1',
|
||||
'bfl/flux-pro-1.1-ultra',
|
||||
'bytedance/seed-1.6',
|
||||
'bytedance/seed-1.8',
|
||||
'bytedance/seedance-v1.0-lite-i2v',
|
||||
'bytedance/seedance-v1.0-lite-t2v',
|
||||
'bytedance/seedance-v1.0-pro',
|
||||
'bytedance/seedance-v1.0-pro-fast',
|
||||
'bytedance/seedance-v1.5-pro',
|
||||
'cohere/command-a',
|
||||
'cohere/command-r',
|
||||
'cohere/command-r-plus',
|
||||
'cohere/embed-v4.0',
|
||||
'deepseek/deepseek-r1',
|
||||
'deepseek/deepseek-r1-distill-llama-70b',
|
||||
'deepseek/deepseek-v3',
|
||||
'deepseek/deepseek-v3.1',
|
||||
'deepseek/deepseek-v3.1-base',
|
||||
'deepseek/deepseek-v3.1-terminus',
|
||||
'deepseek/deepseek-v3.2-exp',
|
||||
'deepseek/deepseek-v3.2-exp-thinking',
|
||||
'deepseek/deepseek-v3.2',
|
||||
'deepseek/deepseek-v3.2-thinking',
|
||||
'google/gemini-2.0-flash',
|
||||
'google/gemini-2.0-flash-lite',
|
||||
'google/gemini-2.5-flash',
|
||||
'google/gemini-2.5-flash-image',
|
||||
'google/gemini-2.5-flash-image-preview',
|
||||
'google/gemini-2.5-flash-lite',
|
||||
'google/gemini-2.5-flash-lite-preview-09-2025',
|
||||
'google/gemini-2.5-flash-preview-09-2025',
|
||||
'google/gemini-2.5-pro',
|
||||
'google/gemini-3-flash',
|
||||
'google/gemini-3-pro-image',
|
||||
'google/gemini-3-pro-preview',
|
||||
'google/gemini-3.1-flash-image-preview',
|
||||
'google/gemini-3.1-flash-lite-preview',
|
||||
'google/gemini-3.1-pro-preview',
|
||||
'google/gemini-embedding-001',
|
||||
'google/gemma-2-9b',
|
||||
'google/imagen-4.0-fast-generate-001',
|
||||
'google/imagen-4.0-generate-001',
|
||||
'google/imagen-4.0-ultra-generate-001',
|
||||
'google/text-embedding-005',
|
||||
'google/text-multilingual-embedding-002',
|
||||
'google/veo-3.0-fast-generate-001',
|
||||
'google/veo-3.0-generate-001',
|
||||
'google/veo-3.1-fast-generate-001',
|
||||
'google/veo-3.1-generate-001',
|
||||
'inception/mercury-2',
|
||||
'inception/mercury-coder-small',
|
||||
'klingai/kling-v2.5-turbo-i2v',
|
||||
'klingai/kling-v2.5-turbo-t2v',
|
||||
'klingai/kling-v2.6-i2v',
|
||||
'klingai/kling-v2.6-motion-control',
|
||||
'klingai/kling-v2.6-t2v',
|
||||
'klingai/kling-v3.0-i2v',
|
||||
'klingai/kling-v3.0-t2v',
|
||||
'kwaipilot/kat-coder-pro-v1',
|
||||
'meituan/longcat-flash-chat',
|
||||
'meituan/longcat-flash-thinking',
|
||||
'meta/llama-3-70b',
|
||||
'meta/llama-3-8b',
|
||||
'meta/llama-3.1-70b',
|
||||
'meta/llama-3.1-8b',
|
||||
'meta/llama-3.2-11b',
|
||||
@@ -89,27 +136,40 @@ ChatVercelModel: TypeAlias = Literal[
|
||||
'meta/llama-3.3-70b',
|
||||
'meta/llama-4-maverick',
|
||||
'meta/llama-4-scout',
|
||||
'minimax/minimax-m2',
|
||||
'minimax/minimax-m2.1',
|
||||
'minimax/minimax-m2.1-lightning',
|
||||
'minimax/minimax-m2.5',
|
||||
'minimax/minimax-m2.5-highspeed',
|
||||
'mistral/codestral',
|
||||
'mistral/codestral-embed',
|
||||
'mistral/devstral-2',
|
||||
'mistral/devstral-small',
|
||||
'mistral/devstral-small-2',
|
||||
'mistral/magistral-medium',
|
||||
'mistral/magistral-medium-2506',
|
||||
'mistral/magistral-small',
|
||||
'mistral/magistral-small-2506',
|
||||
'mistral/ministral-14b',
|
||||
'mistral/ministral-3b',
|
||||
'mistral/ministral-8b',
|
||||
'mistral/mistral-embed',
|
||||
'mistral/mistral-large',
|
||||
'mistral/mistral-large-3',
|
||||
'mistral/mistral-medium',
|
||||
'mistral/mistral-nemo',
|
||||
'mistral/mistral-small',
|
||||
'mistral/mixtral-8x22b-instruct',
|
||||
'mistral/pixtral-12b',
|
||||
'mistral/pixtral-large',
|
||||
'moonshotai/kimi-k2',
|
||||
'moonshotai/kimi-k2-0905',
|
||||
'moonshotai/kimi-k2-thinking',
|
||||
'moonshotai/kimi-k2-thinking-turbo',
|
||||
'moonshotai/kimi-k2-turbo',
|
||||
'moonshotai/kimi-k2.5',
|
||||
'morph/morph-v3-fast',
|
||||
'morph/morph-v3-large',
|
||||
'nvidia/nemotron-3-nano-30b-a3b',
|
||||
'nvidia/nemotron-nano-12b-v2-vl',
|
||||
'nvidia/nemotron-nano-9b-v2',
|
||||
'openai/gpt-3.5-turbo',
|
||||
'openai/gpt-3.5-turbo-instruct',
|
||||
'openai/gpt-4-turbo',
|
||||
@@ -118,16 +178,37 @@ ChatVercelModel: TypeAlias = Literal[
|
||||
'openai/gpt-4.1-nano',
|
||||
'openai/gpt-4o',
|
||||
'openai/gpt-4o-mini',
|
||||
'openai/gpt-4o-mini-search-preview',
|
||||
'openai/gpt-5',
|
||||
'openai/gpt-5-chat',
|
||||
'openai/gpt-5-codex',
|
||||
'openai/gpt-5-mini',
|
||||
'openai/gpt-5-nano',
|
||||
'openai/gpt-5-pro',
|
||||
'openai/gpt-5.1-codex',
|
||||
'openai/gpt-5.1-codex-max',
|
||||
'openai/gpt-5.1-codex-mini',
|
||||
'openai/gpt-5.1-instant',
|
||||
'openai/gpt-5.1-thinking',
|
||||
'openai/gpt-5.2',
|
||||
'openai/gpt-5.2-chat',
|
||||
'openai/gpt-5.2-codex',
|
||||
'openai/gpt-5.2-pro',
|
||||
'openai/gpt-5.3-chat',
|
||||
'openai/gpt-5.3-codex',
|
||||
'openai/gpt-5.4',
|
||||
'openai/gpt-5.4-pro',
|
||||
'openai/gpt-image-1',
|
||||
'openai/gpt-image-1-mini',
|
||||
'openai/gpt-image-1.5',
|
||||
'openai/gpt-oss-120b',
|
||||
'openai/gpt-oss-20b',
|
||||
'openai/gpt-oss-safeguard-20b',
|
||||
'openai/o1',
|
||||
'openai/o3',
|
||||
'openai/o3-deep-research',
|
||||
'openai/o3-mini',
|
||||
'openai/o3-pro',
|
||||
'openai/o4-mini',
|
||||
'openai/text-embedding-3-large',
|
||||
'openai/text-embedding-3-small',
|
||||
@@ -136,6 +217,11 @@ ChatVercelModel: TypeAlias = Literal[
|
||||
'perplexity/sonar-pro',
|
||||
'perplexity/sonar-reasoning',
|
||||
'perplexity/sonar-reasoning-pro',
|
||||
'prime-intellect/intellect-3',
|
||||
'recraft/recraft-v2',
|
||||
'recraft/recraft-v3',
|
||||
'recraft/recraft-v4',
|
||||
'recraft/recraft-v4-pro',
|
||||
'stealth/sonoma-dusk-alpha',
|
||||
'stealth/sonoma-sky-alpha',
|
||||
'vercel/v0-1.0-md',
|
||||
@@ -143,11 +229,13 @@ ChatVercelModel: TypeAlias = Literal[
|
||||
'voyage/voyage-3-large',
|
||||
'voyage/voyage-3.5',
|
||||
'voyage/voyage-3.5-lite',
|
||||
'voyage/voyage-4',
|
||||
'voyage/voyage-4-large',
|
||||
'voyage/voyage-4-lite',
|
||||
'voyage/voyage-code-2',
|
||||
'voyage/voyage-code-3',
|
||||
'voyage/voyage-finance-2',
|
||||
'voyage/voyage-law-2',
|
||||
'xai/grok-2',
|
||||
'xai/grok-2-vision',
|
||||
'xai/grok-3',
|
||||
'xai/grok-3-fast',
|
||||
@@ -156,11 +244,25 @@ ChatVercelModel: TypeAlias = Literal[
|
||||
'xai/grok-4',
|
||||
'xai/grok-4-fast-non-reasoning',
|
||||
'xai/grok-4-fast-reasoning',
|
||||
'xai/grok-4.1-fast-non-reasoning',
|
||||
'xai/grok-4.1-fast-reasoning',
|
||||
'xai/grok-4.20-multi-agent-beta',
|
||||
'xai/grok-4.20-non-reasoning-beta',
|
||||
'xai/grok-4.20-reasoning-beta',
|
||||
'xai/grok-code-fast-1',
|
||||
'xai/grok-imagine-image',
|
||||
'xai/grok-imagine-image-pro',
|
||||
'xai/grok-imagine-video',
|
||||
'xiaomi/mimo-v2-flash',
|
||||
'zai/glm-4.5',
|
||||
'zai/glm-4.5-air',
|
||||
'zai/glm-4.5v',
|
||||
'zai/glm-4.6',
|
||||
'zai/glm-4.6v',
|
||||
'zai/glm-4.6v-flash',
|
||||
'zai/glm-4.7',
|
||||
'zai/glm-4.7-flashx',
|
||||
'zai/glm-5',
|
||||
]
|
||||
|
||||
|
||||
@@ -181,7 +283,8 @@ class ChatVercel(BaseChatModel):
|
||||
|
||||
Args:
|
||||
model: The model identifier
|
||||
api_key: Your Vercel API key
|
||||
api_key: Your Vercel AI Gateway API key. If not provided, falls back to
|
||||
AI_GATEWAY_API_KEY or VERCEL_OIDC_TOKEN environment variables.
|
||||
base_url: The Vercel AI Gateway endpoint (defaults to https://ai-gateway.vercel.sh/v1)
|
||||
temperature: Sampling temperature (0-2)
|
||||
max_tokens: Maximum tokens to generate
|
||||
@@ -191,6 +294,14 @@ class ChatVercel(BaseChatModel):
|
||||
max_retries: Maximum number of retries for failed requests
|
||||
provider_options: Provider routing options for the gateway. Use this to control which
|
||||
providers are used and in what order. Example: {'gateway': {'order': ['vertex', 'anthropic']}}
|
||||
reasoning: Optional provider-specific reasoning configuration. Merged into
|
||||
providerOptions under the appropriate provider key. Example for Anthropic:
|
||||
{'anthropic': {'thinking': {'type': 'adaptive'}}}. Example for OpenAI:
|
||||
{'openai': {'reasoningEffort': 'high', 'reasoningSummary': 'detailed'}}.
|
||||
model_fallbacks: Optional list of fallback model IDs tried in order if the primary
|
||||
model fails. Passed as providerOptions.gateway.models.
|
||||
caching: Optional caching mode for the gateway. Currently supports 'auto', which
|
||||
enables provider-specific prompt caching via providerOptions.gateway.caching.
|
||||
"""
|
||||
|
||||
# Model configuration
|
||||
@@ -206,8 +317,11 @@ class ChatVercel(BaseChatModel):
|
||||
'o3',
|
||||
'o4',
|
||||
'gpt-oss',
|
||||
'gpt-5.2-pro',
|
||||
'gpt-5.4-pro',
|
||||
'deepseek-r1',
|
||||
'qwen3-next-80b-a3b-thinking',
|
||||
'-thinking',
|
||||
'perplexity/sonar-reasoning',
|
||||
]
|
||||
)
|
||||
|
||||
@@ -221,6 +335,9 @@ class ChatVercel(BaseChatModel):
|
||||
http_client: httpx.AsyncClient | None = None
|
||||
_strict_response_validation: bool = False
|
||||
provider_options: dict[str, Any] | None = None
|
||||
reasoning: dict[str, dict[str, Any]] | None = None
|
||||
model_fallbacks: list[str] | None = None
|
||||
caching: Literal['auto'] | None = None
|
||||
|
||||
# Static
|
||||
@property
|
||||
@@ -229,8 +346,10 @@ class ChatVercel(BaseChatModel):
|
||||
|
||||
def _get_client_params(self) -> dict[str, Any]:
|
||||
"""Prepare client parameters dictionary."""
|
||||
api_key = self.api_key or os.getenv('AI_GATEWAY_API_KEY') or os.getenv('VERCEL_OIDC_TOKEN')
|
||||
|
||||
base_params = {
|
||||
'api_key': self.api_key,
|
||||
'api_key': api_key,
|
||||
'base_url': self.base_url,
|
||||
'timeout': self.timeout,
|
||||
'max_retries': self.max_retries,
|
||||
@@ -387,8 +506,36 @@ class ChatVercel(BaseChatModel):
|
||||
model_params['max_tokens'] = self.max_tokens
|
||||
if self.top_p is not None:
|
||||
model_params['top_p'] = self.top_p
|
||||
|
||||
extra_body: dict[str, Any] = {}
|
||||
|
||||
provider_opts: dict[str, Any] = {}
|
||||
if self.provider_options:
|
||||
model_params['extra_body'] = {'providerOptions': self.provider_options}
|
||||
provider_opts.update(self.provider_options)
|
||||
|
||||
if self.reasoning:
|
||||
# Merge provider-specific reasoning options (ex: {'anthropic': {'thinking': ...}})
|
||||
for provider_name, opts in self.reasoning.items():
|
||||
existing = provider_opts.get(provider_name, {})
|
||||
existing.update(opts)
|
||||
provider_opts[provider_name] = existing
|
||||
|
||||
gateway_opts: dict[str, Any] = provider_opts.get('gateway', {})
|
||||
|
||||
if self.model_fallbacks:
|
||||
gateway_opts['models'] = self.model_fallbacks
|
||||
|
||||
if self.caching:
|
||||
gateway_opts['caching'] = self.caching
|
||||
|
||||
if gateway_opts:
|
||||
provider_opts['gateway'] = gateway_opts
|
||||
|
||||
if provider_opts:
|
||||
extra_body['providerOptions'] = provider_opts
|
||||
|
||||
if extra_body:
|
||||
model_params['extra_body'] = extra_body
|
||||
|
||||
if output_format is None:
|
||||
# Return string response
|
||||
@@ -439,14 +586,10 @@ class ChatVercel(BaseChatModel):
|
||||
|
||||
vercel_messages = VercelMessageSerializer.serialize_messages(modified_messages)
|
||||
|
||||
request_params = model_params.copy()
|
||||
if self.provider_options:
|
||||
request_params['extra_body'] = {'providerOptions': self.provider_options}
|
||||
|
||||
response = await self.get_client().chat.completions.create(
|
||||
model=self.model,
|
||||
messages=vercel_messages,
|
||||
**request_params,
|
||||
**model_params,
|
||||
)
|
||||
|
||||
content = response.choices[0].message.content if response.choices else None
|
||||
@@ -491,10 +634,6 @@ class ChatVercel(BaseChatModel):
|
||||
'schema': schema,
|
||||
}
|
||||
|
||||
request_params = model_params.copy()
|
||||
if self.provider_options:
|
||||
request_params['extra_body'] = {'providerOptions': self.provider_options}
|
||||
|
||||
response = await self.get_client().chat.completions.create(
|
||||
model=self.model,
|
||||
messages=vercel_messages,
|
||||
@@ -502,7 +641,7 @@ class ChatVercel(BaseChatModel):
|
||||
json_schema=response_format_schema,
|
||||
type='json_schema',
|
||||
),
|
||||
**request_params,
|
||||
**model_params,
|
||||
)
|
||||
|
||||
content = response.choices[0].message.content if response.choices else None
|
||||
|
||||
@@ -223,9 +223,7 @@ def setup_logging(stream=None, log_level=None, force_setup=False, debug_log_file
|
||||
'trafilatura.htmlprocessing',
|
||||
'trafilatura',
|
||||
'groq',
|
||||
'portalocker',
|
||||
'google_genai',
|
||||
'portalocker.utils',
|
||||
'websockets', # General websockets (but not websockets.client which we need)
|
||||
]
|
||||
for logger_name in third_party_loggers:
|
||||
|
||||
@@ -329,6 +329,7 @@ class MCPClient:
|
||||
return ActionResult(
|
||||
extracted_content=extracted_content,
|
||||
long_term_memory=f"Used MCP tool '{tool.name}' from {self.server_name}",
|
||||
include_extracted_content_only_once=True,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -372,6 +373,7 @@ class MCPClient:
|
||||
return ActionResult(
|
||||
extracted_content=extracted_content,
|
||||
long_term_memory=f"Used MCP tool '{tool.name}' from {self.server_name}",
|
||||
include_extracted_content_only_once=True,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -232,13 +232,21 @@ class BrowserUseServer:
|
||||
),
|
||||
types.Tool(
|
||||
name='browser_click',
|
||||
description='Click an element on the page by its index',
|
||||
description='Click an element by index or at specific viewport coordinates. Use index for elements from browser_get_state, or coordinate_x/coordinate_y for pixel-precise clicking.',
|
||||
inputSchema={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'index': {
|
||||
'type': 'integer',
|
||||
'description': 'The index of the link or element to click (from browser_get_state)',
|
||||
'description': 'The index of the element to click (from browser_get_state). Provide this OR coordinate_x+coordinate_y.',
|
||||
},
|
||||
'coordinate_x': {
|
||||
'type': 'integer',
|
||||
'description': 'X coordinate in pixels from the left edge of the viewport. Must be used together with coordinate_y. Provide this OR index.',
|
||||
},
|
||||
'coordinate_y': {
|
||||
'type': 'integer',
|
||||
'description': 'Y coordinate in pixels from the top edge of the viewport. Must be used together with coordinate_x. Provide this OR index.',
|
||||
},
|
||||
'new_tab': {
|
||||
'type': 'boolean',
|
||||
@@ -246,12 +254,11 @@ class BrowserUseServer:
|
||||
'default': False,
|
||||
},
|
||||
},
|
||||
'required': ['index'],
|
||||
},
|
||||
),
|
||||
types.Tool(
|
||||
name='browser_type',
|
||||
description='Type text into an input field',
|
||||
description='Type text into an input field. Clears existing text by default; pass text="" to clear only.',
|
||||
inputSchema={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
@@ -259,7 +266,10 @@ class BrowserUseServer:
|
||||
'type': 'integer',
|
||||
'description': 'The index of the input element (from browser_get_state)',
|
||||
},
|
||||
'text': {'type': 'string', 'description': 'The text to type'},
|
||||
'text': {
|
||||
'type': 'string',
|
||||
'description': 'The text to type. Pass an empty string ("") to clear the field without typing.',
|
||||
},
|
||||
},
|
||||
'required': ['index', 'text'],
|
||||
},
|
||||
@@ -294,6 +304,33 @@ class BrowserUseServer:
|
||||
'required': ['query'],
|
||||
},
|
||||
),
|
||||
types.Tool(
|
||||
name='browser_get_html',
|
||||
description='Get the raw HTML of the current page or a specific element by CSS selector',
|
||||
inputSchema={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'selector': {
|
||||
'type': 'string',
|
||||
'description': 'Optional CSS selector to get HTML of a specific element. If omitted, returns full page HTML.',
|
||||
},
|
||||
},
|
||||
},
|
||||
),
|
||||
types.Tool(
|
||||
name='browser_screenshot',
|
||||
description='Take a screenshot of the current page. Returns viewport metadata as text and the screenshot as an image.',
|
||||
inputSchema={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'full_page': {
|
||||
'type': 'boolean',
|
||||
'description': 'Whether to capture the full scrollable page or just the visible viewport',
|
||||
'default': False,
|
||||
},
|
||||
},
|
||||
},
|
||||
),
|
||||
types.Tool(
|
||||
name='browser_scroll',
|
||||
description='Scroll the page',
|
||||
@@ -361,8 +398,7 @@ class BrowserUseServer:
|
||||
},
|
||||
'model': {
|
||||
'type': 'string',
|
||||
'description': 'LLM model to use (e.g., gpt-4o, claude-3-opus-20240229)',
|
||||
'default': 'gpt-4o',
|
||||
'description': 'LLM model to use (e.g., gpt-4o, claude-3-opus-20240229). Defaults to the configured model.',
|
||||
},
|
||||
'allowed_domains': {
|
||||
'type': 'array',
|
||||
@@ -417,12 +453,14 @@ class BrowserUseServer:
|
||||
return []
|
||||
|
||||
@self.server.call_tool()
|
||||
async def handle_call_tool(name: str, arguments: dict[str, Any] | None) -> list[types.TextContent]:
|
||||
async def handle_call_tool(name: str, arguments: dict[str, Any] | None) -> list[types.TextContent | types.ImageContent]:
|
||||
"""Handle tool execution."""
|
||||
start_time = time.time()
|
||||
error_msg = None
|
||||
try:
|
||||
result = await self._execute_tool(name, arguments or {})
|
||||
if isinstance(result, list):
|
||||
return result
|
||||
return [types.TextContent(type='text', text=result)]
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
@@ -441,15 +479,17 @@ class BrowserUseServer:
|
||||
)
|
||||
)
|
||||
|
||||
async def _execute_tool(self, tool_name: str, arguments: dict[str, Any]) -> str:
|
||||
"""Execute a browser-use tool."""
|
||||
async def _execute_tool(
|
||||
self, tool_name: str, arguments: dict[str, Any]
|
||||
) -> str | list[types.TextContent | types.ImageContent]:
|
||||
"""Execute a browser-use tool. Returns str for most tools, or a content list for tools with image output."""
|
||||
|
||||
# Agent-based tools
|
||||
if tool_name == 'retry_with_browser_use_agent':
|
||||
return await self._retry_with_browser_use_agent(
|
||||
task=arguments['task'],
|
||||
max_steps=arguments.get('max_steps', 100),
|
||||
model=arguments.get('model', 'gpt-4o'),
|
||||
model=arguments.get('model'),
|
||||
allowed_domains=arguments.get('allowed_domains', []),
|
||||
use_vision=arguments.get('use_vision', True),
|
||||
)
|
||||
@@ -474,13 +514,32 @@ class BrowserUseServer:
|
||||
return await self._navigate(arguments['url'], arguments.get('new_tab', False))
|
||||
|
||||
elif tool_name == 'browser_click':
|
||||
return await self._click(arguments['index'], arguments.get('new_tab', False))
|
||||
return await self._click(
|
||||
index=arguments.get('index'),
|
||||
coordinate_x=arguments.get('coordinate_x'),
|
||||
coordinate_y=arguments.get('coordinate_y'),
|
||||
new_tab=arguments.get('new_tab', False),
|
||||
)
|
||||
|
||||
elif tool_name == 'browser_type':
|
||||
return await self._type_text(arguments['index'], arguments['text'])
|
||||
|
||||
elif tool_name == 'browser_get_state':
|
||||
return await self._get_browser_state(arguments.get('include_screenshot', False))
|
||||
state_json, screenshot_b64 = await self._get_browser_state(arguments.get('include_screenshot', False))
|
||||
content: list[types.TextContent | types.ImageContent] = [types.TextContent(type='text', text=state_json)]
|
||||
if screenshot_b64:
|
||||
content.append(types.ImageContent(type='image', data=screenshot_b64, mimeType='image/png'))
|
||||
return content
|
||||
|
||||
elif tool_name == 'browser_get_html':
|
||||
return await self._get_html(arguments.get('selector'))
|
||||
|
||||
elif tool_name == 'browser_screenshot':
|
||||
meta_json, screenshot_b64 = await self._screenshot(arguments.get('full_page', False))
|
||||
content: list[types.TextContent | types.ImageContent] = [types.TextContent(type='text', text=meta_json)]
|
||||
if screenshot_b64:
|
||||
content.append(types.ImageContent(type='image', data=screenshot_b64, mimeType='image/png'))
|
||||
return content
|
||||
|
||||
elif tool_name == 'browser_extract_content':
|
||||
return await self._extract_content(arguments['query'], arguments.get('extract_links', False))
|
||||
@@ -575,7 +634,7 @@ class BrowserUseServer:
|
||||
self,
|
||||
task: str,
|
||||
max_steps: int = 100,
|
||||
model: str = 'gpt-4o',
|
||||
model: str | None = None,
|
||||
allowed_domains: list[str] | None = None,
|
||||
use_vision: bool = True,
|
||||
) -> str:
|
||||
@@ -588,27 +647,25 @@ class BrowserUseServer:
|
||||
# Get LLM provider
|
||||
model_provider = llm_config.get('model_provider') or os.getenv('MODEL_PROVIDER')
|
||||
|
||||
# 如果model_provider不等于空,且等Bedrock
|
||||
# Get Bedrock-specific config
|
||||
if model_provider and model_provider.lower() == 'bedrock':
|
||||
llm_model = llm_config.get('model') or os.getenv('MODEL') or 'us.anthropic.claude-sonnet-4-20250514-v1:0'
|
||||
aws_region = llm_config.get('region') or os.getenv('REGION')
|
||||
if not aws_region:
|
||||
aws_region = 'us-east-1'
|
||||
aws_sso_auth = llm_config.get('aws_sso_auth', False)
|
||||
llm = ChatAWSBedrock(
|
||||
model=llm_model, # or any Bedrock model
|
||||
aws_region=aws_region,
|
||||
aws_sso_auth=True,
|
||||
aws_sso_auth=aws_sso_auth,
|
||||
)
|
||||
else:
|
||||
api_key = llm_config.get('api_key') or os.getenv('OPENAI_API_KEY')
|
||||
if not api_key:
|
||||
return 'Error: OPENAI_API_KEY not set in config or environment'
|
||||
|
||||
# Override model if provided in tool call
|
||||
if model != llm_config.get('model', 'gpt-4o'):
|
||||
llm_model = model
|
||||
else:
|
||||
llm_model = llm_config.get('model', 'gpt-4o')
|
||||
# Use explicit model from tool call, otherwise fall back to configured default
|
||||
llm_model = model or llm_config.get('model', 'gpt-4o')
|
||||
|
||||
base_url = llm_config.get('base_url', None)
|
||||
kwargs = {}
|
||||
@@ -693,14 +750,34 @@ class BrowserUseServer:
|
||||
await event
|
||||
return f'Navigated to: {url}'
|
||||
|
||||
async def _click(self, index: int, new_tab: bool = False) -> str:
|
||||
"""Click an element by index."""
|
||||
async def _click(
|
||||
self,
|
||||
index: int | None = None,
|
||||
coordinate_x: int | None = None,
|
||||
coordinate_y: int | None = None,
|
||||
new_tab: bool = False,
|
||||
) -> str:
|
||||
"""Click an element by index or at viewport coordinates."""
|
||||
if not self.browser_session:
|
||||
return 'Error: No browser session active'
|
||||
|
||||
# Update session activity
|
||||
self._update_session_activity(self.browser_session.id)
|
||||
|
||||
# Coordinate-based clicking
|
||||
if coordinate_x is not None and coordinate_y is not None:
|
||||
from browser_use.browser.events import ClickCoordinateEvent
|
||||
|
||||
event = self.browser_session.event_bus.dispatch(
|
||||
ClickCoordinateEvent(coordinate_x=coordinate_x, coordinate_y=coordinate_y)
|
||||
)
|
||||
await event
|
||||
return f'Clicked at coordinates ({coordinate_x}, {coordinate_y})'
|
||||
|
||||
# Index-based clicking
|
||||
if index is None:
|
||||
return 'Error: Provide either index or both coordinate_x and coordinate_y'
|
||||
|
||||
# Get the element
|
||||
element = await self.browser_session.get_dom_element_by_index(index)
|
||||
if not element:
|
||||
@@ -730,7 +807,6 @@ class BrowserUseServer:
|
||||
return f'Clicked element {index} and opened in new tab {full_url[:20]}...'
|
||||
else:
|
||||
# For non-link elements, just do a normal click
|
||||
# Opening in new tab without href is not reliably supported
|
||||
from browser_use.browser.events import ClickElementEvent
|
||||
|
||||
event = self.browser_session.event_bus.dispatch(ClickElementEvent(node=element))
|
||||
@@ -790,23 +866,39 @@ class BrowserUseServer:
|
||||
else:
|
||||
return f"Typed '{text}' into element {index}"
|
||||
|
||||
async def _get_browser_state(self, include_screenshot: bool = False) -> str:
|
||||
"""Get current browser state."""
|
||||
async def _get_browser_state(self, include_screenshot: bool = False) -> tuple[str, str | None]:
|
||||
"""Get current browser state. Returns (state_json, screenshot_b64 | None)."""
|
||||
if not self.browser_session:
|
||||
return 'Error: No browser session active'
|
||||
return 'Error: No browser session active', None
|
||||
|
||||
state = await self.browser_session.get_browser_state_summary()
|
||||
|
||||
result = {
|
||||
result: dict[str, Any] = {
|
||||
'url': state.url,
|
||||
'title': state.title,
|
||||
'tabs': [{'url': tab.url, 'title': tab.title} for tab in state.tabs],
|
||||
'interactive_elements': [],
|
||||
}
|
||||
|
||||
# Add viewport info so the LLM knows the coordinate space
|
||||
if state.page_info:
|
||||
pi = state.page_info
|
||||
result['viewport'] = {
|
||||
'width': pi.viewport_width,
|
||||
'height': pi.viewport_height,
|
||||
}
|
||||
result['page'] = {
|
||||
'width': pi.page_width,
|
||||
'height': pi.page_height,
|
||||
}
|
||||
result['scroll'] = {
|
||||
'x': pi.scroll_x,
|
||||
'y': pi.scroll_y,
|
||||
}
|
||||
|
||||
# Add interactive elements with their indices
|
||||
for index, element in state.dom_state.selector_map.items():
|
||||
elem_info = {
|
||||
elem_info: dict[str, Any] = {
|
||||
'index': index,
|
||||
'tag': element.tag_name,
|
||||
'text': element.get_all_children_text(max_depth=2)[:100],
|
||||
@@ -817,10 +909,69 @@ class BrowserUseServer:
|
||||
elem_info['href'] = element.attributes['href']
|
||||
result['interactive_elements'].append(elem_info)
|
||||
|
||||
# Return screenshot separately as ImageContent instead of embedding base64 in JSON
|
||||
screenshot_b64 = None
|
||||
if include_screenshot and state.screenshot:
|
||||
result['screenshot'] = state.screenshot
|
||||
screenshot_b64 = state.screenshot
|
||||
# Include viewport dimensions in JSON so LLM can map pixels to coordinates
|
||||
if state.page_info:
|
||||
result['screenshot_dimensions'] = {
|
||||
'width': state.page_info.viewport_width,
|
||||
'height': state.page_info.viewport_height,
|
||||
}
|
||||
|
||||
return json.dumps(result, indent=2)
|
||||
return json.dumps(result, indent=2), screenshot_b64
|
||||
|
||||
async def _get_html(self, selector: str | None = None) -> str:
|
||||
"""Get raw HTML of the page or a specific element."""
|
||||
if not self.browser_session:
|
||||
return 'Error: No browser session active'
|
||||
|
||||
self._update_session_activity(self.browser_session.id)
|
||||
|
||||
cdp_session = await self.browser_session.get_or_create_cdp_session(target_id=None, focus=False)
|
||||
if not cdp_session:
|
||||
return 'Error: No active CDP session'
|
||||
|
||||
if selector:
|
||||
js = (
|
||||
f'(function(){{ const el = document.querySelector({json.dumps(selector)}); return el ? el.outerHTML : null; }})()'
|
||||
)
|
||||
else:
|
||||
js = 'document.documentElement.outerHTML'
|
||||
|
||||
result = await cdp_session.cdp_client.send.Runtime.evaluate(
|
||||
params={'expression': js, 'returnByValue': True},
|
||||
session_id=cdp_session.session_id,
|
||||
)
|
||||
html = result.get('result', {}).get('value')
|
||||
if html is None:
|
||||
return f'No element found for selector: {selector}' if selector else 'Error: Could not get page HTML'
|
||||
return html
|
||||
|
||||
async def _screenshot(self, full_page: bool = False) -> tuple[str, str | None]:
|
||||
"""Take a screenshot. Returns (metadata_json, screenshot_b64 | None)."""
|
||||
if not self.browser_session:
|
||||
return 'Error: No browser session active', None
|
||||
|
||||
import base64
|
||||
|
||||
self._update_session_activity(self.browser_session.id)
|
||||
|
||||
data = await self.browser_session.take_screenshot(full_page=full_page)
|
||||
b64 = base64.b64encode(data).decode()
|
||||
|
||||
# Return screenshot separately as ImageContent instead of embedding base64 in JSON
|
||||
state = await self.browser_session.get_browser_state_summary()
|
||||
result: dict[str, Any] = {
|
||||
'size_bytes': len(data),
|
||||
}
|
||||
if state.page_info:
|
||||
result['viewport'] = {
|
||||
'width': state.page_info.viewport_width,
|
||||
'height': state.page_info.viewport_height,
|
||||
}
|
||||
return json.dumps(result), b64
|
||||
|
||||
async def _extract_content(self, query: str, extract_links: bool = False) -> str:
|
||||
"""Extract content from current page."""
|
||||
@@ -1075,19 +1226,25 @@ class BrowserUseServer:
|
||||
# Start the cleanup task
|
||||
await self._start_cleanup_task()
|
||||
|
||||
if sys.stdin is None:
|
||||
raise RuntimeError('MCP stdio transport requires stdin, but this process was launched without one.')
|
||||
|
||||
async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
|
||||
await self.server.run(
|
||||
read_stream,
|
||||
write_stream,
|
||||
InitializationOptions(
|
||||
server_name='browser-use',
|
||||
server_version='0.1.0',
|
||||
capabilities=self.server.get_capabilities(
|
||||
notification_options=NotificationOptions(),
|
||||
experimental_capabilities={},
|
||||
try:
|
||||
await self.server.run(
|
||||
read_stream,
|
||||
write_stream,
|
||||
InitializationOptions(
|
||||
server_name='browser-use',
|
||||
server_version='0.1.0',
|
||||
capabilities=self.server.get_capabilities(
|
||||
notification_options=NotificationOptions(),
|
||||
experimental_capabilities={},
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
)
|
||||
except BrokenPipeError:
|
||||
logger.warning('MCP client disconnected while writing to stdio; shutting down server cleanly.')
|
||||
|
||||
|
||||
async def main(session_timeout_minutes: int = 10):
|
||||
|
||||
@@ -24,20 +24,10 @@ curl -fsSL https://browser-use.com/cli/install.sh | bash
|
||||
& "C:\Program Files\Git\bin\bash.exe" -c 'curl -fsSL https://browser-use.com/cli/install.sh | bash'
|
||||
```
|
||||
|
||||
### Installation Modes
|
||||
```bash
|
||||
curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --full # All modes
|
||||
curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --local-only # Local browser only
|
||||
curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --remote-only # Cloud browser only
|
||||
curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --api-key bu_xxx # With API key
|
||||
```
|
||||
|
||||
### Post-Install
|
||||
```bash
|
||||
browser-use doctor # Validate installation
|
||||
browser-use setup # Run setup wizard (optional)
|
||||
browser-use setup --mode local|remote|full # Non-interactive setup
|
||||
browser-use setup --api-key bu_xxx --yes # With API key, skip prompts
|
||||
```
|
||||
|
||||
### Generate Templates
|
||||
@@ -62,13 +52,10 @@ If you prefer not to use the one-line installer:
|
||||
# 1. Install the package
|
||||
uv pip install browser-use
|
||||
|
||||
# 2. Install Chromium (for local browser mode)
|
||||
# 2. Install Chromium
|
||||
browser-use install
|
||||
|
||||
# 3. Configure API key (for remote mode)
|
||||
export BROWSER_USE_API_KEY=your_key # or $env:BROWSER_USE_API_KEY on Windows
|
||||
|
||||
# 4. Validate
|
||||
# 3. Validate
|
||||
browser-use doctor
|
||||
```
|
||||
|
||||
@@ -106,11 +93,20 @@ browser-use open https://example.com
|
||||
# Visible browser window
|
||||
browser-use --headed open https://example.com
|
||||
|
||||
# Use your real Chrome (with existing logins/cookies)
|
||||
browser-use --browser real open https://gmail.com
|
||||
# Use your real Chrome with Default profile (with existing logins/cookies)
|
||||
browser-use --profile "Default" open https://gmail.com
|
||||
|
||||
# Cloud browser (requires BROWSER_USE_API_KEY)
|
||||
browser-use --browser remote open https://example.com
|
||||
# Use a specific Chrome profile
|
||||
browser-use --profile "Profile 1" open https://gmail.com
|
||||
|
||||
# Auto-discover and connect to running Chrome
|
||||
browser-use --connect open https://example.com
|
||||
|
||||
# Connect to an existing browser via CDP URL
|
||||
browser-use --cdp-url http://localhost:9222 open https://example.com
|
||||
|
||||
# WebSocket CDP URL also works
|
||||
browser-use --cdp-url ws://localhost:9222/devtools/browser/... state
|
||||
```
|
||||
|
||||
## All Commands
|
||||
@@ -135,11 +131,13 @@ browser-use --browser remote open https://example.com
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `click <index>` | Click element by index |
|
||||
| `click <x> <y>` | Click at pixel coordinates |
|
||||
| `type "text"` | Type into focused element |
|
||||
| `input <index> "text"` | Click element, then type |
|
||||
| `keys "Enter"` | Send keyboard keys |
|
||||
| `keys "Control+a"` | Send key combination |
|
||||
| `select <index> "value"` | Select dropdown option |
|
||||
| `upload <index> <path>` | Upload file to file input element |
|
||||
| `hover <index>` | Hover over element |
|
||||
| `dblclick <index>` | Double-click element |
|
||||
| `rightclick <index>` | Right-click element |
|
||||
@@ -147,9 +145,10 @@ browser-use --browser remote open https://example.com
|
||||
### Tabs
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `switch <tab>` | Switch to tab by index |
|
||||
| `close-tab` | Close current tab |
|
||||
| `close-tab <tab>` | Close specific tab |
|
||||
| `tab list` | List all tabs |
|
||||
| `tab new [url]` | Open new tab |
|
||||
| `tab switch <index>` | Switch to tab by index |
|
||||
| `tab close [index...]` | Close tab(s) (current if no index) |
|
||||
|
||||
### Cookies
|
||||
| Command | Description |
|
||||
@@ -188,7 +187,7 @@ browser-use --browser remote open https://example.com
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `eval "js code"` | Execute JavaScript |
|
||||
| `extract "query"` | Extract data with LLM |
|
||||
| `extract "query"` | Extract data with LLM (not yet implemented) |
|
||||
|
||||
### Python (Persistent Session)
|
||||
```bash
|
||||
@@ -200,88 +199,45 @@ browser-use python --reset # Clear namespace
|
||||
browser-use python --file script.py # Run Python file
|
||||
```
|
||||
|
||||
## Agent Tasks
|
||||
## Cloud API
|
||||
|
||||
Run AI-powered browser automation tasks.
|
||||
|
||||
### Local Mode
|
||||
```bash
|
||||
browser-use run "Fill the contact form with test data"
|
||||
browser-use run "Extract all product prices" --max-steps 50
|
||||
browser-use run "task" --llm gpt-4o # Specify LLM model
|
||||
```
|
||||
|
||||
Requires an LLM API key (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.).
|
||||
|
||||
### Remote Mode (Cloud)
|
||||
```bash
|
||||
browser-use -b remote run "Search for AI news" # US proxy default
|
||||
browser-use -b remote run "task" --llm gpt-4o # Specify LLM
|
||||
browser-use -b remote run "task" --proxy-country gb # UK proxy
|
||||
browser-use -b remote run "task" --session-id <id> # Reuse session
|
||||
browser-use -b remote run "task" --no-wait # Async (returns task ID)
|
||||
browser-use -b remote run "task" --wait # Wait for completion
|
||||
browser-use -b remote run "task" --stream # Stream output
|
||||
browser-use -b remote run "task" --flash # Fast mode
|
||||
browser-use -b remote run "task" --keep-alive # Keep session alive
|
||||
browser-use -b remote run "task" --thinking # Extended reasoning
|
||||
browser-use -b remote run "task" --vision # Enable vision (default)
|
||||
browser-use -b remote run "task" --no-vision # Disable vision
|
||||
browser-use -b remote run "task" --profile <id> # Use cloud profile
|
||||
|
||||
# Task configuration
|
||||
browser-use -b remote run "task" --start-url https://example.com # Start from URL
|
||||
browser-use -b remote run "task" --allowed-domain example.com # Restrict navigation (repeatable)
|
||||
browser-use -b remote run "task" --metadata key=value # Task metadata (repeatable)
|
||||
browser-use -b remote run "task" --secret API_KEY=xxx # Task secrets (repeatable)
|
||||
browser-use -b remote run "task" --skill-id skill-123 # Enable skills (repeatable)
|
||||
|
||||
# Structured output and evaluation
|
||||
browser-use -b remote run "task" --structured-output '{"type":"object"}' # JSON schema
|
||||
browser-use -b remote run "task" --judge # Enable judge mode
|
||||
browser-use -b remote run "task" --judge-ground-truth "answer" # Expected answer
|
||||
```
|
||||
|
||||
Requires `BROWSER_USE_API_KEY`.
|
||||
|
||||
## Task Management (Remote Mode)
|
||||
|
||||
Manage cloud tasks when using `--browser remote`.
|
||||
Generic REST passthrough to the Browser-Use Cloud API, plus cloud browser provisioning.
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `task list` | List recent tasks |
|
||||
| `task list --status running` | Filter by status |
|
||||
| `task list --session <id>` | Filter by session ID |
|
||||
| `task status <id>` | Get task status (latest step only) |
|
||||
| `task status <id> -c` | Compact: all steps with reasoning |
|
||||
| `task status <id> -v` | Verbose: full details |
|
||||
| `task status <id> --last 5` | Show last 5 steps |
|
||||
| `task status <id> --step 3` | Show specific step number |
|
||||
| `task status <id> --reverse` | Show steps newest first |
|
||||
| `task stop <id>` | Stop running task |
|
||||
| `task logs <id>` | Get execution logs |
|
||||
| `cloud connect` | Provision cloud browser and connect (zero-config, auto-manages profile) |
|
||||
| `cloud login <api-key>` | Save API key |
|
||||
| `cloud logout` | Remove API key |
|
||||
| `cloud v2 GET <path>` | GET request to API v2 |
|
||||
| `cloud v2 POST <path> '<json>'` | POST request to API v2 |
|
||||
| `cloud v3 POST <path> '<json>'` | POST request to API v3 |
|
||||
| `cloud v2 poll <task-id>` | Poll task until done |
|
||||
| `cloud v2 --help` | Show API v2 endpoints (from OpenAPI spec) |
|
||||
| `cloud v3 --help` | Show API v3 endpoints |
|
||||
|
||||
## Cloud Sessions (Remote Mode)
|
||||
```bash
|
||||
# Save API key to ~/.browser-use/config.json
|
||||
browser-use cloud login sk-abc123...
|
||||
|
||||
Manage cloud browser sessions.
|
||||
# Provision a cloud browser and connect
|
||||
browser-use cloud connect
|
||||
browser-use state # works normally
|
||||
browser-use close # disconnects AND stops cloud browser
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `session list` | List cloud sessions |
|
||||
| `session list --status active` | Filter by status |
|
||||
| `session get <id>` | Get session details + live URL |
|
||||
| `session stop <id>` | Stop session |
|
||||
| `session stop --all` | Stop all active sessions |
|
||||
| `session create` | Create new session |
|
||||
| `session create --profile <id>` | With cloud profile |
|
||||
| `session create --proxy-country gb` | With geographic proxy |
|
||||
| `session create --start-url <url>` | Start at specific URL |
|
||||
| `session create --screen-size 1920x1080` | Custom screen size |
|
||||
| `session create --keep-alive` | Keep session alive |
|
||||
| `session create --persist-memory` | Persist memory between tasks |
|
||||
| `session share <id>` | Create public share URL |
|
||||
| `session share <id> --delete` | Delete public share |
|
||||
# List browsers
|
||||
browser-use cloud v2 GET /browsers
|
||||
|
||||
# Create a task
|
||||
browser-use cloud v2 POST /tasks '{"task":"Search for AI news","url":"https://google.com"}'
|
||||
|
||||
# Poll until done
|
||||
browser-use cloud v2 poll <task-id>
|
||||
|
||||
# Remove API key
|
||||
browser-use cloud logout
|
||||
```
|
||||
|
||||
API key stored in `~/.browser-use/config.json` with `0600` permissions.
|
||||
|
||||
## Tunnels
|
||||
|
||||
@@ -298,55 +254,70 @@ Expose local dev servers to cloud browsers via Cloudflare tunnels.
|
||||
# Example: Test local dev server with cloud browser
|
||||
npm run dev & # localhost:3000
|
||||
browser-use tunnel 3000 # → https://abc.trycloudflare.com
|
||||
browser-use -b remote open https://abc.trycloudflare.com
|
||||
browser-use cloud connect # Provision cloud browser
|
||||
browser-use open https://abc.trycloudflare.com
|
||||
```
|
||||
|
||||
## Profile Management
|
||||
|
||||
### Local Profiles (`-b real`)
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `profile list` | List Chrome profiles |
|
||||
| `profile cookies <name>` | Show cookies by domain |
|
||||
| `profile sync --from <name>` | Sync local profile to cloud |
|
||||
| `profile sync --from Default --domain youtube.com` | Sync specific domain only |
|
||||
The `profile` subcommand delegates to the [profile-use](https://github.com/browser-use/profile-use) Go binary, which syncs local browser cookies to Browser-Use cloud.
|
||||
|
||||
### Cloud Profiles (`-b remote`)
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `profile list` | List cloud profiles |
|
||||
| `profile list --page 2 --page-size 50` | Pagination |
|
||||
| `profile get <id>` | Get profile details |
|
||||
| `profile create` | Create profile |
|
||||
| `profile create --name "My Profile"` | Create with name |
|
||||
| `profile update <id> --name <name>` | Rename profile |
|
||||
| `profile delete <id>` | Delete profile |
|
||||
|
||||
## Local Session Management
|
||||
The binary is managed at `~/.browser-use/bin/profile-use` and auto-downloaded on first use.
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `sessions` | List active sessions |
|
||||
| `close` | Close browser session |
|
||||
| `profile` | Interactive sync wizard |
|
||||
| `profile list` | List detected browsers and profiles |
|
||||
| `profile sync --all` | Sync all profiles to cloud |
|
||||
| `profile sync --browser "Google Chrome" --profile "Default"` | Sync specific profile |
|
||||
| `profile auth --apikey <key>` | Set API key (shared with `cloud login`) |
|
||||
| `profile inspect --browser "Google Chrome" --profile "Default"` | Inspect cookies locally |
|
||||
| `profile update` | Download/update the profile-use binary |
|
||||
|
||||
## Session Management
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `sessions` | List active browser sessions |
|
||||
| `close` | Close current session's browser and daemon |
|
||||
| `close --all` | Close all sessions |
|
||||
| `server status` | Check if server is running |
|
||||
| `server stop` | Stop server |
|
||||
| `server logs` | View server logs |
|
||||
| `--session NAME` | Target a named session (default: "default") |
|
||||
|
||||
```bash
|
||||
# Default behavior unchanged
|
||||
browser-use open https://example.com # uses session 'default'
|
||||
browser-use state # talks to 'default' daemon
|
||||
|
||||
# Named sessions
|
||||
browser-use --session work open https://example.com
|
||||
browser-use --session work state
|
||||
browser-use --session cloud cloud connect
|
||||
|
||||
# List active sessions
|
||||
browser-use sessions
|
||||
|
||||
# Close specific session
|
||||
browser-use --session work close
|
||||
|
||||
# Close all sessions
|
||||
browser-use close --all
|
||||
|
||||
# Env var fallback
|
||||
BROWSER_USE_SESSION=work browser-use state
|
||||
```
|
||||
|
||||
## Global Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--session NAME` | Use named session (default: "default") |
|
||||
| `--browser MODE` | Browser mode: chromium, real, remote |
|
||||
| `--headed` | Show browser window |
|
||||
| `--profile NAME` | Browser profile (local name or cloud ID) |
|
||||
| `--profile [NAME]` | Use real Chrome (bare `--profile` uses "Default") |
|
||||
| `--connect` | Auto-discover and connect to running Chrome via CDP |
|
||||
| `--cdp-url <url>` | Connect to existing browser via CDP URL (`http://` or `ws://`) |
|
||||
| `--session NAME` | Target a named session (default: "default", env: `BROWSER_USE_SESSION`) |
|
||||
| `--json` | Output as JSON |
|
||||
| `--api-key KEY` | Override API key |
|
||||
| `--mcp` | Run as MCP server via stdin/stdout |
|
||||
|
||||
**Session behavior**: All commands without `--session` use the same "default" session. The browser stays open and is reused across commands. Use `--session NAME` to run multiple browsers in parallel.
|
||||
|
||||
## Examples
|
||||
|
||||
### Fill a Form
|
||||
@@ -365,15 +336,6 @@ browser-use open https://news.ycombinator.com
|
||||
browser-use eval "Array.from(document.querySelectorAll('.titleline a')).slice(0,5).map(a => a.textContent)"
|
||||
```
|
||||
|
||||
### Multi-Session Workflow
|
||||
```bash
|
||||
browser-use --session work open https://work.example.com
|
||||
browser-use --session personal open https://personal.example.com
|
||||
browser-use --session work state
|
||||
browser-use --session personal state
|
||||
browser-use close --all
|
||||
```
|
||||
|
||||
### Python Automation
|
||||
```bash
|
||||
browser-use open https://example.com
|
||||
@@ -385,19 +347,6 @@ browser.screenshot('scrolled.png')
|
||||
"
|
||||
```
|
||||
|
||||
### Cloud Agent with Session Reuse
|
||||
```bash
|
||||
# Start task, keep session alive
|
||||
browser-use -b remote run "Log into example.com" --keep-alive --no-wait
|
||||
# → task_id: task-123, session_id: sess-456
|
||||
|
||||
# Check task status
|
||||
browser-use task status task-123
|
||||
|
||||
# Run another task in same session (preserves login)
|
||||
browser-use -b remote run "Go to settings" --session-id sess-456
|
||||
```
|
||||
|
||||
## Claude Code Skill
|
||||
|
||||
For [Claude Code](https://claude.ai/code), a skill provides richer context for browser automation:
|
||||
@@ -410,15 +359,34 @@ curl -o ~/.claude/skills/browser-use/SKILL.md \
|
||||
|
||||
## How It Works
|
||||
|
||||
The CLI uses a session server architecture:
|
||||
The CLI uses a multi-session daemon architecture:
|
||||
|
||||
1. First command starts a background server (browser stays open)
|
||||
1. First command starts a background daemon for that session (browser stays open)
|
||||
2. Subsequent commands communicate via Unix socket (or TCP on Windows)
|
||||
3. Browser persists across commands for fast interaction
|
||||
4. Server auto-starts when needed, stops with `browser-use server stop`
|
||||
4. Each `--session` gets its own daemon, socket, and PID file in `~/.browser-use/`
|
||||
5. Daemon auto-starts when needed, auto-exits when browser dies, or stops with `browser-use close`
|
||||
|
||||
This gives you ~50ms command latency instead of waiting for browser startup each time.
|
||||
|
||||
### File Layout
|
||||
|
||||
All CLI-managed files live under `~/.browser-use/` (override with `BROWSER_USE_HOME`):
|
||||
|
||||
```
|
||||
~/.browser-use/
|
||||
├── config.json # API key, settings (shared with profile-use)
|
||||
├── bin/
|
||||
│ └── profile-use # Managed Go binary (auto-downloaded)
|
||||
├── tunnels/
|
||||
│ ├── {port}.json # Tunnel metadata
|
||||
│ └── {port}.log # Tunnel logs
|
||||
├── default.state.json # Daemon lifecycle state (phase, PID, config)
|
||||
├── default.sock # Daemon socket (ephemeral)
|
||||
├── default.pid # Daemon PID (ephemeral)
|
||||
└── cli.log # Daemon log
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>Windows Troubleshooting</summary>
|
||||
|
||||
@@ -444,11 +412,11 @@ echo $env:PATH
|
||||
& "C:\Program Files\Git\bin\bash.exe" -c 'browser-use --help'
|
||||
```
|
||||
|
||||
### "Failed to start session server" error
|
||||
### "Failed to start daemon" error
|
||||
Kill zombie processes:
|
||||
```powershell
|
||||
# Find process on port
|
||||
netstat -ano | findstr 49698
|
||||
# Find browser-use Python processes
|
||||
tasklist | findstr python
|
||||
|
||||
# Kill by PID
|
||||
taskkill /PID <pid> /F
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
"""Browser-use CLI package.
|
||||
|
||||
This package provides a fast command-line interface for browser automation.
|
||||
The CLI uses a session server architecture for persistent browser sessions.
|
||||
The CLI uses a daemon architecture for persistent browser sessions.
|
||||
|
||||
Usage:
|
||||
browser-use open https://example.com
|
||||
browser-use click 5
|
||||
browser-use type "Hello World"
|
||||
browser-use python "print(browser.url)"
|
||||
browser-use run "Fill the contact form"
|
||||
browser-use close
|
||||
"""
|
||||
|
||||
|
||||
201
browser_use/skill_cli/actions.py
Normal file
201
browser_use/skill_cli/actions.py
Normal file
@@ -0,0 +1,201 @@
|
||||
"""Direct action execution for CLI daemon — no event bus dispatch.
|
||||
|
||||
Wraps DefaultActionWatchdog methods and DomService for direct calling.
|
||||
The watchdog instance is NOT registered on the event bus — it's just
|
||||
used as a library of action implementations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from bubus import EventBus
|
||||
|
||||
from browser_use.browser.events import (
|
||||
GoBackEvent,
|
||||
SelectDropdownOptionEvent,
|
||||
SendKeysEvent,
|
||||
TypeTextEvent,
|
||||
UploadFileEvent,
|
||||
)
|
||||
from browser_use.browser.watchdogs.default_action_watchdog import DefaultActionWatchdog
|
||||
from browser_use.dom.service import DomService
|
||||
from browser_use.dom.views import EnhancedDOMTreeNode, SerializedDOMState
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from browser_use.browser.session import BrowserSession
|
||||
from browser_use.browser.views import BrowserStateSummary, PageInfo
|
||||
|
||||
logger = logging.getLogger('browser_use.skill_cli.actions')
|
||||
|
||||
|
||||
class ActionHandler:
|
||||
"""Execute browser actions directly without the event bus.
|
||||
|
||||
Uses DefaultActionWatchdog methods for complex actions (click, type, keys, etc.)
|
||||
and DomService for DOM snapshots. All other actions use direct CDP calls.
|
||||
"""
|
||||
|
||||
def __init__(self, browser_session: BrowserSession) -> None:
|
||||
self.bs = browser_session
|
||||
# Create watchdog instance — NOT registered on event bus
|
||||
self._watchdog = DefaultActionWatchdog(
|
||||
event_bus=EventBus(), # dummy, never dispatched to
|
||||
browser_session=browser_session,
|
||||
)
|
||||
self._dom_service: DomService | None = None
|
||||
|
||||
async def navigate(self, url: str) -> None:
|
||||
"""Navigate the focused tab to a URL."""
|
||||
assert self.bs.agent_focus_target_id is not None, 'No focused tab'
|
||||
await self.bs._navigate_and_wait(url, self.bs.agent_focus_target_id)
|
||||
|
||||
async def click_element(self, node: EnhancedDOMTreeNode) -> dict[str, Any] | None:
|
||||
"""Click an element using the watchdog's full implementation (with fallbacks)."""
|
||||
return await self._watchdog._click_element_node_impl(node)
|
||||
|
||||
async def click_coordinate(self, x: int, y: int) -> dict[str, Any] | None:
|
||||
"""Click at coordinates."""
|
||||
from browser_use.browser.events import ClickCoordinateEvent
|
||||
|
||||
event = ClickCoordinateEvent(coordinate_x=x, coordinate_y=y)
|
||||
return await self._watchdog.on_ClickCoordinateEvent(event)
|
||||
|
||||
async def type_text(self, node: EnhancedDOMTreeNode, text: str) -> dict[str, Any] | None:
|
||||
"""Type text into an element."""
|
||||
event = TypeTextEvent(node=node, text=text)
|
||||
return await self._watchdog.on_TypeTextEvent(event)
|
||||
|
||||
async def scroll(self, direction: str, amount: int) -> None:
|
||||
"""Scroll the page using JS (CDP gesture doesn't work in --connect mode)."""
|
||||
if direction in ('down', 'up'):
|
||||
x, y = 0, (amount if direction == 'down' else -amount)
|
||||
else:
|
||||
x, y = (amount if direction == 'right' else -amount), 0
|
||||
cdp_session = await self.bs.get_or_create_cdp_session()
|
||||
assert cdp_session is not None, 'No CDP session for scroll'
|
||||
await cdp_session.cdp_client.send.Runtime.evaluate(
|
||||
params={'expression': f'window.scrollBy({x}, {y})', 'awaitPromise': False},
|
||||
session_id=cdp_session.session_id,
|
||||
)
|
||||
|
||||
async def go_back(self) -> None:
|
||||
"""Go back in history."""
|
||||
event = GoBackEvent()
|
||||
await self._watchdog.on_GoBackEvent(event)
|
||||
|
||||
async def send_keys(self, keys: str) -> None:
|
||||
"""Send keyboard keys."""
|
||||
event = SendKeysEvent(keys=keys)
|
||||
await self._watchdog.on_SendKeysEvent(event)
|
||||
|
||||
async def select_dropdown(self, node: EnhancedDOMTreeNode, text: str) -> dict[str, str]:
|
||||
"""Select a dropdown option."""
|
||||
event = SelectDropdownOptionEvent(node=node, text=text)
|
||||
return await self._watchdog.on_SelectDropdownOptionEvent(event)
|
||||
|
||||
async def upload_file(self, node: EnhancedDOMTreeNode, file_path: str) -> None:
|
||||
"""Upload a file to a file input element."""
|
||||
event = UploadFileEvent(node=node, file_path=file_path)
|
||||
await self._watchdog.on_UploadFileEvent(event)
|
||||
|
||||
async def get_state(self) -> BrowserStateSummary:
|
||||
"""Build DOM via DomService directly (no DOMWatchdog, no event bus)."""
|
||||
from browser_use.browser.views import BrowserStateSummary, PageInfo
|
||||
|
||||
if self._dom_service is None:
|
||||
self._dom_service = DomService(browser_session=self.bs)
|
||||
|
||||
page_url = await self.bs.get_current_page_url()
|
||||
|
||||
# Fast path for non-http pages
|
||||
if page_url.lower().split(':', 1)[0] not in ('http', 'https'):
|
||||
return BrowserStateSummary(
|
||||
dom_state=SerializedDOMState(_root=None, selector_map={}),
|
||||
url=page_url,
|
||||
title='Empty Tab',
|
||||
tabs=await self.bs.get_tabs(),
|
||||
screenshot=None,
|
||||
page_info=None,
|
||||
)
|
||||
|
||||
# Build DOM and take screenshot in parallel
|
||||
import asyncio
|
||||
|
||||
dom_task = asyncio.create_task(self._dom_service.get_serialized_dom_tree())
|
||||
screenshot_task = asyncio.create_task(self.bs.take_screenshot())
|
||||
|
||||
dom_state: SerializedDOMState | None = None
|
||||
screenshot_b64: str | None = None
|
||||
|
||||
try:
|
||||
dom_state, _tree, _timing = await dom_task
|
||||
except Exception as e:
|
||||
logger.warning(f'DOM build failed: {e}')
|
||||
dom_state = SerializedDOMState(_root=None, selector_map={})
|
||||
|
||||
try:
|
||||
screenshot_bytes = await screenshot_task
|
||||
import base64
|
||||
|
||||
screenshot_b64 = base64.b64encode(screenshot_bytes).decode() if screenshot_bytes else None
|
||||
except Exception as e:
|
||||
logger.warning(f'Screenshot failed: {e}')
|
||||
|
||||
# Update cached selector map for element lookups
|
||||
if dom_state and dom_state.selector_map:
|
||||
self.bs.update_cached_selector_map(dom_state.selector_map)
|
||||
|
||||
# Get page info
|
||||
page_info: PageInfo | None = None
|
||||
try:
|
||||
cdp_session = await self.bs.get_or_create_cdp_session(target_id=None, focus=False)
|
||||
if cdp_session:
|
||||
metrics = await cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=cdp_session.session_id)
|
||||
css_metrics = metrics.get('cssLayoutViewport', {})
|
||||
content_size = metrics.get('cssContentSize', metrics.get('contentSize', {}))
|
||||
visual_viewport = metrics.get('cssVisualViewport', metrics.get('visualViewport', {}))
|
||||
page_info = PageInfo(
|
||||
viewport_width=int(css_metrics.get('clientWidth', 0)),
|
||||
viewport_height=int(css_metrics.get('clientHeight', 0)),
|
||||
page_width=int(content_size.get('width', 0)),
|
||||
page_height=int(content_size.get('height', 0)),
|
||||
scroll_x=int(visual_viewport.get('pageX', 0)),
|
||||
scroll_y=int(visual_viewport.get('pageY', 0)),
|
||||
pixels_above=int(visual_viewport.get('pageY', 0)),
|
||||
pixels_below=max(
|
||||
0,
|
||||
int(content_size.get('height', 0))
|
||||
- int(css_metrics.get('clientHeight', 0))
|
||||
- int(visual_viewport.get('pageY', 0)),
|
||||
),
|
||||
pixels_left=0,
|
||||
pixels_right=0,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f'Failed to get page info: {e}')
|
||||
|
||||
tabs = await self.bs.get_tabs()
|
||||
|
||||
# Use focused tab's title, not tabs[0]
|
||||
title = ''
|
||||
focused_id = self.bs.agent_focus_target_id
|
||||
found_focused = False
|
||||
for tab in tabs:
|
||||
if tab.target_id == focused_id:
|
||||
title = tab.title
|
||||
found_focused = True
|
||||
break
|
||||
if not found_focused and tabs:
|
||||
title = tabs[0].title
|
||||
|
||||
return BrowserStateSummary(
|
||||
dom_state=dom_state,
|
||||
url=page_url,
|
||||
title=title,
|
||||
tabs=tabs,
|
||||
screenshot=screenshot_b64,
|
||||
page_info=page_info,
|
||||
closed_popup_messages=self.bs._closed_popup_messages.copy(),
|
||||
)
|
||||
@@ -1,167 +0,0 @@
|
||||
"""API key management for browser-use CLI."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class APIKeyRequired(Exception):
|
||||
"""Raised when API key is required but not provided."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def get_config_path() -> Path:
|
||||
"""Get browser-use config file path."""
|
||||
if sys.platform == 'win32':
|
||||
base = Path(os.environ.get('APPDATA', Path.home()))
|
||||
else:
|
||||
base = Path(os.environ.get('XDG_CONFIG_HOME', Path.home() / '.config'))
|
||||
return base / 'browser-use' / 'config.json'
|
||||
|
||||
|
||||
def require_api_key(feature: str = 'this feature') -> str:
|
||||
"""Get API key or raise helpful error.
|
||||
|
||||
Checks in order:
|
||||
1. BROWSER_USE_API_KEY environment variable
|
||||
2. Config file (~/.config/browser-use/config.json)
|
||||
3. Interactive prompt (if TTY)
|
||||
4. Raises APIKeyRequired with helpful message
|
||||
"""
|
||||
# 1. Check environment
|
||||
key = os.environ.get('BROWSER_USE_API_KEY')
|
||||
if key:
|
||||
return key
|
||||
|
||||
# 2. Check config file
|
||||
config_path = get_config_path()
|
||||
if config_path.exists():
|
||||
try:
|
||||
config = json.loads(config_path.read_text())
|
||||
if key := config.get('api_key'):
|
||||
return key
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 3. Interactive prompt (if TTY)
|
||||
if sys.stdin.isatty() and sys.stdout.isatty():
|
||||
return prompt_for_api_key(feature)
|
||||
|
||||
# 4. Error with helpful message
|
||||
raise APIKeyRequired(
|
||||
f"""
|
||||
╭─────────────────────────────────────────────────────────────╮
|
||||
│ 🔑 Browser-Use API Key Required │
|
||||
│ │
|
||||
│ {feature} requires an API key. │
|
||||
│ │
|
||||
│ Get yours at: https://browser-use.com/new-api-key │
|
||||
│ │
|
||||
│ Then set it via: │
|
||||
│ export BROWSER_USE_API_KEY=your_key_here │
|
||||
│ │
|
||||
│ Or add to {config_path}: │
|
||||
│ {{"api_key": "your_key_here"}} │
|
||||
╰─────────────────────────────────────────────────────────────╯
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def prompt_for_api_key(feature: str) -> str:
|
||||
"""Interactive prompt for API key."""
|
||||
print(
|
||||
f"""
|
||||
╭─────────────────────────────────────────────────────────────╮
|
||||
│ 🔑 Browser-Use API Key Required │
|
||||
│ │
|
||||
│ {feature} requires an API key. │
|
||||
│ Get yours at: https://browser-use.com/new-api-key │
|
||||
╰─────────────────────────────────────────────────────────────╯
|
||||
"""
|
||||
)
|
||||
|
||||
try:
|
||||
key = input('Enter API key: ').strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
raise APIKeyRequired('No API key provided')
|
||||
|
||||
if not key:
|
||||
raise APIKeyRequired('No API key provided')
|
||||
|
||||
try:
|
||||
save = input('Save to config? [y/N]: ').strip().lower()
|
||||
if save == 'y':
|
||||
save_api_key(key)
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
pass
|
||||
|
||||
return key
|
||||
|
||||
|
||||
def save_api_key(key: str) -> None:
|
||||
"""Save API key to config file."""
|
||||
config_path = get_config_path()
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
config: dict = {}
|
||||
if config_path.exists():
|
||||
try:
|
||||
config = json.loads(config_path.read_text())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
config['api_key'] = key
|
||||
config_path.write_text(json.dumps(config, indent=2))
|
||||
# Restrict permissions to owner only (0600)
|
||||
config_path.chmod(0o600)
|
||||
print(f'Saved to {config_path}')
|
||||
|
||||
|
||||
def get_api_key() -> str | None:
|
||||
"""Get API key if available, without raising error."""
|
||||
try:
|
||||
return require_api_key('API key check')
|
||||
except APIKeyRequired:
|
||||
return None
|
||||
|
||||
|
||||
def check_api_key() -> dict[str, bool | str | None]:
|
||||
"""Check API key availability without interactive prompts.
|
||||
|
||||
Returns:
|
||||
Dict with keys:
|
||||
- 'available': bool - whether API key is configured
|
||||
- 'source': str | None - where it came from ('env', 'config', or None)
|
||||
- 'key_prefix': str | None - first 8 chars of key (for display)
|
||||
"""
|
||||
# Check environment
|
||||
key = os.environ.get('BROWSER_USE_API_KEY')
|
||||
if key:
|
||||
return {
|
||||
'available': True,
|
||||
'source': 'env',
|
||||
'key_prefix': key[:8] if len(key) >= 8 else key,
|
||||
}
|
||||
|
||||
# Check config file
|
||||
config_path = get_config_path()
|
||||
if config_path.exists():
|
||||
try:
|
||||
config = json.loads(config_path.read_text())
|
||||
if key := config.get('api_key'):
|
||||
return {
|
||||
'available': True,
|
||||
'source': 'config',
|
||||
'key_prefix': key[:8] if len(key) >= 8 else key,
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Not available
|
||||
return {
|
||||
'available': False,
|
||||
'source': None,
|
||||
'key_prefix': None,
|
||||
}
|
||||
225
browser_use/skill_cli/browser.py
Normal file
225
browser_use/skill_cli/browser.py
Normal file
@@ -0,0 +1,225 @@
|
||||
"""Lightweight BrowserSession subclass for the CLI daemon.
|
||||
|
||||
Skips watchdogs, event bus handlers, and auto-reconnect for ALL modes.
|
||||
Launches browser if needed, then calls connect() directly.
|
||||
All inherited methods (get_element_by_index, take_screenshot, etc.)
|
||||
work because this IS a BrowserSession.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
import psutil
|
||||
|
||||
from browser_use.browser.session import BrowserSession
|
||||
|
||||
logger = logging.getLogger('browser_use.skill_cli.browser')
|
||||
|
||||
|
||||
class CLIBrowserSession(BrowserSession):
|
||||
"""BrowserSession that skips watchdogs and event bus for all modes.
|
||||
|
||||
For --connect: connects to existing Chrome via CDP URL.
|
||||
For managed Chromium: launches browser, gets CDP URL, connects.
|
||||
For cloud: provisions browser, gets CDP URL, connects.
|
||||
|
||||
All three modes converge at connect() — no watchdogs, no event bus.
|
||||
"""
|
||||
|
||||
_browser_process: psutil.Process | None = None # type: ignore[assignment]
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Launch/provision browser if needed, then connect lightweight."""
|
||||
if self.cdp_url:
|
||||
# --connect or --cdp-url: CDP URL already known
|
||||
pass
|
||||
elif self.browser_profile.use_cloud:
|
||||
# Cloud: provision browser via API
|
||||
await self._provision_cloud_browser()
|
||||
else:
|
||||
# Managed Chromium: launch browser process
|
||||
await self._launch_local_browser()
|
||||
|
||||
# All modes: lightweight CDP connection (no watchdogs)
|
||||
await self.connect()
|
||||
|
||||
# Prevent heavy monitoring on future tabs
|
||||
if self.session_manager:
|
||||
|
||||
async def _noop(cdp_session: object) -> None:
|
||||
pass
|
||||
|
||||
self.session_manager._enable_page_monitoring = _noop # type: ignore[assignment]
|
||||
|
||||
# Disable auto-reconnect — daemon should die when CDP drops
|
||||
self._intentional_stop = True
|
||||
|
||||
# Register popup/dialog handler so JS alerts don't freeze Chrome
|
||||
await self._register_dialog_handler()
|
||||
|
||||
async def _register_dialog_handler(self) -> None:
|
||||
"""Register CDP handler to auto-dismiss JS dialogs (alert, confirm, prompt).
|
||||
|
||||
Without this, any JS dialog freezes all CDP commands until manually dismissed.
|
||||
Messages are stored in _closed_popup_messages for inclusion in state output.
|
||||
"""
|
||||
import asyncio as _asyncio
|
||||
|
||||
if not self._cdp_client_root:
|
||||
return
|
||||
|
||||
async def handle_dialog(event_data: dict, session_id: str | None = None) -> None:
|
||||
try:
|
||||
dialog_type = event_data.get('type', 'alert')
|
||||
message = event_data.get('message', '')
|
||||
if message:
|
||||
self._closed_popup_messages.append(f'[{dialog_type}] {message}')
|
||||
# Accept alerts/confirms/beforeunload, dismiss prompts
|
||||
should_accept = dialog_type in ('alert', 'confirm', 'beforeunload')
|
||||
logger.info(f'Auto-{"accepting" if should_accept else "dismissing"} {dialog_type}: {message[:100]}')
|
||||
if not self._cdp_client_root:
|
||||
return
|
||||
await _asyncio.wait_for(
|
||||
self._cdp_client_root.send.Page.handleJavaScriptDialog(
|
||||
params={'accept': should_accept},
|
||||
session_id=session_id,
|
||||
),
|
||||
timeout=0.5,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try to enable Page domain on root client (may fail — not all CDP targets support it)
|
||||
try:
|
||||
await self._cdp_client_root.send.Page.enable()
|
||||
except Exception:
|
||||
pass
|
||||
self._cdp_client_root.register.Page.javascriptDialogOpening(handle_dialog) # type: ignore[arg-type]
|
||||
|
||||
async def _launch_local_browser(self) -> None:
|
||||
"""Launch Chromium using LocalBrowserWatchdog's launch logic."""
|
||||
from bubus import EventBus
|
||||
|
||||
from browser_use.browser.watchdogs.local_browser_watchdog import LocalBrowserWatchdog
|
||||
|
||||
# Instantiate watchdog as plain object — NOT registered on event bus
|
||||
launcher = LocalBrowserWatchdog(event_bus=EventBus(), browser_session=self)
|
||||
process, cdp_url = await launcher._launch_browser()
|
||||
self._browser_process = process
|
||||
self.browser_profile.cdp_url = cdp_url
|
||||
logger.info(f'Launched browser (PID {process.pid}), CDP: {cdp_url}')
|
||||
|
||||
async def _provision_cloud_browser(self) -> None:
|
||||
"""Provision a cloud browser and set the CDP URL."""
|
||||
import os
|
||||
|
||||
from browser_use.browser.cloud.views import CreateBrowserRequest
|
||||
|
||||
# Override cloud API base URL if set (CLI injects this into daemon env).
|
||||
# CloudBrowserClient expects the host URL (it appends /api/v2/... internally).
|
||||
cloud_base = os.environ.get('BROWSER_USE_CLOUD_BASE_URL')
|
||||
if cloud_base:
|
||||
self._cloud_browser_client.api_base_url = cloud_base.rstrip('/')
|
||||
|
||||
# Ensure CLI has an API key from config.json before proceeding.
|
||||
from browser_use.skill_cli.config import get_config_value
|
||||
|
||||
if not get_config_value('api_key'):
|
||||
from browser_use.browser.cloud.views import CloudBrowserAuthError
|
||||
|
||||
raise CloudBrowserAuthError(
|
||||
'No API key configured. Run `browser-use cloud login <key>` or `browser-use cloud signup`.'
|
||||
)
|
||||
|
||||
cloud_params = self.browser_profile.cloud_browser_params or CreateBrowserRequest()
|
||||
# Set recording from CLI config (defaults to True)
|
||||
from browser_use.skill_cli.config import get_config_value
|
||||
|
||||
cloud_params.enable_recording = bool(get_config_value('cloud_connect_recording'))
|
||||
|
||||
try:
|
||||
cloud_response = await self._cloud_browser_client.create_browser(cloud_params)
|
||||
except Exception as e:
|
||||
# If profile is invalid, create a new one and retry once
|
||||
if 'profile' in str(e).lower() or '422' in str(e):
|
||||
logger.info('Cloud profile invalid, creating new one and retrying')
|
||||
from browser_use.skill_cli.commands.cloud import _create_cloud_profile_inner
|
||||
|
||||
api_key = get_config_value('api_key')
|
||||
if not api_key:
|
||||
raise
|
||||
new_profile_id = _create_cloud_profile_inner(str(api_key))
|
||||
cloud_params.profile_id = new_profile_id
|
||||
cloud_response = await self._cloud_browser_client.create_browser(cloud_params)
|
||||
else:
|
||||
raise
|
||||
self.browser_profile.cdp_url = cloud_response.cdpUrl
|
||||
self.browser_profile.is_local = False
|
||||
logger.info(f'Cloud browser provisioned, CDP: {cloud_response.cdpUrl}')
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Disconnect from the browser.
|
||||
|
||||
For --connect/--cdp-url: just close the websocket (we don't own the browser).
|
||||
For cloud: stop the remote browser via API before disconnecting.
|
||||
"""
|
||||
self._intentional_stop = True
|
||||
# Stop cloud browser if we provisioned one
|
||||
if self.browser_profile.use_cloud and self._cloud_browser_client.current_session_id:
|
||||
try:
|
||||
import asyncio as _asyncio
|
||||
|
||||
await _asyncio.wait_for(self._cloud_browser_client.stop_browser(), timeout=5.0)
|
||||
except Exception as e:
|
||||
logger.debug(f'Error stopping cloud browser: {e}')
|
||||
if self._cdp_client_root:
|
||||
try:
|
||||
await self._cdp_client_root.stop()
|
||||
except Exception as e:
|
||||
logger.debug(f'Error closing CDP client: {e}')
|
||||
self._cdp_client_root = None # type: ignore[assignment]
|
||||
if self.session_manager:
|
||||
try:
|
||||
await self.session_manager.clear()
|
||||
except Exception as e:
|
||||
logger.debug(f'Error clearing session manager: {e}')
|
||||
self.session_manager = None
|
||||
self.agent_focus_target_id = None
|
||||
self._cached_selector_map.clear()
|
||||
|
||||
async def kill(self) -> None:
|
||||
"""Send Browser.close to kill the browser, then disconnect.
|
||||
|
||||
For managed Chromium: sends Browser.close CDP command + terminates process.
|
||||
"""
|
||||
if self._cdp_client_root:
|
||||
try:
|
||||
await self._cdp_client_root.send.Browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
await self.stop()
|
||||
# Force kill the process if we launched it and it's still alive
|
||||
if self._browser_process:
|
||||
try:
|
||||
if self._browser_process.is_running():
|
||||
self._browser_process.terminate()
|
||||
self._browser_process.wait(timeout=5)
|
||||
except Exception:
|
||||
try:
|
||||
self._browser_process.kill()
|
||||
except Exception:
|
||||
pass
|
||||
self._browser_process = None
|
||||
|
||||
@property
|
||||
def is_cdp_connected(self) -> bool:
|
||||
"""Check if CDP WebSocket connection is alive."""
|
||||
if self._cdp_client_root is None or self._cdp_client_root.ws is None:
|
||||
return False
|
||||
try:
|
||||
from websockets.protocol import State
|
||||
|
||||
return self._cdp_client_root.ws.state is State.OPEN
|
||||
except Exception:
|
||||
return False
|
||||
@@ -1,23 +1,15 @@
|
||||
"""Command handlers for browser-use CLI."""
|
||||
|
||||
from browser_use.skill_cli.commands import (
|
||||
agent,
|
||||
browser,
|
||||
cloud_session,
|
||||
cloud_task,
|
||||
doctor,
|
||||
python_exec,
|
||||
session,
|
||||
setup,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'agent',
|
||||
'browser',
|
||||
'cloud_session',
|
||||
'cloud_task',
|
||||
'doctor',
|
||||
'python_exec',
|
||||
'session',
|
||||
'setup',
|
||||
]
|
||||
|
||||
@@ -1,335 +0,0 @@
|
||||
"""Agent task command handler."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from browser_use.skill_cli.api_key import APIKeyRequired, require_api_key
|
||||
from browser_use.skill_cli.sessions import SessionInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cloud-only flags that only work in remote mode
|
||||
CLOUD_ONLY_FLAGS = [
|
||||
'session_id',
|
||||
'proxy_country',
|
||||
'wait',
|
||||
'stream',
|
||||
'flash',
|
||||
'keep_alive',
|
||||
'thinking',
|
||||
'start_url',
|
||||
'metadata',
|
||||
'secret',
|
||||
'allowed_domain',
|
||||
'skill_id',
|
||||
'structured_output',
|
||||
'judge',
|
||||
'judge_ground_truth',
|
||||
]
|
||||
|
||||
|
||||
async def handle(session: SessionInfo, params: dict[str, Any]) -> Any:
|
||||
"""Handle agent run command.
|
||||
|
||||
Routes based on browser mode:
|
||||
- Remote mode (--browser remote): Uses Cloud API with US proxy by default
|
||||
- Local mode (default): Uses local browser-use agent
|
||||
"""
|
||||
task = params.get('task')
|
||||
if not task:
|
||||
return {'success': False, 'error': 'No task provided'}
|
||||
|
||||
# Route based on browser mode
|
||||
if session.browser_mode == 'remote':
|
||||
# Remote mode requires Browser-Use API key
|
||||
try:
|
||||
require_api_key('Cloud agent tasks')
|
||||
except APIKeyRequired as e:
|
||||
return {'success': False, 'error': str(e)}
|
||||
return await _handle_cloud_task(params)
|
||||
else:
|
||||
# Check if user tried to use cloud-only flags in local mode
|
||||
used_cloud_flags = [f for f in CLOUD_ONLY_FLAGS if params.get(f)]
|
||||
if used_cloud_flags:
|
||||
from browser_use.skill_cli.install_config import is_mode_available
|
||||
|
||||
flags_str = ', '.join(f'--{f.replace("_", "-")}' for f in used_cloud_flags)
|
||||
|
||||
if is_mode_available('remote'):
|
||||
# Remote is available, user just needs to use it
|
||||
return {
|
||||
'success': False,
|
||||
'error': f'Cloud-only flags used in local mode: {flags_str}\nUse --browser remote to enable cloud features.',
|
||||
}
|
||||
else:
|
||||
# Remote not installed (--local-only install)
|
||||
return {
|
||||
'success': False,
|
||||
'error': f'Cloud-only flags require remote mode: {flags_str}\n'
|
||||
f'Remote mode is not installed. Reinstall to enable:\n'
|
||||
f' curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --remote-only\n'
|
||||
f' curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --full',
|
||||
}
|
||||
return await _handle_local_task(session, params)
|
||||
|
||||
|
||||
async def _handle_cloud_task(params: dict[str, Any]) -> Any:
|
||||
"""Handle task execution via Cloud API.
|
||||
|
||||
By default uses US proxy for all cloud tasks.
|
||||
"""
|
||||
from browser_use.skill_cli.commands import cloud_session, cloud_task
|
||||
|
||||
task = params['task']
|
||||
|
||||
# Handle vision flag (--vision vs --no-vision)
|
||||
vision: bool | None = None
|
||||
if params.get('vision'):
|
||||
vision = True
|
||||
elif params.get('no_vision'):
|
||||
vision = False
|
||||
|
||||
# Parse key=value list params
|
||||
metadata = _parse_key_value_list(params.get('metadata'))
|
||||
secrets = _parse_key_value_list(params.get('secret'))
|
||||
|
||||
# Build session params - only include what user explicitly set
|
||||
session_id = params.get('session_id')
|
||||
profile_id = params.get('profile')
|
||||
proxy_country = params.get('proxy_country')
|
||||
|
||||
try:
|
||||
logger.info(f'Creating cloud task: {task}')
|
||||
|
||||
# Create session first if profile or proxy specified and no session_id
|
||||
if (profile_id or proxy_country) and not session_id:
|
||||
session = cloud_session.create_session(
|
||||
profile_id=profile_id,
|
||||
proxy_country=proxy_country,
|
||||
keep_alive=params.get('keep_alive'),
|
||||
)
|
||||
session_id = session.id
|
||||
logger.info(f'Created cloud session: {session_id}')
|
||||
|
||||
# Create cloud task - only pass what user explicitly set
|
||||
task_response = cloud_task.create_task(
|
||||
task=task,
|
||||
llm=params.get('llm'),
|
||||
session_id=session_id,
|
||||
max_steps=params.get('max_steps'),
|
||||
flash_mode=params.get('flash'),
|
||||
thinking=params.get('thinking'),
|
||||
vision=vision,
|
||||
start_url=params.get('start_url'),
|
||||
metadata=metadata,
|
||||
secrets=secrets,
|
||||
allowed_domains=params.get('allowed_domain'),
|
||||
skill_ids=params.get('skill_id'),
|
||||
structured_output=params.get('structured_output'),
|
||||
judge=params.get('judge'),
|
||||
judge_ground_truth=params.get('judge_ground_truth'),
|
||||
)
|
||||
|
||||
task_id = task_response.id
|
||||
response_session_id = task_response.session_id
|
||||
|
||||
if not task_id:
|
||||
return {
|
||||
'success': False,
|
||||
'error': 'Cloud API did not return a task ID',
|
||||
'task': task,
|
||||
}
|
||||
|
||||
logger.info(f'Cloud task created: {task_id}')
|
||||
|
||||
# Return immediately unless --wait is specified
|
||||
if not params.get('wait'):
|
||||
return {
|
||||
'success': True,
|
||||
'task_id': task_id,
|
||||
'session_id': response_session_id,
|
||||
'message': 'Task started. Use "browser-use task status <task_id>" to check progress.',
|
||||
}
|
||||
|
||||
# Poll until complete
|
||||
logger.info('Waiting for task completion...')
|
||||
result = await cloud_task.poll_until_complete(task_id, stream=params.get('stream', False))
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'task': task,
|
||||
'task_id': task_id,
|
||||
'session_id': response_session_id,
|
||||
'status': result.status,
|
||||
'output': result.output,
|
||||
'cost': result.cost,
|
||||
'done': result.status == 'finished',
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f'Cloud task failed: {e}')
|
||||
return {
|
||||
'success': False,
|
||||
'error': str(e),
|
||||
'task': task,
|
||||
}
|
||||
|
||||
|
||||
def _parse_key_value_list(items: list[str] | None) -> dict[str, str | None] | None:
|
||||
"""Parse a list of 'key=value' strings into a dict."""
|
||||
if not items:
|
||||
return None
|
||||
result: dict[str, str | None] = {}
|
||||
for item in items:
|
||||
if '=' in item:
|
||||
key, value = item.split('=', 1)
|
||||
result[key] = value
|
||||
return result if result else None
|
||||
|
||||
|
||||
async def _handle_local_task(session: SessionInfo, params: dict[str, Any]) -> Any:
|
||||
"""Handle task execution locally with browser-use agent."""
|
||||
task = params['task']
|
||||
max_steps = params.get('max_steps')
|
||||
model = params.get('llm') # Optional model override
|
||||
|
||||
try:
|
||||
# Import agent and LLM
|
||||
from browser_use.agent.service import Agent
|
||||
|
||||
# Try to get LLM from environment (with optional model override)
|
||||
llm = await get_llm(model=model)
|
||||
if llm is None:
|
||||
if model:
|
||||
return {
|
||||
'success': False,
|
||||
'error': f'Could not initialize model "{model}". '
|
||||
f'Make sure the appropriate API key is set (OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY).',
|
||||
}
|
||||
return {
|
||||
'success': False,
|
||||
'error': 'No LLM configured. Set BROWSER_USE_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY',
|
||||
}
|
||||
|
||||
# Create and run agent
|
||||
agent = Agent(
|
||||
task=task,
|
||||
llm=llm,
|
||||
browser_session=session.browser_session,
|
||||
)
|
||||
|
||||
logger.info(f'Running local agent task: {task}')
|
||||
run_kwargs = {}
|
||||
if max_steps is not None:
|
||||
run_kwargs['max_steps'] = max_steps
|
||||
result = await agent.run(**run_kwargs)
|
||||
|
||||
# Extract result info
|
||||
final_result = result.final_result() if result else None
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'task': task,
|
||||
'steps': len(result) if result else 0,
|
||||
'result': str(final_result) if final_result else None,
|
||||
'done': result.is_done() if result else False,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f'Local agent task failed: {e}')
|
||||
return {
|
||||
'success': False,
|
||||
'error': str(e),
|
||||
'task': task,
|
||||
}
|
||||
|
||||
|
||||
def _get_verified_models() -> dict[str, set[str]]:
|
||||
"""Extract verified model names from SDK sources of truth."""
|
||||
import typing
|
||||
|
||||
from anthropic.types.model_param import ModelParam
|
||||
from openai.types.shared.chat_model import ChatModel
|
||||
|
||||
from browser_use.llm.google.chat import VerifiedGeminiModels
|
||||
|
||||
# OpenAI: ChatModel is a Literal type
|
||||
openai_models = set(typing.get_args(ChatModel))
|
||||
|
||||
# Anthropic: ModelParam is Union[Literal[...], str] - extract the Literal
|
||||
anthropic_literal = typing.get_args(ModelParam)[0]
|
||||
anthropic_models = set(typing.get_args(anthropic_literal))
|
||||
|
||||
# Google: VerifiedGeminiModels Literal
|
||||
google_models = set(typing.get_args(VerifiedGeminiModels))
|
||||
|
||||
# Browser-Use: cloud models
|
||||
browser_use_models = {'bu-latest', 'bu-1-0', 'bu-2-0'}
|
||||
|
||||
return {
|
||||
'openai': openai_models,
|
||||
'anthropic': anthropic_models,
|
||||
'google': google_models,
|
||||
'browser-use': browser_use_models,
|
||||
}
|
||||
|
||||
|
||||
_VERIFIED_MODELS: dict[str, set[str]] | None = None
|
||||
|
||||
|
||||
def _get_provider_for_model(model: str) -> str | None:
|
||||
"""Determine the provider by checking SDK verified model lists."""
|
||||
global _VERIFIED_MODELS
|
||||
if _VERIFIED_MODELS is None:
|
||||
_VERIFIED_MODELS = _get_verified_models()
|
||||
|
||||
for provider, models in _VERIFIED_MODELS.items():
|
||||
if model in models:
|
||||
return provider
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_llm(model: str | None = None) -> Any:
|
||||
"""Get LLM instance from environment configuration.
|
||||
|
||||
Args:
|
||||
model: Optional model name to use. If provided, will instantiate
|
||||
the appropriate provider for that model. If not provided,
|
||||
auto-detects from available API keys.
|
||||
|
||||
Supported providers: OpenAI, Anthropic, Google, Browser-Use.
|
||||
Model names are validated against each SDK's verified model list.
|
||||
"""
|
||||
from browser_use.llm import ChatAnthropic, ChatBrowserUse, ChatGoogle, ChatOpenAI
|
||||
|
||||
if model:
|
||||
provider = _get_provider_for_model(model)
|
||||
|
||||
if provider == 'openai':
|
||||
return ChatOpenAI(model=model)
|
||||
elif provider == 'anthropic':
|
||||
return ChatAnthropic(model=model)
|
||||
elif provider == 'google':
|
||||
return ChatGoogle(model=model)
|
||||
elif provider == 'browser-use':
|
||||
return ChatBrowserUse(model=model)
|
||||
else:
|
||||
logger.warning(f'Unknown model: {model}. Not in any verified model list.')
|
||||
return None
|
||||
|
||||
# No model specified - auto-detect from available API keys
|
||||
if os.environ.get('BROWSER_USE_API_KEY'):
|
||||
return ChatBrowserUse()
|
||||
|
||||
if os.environ.get('OPENAI_API_KEY'):
|
||||
return ChatOpenAI(model='o3')
|
||||
|
||||
if os.environ.get('ANTHROPIC_API_KEY'):
|
||||
return ChatAnthropic(model='claude-sonnet-4-0')
|
||||
|
||||
if os.environ.get('GOOGLE_API_KEY'):
|
||||
return ChatGoogle(model='gemini-flash-latest')
|
||||
|
||||
return None
|
||||
@@ -19,10 +19,10 @@ COMMANDS = {
|
||||
'back',
|
||||
'screenshot',
|
||||
'state',
|
||||
'switch',
|
||||
'close-tab',
|
||||
'tab',
|
||||
'keys',
|
||||
'select',
|
||||
'upload',
|
||||
'eval',
|
||||
'extract',
|
||||
'cookies',
|
||||
@@ -81,18 +81,16 @@ async def _get_element_center(session: SessionInfo, node: Any) -> tuple[float, f
|
||||
async def handle(action: str, session: SessionInfo, params: dict[str, Any]) -> Any:
|
||||
"""Handle browser control command."""
|
||||
bs = session.browser_session
|
||||
actions = session.actions
|
||||
if actions is None:
|
||||
return {'error': 'ActionHandler not initialized'}
|
||||
|
||||
if action == 'open':
|
||||
url = params['url']
|
||||
# Ensure URL has scheme
|
||||
if not url.startswith(('http://', 'https://', 'file://')):
|
||||
url = 'https://' + url
|
||||
|
||||
from browser_use.browser.events import NavigateToUrlEvent
|
||||
|
||||
await bs.event_bus.dispatch(NavigateToUrlEvent(url=url))
|
||||
await actions.navigate(url)
|
||||
result: dict[str, Any] = {'url': url}
|
||||
# Add live preview URL for cloud browsers
|
||||
if bs.browser_profile.use_cloud and bs.cdp_url:
|
||||
from urllib.parse import quote
|
||||
|
||||
@@ -100,18 +98,22 @@ async def handle(action: str, session: SessionInfo, params: dict[str, Any]) -> A
|
||||
return result
|
||||
|
||||
elif action == 'click':
|
||||
from browser_use.browser.events import ClickElementEvent
|
||||
|
||||
index = params['index']
|
||||
# Look up node from selector map
|
||||
node = await bs.get_element_by_index(index)
|
||||
if node is None:
|
||||
return {'error': f'Element index {index} not found - page may have changed'}
|
||||
await bs.event_bus.dispatch(ClickElementEvent(node=node))
|
||||
return {'clicked': index}
|
||||
args = params.get('args', [])
|
||||
if len(args) == 2:
|
||||
x, y = args
|
||||
await actions.click_coordinate(x, y)
|
||||
return {'clicked_coordinate': {'x': x, 'y': y}}
|
||||
elif len(args) == 1:
|
||||
index = args[0]
|
||||
node = await bs.get_element_by_index(index)
|
||||
if node is None:
|
||||
return {'error': f'Element index {index} not found - page may have changed'}
|
||||
await actions.click_element(node)
|
||||
return {'clicked': index}
|
||||
else:
|
||||
return {'error': 'Usage: click <index> or click <x> <y>'}
|
||||
|
||||
elif action == 'type':
|
||||
# Type into currently focused element using CDP directly
|
||||
text = params['text']
|
||||
cdp_session = await bs.get_or_create_cdp_session(target_id=None, focus=False)
|
||||
if not cdp_session:
|
||||
@@ -123,30 +125,23 @@ async def handle(action: str, session: SessionInfo, params: dict[str, Any]) -> A
|
||||
return {'typed': text}
|
||||
|
||||
elif action == 'input':
|
||||
from browser_use.browser.events import ClickElementEvent, TypeTextEvent
|
||||
|
||||
index = params['index']
|
||||
text = params['text']
|
||||
# Look up node from selector map
|
||||
node = await bs.get_element_by_index(index)
|
||||
if node is None:
|
||||
return {'error': f'Element index {index} not found - page may have changed'}
|
||||
await bs.event_bus.dispatch(ClickElementEvent(node=node))
|
||||
await bs.event_bus.dispatch(TypeTextEvent(node=node, text=text))
|
||||
await actions.click_element(node)
|
||||
await actions.type_text(node, text)
|
||||
return {'input': text, 'element': index}
|
||||
|
||||
elif action == 'scroll':
|
||||
from browser_use.browser.events import ScrollEvent
|
||||
|
||||
direction = params.get('direction', 'down')
|
||||
amount = params.get('amount', 500)
|
||||
await bs.event_bus.dispatch(ScrollEvent(direction=direction, amount=amount))
|
||||
await actions.scroll(direction, amount)
|
||||
return {'scrolled': direction, 'amount': amount}
|
||||
|
||||
elif action == 'back':
|
||||
from browser_use.browser.events import GoBackEvent
|
||||
|
||||
await bs.event_bus.dispatch(GoBackEvent())
|
||||
await actions.go_back()
|
||||
return {'back': True}
|
||||
|
||||
elif action == 'screenshot':
|
||||
@@ -161,59 +156,133 @@ async def handle(action: str, session: SessionInfo, params: dict[str, Any]) -> A
|
||||
return {'screenshot': base64.b64encode(data).decode(), 'size': len(data)}
|
||||
|
||||
elif action == 'state':
|
||||
# Return the same LLM representation that browser-use agents see
|
||||
state_text = await bs.get_state_as_text()
|
||||
state = await actions.get_state()
|
||||
assert state.dom_state is not None
|
||||
state_text = state.dom_state.llm_representation()
|
||||
|
||||
# Prepend viewport dimensions
|
||||
if state.page_info:
|
||||
pi = state.page_info
|
||||
viewport_text = f'viewport: {pi.viewport_width}x{pi.viewport_height}\n'
|
||||
viewport_text += f'page: {pi.page_width}x{pi.page_height}\n'
|
||||
viewport_text += f'scroll: ({pi.scroll_x}, {pi.scroll_y})\n'
|
||||
state_text = viewport_text + state_text
|
||||
|
||||
# Append auto-dismissed popup messages
|
||||
if bs._closed_popup_messages:
|
||||
state_text += '\nAuto-closed dialogs:\n'
|
||||
for msg in bs._closed_popup_messages:
|
||||
state_text += f' {msg}\n'
|
||||
bs._closed_popup_messages.clear()
|
||||
|
||||
return {'_raw_text': state_text}
|
||||
|
||||
elif action == 'switch':
|
||||
from browser_use.browser.events import SwitchTabEvent
|
||||
elif action == 'tab':
|
||||
tab_command = params.get('tab_command')
|
||||
|
||||
tab_index = params['tab']
|
||||
# Get target_id from tab index
|
||||
page_targets = bs.session_manager.get_all_page_targets() if bs.session_manager else []
|
||||
if tab_index < 0 or tab_index >= len(page_targets):
|
||||
return {'error': f'Invalid tab index {tab_index}. Available: 0-{len(page_targets) - 1}'}
|
||||
target_id = page_targets[tab_index].target_id
|
||||
await bs.event_bus.dispatch(SwitchTabEvent(target_id=target_id))
|
||||
return {'switched': tab_index}
|
||||
if tab_command == 'list':
|
||||
page_targets = bs.session_manager.get_all_page_targets() if bs.session_manager else []
|
||||
lines = ['TAB URL']
|
||||
for i, t in enumerate(page_targets):
|
||||
lines.append(f'{i:<4} {t.url}')
|
||||
return {'_raw_text': '\n'.join(lines)}
|
||||
|
||||
elif action == 'close-tab':
|
||||
from browser_use.browser.events import CloseTabEvent
|
||||
elif tab_command == 'new':
|
||||
url = params.get('url', 'about:blank')
|
||||
target_id = await bs._cdp_create_new_page(url, background=True)
|
||||
bs.agent_focus_target_id = target_id
|
||||
return {'created': target_id[:8], 'url': url}
|
||||
|
||||
tab_index = params.get('tab')
|
||||
# Get target_id from tab index
|
||||
page_targets = bs.session_manager.get_all_page_targets() if bs.session_manager else []
|
||||
if tab_index is not None:
|
||||
elif tab_command == 'switch':
|
||||
tab_index = params['tab']
|
||||
page_targets = bs.session_manager.get_all_page_targets() if bs.session_manager else []
|
||||
if tab_index < 0 or tab_index >= len(page_targets):
|
||||
return {'error': f'Invalid tab index {tab_index}. Available: 0-{len(page_targets) - 1}'}
|
||||
target_id = page_targets[tab_index].target_id
|
||||
else:
|
||||
# Close current/focused tab
|
||||
target_id = bs.session_manager.get_focused_target().target_id if bs.session_manager else None
|
||||
if not target_id:
|
||||
return {'error': 'No focused tab to close'}
|
||||
await bs.event_bus.dispatch(CloseTabEvent(target_id=target_id))
|
||||
return {'closed': tab_index}
|
||||
bs.agent_focus_target_id = page_targets[tab_index].target_id
|
||||
return {'switched': tab_index}
|
||||
|
||||
elif tab_command == 'close':
|
||||
tab_indices = params.get('tabs', [])
|
||||
|
||||
page_targets = bs.session_manager.get_all_page_targets() if bs.session_manager else []
|
||||
|
||||
async def _close_target(tid: str) -> None:
|
||||
cdp_session = await bs.get_or_create_cdp_session(target_id=None, focus=False)
|
||||
if cdp_session:
|
||||
await cdp_session.cdp_client.send.Target.closeTarget(params={'targetId': tid})
|
||||
|
||||
if not tab_indices:
|
||||
# Use caller's logical focus, not Chrome's global focus
|
||||
target_id = bs.agent_focus_target_id
|
||||
if not target_id:
|
||||
target_id = bs.session_manager.get_focused_target().target_id if bs.session_manager else None
|
||||
if not target_id:
|
||||
return {'error': 'No focused tab to close'}
|
||||
await _close_target(target_id)
|
||||
return {'closed': [0]}
|
||||
|
||||
closed = []
|
||||
errors = []
|
||||
for idx in sorted(tab_indices, reverse=True):
|
||||
if idx < 0 or idx >= len(page_targets):
|
||||
errors.append(f'Tab {idx} out of range')
|
||||
continue
|
||||
try:
|
||||
await _close_target(page_targets[idx].target_id)
|
||||
closed.append(idx)
|
||||
except Exception as e:
|
||||
errors.append(f'Tab {idx}: {e}')
|
||||
result: dict[str, Any] = {'closed': closed}
|
||||
if errors:
|
||||
result['errors'] = errors
|
||||
return result
|
||||
|
||||
return {'error': 'Invalid tab command. Use: list, new, switch, close'}
|
||||
|
||||
elif action == 'keys':
|
||||
from browser_use.browser.events import SendKeysEvent
|
||||
|
||||
keys = params['keys']
|
||||
await bs.event_bus.dispatch(SendKeysEvent(keys=keys))
|
||||
await actions.send_keys(keys)
|
||||
return {'sent': keys}
|
||||
|
||||
elif action == 'select':
|
||||
from browser_use.browser.events import SelectDropdownOptionEvent
|
||||
|
||||
index = params['index']
|
||||
value = params['value']
|
||||
# Look up node from selector map
|
||||
node = await bs.get_element_by_index(index)
|
||||
if node is None:
|
||||
return {'error': f'Element index {index} not found - page may have changed'}
|
||||
await bs.event_bus.dispatch(SelectDropdownOptionEvent(node=node, text=value))
|
||||
await actions.select_dropdown(node, value)
|
||||
return {'selected': value, 'element': index}
|
||||
|
||||
elif action == 'upload':
|
||||
index = params['index']
|
||||
file_path = params['path']
|
||||
|
||||
p = Path(file_path)
|
||||
if not p.exists():
|
||||
return {'error': f'File not found: {file_path}'}
|
||||
if not p.is_file():
|
||||
return {'error': f'Not a file: {file_path}'}
|
||||
if p.stat().st_size == 0:
|
||||
return {'error': f'File is empty (0 bytes): {file_path}'}
|
||||
|
||||
node = await bs.get_element_by_index(index)
|
||||
if node is None:
|
||||
return {'error': f'Element index {index} not found - page may have changed'}
|
||||
|
||||
file_input_node = bs.find_file_input_near_element(node)
|
||||
|
||||
if file_input_node is None:
|
||||
selector_map = await bs.get_selector_map()
|
||||
file_input_indices = [idx for idx, el in selector_map.items() if bs.is_file_input(el)]
|
||||
if file_input_indices:
|
||||
hint = f' File input(s) found at index: {", ".join(map(str, file_input_indices))}'
|
||||
else:
|
||||
hint = ' No file input found on the page.'
|
||||
return {'error': f'Element {index} is not a file input.{hint}'}
|
||||
|
||||
await actions.upload_file(file_input_node, file_path)
|
||||
return {'uploaded': file_path, 'element': index}
|
||||
|
||||
elif action == 'eval':
|
||||
js = params['js']
|
||||
# Execute JavaScript via CDP
|
||||
@@ -224,7 +293,7 @@ async def handle(action: str, session: SessionInfo, params: dict[str, Any]) -> A
|
||||
query = params['query']
|
||||
# This requires LLM integration
|
||||
# For now, return a placeholder
|
||||
return {'query': query, 'error': 'extract requires agent mode - use: browser-use run "extract ..."'}
|
||||
return {'query': query, 'error': 'extract is not yet implemented'}
|
||||
|
||||
elif action == 'hover':
|
||||
index = params['index']
|
||||
@@ -473,7 +542,7 @@ async def handle(action: str, session: SessionInfo, params: dict[str, Any]) -> A
|
||||
]
|
||||
|
||||
file_path = Path(params['file'])
|
||||
file_path.write_text(json.dumps(cookie_list, indent=2))
|
||||
file_path.write_text(json.dumps(cookie_list, indent=2, ensure_ascii=False), encoding='utf-8')
|
||||
return {'exported': len(cookie_list), 'file': str(file_path)}
|
||||
|
||||
elif cookies_command == 'import':
|
||||
|
||||
694
browser_use/skill_cli/commands/cloud.py
Normal file
694
browser_use/skill_cli/commands/cloud.py
Normal file
@@ -0,0 +1,694 @@
|
||||
"""Cloud API command — generic REST passthrough to Browser-Use Cloud.
|
||||
|
||||
Stdlib only. No async, no SDK, no heavy imports.
|
||||
|
||||
Usage:
|
||||
browser-use cloud login <api-key>
|
||||
browser-use cloud logout
|
||||
browser-use cloud v2 GET /browsers
|
||||
browser-use cloud v2 POST /tasks '{"task":"...","url":"https://..."}'
|
||||
browser-use cloud v2 poll <task-id>
|
||||
browser-use cloud v2 --help
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import typing
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DEFAULT_BASE_URL = 'https://api.browser-use.com'
|
||||
_AUTH_HEADER = 'X-Browser-Use-API-Key'
|
||||
|
||||
|
||||
def _get_base() -> str:
|
||||
"""Get the API host URL. All paths are appended by callers."""
|
||||
return os.environ.get('BROWSER_USE_CLOUD_BASE_URL', _DEFAULT_BASE_URL).rstrip('/')
|
||||
|
||||
|
||||
def _base_url(version: str) -> str:
|
||||
"""Get versioned API URL: {base}/api/{version}"""
|
||||
per_version = os.environ.get(f'BROWSER_USE_CLOUD_BASE_URL_{version.upper()}')
|
||||
if per_version:
|
||||
return per_version
|
||||
return f'{_get_base()}/api/{version}'
|
||||
|
||||
|
||||
def _spec_url(version: str) -> str:
|
||||
per_version = os.environ.get(f'BROWSER_USE_OPENAPI_SPEC_URL_{version.upper()}')
|
||||
if per_version:
|
||||
return per_version
|
||||
return f'{_get_base()}/api/{version}/openapi.json'
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API key persistence
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _get_config_path() -> Path:
|
||||
from browser_use.skill_cli.utils import get_config_path
|
||||
|
||||
return get_config_path()
|
||||
|
||||
|
||||
def _read_config() -> dict:
|
||||
from browser_use.skill_cli.config import read_config
|
||||
|
||||
return read_config()
|
||||
|
||||
|
||||
def _write_config(data: dict) -> None:
|
||||
from browser_use.skill_cli.config import write_config
|
||||
|
||||
write_config(data)
|
||||
|
||||
|
||||
def _get_api_key_or_none() -> str | None:
|
||||
"""Return API key from CLI config file, or None if not found."""
|
||||
from browser_use.skill_cli.config import get_config_value
|
||||
|
||||
val = get_config_value('api_key')
|
||||
return str(val) if val is not None else None
|
||||
|
||||
|
||||
def _get_api_key() -> str:
|
||||
"""Return API key from config file. Exits with error if missing."""
|
||||
key = _get_api_key_or_none()
|
||||
if key:
|
||||
return key
|
||||
|
||||
print('Error: No API key found.', file=sys.stderr)
|
||||
if os.environ.get('BROWSER_USE_API_KEY'):
|
||||
print(' Note: BROWSER_USE_API_KEY env var is set but not used by the CLI.', file=sys.stderr)
|
||||
print(' Run: browser-use config set api_key "$BROWSER_USE_API_KEY"', file=sys.stderr)
|
||||
else:
|
||||
print(
|
||||
'Already have an account? Get a key at: https://cloud.browser-use.com/settings?tab=api-keys&new=1&utm_source=oss&utm_medium=cli',
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(' Then run: browser-use cloud login <key>', file=sys.stderr)
|
||||
print('No account? Run: browser-use cloud signup', file=sys.stderr)
|
||||
print(' This creates an agent account you can claim later with: browser-use cloud signup --claim', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _create_cloud_profile_inner(api_key: str) -> str:
|
||||
"""Create a new cloud profile and save to config. Returns profile ID.
|
||||
|
||||
Raises RuntimeError on failure — safe to call from daemon context.
|
||||
"""
|
||||
body = json.dumps({'name': 'Browser Use CLI'}).encode()
|
||||
status, resp = _http_request('POST', f'{_base_url("v2")}/profiles', body, api_key)
|
||||
if status >= 400:
|
||||
raise RuntimeError(f'Error creating cloud profile: HTTP {status} — {resp}')
|
||||
|
||||
try:
|
||||
data = json.loads(resp)
|
||||
new_id = data['id']
|
||||
except (json.JSONDecodeError, KeyError, TypeError):
|
||||
raise RuntimeError(f'Unexpected response from cloud API: {resp}')
|
||||
|
||||
config = _read_config()
|
||||
config['cloud_connect_profile_id'] = new_id
|
||||
_write_config(config)
|
||||
return new_id
|
||||
|
||||
|
||||
def _create_cloud_profile() -> str:
|
||||
"""Create a new cloud profile and save to config. Returns profile ID.
|
||||
|
||||
CLI entry point — exits on error.
|
||||
"""
|
||||
api_key = _get_api_key()
|
||||
try:
|
||||
return _create_cloud_profile_inner(api_key)
|
||||
except RuntimeError as e:
|
||||
print(str(e), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _get_or_create_cloud_profile() -> str:
|
||||
"""Return cloud profile ID from config, creating one if missing. No validation HTTP call."""
|
||||
config = _read_config()
|
||||
profile_id = config.get('cloud_connect_profile_id')
|
||||
if profile_id:
|
||||
return profile_id
|
||||
return _create_cloud_profile()
|
||||
|
||||
|
||||
def _get_cloud_connect_proxy() -> str | None:
|
||||
"""Return the cloud connect proxy country code from config."""
|
||||
from browser_use.skill_cli.config import get_config_value
|
||||
|
||||
val = get_config_value('cloud_connect_proxy')
|
||||
return str(val) if val is not None else None
|
||||
|
||||
|
||||
def _get_cloud_connect_timeout() -> int | None:
|
||||
"""Return the cloud connect timeout (minutes) from config."""
|
||||
from browser_use.skill_cli.config import get_config_value
|
||||
|
||||
val = get_config_value('cloud_connect_timeout')
|
||||
return int(val) if val is not None else None
|
||||
|
||||
|
||||
def _save_api_key(key: str) -> None:
|
||||
config = _read_config()
|
||||
config['api_key'] = key
|
||||
_write_config(config)
|
||||
|
||||
|
||||
def _remove_api_key() -> bool:
|
||||
config = _read_config()
|
||||
if 'api_key' not in config:
|
||||
return False
|
||||
del config['api_key']
|
||||
path = _get_config_path()
|
||||
if config:
|
||||
_write_config(config)
|
||||
else:
|
||||
path.unlink(missing_ok=True)
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTTP helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _http_request(method: str, url: str, body: bytes | None, api_key: str, timeout: float = 30.0) -> tuple[int, bytes]:
|
||||
"""Fire an HTTP request. Returns (status_code, response_body)."""
|
||||
headers = {_AUTH_HEADER: api_key}
|
||||
if body is not None:
|
||||
headers['Content-Type'] = 'application/json'
|
||||
|
||||
req = urllib.request.Request(url, data=body, headers=headers, method=method.upper())
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return resp.status, resp.read()
|
||||
except urllib.error.HTTPError as e:
|
||||
return e.code, e.read()
|
||||
except urllib.error.URLError as e:
|
||||
print(f'Error: {e.reason}', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _print_json(data: bytes, file: typing.TextIO | None = None) -> None:
|
||||
"""Pretty-print JSON, raw fallback."""
|
||||
out = file or sys.stdout
|
||||
try:
|
||||
parsed = json.loads(data)
|
||||
print(json.dumps(parsed, indent=2), file=out)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
buf = out.buffer if hasattr(out, 'buffer') else sys.stdout.buffer
|
||||
buf.write(data)
|
||||
buf.write(b'\n')
|
||||
buf.flush()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OpenAPI help
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _fetch_spec(version: str) -> bytes | None:
|
||||
url = _spec_url(version)
|
||||
try:
|
||||
req = urllib.request.Request(url)
|
||||
with urllib.request.urlopen(req, timeout=5) as resp:
|
||||
return resp.read()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _example_value(prop: dict, schemas: dict) -> object:
|
||||
"""Generate a placeholder value for an OpenAPI property."""
|
||||
if '$ref' in prop:
|
||||
ref_name = prop['$ref'].rsplit('/', 1)[-1]
|
||||
if ref_name in schemas:
|
||||
return _generate_body_example_dict(ref_name, schemas)
|
||||
return {}
|
||||
|
||||
t = prop.get('type', 'string')
|
||||
fmt = prop.get('format', '')
|
||||
enum = prop.get('enum')
|
||||
|
||||
if enum:
|
||||
return enum[0]
|
||||
if t == 'string':
|
||||
if fmt == 'uri' or fmt == 'url':
|
||||
return 'https://example.com'
|
||||
if fmt == 'date-time':
|
||||
return '2025-01-01T00:00:00Z'
|
||||
if 'email' in fmt:
|
||||
return 'user@example.com'
|
||||
return '...'
|
||||
if t == 'integer':
|
||||
return 0
|
||||
if t == 'number':
|
||||
return 0.0
|
||||
if t == 'boolean':
|
||||
return False
|
||||
if t == 'array':
|
||||
items = prop.get('items', {})
|
||||
return [_example_value(items, schemas)]
|
||||
if t == 'object':
|
||||
props = prop.get('properties', {})
|
||||
return {k: _example_value(v, schemas) for k, v in props.items()}
|
||||
return '...'
|
||||
|
||||
|
||||
def _generate_body_example_dict(ref_name: str, schemas: dict) -> dict:
|
||||
"""Build a compact example dict from a $ref schema."""
|
||||
schema = schemas.get(ref_name, {})
|
||||
props = schema.get('properties', {})
|
||||
required = set(schema.get('required', []))
|
||||
|
||||
result = {}
|
||||
# Required fields first, then sorted optional
|
||||
for key in sorted(props, key=lambda k: (k not in required, k)):
|
||||
result[key] = _example_value(props[key], schemas)
|
||||
return result
|
||||
|
||||
|
||||
def _generate_body_example(ref: str, schemas: dict) -> str:
|
||||
"""Return compact JSON string for a $ref."""
|
||||
ref_name = ref.rsplit('/', 1)[-1]
|
||||
obj = _generate_body_example_dict(ref_name, schemas)
|
||||
return json.dumps(obj, separators=(',', ':'))
|
||||
|
||||
|
||||
def _find_body_ref(spec: dict, method: str, path: str) -> str | None:
|
||||
"""Find the $ref for request body of a given method+path in spec."""
|
||||
paths = spec.get('paths', {})
|
||||
path_obj = paths.get(path, {})
|
||||
method_obj = path_obj.get(method.lower(), {})
|
||||
body = method_obj.get('requestBody', {})
|
||||
content = body.get('content', {})
|
||||
json_media = content.get('application/json', {})
|
||||
schema = json_media.get('schema', {})
|
||||
return schema.get('$ref')
|
||||
|
||||
|
||||
def _match_path(spec_path: str, req_path: str) -> bool:
|
||||
"""Match an OpenAPI template path against a concrete path.
|
||||
|
||||
E.g. /tasks/{task_id} matches /tasks/abc123
|
||||
"""
|
||||
spec_parts = spec_path.strip('/').split('/')
|
||||
req_parts = req_path.strip('/').split('/')
|
||||
if len(spec_parts) != len(req_parts):
|
||||
return False
|
||||
for sp, rp in zip(spec_parts, req_parts):
|
||||
if sp.startswith('{') and sp.endswith('}'):
|
||||
continue
|
||||
if sp != rp:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _find_body_example(spec: dict, method: str, path: str) -> str | None:
|
||||
"""Find a body example for the given method+path, using template matching."""
|
||||
schemas = spec.get('components', {}).get('schemas', {})
|
||||
paths = spec.get('paths', {})
|
||||
|
||||
for spec_path in paths:
|
||||
if _match_path(spec_path, path):
|
||||
ref = _find_body_ref(spec, method, spec_path)
|
||||
if ref:
|
||||
return _generate_body_example(ref, schemas)
|
||||
return None
|
||||
|
||||
|
||||
def _format_openapi_help(spec_data: bytes) -> str:
|
||||
"""Parse OpenAPI spec and render grouped endpoints."""
|
||||
try:
|
||||
spec = json.loads(spec_data)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
return ''
|
||||
|
||||
paths = spec.get('paths', {})
|
||||
schemas = spec.get('components', {}).get('schemas', {})
|
||||
info = spec.get('info', {})
|
||||
|
||||
lines: list[str] = []
|
||||
title = info.get('title', 'API')
|
||||
version = info.get('version', '')
|
||||
lines.append(f'{title} {version}'.strip())
|
||||
lines.append('')
|
||||
|
||||
# Group by tag
|
||||
groups: dict[str, list[str]] = {}
|
||||
for path, methods in sorted(paths.items()):
|
||||
for method, details in sorted(methods.items()):
|
||||
if method in ('parameters', 'summary', 'description'):
|
||||
continue
|
||||
tags = details.get('tags', ['Other'])
|
||||
tag = tags[0] if tags else 'Other'
|
||||
summary = details.get('summary', '')
|
||||
|
||||
# Build endpoint line
|
||||
parts = [f' {method.upper():6s} {path}']
|
||||
if summary:
|
||||
parts.append(f' # {summary}')
|
||||
|
||||
# Parameters
|
||||
params = details.get('parameters', [])
|
||||
param_strs = []
|
||||
for p in params:
|
||||
name = p.get('name', '')
|
||||
required = p.get('required', False)
|
||||
marker = '*' if required else ''
|
||||
param_strs.append(f'{name}{marker}')
|
||||
if param_strs:
|
||||
parts.append(f' params: {", ".join(param_strs)}')
|
||||
|
||||
# Body example
|
||||
body_ref = _find_body_ref(spec, method, path)
|
||||
if body_ref:
|
||||
example = _generate_body_example(body_ref, schemas)
|
||||
parts.append(f" body: '{example}'")
|
||||
|
||||
groups.setdefault(tag, []).append('\n'.join(parts) if len(parts) > 1 else parts[0])
|
||||
|
||||
for tag, endpoints in sorted(groups.items()):
|
||||
lines.append(f'[{tag}]')
|
||||
for ep in endpoints:
|
||||
lines.append(ep)
|
||||
lines.append('')
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
|
||||
def _static_help(version: str) -> str:
|
||||
"""Fallback help when OpenAPI spec is unavailable."""
|
||||
return f"""Browser-Use Cloud API {version}
|
||||
|
||||
Usage:
|
||||
browser-use cloud {version} <METHOD> <path> [body]
|
||||
browser-use cloud {version} poll <task-id>
|
||||
|
||||
Examples:
|
||||
browser-use cloud {version} GET /browsers
|
||||
browser-use cloud {version} POST /tasks '{{"task":"Search for AI news","url":"https://google.com"}}'
|
||||
browser-use cloud {version} GET /tasks/<task-id>
|
||||
browser-use cloud {version} poll <task-id>
|
||||
|
||||
(Could not fetch OpenAPI spec for live endpoint listing)
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Command handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _cloud_login(argv: list[str]) -> int:
|
||||
if not argv:
|
||||
print('Usage: browser-use cloud login <api-key>', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
key = argv[0]
|
||||
_save_api_key(key)
|
||||
print('API key saved')
|
||||
return 0
|
||||
|
||||
|
||||
def _cloud_logout() -> int:
|
||||
if _remove_api_key():
|
||||
print('API key removed')
|
||||
else:
|
||||
print('No API key to remove')
|
||||
return 0
|
||||
|
||||
|
||||
def _cloud_rest(argv: list[str], version: str) -> int:
|
||||
"""Generic REST passthrough."""
|
||||
if len(argv) < 2:
|
||||
print(f'Usage: browser-use cloud {version} <METHOD> <path> [body]', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
method = argv[0].upper()
|
||||
path = argv[1]
|
||||
body_str = argv[2] if len(argv) > 2 else None
|
||||
|
||||
# Normalize path
|
||||
if not path.startswith('/'):
|
||||
path = '/' + path
|
||||
|
||||
url = f'{_base_url(version)}{path}'
|
||||
api_key = _get_api_key()
|
||||
|
||||
body = body_str.encode() if body_str else None
|
||||
status, resp_body = _http_request(method, url, body, api_key)
|
||||
|
||||
if 400 <= status < 500:
|
||||
print(f'HTTP {status}', file=sys.stderr)
|
||||
_print_json(resp_body, file=sys.stderr)
|
||||
|
||||
# Try to suggest correct body from spec
|
||||
spec_data = _fetch_spec(version)
|
||||
if spec_data:
|
||||
try:
|
||||
spec = json.loads(spec_data)
|
||||
example = _find_body_example(spec, method, path)
|
||||
if example:
|
||||
print(f"\nExpected body: '{example}'", file=sys.stderr)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
return 2
|
||||
|
||||
if status >= 500:
|
||||
print(f'HTTP {status}', file=sys.stderr)
|
||||
_print_json(resp_body, file=sys.stderr)
|
||||
return 1
|
||||
|
||||
_print_json(resp_body)
|
||||
return 0
|
||||
|
||||
|
||||
def _cloud_poll(argv: list[str], version: str) -> int:
|
||||
"""Poll GET /tasks/<id> until done."""
|
||||
if not argv:
|
||||
print(f'Usage: browser-use cloud {version} poll <task-id>', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
task_id = argv[0]
|
||||
url = f'{_base_url(version)}/tasks/{task_id}'
|
||||
api_key = _get_api_key()
|
||||
|
||||
while True:
|
||||
status_code, resp_body = _http_request('GET', url, None, api_key)
|
||||
|
||||
if status_code >= 400:
|
||||
print(f'\nHTTP {status_code}', file=sys.stderr)
|
||||
_print_json(resp_body, file=sys.stderr)
|
||||
return 2
|
||||
|
||||
try:
|
||||
data = json.loads(resp_body)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
print('\nError: invalid JSON response', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
task_status = data.get('status', 'unknown')
|
||||
cost = data.get('cost', 0)
|
||||
print(f'\rstatus: {task_status} cost: ${cost:.4f}', end='', file=sys.stderr, flush=True)
|
||||
|
||||
if task_status == 'finished':
|
||||
print('', file=sys.stderr) # newline
|
||||
_print_json(resp_body)
|
||||
return 0
|
||||
|
||||
if task_status == 'failed':
|
||||
print('', file=sys.stderr)
|
||||
_print_json(resp_body, file=sys.stderr)
|
||||
return 2
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
def _cloud_help(version: str) -> int:
|
||||
"""Show OpenAPI-driven help for a version."""
|
||||
spec_data = _fetch_spec(version)
|
||||
if spec_data:
|
||||
formatted = _format_openapi_help(spec_data)
|
||||
if formatted:
|
||||
print(formatted)
|
||||
return 0
|
||||
|
||||
print(_static_help(version))
|
||||
return 0
|
||||
|
||||
|
||||
def _cloud_versioned(argv: list[str], version: str) -> int:
|
||||
"""Route versioned subcommands: poll, help, or REST passthrough."""
|
||||
if not argv:
|
||||
return _cloud_help(version)
|
||||
|
||||
first = argv[0]
|
||||
|
||||
if first in ('--help', 'help', '-h'):
|
||||
return _cloud_help(version)
|
||||
|
||||
if first == 'poll':
|
||||
return _cloud_poll(argv[1:], version)
|
||||
|
||||
# REST passthrough: METHOD path [body]
|
||||
return _cloud_rest(argv, version)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Signup (agent self-registration)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _signup_challenge() -> int:
|
||||
"""Request a signup challenge."""
|
||||
if _get_api_key_or_none():
|
||||
print('You already have an API key configured.', file=sys.stderr)
|
||||
print('Run `browser-use cloud signup --claim` to claim your account.', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
body = json.dumps({}).encode()
|
||||
status, resp = _http_request('POST', f'{_get_base()}/cloud/signup', body, api_key='')
|
||||
if status >= 400:
|
||||
print(f'Error: HTTP {status}', file=sys.stderr)
|
||||
_print_json(resp, file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
data = json.loads(resp)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
print('Error: invalid response', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
print(f'Challenge ID: {data["challenge_id"]}')
|
||||
print(f'Challenge: {data["challenge_text"]}')
|
||||
print()
|
||||
print('Verify to create your agent account:')
|
||||
print(' browser-use cloud signup --verify <challenge-id> <answer>')
|
||||
return 0
|
||||
|
||||
|
||||
def _signup_verify(challenge_id: str, answer: str) -> int:
|
||||
"""Verify a signup challenge and save the API key."""
|
||||
if _get_api_key_or_none():
|
||||
print('You already have an API key configured.', file=sys.stderr)
|
||||
print('Run `browser-use cloud signup --claim` to claim your account.', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
body = json.dumps({'challenge_id': challenge_id, 'answer': answer}).encode()
|
||||
status, resp = _http_request('POST', f'{_get_base()}/cloud/signup/verify', body, api_key='')
|
||||
if status >= 400:
|
||||
print(f'Error: HTTP {status}', file=sys.stderr)
|
||||
_print_json(resp, file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
data = json.loads(resp)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
print('Error: invalid response', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
_save_api_key(data['api_key'])
|
||||
print('API key saved')
|
||||
return 0
|
||||
|
||||
|
||||
def _signup_claim() -> int:
|
||||
"""Generate a claim URL for the current API key."""
|
||||
api_key = _get_api_key()
|
||||
status, resp = _http_request('POST', f'{_get_base()}/cloud/signup/claim', None, api_key)
|
||||
if status >= 400:
|
||||
print(f'Error: HTTP {status}', file=sys.stderr)
|
||||
_print_json(resp, file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
data = json.loads(resp)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
print('Error: invalid response', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
print(f'Claim URL: {data["claim_url"]}')
|
||||
print('Share this URL with a human to claim ownership of this account.')
|
||||
return 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main dispatcher
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def handle_cloud_command(argv: list[str]) -> int:
|
||||
"""Main dispatcher for `browser-use cloud ...`."""
|
||||
if not argv:
|
||||
_print_cloud_usage()
|
||||
return 1
|
||||
|
||||
subcmd = argv[0]
|
||||
|
||||
if subcmd == 'login':
|
||||
return _cloud_login(argv[1:])
|
||||
|
||||
if subcmd == 'logout':
|
||||
return _cloud_logout()
|
||||
|
||||
if subcmd in ('v2', 'v3'):
|
||||
return _cloud_versioned(argv[1:], subcmd)
|
||||
|
||||
if subcmd == 'signup':
|
||||
if '--verify' in argv:
|
||||
idx = argv.index('--verify')
|
||||
if idx + 2 >= len(argv):
|
||||
print('Usage: browser-use cloud signup --verify <challenge-id> <answer>', file=sys.stderr)
|
||||
return 1
|
||||
return _signup_verify(argv[idx + 1], argv[idx + 2])
|
||||
if '--claim' in argv:
|
||||
return _signup_claim()
|
||||
return _signup_challenge()
|
||||
|
||||
if subcmd == 'connect':
|
||||
# Normally intercepted by main.py before reaching here
|
||||
print('Error: cloud connect must be run via the main CLI (browser-use cloud connect)', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if subcmd in ('--help', 'help', '-h'):
|
||||
_print_cloud_usage()
|
||||
return 0
|
||||
|
||||
print(f'Unknown cloud subcommand: {subcmd}', file=sys.stderr)
|
||||
_print_cloud_usage()
|
||||
return 1
|
||||
|
||||
|
||||
def _print_cloud_usage() -> None:
|
||||
print('Usage: browser-use cloud <command>')
|
||||
print()
|
||||
print('Commands:')
|
||||
print(' connect Provision cloud browser and connect')
|
||||
print(' signup Create an agent account (challenge-response)')
|
||||
print(' signup --verify <id> <answer> Verify challenge and save API key')
|
||||
print(' signup --claim Generate URL to claim your agent account')
|
||||
print(' login <api-key> Save API key')
|
||||
print(' logout Remove API key')
|
||||
print(' v2 <METHOD> <path> [body] REST passthrough (API v2)')
|
||||
print(' v3 <METHOD> <path> [body] REST passthrough (API v3)')
|
||||
print(' v2 poll <task-id> Poll task until done')
|
||||
print(' v2 --help Show API v2 endpoints')
|
||||
print(' v3 --help Show API v3 endpoints')
|
||||
print()
|
||||
print('Examples:')
|
||||
print(' browser-use cloud login sk-abc123...')
|
||||
print(' browser-use cloud v2 GET /browsers')
|
||||
print(' browser-use cloud v2 POST /tasks \'{"task":"...","url":"https://..."}\'')
|
||||
print(' browser-use cloud v2 poll <task-id>')
|
||||
@@ -1,423 +0,0 @@
|
||||
"""Cloud session SDK wrappers and CLI handlers.
|
||||
|
||||
This module provides:
|
||||
- SDK wrapper functions for the Browser-Use Cloud Session API
|
||||
- CLI command handlers for `browser-use session <command>`
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import Any
|
||||
|
||||
from browser_use_sdk.types.session_item_view import SessionItemView
|
||||
from browser_use_sdk.types.session_view import SessionView
|
||||
from browser_use_sdk.types.share_view import ShareView
|
||||
|
||||
from browser_use.skill_cli.commands.utils import format_duration, get_sdk_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ============ SDK Wrappers ============
|
||||
|
||||
|
||||
def create_session(**kwargs: Any) -> SessionItemView:
|
||||
"""Create a cloud browser session.
|
||||
|
||||
Args:
|
||||
profile_id: Cloud profile ID for persistent auth/cookies
|
||||
proxy_country: Proxy country code (us, gb, de, etc.)
|
||||
keep_alive: Keep session alive after task completes
|
||||
persist_memory: Share memory between tasks in session
|
||||
start_url: URL to navigate to when session starts
|
||||
screen_width: Browser screen width in pixels
|
||||
screen_height: Browser screen height in pixels
|
||||
|
||||
Returns:
|
||||
SessionItemView with session details
|
||||
"""
|
||||
# Map our param names to SDK param names
|
||||
param_map = {
|
||||
'proxy_country': 'proxy_country_code',
|
||||
'screen_width': 'browser_screen_width',
|
||||
'screen_height': 'browser_screen_height',
|
||||
}
|
||||
params = {}
|
||||
for k, v in kwargs.items():
|
||||
if v is not None:
|
||||
params[param_map.get(k, k)] = v
|
||||
|
||||
return get_sdk_client().sessions.create_session(**params)
|
||||
|
||||
|
||||
def list_sessions(limit: int = 10, status: str | None = None) -> list[SessionItemView]:
|
||||
"""List cloud browser sessions."""
|
||||
client = get_sdk_client()
|
||||
response = client.sessions.list_sessions(
|
||||
page_size=min(limit, 100),
|
||||
filter_by=status,
|
||||
)
|
||||
return list(response.items) if response.items else []
|
||||
|
||||
|
||||
def get_session(session_id: str) -> SessionView:
|
||||
"""Get details of a specific session."""
|
||||
return get_sdk_client().sessions.get_session(session_id)
|
||||
|
||||
|
||||
def stop_session(session_id: str) -> SessionView:
|
||||
"""Stop a cloud session."""
|
||||
return get_sdk_client().sessions.update_session(session_id, action='stop')
|
||||
|
||||
|
||||
def delete_session(session_id: str) -> None:
|
||||
"""Delete a cloud session and all its tasks."""
|
||||
get_sdk_client().sessions.delete_session(session_id)
|
||||
|
||||
|
||||
def create_public_share(session_id: str) -> ShareView:
|
||||
"""Create a public share URL for a session."""
|
||||
return get_sdk_client().sessions.create_session_public_share(session_id)
|
||||
|
||||
|
||||
def delete_public_share(session_id: str) -> None:
|
||||
"""Delete the public share for a session."""
|
||||
get_sdk_client().sessions.delete_session_public_share(session_id)
|
||||
|
||||
|
||||
def stop_sessions_parallel(session_ids: list[str]) -> tuple[list[str], list[dict[str, Any]]]:
|
||||
"""Stop multiple cloud sessions in parallel."""
|
||||
client = get_sdk_client()
|
||||
stopped: list[str] = []
|
||||
errors: list[dict[str, Any]] = []
|
||||
|
||||
def stop_one(sid: str) -> tuple[str, str | None]:
|
||||
try:
|
||||
client.sessions.update_session(sid, action='stop')
|
||||
return (sid, None)
|
||||
except Exception as e:
|
||||
return (sid, str(e))
|
||||
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
futures = {executor.submit(stop_one, sid): sid for sid in session_ids}
|
||||
for future in as_completed(futures):
|
||||
sid, error = future.result()
|
||||
if error:
|
||||
errors.append({'id': sid, 'error': error})
|
||||
else:
|
||||
stopped.append(sid)
|
||||
|
||||
return stopped, errors
|
||||
|
||||
|
||||
# ============ CLI Handlers ============
|
||||
|
||||
|
||||
def handle_session_command(args: argparse.Namespace) -> int:
|
||||
"""Handle session subcommands.
|
||||
|
||||
Session commands manage cloud sessions and always require the cloud API.
|
||||
|
||||
Args:
|
||||
args: Parsed command-line arguments
|
||||
|
||||
Returns:
|
||||
Exit code (0 for success, 1 for error)
|
||||
"""
|
||||
from browser_use.skill_cli.api_key import APIKeyRequired, require_api_key
|
||||
from browser_use.skill_cli.install_config import is_mode_available
|
||||
|
||||
# Check if remote mode is available
|
||||
if not is_mode_available('remote'):
|
||||
print(
|
||||
'Error: Session management requires remote mode.\n'
|
||||
'Remote mode is not installed. Reinstall to enable:\n'
|
||||
' curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --remote-only\n'
|
||||
' curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --full',
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
# Check API key
|
||||
try:
|
||||
require_api_key('Cloud sessions')
|
||||
except APIKeyRequired as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if args.session_command == 'list':
|
||||
return _handle_list(args)
|
||||
elif args.session_command == 'get':
|
||||
return _handle_get(args)
|
||||
elif args.session_command == 'stop':
|
||||
return _handle_stop(args)
|
||||
elif args.session_command == 'create':
|
||||
return _handle_create(args)
|
||||
elif args.session_command == 'share':
|
||||
return _handle_share(args)
|
||||
else:
|
||||
print('Usage: browser-use session <command>')
|
||||
print('Commands: list, get <id>, stop <id>, create, share <id>')
|
||||
return 1
|
||||
|
||||
|
||||
# ============ CLI Helper Functions ============
|
||||
|
||||
|
||||
def _session_to_dict(session: Any) -> dict[str, Any]:
|
||||
"""Convert SDK session object to dict for JSON output."""
|
||||
return {
|
||||
'id': session.id,
|
||||
'status': session.status,
|
||||
'liveUrl': session.live_url,
|
||||
'startedAt': session.started_at.isoformat() if session.started_at else None,
|
||||
'finishedAt': session.finished_at.isoformat() if session.finished_at else None,
|
||||
'keepAlive': session.keep_alive,
|
||||
'persistMemory': getattr(session, 'persist_memory', None),
|
||||
'proxyCost': session.proxy_cost,
|
||||
'publicShareUrl': getattr(session, 'public_share_url', None),
|
||||
}
|
||||
|
||||
|
||||
def _handle_list(args: argparse.Namespace) -> int:
|
||||
"""Handle 'session list' command."""
|
||||
try:
|
||||
status_filter = getattr(args, 'status', None)
|
||||
sessions = list_sessions(limit=args.limit, status=status_filter)
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps([_session_to_dict(s) for s in sessions]))
|
||||
else:
|
||||
if not sessions:
|
||||
status_msg = f' with status "{status_filter}"' if status_filter else ''
|
||||
print(f'No sessions found{status_msg}')
|
||||
else:
|
||||
header = f'Sessions ({len(sessions)})'
|
||||
if status_filter:
|
||||
header = f'{status_filter.capitalize()} sessions ({len(sessions)})'
|
||||
print(f'{header}:')
|
||||
for s in sessions:
|
||||
session_id = s.id or 'unknown'
|
||||
status = s.status or 'unknown'
|
||||
live_url = s.live_url
|
||||
started_at = s.started_at
|
||||
finished_at = s.finished_at
|
||||
keep_alive = '🔄' if s.keep_alive else ''
|
||||
|
||||
# Status emoji
|
||||
status_emoji = {
|
||||
'active': '🟢',
|
||||
'stopped': '⏹️',
|
||||
}.get(status, '❓')
|
||||
|
||||
# Truncate ID for display
|
||||
short_id = session_id[:8] + '...' if len(session_id) > 8 else session_id
|
||||
|
||||
# Build line with duration
|
||||
duration = format_duration(started_at, finished_at)
|
||||
line = f' {status_emoji} {short_id} [{status}]'
|
||||
if duration:
|
||||
line += f' {duration}'
|
||||
if keep_alive:
|
||||
line += f' {keep_alive}'
|
||||
if live_url and status == 'active':
|
||||
line += f'\n live: {live_url}'
|
||||
print(line)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _handle_get(args: argparse.Namespace) -> int:
|
||||
"""Handle 'session get <session_id>' command."""
|
||||
try:
|
||||
session = get_session(args.session_id)
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps(_session_to_dict(session)))
|
||||
else:
|
||||
session_id = session.id or args.session_id
|
||||
status = session.status or 'unknown'
|
||||
live_url = session.live_url
|
||||
started_at = session.started_at
|
||||
finished_at = session.finished_at
|
||||
keep_alive = session.keep_alive
|
||||
proxy_cost = session.proxy_cost
|
||||
public_share_url = getattr(session, 'public_share_url', None)
|
||||
|
||||
# Status emoji
|
||||
status_emoji = {
|
||||
'active': '🟢',
|
||||
'stopped': '⏹️',
|
||||
}.get(status, '❓')
|
||||
|
||||
# Build header with duration
|
||||
duration = format_duration(started_at, finished_at)
|
||||
header_parts = [f'{status_emoji} {session_id[:8]}... [{status}]']
|
||||
if duration:
|
||||
header_parts.append(duration)
|
||||
if proxy_cost:
|
||||
# Format proxy cost to 2 decimal places
|
||||
try:
|
||||
cost_val = float(proxy_cost)
|
||||
header_parts.append(f'${cost_val:.2f}')
|
||||
except (ValueError, TypeError):
|
||||
header_parts.append(f'${proxy_cost}')
|
||||
print(' '.join(header_parts))
|
||||
|
||||
if keep_alive:
|
||||
print(' Keep Alive: Yes')
|
||||
if live_url:
|
||||
print(f' Live URL: {live_url}')
|
||||
if public_share_url:
|
||||
print(f' Public Share: {public_share_url}')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _handle_stop(args: argparse.Namespace) -> int:
|
||||
"""Handle 'session stop <session_id>' command."""
|
||||
# Handle --all flag
|
||||
if getattr(args, 'all', False):
|
||||
return _handle_stop_all(args)
|
||||
|
||||
try:
|
||||
stop_session(args.session_id)
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps({'stopped': args.session_id}))
|
||||
else:
|
||||
print(f'Stopped session: {args.session_id}')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _handle_stop_all(args: argparse.Namespace) -> int:
|
||||
"""Handle 'session stop --all' command."""
|
||||
try:
|
||||
# Get all active sessions
|
||||
sessions = list_sessions(limit=100, status='active')
|
||||
except Exception as e:
|
||||
print(f'Error listing sessions: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if not sessions:
|
||||
print('No active sessions to stop')
|
||||
return 0
|
||||
|
||||
# Extract session IDs
|
||||
session_ids = [s.id for s in sessions if s.id]
|
||||
|
||||
if not session_ids:
|
||||
print('No active sessions to stop')
|
||||
return 0
|
||||
|
||||
# Stop all sessions in parallel
|
||||
stopped, errors = stop_sessions_parallel(session_ids)
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps({'stopped': stopped, 'errors': errors}))
|
||||
else:
|
||||
if stopped:
|
||||
print(f'Stopped {len(stopped)} session(s):')
|
||||
for sid in stopped:
|
||||
print(f' ✓ {sid[:8]}...')
|
||||
if errors:
|
||||
print(f'Failed to stop {len(errors)} session(s):')
|
||||
for err in errors:
|
||||
print(f' ✗ {err["id"][:8]}...: {err["error"]}')
|
||||
|
||||
return 0 if not errors else 1
|
||||
|
||||
|
||||
def _handle_create(args: argparse.Namespace) -> int:
|
||||
"""Handle 'session create' command."""
|
||||
# Parse screen size if provided
|
||||
screen_width = None
|
||||
screen_height = None
|
||||
if hasattr(args, 'screen_size') and args.screen_size:
|
||||
try:
|
||||
w, h = args.screen_size.lower().split('x')
|
||||
screen_width = int(w)
|
||||
screen_height = int(h)
|
||||
except ValueError:
|
||||
print('Error: Invalid screen size format. Use WxH (e.g., 1920x1080)', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
session = create_session(
|
||||
profile_id=getattr(args, 'profile', None),
|
||||
proxy_country=getattr(args, 'proxy_country', None),
|
||||
keep_alive=getattr(args, 'keep_alive', None),
|
||||
persist_memory=getattr(args, 'persist_memory', None),
|
||||
start_url=getattr(args, 'start_url', None),
|
||||
screen_width=screen_width,
|
||||
screen_height=screen_height,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps(_session_to_dict(session)))
|
||||
else:
|
||||
print(f'Created session: {session.id}')
|
||||
if session.live_url:
|
||||
print(f' Live URL: {session.live_url}')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _handle_share(args: argparse.Namespace) -> int:
|
||||
"""Handle 'session share <session_id>' command."""
|
||||
session_id = args.session_id
|
||||
|
||||
# Delete share if requested
|
||||
if getattr(args, 'delete', False):
|
||||
try:
|
||||
delete_public_share(session_id)
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps({'deleted': session_id}))
|
||||
else:
|
||||
print(f'Deleted public share for session: {session_id}')
|
||||
return 0
|
||||
|
||||
# Create share
|
||||
try:
|
||||
share = create_public_share(session_id)
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
'sessionId': session_id,
|
||||
'url': share.share_url,
|
||||
'shareToken': share.share_token,
|
||||
'viewCount': share.view_count,
|
||||
}
|
||||
)
|
||||
)
|
||||
else:
|
||||
print(f'Public share created for session: {session_id}')
|
||||
if share.share_url:
|
||||
print(f' URL: {share.share_url}')
|
||||
|
||||
return 0
|
||||
@@ -1,413 +0,0 @@
|
||||
"""Cloud task SDK wrappers and CLI handlers.
|
||||
|
||||
This module provides:
|
||||
- SDK wrapper functions for the Browser-Use Cloud Task API
|
||||
- CLI command handlers for `browser-use task <command>`
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
from browser_use_sdk.types.task_created_response import TaskCreatedResponse
|
||||
from browser_use_sdk.types.task_item_view import TaskItemView
|
||||
from browser_use_sdk.types.task_log_file_response import TaskLogFileResponse
|
||||
from browser_use_sdk.types.task_view import TaskView
|
||||
|
||||
from browser_use.skill_cli.commands.utils import format_duration, get_sdk_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _filter_none(kwargs: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Filter out None values from kwargs (SDK passes them as null, API rejects)."""
|
||||
return {k: v for k, v in kwargs.items() if v is not None}
|
||||
|
||||
|
||||
# ============ SDK Wrappers ============
|
||||
|
||||
|
||||
def create_task(task: str, **kwargs: Any) -> TaskCreatedResponse:
|
||||
"""Create a cloud task via API.
|
||||
|
||||
Args:
|
||||
task: Task description for the agent
|
||||
llm: LLM model identifier
|
||||
session_id: Existing session ID to use
|
||||
max_steps: Maximum agent steps
|
||||
flash_mode: Enable flash mode for faster execution
|
||||
thinking: Enable extended reasoning mode
|
||||
vision: Enable/disable vision
|
||||
start_url: URL to start the task from
|
||||
metadata: Task metadata key-value pairs
|
||||
secrets: Task secrets key-value pairs
|
||||
allowed_domains: Restrict navigation to these domains
|
||||
skill_ids: Enable specific skill IDs
|
||||
structured_output: JSON schema for structured output
|
||||
judge: Enable judge mode
|
||||
judge_ground_truth: Expected answer for judge evaluation
|
||||
|
||||
Returns:
|
||||
TaskCreatedResponse with task ID and session ID
|
||||
"""
|
||||
params = _filter_none(kwargs)
|
||||
params['task'] = task
|
||||
return get_sdk_client().tasks.create_task(**params)
|
||||
|
||||
|
||||
def get_task(task_id: str) -> TaskView:
|
||||
"""Get full task details including steps."""
|
||||
return get_sdk_client().tasks.get_task(task_id)
|
||||
|
||||
|
||||
def list_tasks(
|
||||
limit: int = 10,
|
||||
status: str | None = None,
|
||||
session_id: str | None = None,
|
||||
) -> list[TaskItemView]:
|
||||
"""List recent tasks."""
|
||||
client = get_sdk_client()
|
||||
response = client.tasks.list_tasks(
|
||||
page_size=limit,
|
||||
**_filter_none({'filter_by': status, 'session_id': session_id}),
|
||||
)
|
||||
return list(response.items) if response.items else []
|
||||
|
||||
|
||||
def stop_task(task_id: str) -> TaskView:
|
||||
"""Stop a running task."""
|
||||
return get_sdk_client().tasks.update_task(task_id, action='stop')
|
||||
|
||||
|
||||
def get_task_logs(task_id: str) -> TaskLogFileResponse:
|
||||
"""Get task execution logs."""
|
||||
return get_sdk_client().tasks.get_task_logs(task_id)
|
||||
|
||||
|
||||
async def poll_until_complete(
|
||||
task_id: str,
|
||||
stream: bool = False,
|
||||
poll_interval: float = 1.0,
|
||||
) -> TaskView:
|
||||
"""Poll task status until finished."""
|
||||
import asyncio
|
||||
|
||||
client = get_sdk_client()
|
||||
last_status = None
|
||||
|
||||
while True:
|
||||
# Run blocking SDK call in thread to avoid blocking event loop
|
||||
task = await asyncio.to_thread(client.tasks.get_task, task_id)
|
||||
current_status = task.status
|
||||
|
||||
if stream and current_status != last_status:
|
||||
print(f'Status: {current_status}')
|
||||
last_status = current_status
|
||||
|
||||
if current_status in ('finished', 'stopped', 'failed'):
|
||||
return task
|
||||
|
||||
await asyncio.sleep(poll_interval)
|
||||
|
||||
|
||||
# ============ CLI Handlers ============
|
||||
|
||||
|
||||
def handle_task_command(args: argparse.Namespace) -> int:
|
||||
"""Handle task subcommands.
|
||||
|
||||
Task commands manage cloud tasks and always require the cloud API.
|
||||
|
||||
Args:
|
||||
args: Parsed command-line arguments
|
||||
|
||||
Returns:
|
||||
Exit code (0 for success, 1 for error)
|
||||
"""
|
||||
from browser_use.skill_cli.api_key import APIKeyRequired, require_api_key
|
||||
from browser_use.skill_cli.install_config import is_mode_available
|
||||
|
||||
# Check if remote mode is available
|
||||
if not is_mode_available('remote'):
|
||||
print(
|
||||
'Error: Task management requires remote mode.\n'
|
||||
'Remote mode is not installed. Reinstall to enable:\n'
|
||||
' curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --remote-only\n'
|
||||
' curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --full',
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
# Check API key
|
||||
try:
|
||||
require_api_key('Cloud tasks')
|
||||
except APIKeyRequired as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if args.task_command == 'list':
|
||||
return _handle_list(args)
|
||||
elif args.task_command == 'status':
|
||||
return _handle_status(args)
|
||||
elif args.task_command == 'stop':
|
||||
return _handle_stop(args)
|
||||
elif args.task_command == 'logs':
|
||||
return _handle_logs(args)
|
||||
else:
|
||||
print('Usage: browser-use task <command>')
|
||||
print('Commands: list, status <task_id>, stop <task_id>, logs <task_id>')
|
||||
return 1
|
||||
|
||||
|
||||
# ============ CLI Helper Functions ============
|
||||
|
||||
|
||||
def _task_item_to_dict(task: Any) -> dict[str, Any]:
|
||||
"""Convert SDK TaskItemView to dict for JSON output."""
|
||||
return {
|
||||
'id': task.id,
|
||||
'status': task.status,
|
||||
'task': task.task,
|
||||
'sessionId': task.session_id,
|
||||
}
|
||||
|
||||
|
||||
def _task_to_dict(task: Any) -> dict[str, Any]:
|
||||
"""Convert SDK TaskView to dict for JSON output."""
|
||||
return {
|
||||
'id': task.id,
|
||||
'status': task.status,
|
||||
'task': task.task,
|
||||
'output': task.output,
|
||||
'cost': task.cost,
|
||||
'sessionId': task.session_id,
|
||||
'startedAt': task.started_at.isoformat() if task.started_at else None,
|
||||
'finishedAt': task.finished_at.isoformat() if task.finished_at else None,
|
||||
'steps': [_step_to_dict(s) for s in (task.steps or [])],
|
||||
}
|
||||
|
||||
|
||||
def _step_to_dict(step: Any) -> dict[str, Any]:
|
||||
"""Convert SDK step to dict for JSON output."""
|
||||
return {
|
||||
'number': step.number,
|
||||
'url': step.url,
|
||||
'memory': step.memory,
|
||||
'actions': step.actions,
|
||||
}
|
||||
|
||||
|
||||
def _handle_list(args: argparse.Namespace) -> int:
|
||||
"""Handle 'task list' command."""
|
||||
try:
|
||||
status_filter = getattr(args, 'status', None)
|
||||
session_filter = getattr(args, 'session', None)
|
||||
tasks = list_tasks(
|
||||
limit=args.limit,
|
||||
status=status_filter,
|
||||
session_id=session_filter,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps([_task_item_to_dict(t) for t in tasks]))
|
||||
else:
|
||||
if not tasks:
|
||||
status_msg = f' with status "{status_filter}"' if status_filter else ''
|
||||
session_msg = f' in session "{session_filter}"' if session_filter else ''
|
||||
print(f'No tasks found{status_msg}{session_msg}')
|
||||
else:
|
||||
header = f'Tasks ({len(tasks)})'
|
||||
if status_filter:
|
||||
header = f'{status_filter.capitalize()} tasks ({len(tasks)})'
|
||||
print(f'{header}:')
|
||||
for t in tasks:
|
||||
task_id = t.id or 'unknown'
|
||||
status = t.status or 'unknown'
|
||||
task_desc = t.task or ''
|
||||
# Truncate long task descriptions
|
||||
if len(task_desc) > 50:
|
||||
task_desc = task_desc[:47] + '...'
|
||||
|
||||
# Status emoji
|
||||
status_emoji = {
|
||||
'started': '🔄',
|
||||
'running': '🔄',
|
||||
'finished': '✅',
|
||||
'stopped': '⏹️',
|
||||
'failed': '❌',
|
||||
}.get(status, '❓')
|
||||
|
||||
print(f' {status_emoji} {task_id[:8]}... [{status}] {task_desc}')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _handle_status(args: argparse.Namespace) -> int:
|
||||
"""Handle 'task status <task_id>' command."""
|
||||
try:
|
||||
# Use get_task() for full details including steps
|
||||
task = get_task(args.task_id)
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps(_task_to_dict(task)))
|
||||
else:
|
||||
task_id = task.id or args.task_id
|
||||
task_status = task.status or 'unknown'
|
||||
output = task.output
|
||||
cost = task.cost
|
||||
steps = task.steps or []
|
||||
started_at = task.started_at
|
||||
finished_at = task.finished_at
|
||||
|
||||
compact = getattr(args, 'compact', False)
|
||||
verbose = getattr(args, 'verbose', False)
|
||||
last_n = getattr(args, 'last', None)
|
||||
reverse = getattr(args, 'reverse', False)
|
||||
specific_step = getattr(args, 'step', None)
|
||||
|
||||
# Determine display mode:
|
||||
# - Default: show only latest step
|
||||
# - --compact: show all steps with reasoning
|
||||
# - --verbose: show all steps with full details
|
||||
show_all_steps = compact or verbose
|
||||
|
||||
# Status emoji
|
||||
status_emoji = {
|
||||
'started': '🔄',
|
||||
'running': '🔄',
|
||||
'finished': '✅',
|
||||
'stopped': '⏹️',
|
||||
'failed': '❌',
|
||||
}.get(task_status, '❓')
|
||||
|
||||
# Build header line: status, cost, duration
|
||||
parts = [f'{status_emoji} {task_id[:8]}... [{task_status}]']
|
||||
if cost is not None:
|
||||
parts.append(f'${cost}')
|
||||
duration = format_duration(started_at, finished_at)
|
||||
if duration:
|
||||
parts.append(duration)
|
||||
print(' '.join(parts))
|
||||
|
||||
# Show steps
|
||||
if steps:
|
||||
total_steps = len(steps)
|
||||
|
||||
# Filter to specific step if requested
|
||||
if specific_step is not None:
|
||||
steps = [s for s in steps if s.number == specific_step]
|
||||
if not steps:
|
||||
print(f' Step {specific_step} not found (task has {total_steps} steps)')
|
||||
else:
|
||||
print(f' (showing step {specific_step} of {total_steps})')
|
||||
# Display the specific step
|
||||
for step in steps:
|
||||
_print_step(step, verbose)
|
||||
elif not show_all_steps:
|
||||
# Default mode: show only the latest step
|
||||
latest_step = steps[-1]
|
||||
earlier_count = total_steps - 1
|
||||
if earlier_count > 0:
|
||||
print(f' ... {earlier_count} earlier steps')
|
||||
_print_step(latest_step, verbose=False)
|
||||
else:
|
||||
# --compact or --verbose: show all steps (with optional filters)
|
||||
skipped_earlier = 0
|
||||
if last_n is not None and last_n < total_steps:
|
||||
skipped_earlier = total_steps - last_n
|
||||
steps = steps[-last_n:]
|
||||
|
||||
# Apply --reverse
|
||||
if reverse:
|
||||
steps = list(reversed(steps))
|
||||
|
||||
# Show count info
|
||||
if skipped_earlier > 0:
|
||||
print(f' ... {skipped_earlier} earlier steps')
|
||||
|
||||
# Display steps
|
||||
for step in steps:
|
||||
_print_step(step, verbose)
|
||||
|
||||
if output:
|
||||
print(f'\nOutput: {output}')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _print_step(step: Any, verbose: bool) -> None:
|
||||
"""Print a single step in compact or verbose format."""
|
||||
step_num = step.number if step.number is not None else '?'
|
||||
memory = step.memory or ''
|
||||
|
||||
if verbose:
|
||||
url = step.url or ''
|
||||
actions = step.actions or []
|
||||
|
||||
# Truncate URL for display
|
||||
short_url = url[:60] + '...' if len(url) > 60 else url
|
||||
|
||||
print(f' [{step_num}] {short_url}')
|
||||
if memory:
|
||||
# Truncate memory/reasoning for display
|
||||
short_memory = memory[:100] + '...' if len(memory) > 100 else memory
|
||||
print(f' Reasoning: {short_memory}')
|
||||
if actions:
|
||||
for action in actions[:2]: # Show max 2 actions per step
|
||||
# Truncate action for display
|
||||
short_action = action[:70] + '...' if len(action) > 70 else action
|
||||
print(f' Action: {short_action}')
|
||||
if len(actions) > 2:
|
||||
print(f' ... and {len(actions) - 2} more actions')
|
||||
else:
|
||||
# Compact mode: just step number and reasoning
|
||||
if memory:
|
||||
# Truncate reasoning for compact display
|
||||
short_memory = memory[:80] + '...' if len(memory) > 80 else memory
|
||||
print(f' {step_num}. {short_memory}')
|
||||
else:
|
||||
print(f' {step_num}. (no reasoning)')
|
||||
|
||||
|
||||
def _handle_stop(args: argparse.Namespace) -> int:
|
||||
"""Handle 'task stop <task_id>' command."""
|
||||
try:
|
||||
stop_task(args.task_id)
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps({'stopped': args.task_id}))
|
||||
else:
|
||||
print(f'Stopped task: {args.task_id}')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _handle_logs(args: argparse.Namespace) -> int:
|
||||
"""Handle 'task logs <task_id>' command."""
|
||||
try:
|
||||
result = get_task_logs(args.task_id)
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps({'downloadUrl': result.download_url}))
|
||||
else:
|
||||
download_url = result.download_url
|
||||
if download_url:
|
||||
print(f'Download logs: {download_url}')
|
||||
else:
|
||||
print('No logs available for this task')
|
||||
|
||||
return 0
|
||||
@@ -9,8 +9,6 @@ from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
COMMANDS = {'doctor'}
|
||||
|
||||
|
||||
async def handle() -> dict[str, Any]:
|
||||
"""Run health checks and return results."""
|
||||
@@ -22,14 +20,14 @@ async def handle() -> dict[str, Any]:
|
||||
# 2. Browser availability
|
||||
checks['browser'] = _check_browser()
|
||||
|
||||
# 3. API key configuration
|
||||
checks['api_key'] = _check_api_key_config()
|
||||
# 3. Network connectivity (basic check)
|
||||
checks['network'] = await _check_network()
|
||||
|
||||
# 4. Cloudflared availability
|
||||
# 4. Optional: cloudflared (for browser-use tunnel)
|
||||
checks['cloudflared'] = _check_cloudflared()
|
||||
|
||||
# 5. Network connectivity (basic check)
|
||||
checks['network'] = await _check_network()
|
||||
# 5. Optional: profile-use (for browser-use profile)
|
||||
checks['profile_use'] = _check_profile_use()
|
||||
|
||||
# Determine overall status
|
||||
all_ok = all(check.get('status') == 'ok' for check in checks.values())
|
||||
@@ -64,8 +62,7 @@ def _check_browser() -> dict[str, Any]:
|
||||
try:
|
||||
from browser_use.browser.profile import BrowserProfile
|
||||
|
||||
# Just check if we can import and create a profile
|
||||
profile = BrowserProfile(headless=True)
|
||||
BrowserProfile(headless=True) # verify import + constructor work
|
||||
return {
|
||||
'status': 'ok',
|
||||
'message': 'Browser profile available',
|
||||
@@ -78,45 +75,6 @@ def _check_browser() -> dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
def _check_api_key_config() -> dict[str, Any]:
|
||||
"""Check if API key is configured."""
|
||||
from browser_use.skill_cli.api_key import check_api_key
|
||||
|
||||
status = check_api_key()
|
||||
if status['available']:
|
||||
return {
|
||||
'status': 'ok',
|
||||
'message': f'API key configured ({status["source"]})',
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'status': 'missing',
|
||||
'message': 'No API key configured',
|
||||
'note': 'Required for remote browser. Get one at https://browser-use.com/new-api-key',
|
||||
}
|
||||
|
||||
|
||||
def _check_cloudflared() -> dict[str, Any]:
|
||||
"""Check if cloudflared is available."""
|
||||
from browser_use.skill_cli.tunnel import get_tunnel_manager
|
||||
|
||||
tunnel_mgr = get_tunnel_manager()
|
||||
status_info = tunnel_mgr.get_status()
|
||||
|
||||
if status_info['available']:
|
||||
return {
|
||||
'status': 'ok',
|
||||
'message': f'Cloudflared available ({status_info["source"]})',
|
||||
'note': status_info.get('note'),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'status': 'missing',
|
||||
'message': 'Cloudflared not available',
|
||||
'note': 'Will be auto-installed on first tunnel use',
|
||||
}
|
||||
|
||||
|
||||
async def _check_network() -> dict[str, Any]:
|
||||
"""Check basic network connectivity."""
|
||||
try:
|
||||
@@ -140,6 +98,40 @@ async def _check_network() -> dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
def _check_cloudflared() -> dict[str, Any]:
|
||||
"""Check if cloudflared is available (needed for browser-use tunnel)."""
|
||||
from browser_use.skill_cli.tunnel import get_tunnel_manager
|
||||
|
||||
status = get_tunnel_manager().get_status()
|
||||
if status['available']:
|
||||
return {
|
||||
'status': 'ok',
|
||||
'message': f'cloudflared installed ({status["path"]})',
|
||||
}
|
||||
return {
|
||||
'status': 'missing',
|
||||
'message': 'cloudflared not installed (needed for browser-use tunnel)',
|
||||
'fix': 'Install cloudflared: https://developers.cloudflare.com/cloudflare-one/connections/connect-networks/downloads/',
|
||||
}
|
||||
|
||||
|
||||
def _check_profile_use() -> dict[str, Any]:
|
||||
"""Check if profile-use binary is available (needed for browser-use profile)."""
|
||||
from browser_use.skill_cli.profile_use import get_profile_use_binary
|
||||
|
||||
binary = get_profile_use_binary()
|
||||
if binary:
|
||||
return {
|
||||
'status': 'ok',
|
||||
'message': f'profile-use installed ({binary})',
|
||||
}
|
||||
return {
|
||||
'status': 'missing',
|
||||
'message': 'profile-use not installed (needed for browser-use profile)',
|
||||
'fix': 'browser-use profile update',
|
||||
}
|
||||
|
||||
|
||||
def _summarize_checks(checks: dict[str, dict[str, Any]]) -> str:
|
||||
"""Generate a summary of check results."""
|
||||
ok = sum(1 for c in checks.values() if c.get('status') == 'ok')
|
||||
|
||||
@@ -1,703 +0,0 @@
|
||||
"""Profile management command handlers.
|
||||
|
||||
Unified profile management that works with both local Chrome profiles and cloud profiles.
|
||||
The behavior is determined by the browser mode (-b real or -b remote).
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal
|
||||
|
||||
from browser_use.skill_cli.commands.utils import get_sdk_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
ProfileMode = Literal['real', 'remote']
|
||||
|
||||
|
||||
class ProfileModeError(Exception):
|
||||
"""Raised when profile mode cannot be determined or is invalid."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def get_profile_mode(args: argparse.Namespace) -> ProfileMode:
|
||||
"""Determine profile mode from -b flag or install config.
|
||||
|
||||
Args:
|
||||
args: Parsed command-line arguments with browser attribute
|
||||
|
||||
Returns:
|
||||
'real' for local Chrome profiles, 'remote' for cloud profiles
|
||||
|
||||
Raises:
|
||||
ProfileModeError: If mode cannot be determined or chromium mode is used
|
||||
"""
|
||||
from browser_use.skill_cli.install_config import is_mode_available
|
||||
|
||||
browser_mode = getattr(args, 'browser', None)
|
||||
|
||||
# Explicit mode specified
|
||||
if browser_mode == 'real':
|
||||
return 'real'
|
||||
elif browser_mode == 'remote':
|
||||
return 'remote'
|
||||
elif browser_mode == 'chromium':
|
||||
raise ProfileModeError(
|
||||
'Profile commands are not supported in chromium mode.\n'
|
||||
'Use -b real for local Chrome profiles or -b remote for cloud profiles.'
|
||||
)
|
||||
|
||||
# No explicit mode - try to infer from install config
|
||||
local_available = is_mode_available('real')
|
||||
remote_available = is_mode_available('remote')
|
||||
|
||||
if local_available and not remote_available:
|
||||
return 'real'
|
||||
elif remote_available and not local_available:
|
||||
return 'remote'
|
||||
elif local_available and remote_available:
|
||||
raise ProfileModeError(
|
||||
'Both local and remote modes are available.\n'
|
||||
'Specify -b real for local Chrome profiles or -b remote for cloud profiles.'
|
||||
)
|
||||
else:
|
||||
raise ProfileModeError('No profile modes available. Run browser-use setup first.')
|
||||
|
||||
|
||||
def handle_profile_command(args: argparse.Namespace) -> int:
|
||||
"""Handle profile subcommands.
|
||||
|
||||
Routes to local or cloud implementation based on browser mode.
|
||||
"""
|
||||
command = args.profile_command
|
||||
|
||||
# Commands that don't need mode inference
|
||||
if command is None:
|
||||
_print_usage()
|
||||
return 1
|
||||
|
||||
# For sync command, we need special handling (local → cloud)
|
||||
if command == 'sync':
|
||||
return _handle_sync(args)
|
||||
|
||||
# Get profile mode for all other commands
|
||||
try:
|
||||
mode = get_profile_mode(args)
|
||||
except ProfileModeError as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Route to appropriate handler
|
||||
if command == 'list':
|
||||
return _handle_list(args, mode)
|
||||
elif command == 'get':
|
||||
return _handle_get(args, mode)
|
||||
elif command == 'create':
|
||||
return _handle_create(args, mode)
|
||||
elif command == 'update':
|
||||
return _handle_update(args, mode)
|
||||
elif command == 'delete':
|
||||
return _handle_delete(args, mode)
|
||||
elif command == 'cookies':
|
||||
return _handle_cookies(args, mode)
|
||||
else:
|
||||
_print_usage()
|
||||
return 1
|
||||
|
||||
|
||||
def _print_usage() -> None:
|
||||
"""Print profile command usage."""
|
||||
print('Usage: browser-use [-b real|remote] profile <command>')
|
||||
print()
|
||||
print('Commands:')
|
||||
print(' list List profiles')
|
||||
print(' get <id> Get profile details')
|
||||
print(' create Create a new profile (remote only)')
|
||||
print(' update <id> Update profile')
|
||||
print(' delete <id> Delete profile')
|
||||
print(' cookies <id> Show cookies by domain (real only)')
|
||||
print(' sync Sync local profile to cloud')
|
||||
print()
|
||||
print('The -b flag determines which profile system to use:')
|
||||
print(' -b real Local Chrome profiles')
|
||||
print(' -b remote Cloud profiles (requires API key)')
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# List profiles
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _handle_list(args: argparse.Namespace, mode: ProfileMode) -> int:
|
||||
"""Handle 'profile list' command."""
|
||||
if mode == 'real':
|
||||
return _list_local_profiles(args)
|
||||
else:
|
||||
return _list_cloud_profiles(args)
|
||||
|
||||
|
||||
def _list_local_profiles(args: argparse.Namespace) -> int:
|
||||
"""List local Chrome profiles."""
|
||||
profiles = list_local_chrome_profiles()
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps({'profiles': profiles}))
|
||||
else:
|
||||
if profiles:
|
||||
print('Local Chrome profiles:')
|
||||
for p in profiles:
|
||||
print(f' {p["id"]}: {p["name"]} ({p["email"]})')
|
||||
else:
|
||||
print('No Chrome profiles found')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _list_cloud_profiles(args: argparse.Namespace) -> int:
|
||||
"""List cloud profiles."""
|
||||
from browser_use.skill_cli.api_key import APIKeyRequired
|
||||
|
||||
page = getattr(args, 'page', 1)
|
||||
page_size = getattr(args, 'page_size', 20)
|
||||
|
||||
try:
|
||||
client = get_sdk_client()
|
||||
response = client.profiles.list_profiles(page_number=page, page_size=page_size)
|
||||
except APIKeyRequired as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
# Convert to dict for JSON output
|
||||
data = {
|
||||
'items': [{'id': p.id, 'name': p.name} for p in response.items],
|
||||
'totalItems': response.total_items,
|
||||
'pageNumber': response.page_number,
|
||||
'pageSize': response.page_size,
|
||||
}
|
||||
print(json.dumps(data))
|
||||
else:
|
||||
if response.items:
|
||||
print(f'Cloud profiles ({len(response.items)}/{response.total_items}):')
|
||||
for p in response.items:
|
||||
name = p.name or 'Unnamed'
|
||||
print(f' {p.id}: {name}')
|
||||
else:
|
||||
print('No cloud profiles found')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Get profile
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _handle_get(args: argparse.Namespace, mode: ProfileMode) -> int:
|
||||
"""Handle 'profile get <id>' command."""
|
||||
if mode == 'real':
|
||||
return _get_local_profile(args)
|
||||
else:
|
||||
return _get_cloud_profile(args)
|
||||
|
||||
|
||||
def _get_local_profile(args: argparse.Namespace) -> int:
|
||||
"""Get local Chrome profile details."""
|
||||
profiles = list_local_chrome_profiles()
|
||||
profile_id = args.id
|
||||
|
||||
for p in profiles:
|
||||
if p['id'] == profile_id or p['name'] == profile_id:
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps(p))
|
||||
else:
|
||||
print(f'Profile: {p["id"]}')
|
||||
print(f' Name: {p["name"]}')
|
||||
print(f' Email: {p["email"]}')
|
||||
return 0
|
||||
|
||||
print(f'Error: Profile "{profile_id}" not found', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
def _get_cloud_profile(args: argparse.Namespace) -> int:
|
||||
"""Get cloud profile details."""
|
||||
from browser_use.skill_cli.api_key import APIKeyRequired
|
||||
|
||||
try:
|
||||
client = get_sdk_client()
|
||||
profile = client.profiles.get_profile(args.id)
|
||||
except APIKeyRequired as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
data = {
|
||||
'id': profile.id,
|
||||
'name': profile.name,
|
||||
'createdAt': profile.created_at.isoformat() if profile.created_at else None,
|
||||
'updatedAt': profile.updated_at.isoformat() if profile.updated_at else None,
|
||||
}
|
||||
print(json.dumps(data))
|
||||
else:
|
||||
print(f'Profile: {profile.id}')
|
||||
if profile.name:
|
||||
print(f' Name: {profile.name}')
|
||||
if profile.created_at:
|
||||
print(f' Created: {profile.created_at.isoformat()}')
|
||||
if profile.updated_at:
|
||||
print(f' Updated: {profile.updated_at.isoformat()}')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Create profile
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _handle_create(args: argparse.Namespace, mode: ProfileMode) -> int:
|
||||
"""Handle 'profile create' command."""
|
||||
if mode == 'real':
|
||||
print('Error: Cannot create local Chrome profiles via CLI.', file=sys.stderr)
|
||||
print('Use Chrome browser to create new profiles.', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
return _create_cloud_profile(args)
|
||||
|
||||
|
||||
def _create_cloud_profile(args: argparse.Namespace) -> int:
|
||||
"""Create a cloud profile."""
|
||||
from browser_use.skill_cli.api_key import APIKeyRequired
|
||||
|
||||
try:
|
||||
client = get_sdk_client()
|
||||
params = {}
|
||||
if args.name:
|
||||
params['name'] = args.name
|
||||
profile = client.profiles.create_profile(**params)
|
||||
except APIKeyRequired as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps({'id': profile.id, 'name': profile.name}))
|
||||
else:
|
||||
print(f'Created profile: {profile.id}')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Update profile
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _handle_update(args: argparse.Namespace, mode: ProfileMode) -> int:
|
||||
"""Handle 'profile update <id>' command."""
|
||||
if mode == 'real':
|
||||
print('Error: Cannot update local Chrome profiles via CLI.', file=sys.stderr)
|
||||
print('Use Chrome browser settings to update profiles.', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
return _update_cloud_profile(args)
|
||||
|
||||
|
||||
def _update_cloud_profile(args: argparse.Namespace) -> int:
|
||||
"""Update a cloud profile."""
|
||||
from browser_use.skill_cli.api_key import APIKeyRequired
|
||||
|
||||
try:
|
||||
client = get_sdk_client()
|
||||
params = {}
|
||||
if args.name:
|
||||
params['name'] = args.name
|
||||
profile = client.profiles.update_profile(args.id, **params)
|
||||
except APIKeyRequired as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps({'id': profile.id, 'name': profile.name}))
|
||||
else:
|
||||
print(f'Updated profile: {profile.id}')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Delete profile
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _handle_delete(args: argparse.Namespace, mode: ProfileMode) -> int:
|
||||
"""Handle 'profile delete <id>' command."""
|
||||
if mode == 'real':
|
||||
print('Error: Cannot delete local Chrome profiles via CLI.', file=sys.stderr)
|
||||
print('Use Chrome browser settings to remove profiles.', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
return _delete_cloud_profile(args)
|
||||
|
||||
|
||||
def _delete_cloud_profile(args: argparse.Namespace) -> int:
|
||||
"""Delete a cloud profile."""
|
||||
from browser_use.skill_cli.api_key import APIKeyRequired
|
||||
|
||||
try:
|
||||
client = get_sdk_client()
|
||||
client.profiles.delete_browser_profile(args.id)
|
||||
except APIKeyRequired as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps({'deleted': args.id}))
|
||||
else:
|
||||
print(f'Deleted profile: {args.id}')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Cookies (local only)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _handle_cookies(args: argparse.Namespace, mode: ProfileMode) -> int:
|
||||
"""Handle 'profile cookies <id>' command."""
|
||||
if mode == 'remote':
|
||||
print('Error: Cookie listing is only available for local Chrome profiles.', file=sys.stderr)
|
||||
print('Use -b real to access local profile cookies.', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
return _list_profile_cookies(args)
|
||||
|
||||
|
||||
def _list_profile_cookies(args: argparse.Namespace) -> int:
|
||||
"""List cookies by domain in a local Chrome profile."""
|
||||
import asyncio
|
||||
|
||||
from browser_use.skill_cli.sessions import create_browser_session
|
||||
|
||||
# Get local profiles
|
||||
local_profiles = list_local_chrome_profiles()
|
||||
if not local_profiles:
|
||||
print('Error: No local Chrome profiles found', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Find the matching profile
|
||||
profile_arg = args.id
|
||||
selected_profile = None
|
||||
for p in local_profiles:
|
||||
if p['id'] == profile_arg or p['name'] == profile_arg:
|
||||
selected_profile = p
|
||||
break
|
||||
|
||||
if not selected_profile:
|
||||
print(f'Error: Profile "{profile_arg}" not found', file=sys.stderr)
|
||||
print('Available profiles:')
|
||||
for p in local_profiles:
|
||||
print(f' {p["id"]}: {p["name"]}')
|
||||
return 1
|
||||
|
||||
profile_id = selected_profile['id']
|
||||
print(f'Loading cookies from: {selected_profile["name"]} ({selected_profile["email"]})')
|
||||
|
||||
async def get_cookies():
|
||||
local_session = await create_browser_session('real', headed=False, profile=profile_id)
|
||||
await local_session.start()
|
||||
try:
|
||||
cookies = await local_session._cdp_get_cookies()
|
||||
return cookies
|
||||
finally:
|
||||
await local_session.kill()
|
||||
|
||||
try:
|
||||
cookies = asyncio.get_event_loop().run_until_complete(get_cookies())
|
||||
except RuntimeError:
|
||||
cookies = asyncio.run(get_cookies())
|
||||
|
||||
# Group cookies by domain
|
||||
domains: dict[str, int] = {}
|
||||
for cookie in cookies:
|
||||
domain = cookie.get('domain', 'unknown')
|
||||
# Normalize domain (remove leading dot)
|
||||
if domain.startswith('.'):
|
||||
domain = domain[1:]
|
||||
domains[domain] = domains.get(domain, 0) + 1
|
||||
|
||||
# Sort by count descending
|
||||
sorted_domains = sorted(domains.items(), key=lambda x: x[1], reverse=True)
|
||||
|
||||
if getattr(args, 'json', False):
|
||||
print(json.dumps({'domains': dict(sorted_domains), 'total_cookies': len(cookies)}))
|
||||
else:
|
||||
print(f'\nCookies by domain ({len(cookies)} total):')
|
||||
for domain, count in sorted_domains[:20]: # Show top 20
|
||||
print(f' {domain}: {count}')
|
||||
if len(sorted_domains) > 20:
|
||||
print(f' ... and {len(sorted_domains) - 20} more domains')
|
||||
|
||||
print('\nTo sync cookies to cloud:')
|
||||
print(f' browser-use profile sync --from "{profile_id}" --domain <domain>')
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Sync (local → cloud)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _handle_sync(args: argparse.Namespace) -> int:
|
||||
"""Handle 'profile sync' command - sync local profile to cloud."""
|
||||
import asyncio
|
||||
|
||||
from browser_use.skill_cli.api_key import APIKeyRequired
|
||||
from browser_use.skill_cli.sessions import create_browser_session
|
||||
|
||||
# Get SDK client (validates API key)
|
||||
try:
|
||||
client = get_sdk_client()
|
||||
except APIKeyRequired as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Get local profiles
|
||||
local_profiles = list_local_chrome_profiles()
|
||||
if not local_profiles:
|
||||
print('Error: No local Chrome profiles found', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Determine which profile to sync
|
||||
from_profile = args.from_profile
|
||||
if not from_profile:
|
||||
# Show available profiles and ask user to specify
|
||||
print('Available local profiles:')
|
||||
for p in local_profiles:
|
||||
print(f' {p["id"]}: {p["name"]} ({p["email"]})')
|
||||
print()
|
||||
print('Use --from to specify a profile:')
|
||||
print(' browser-use profile sync --from "Default"')
|
||||
print(' browser-use profile sync --from "Profile 1"')
|
||||
return 1
|
||||
|
||||
# Find the matching profile
|
||||
selected_profile = None
|
||||
for p in local_profiles:
|
||||
if p['id'] == from_profile or p['name'] == from_profile:
|
||||
selected_profile = p
|
||||
break
|
||||
|
||||
if not selected_profile:
|
||||
print(f'Error: Profile "{from_profile}" not found', file=sys.stderr)
|
||||
print('Available profiles:')
|
||||
for p in local_profiles:
|
||||
print(f' {p["id"]}: {p["name"]}')
|
||||
return 1
|
||||
|
||||
profile_id = selected_profile['id']
|
||||
profile_name = selected_profile['name']
|
||||
domain_filter = getattr(args, 'domain', None)
|
||||
|
||||
# Generate cloud profile name
|
||||
cloud_name = args.name if args.name else None
|
||||
if not cloud_name:
|
||||
if domain_filter:
|
||||
cloud_name = f'Chrome - {profile_name} ({domain_filter})'
|
||||
else:
|
||||
cloud_name = f'Chrome - {profile_name}'
|
||||
|
||||
# Use stderr for progress when JSON output is requested
|
||||
json_output = getattr(args, 'json', False)
|
||||
out = sys.stderr if json_output else sys.stdout
|
||||
|
||||
def log(msg: str) -> None:
|
||||
print(msg, file=out)
|
||||
|
||||
if domain_filter:
|
||||
log(f'Syncing: {profile_name} → {domain_filter} cookies only')
|
||||
else:
|
||||
log(f'Syncing: {profile_name} ({selected_profile["email"]})')
|
||||
|
||||
# Step 1: Create cloud profile
|
||||
log(' Creating cloud profile...')
|
||||
try:
|
||||
cloud_profile = client.profiles.create_profile(name=cloud_name)
|
||||
cloud_profile_id = cloud_profile.id
|
||||
except Exception as e:
|
||||
print(f'Error creating cloud profile: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
log(f' ✓ Created: {cloud_profile_id}')
|
||||
|
||||
def cleanup_cloud_profile() -> None:
|
||||
"""Delete the cloud profile on failure."""
|
||||
try:
|
||||
client.profiles.delete_browser_profile(cloud_profile_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Step 2: Export cookies from local profile
|
||||
async def sync_cookies():
|
||||
log(' Exporting cookies from local profile...')
|
||||
local_session = await create_browser_session('real', headed=False, profile=profile_id)
|
||||
await local_session.start()
|
||||
try:
|
||||
cookies = await local_session._cdp_get_cookies()
|
||||
if not cookies:
|
||||
return 0, 'No cookies found in local profile'
|
||||
|
||||
# Filter by domain if specified
|
||||
if domain_filter:
|
||||
cookies = [c for c in cookies if domain_filter in c.get('domain', '')]
|
||||
|
||||
if not cookies:
|
||||
return 0, f'No cookies found for domain: {domain_filter}'
|
||||
|
||||
log(f' ✓ Found {len(cookies)} cookies')
|
||||
|
||||
# Save to temp file - convert Cookie objects to dicts for JSON serialization
|
||||
cookies_file = Path(tempfile.gettempdir()) / f'browser-use-sync-{cloud_profile_id}.json'
|
||||
cookies_data = [dict(c) if hasattr(c, '__dict__') else c for c in cookies]
|
||||
cookies_file.write_text(json.dumps(cookies_data))
|
||||
|
||||
return len(cookies), str(cookies_file)
|
||||
finally:
|
||||
await local_session.kill()
|
||||
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
if loop.is_running():
|
||||
import concurrent.futures
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
future = executor.submit(asyncio.run, sync_cookies())
|
||||
cookie_count, cookies_file = future.result()
|
||||
else:
|
||||
cookie_count, cookies_file = loop.run_until_complete(sync_cookies())
|
||||
except RuntimeError:
|
||||
cookie_count, cookies_file = asyncio.run(sync_cookies())
|
||||
|
||||
if cookie_count == 0:
|
||||
log(f' ⚠ {cookies_file}') # cookies_file contains error message
|
||||
cleanup_cloud_profile()
|
||||
return 1
|
||||
|
||||
# Step 3: Import cookies to cloud profile
|
||||
async def import_to_cloud():
|
||||
log(' Importing cookies to cloud profile...')
|
||||
remote_session = await create_browser_session('remote', headed=False, profile=cloud_profile_id)
|
||||
await remote_session.start()
|
||||
try:
|
||||
cookies = json.loads(Path(cookies_file).read_text())
|
||||
await remote_session._cdp_set_cookies(cookies)
|
||||
return True
|
||||
finally:
|
||||
await remote_session.kill()
|
||||
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
if loop.is_running():
|
||||
import concurrent.futures
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
future = executor.submit(asyncio.run, import_to_cloud())
|
||||
future.result()
|
||||
else:
|
||||
loop.run_until_complete(import_to_cloud())
|
||||
except RuntimeError:
|
||||
asyncio.run(import_to_cloud())
|
||||
except Exception as e:
|
||||
log(f' ⚠ Failed to import cookies: {e}')
|
||||
cleanup_cloud_profile()
|
||||
return 1
|
||||
|
||||
# Cleanup temp file
|
||||
try:
|
||||
Path(cookies_file).unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
log('✓ Profile synced successfully!')
|
||||
log(f' Cloud profile ID: {cloud_profile_id}')
|
||||
log('')
|
||||
log('To use this profile:')
|
||||
log(f' browser-use -b remote --profile {cloud_profile_id} open <url>')
|
||||
|
||||
if json_output:
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
'success': True,
|
||||
'profile_id': cloud_profile_id,
|
||||
'cookies_synced': cookie_count,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def list_local_chrome_profiles() -> list[dict[str, Any]]:
|
||||
"""List local Chrome profiles from the Local State file."""
|
||||
import platform
|
||||
|
||||
# Find Chrome Local State file
|
||||
system = platform.system()
|
||||
if system == 'Darwin':
|
||||
local_state = Path.home() / 'Library/Application Support/Google/Chrome/Local State'
|
||||
elif system == 'Windows':
|
||||
local_state = Path.home() / 'AppData/Local/Google/Chrome/User Data/Local State'
|
||||
else:
|
||||
local_state = Path.home() / '.config/google-chrome/Local State'
|
||||
|
||||
if not local_state.exists():
|
||||
return []
|
||||
|
||||
try:
|
||||
data = json.loads(local_state.read_text())
|
||||
profiles_info = data.get('profile', {}).get('info_cache', {})
|
||||
|
||||
profiles = []
|
||||
for profile_id, info in profiles_info.items():
|
||||
profiles.append(
|
||||
{
|
||||
'id': profile_id,
|
||||
'name': info.get('name', profile_id),
|
||||
'email': info.get('user_name', ''),
|
||||
}
|
||||
)
|
||||
return profiles
|
||||
except Exception:
|
||||
return []
|
||||
@@ -49,7 +49,7 @@ async def handle(session: SessionInfo, params: dict[str, Any]) -> Any:
|
||||
|
||||
# Execute code in a thread pool so browser operations can schedule back to the event loop
|
||||
loop = asyncio.get_running_loop()
|
||||
result = await loop.run_in_executor(None, python_session.execute, code, browser_session, loop)
|
||||
result = await loop.run_in_executor(None, python_session.execute, code, browser_session, loop, session.actions)
|
||||
|
||||
if result.success:
|
||||
# Return raw text output for clean display
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
"""Session management command handlers."""
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from browser_use.skill_cli.sessions import SessionRegistry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
COMMANDS = {'sessions', 'close'}
|
||||
|
||||
|
||||
async def handle(action: str, session_name: str, registry: 'SessionRegistry', params: dict[str, Any]) -> Any:
|
||||
"""Handle session management command."""
|
||||
if action == 'sessions':
|
||||
sessions = registry.list_sessions()
|
||||
return {
|
||||
'sessions': sessions,
|
||||
'count': len(sessions),
|
||||
}
|
||||
|
||||
elif action == 'close':
|
||||
if params.get('all'):
|
||||
# Close all sessions and signal shutdown
|
||||
sessions = registry.list_sessions()
|
||||
await registry.close_all()
|
||||
return {
|
||||
'closed': [s['name'] for s in sessions],
|
||||
'count': len(sessions),
|
||||
'_shutdown': True, # Signal to stop server
|
||||
}
|
||||
else:
|
||||
# Close this server's session and shutdown
|
||||
await registry.close_session(session_name)
|
||||
return {'closed': session_name, '_shutdown': True}
|
||||
|
||||
raise ValueError(f'Unknown session action: {action}')
|
||||
@@ -1,330 +1,253 @@
|
||||
"""Setup command - configure browser-use for first-time use.
|
||||
"""Setup command — post-install setup for browser-use CLI.
|
||||
|
||||
Handles dependency installation and configuration with mode-based
|
||||
setup (local/remote/full) and optional automatic fixes.
|
||||
Covers everything install.sh does after the package is installed:
|
||||
home directory, config file, Chromium, profile-use, cloudflared.
|
||||
Interactive by default, --yes for CI.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any, Literal
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
COMMANDS = {'setup'}
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
async def handle(
|
||||
action: str,
|
||||
params: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
"""Handle setup command."""
|
||||
assert action == 'setup'
|
||||
|
||||
mode: Literal['local', 'remote', 'full'] = params.get('mode', 'local')
|
||||
yes: bool = params.get('yes', False)
|
||||
api_key: str | None = params.get('api_key')
|
||||
json_output: bool = params.get('json', False)
|
||||
|
||||
# Validate mode
|
||||
if mode not in ('local', 'remote', 'full'):
|
||||
return {'error': f'Invalid mode: {mode}. Must be local, remote, or full'}
|
||||
|
||||
# Run setup flow
|
||||
def _prompt(message: str, yes: bool) -> bool:
|
||||
"""Prompt user for confirmation. Returns True if --yes or user says yes."""
|
||||
if yes:
|
||||
return True
|
||||
try:
|
||||
checks = await run_checks(mode)
|
||||
|
||||
if not json_output:
|
||||
_log_checks(checks)
|
||||
|
||||
# Plan actions
|
||||
actions = plan_actions(checks, mode, yes, api_key)
|
||||
|
||||
if not json_output:
|
||||
_log_actions(actions)
|
||||
|
||||
# Execute actions
|
||||
await execute_actions(actions, mode, api_key, json_output)
|
||||
|
||||
# Validate
|
||||
validation = await validate_setup(mode)
|
||||
|
||||
if not json_output:
|
||||
_log_validation(validation)
|
||||
|
||||
return {
|
||||
'status': 'success',
|
||||
'mode': mode,
|
||||
'checks': checks,
|
||||
'validation': validation,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f'Setup failed: {e}')
|
||||
error_msg = str(e)
|
||||
if json_output:
|
||||
return {'error': error_msg}
|
||||
return {'error': error_msg}
|
||||
reply = input(f' {message} [Y/n] ').strip().lower()
|
||||
return reply in ('', 'y', 'yes')
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print()
|
||||
return False
|
||||
|
||||
|
||||
async def run_checks(mode: Literal['local', 'remote', 'full']) -> dict[str, Any]:
|
||||
"""Run pre-flight checks without making changes.
|
||||
def handle(yes: bool = False) -> dict:
|
||||
"""Run interactive setup."""
|
||||
from browser_use.skill_cli.utils import get_home_dir
|
||||
|
||||
Returns:
|
||||
Dict mapping check names to their status
|
||||
"""
|
||||
checks: dict[str, Any] = {}
|
||||
home_dir = get_home_dir()
|
||||
results: dict = {}
|
||||
step = 0
|
||||
total = 6
|
||||
|
||||
# Package check
|
||||
try:
|
||||
import browser_use
|
||||
print('\nBrowser-Use Setup')
|
||||
print('━━━━━━━━━━━━━━━━━\n')
|
||||
|
||||
checks['browser_use_package'] = {
|
||||
'status': 'ok',
|
||||
'message': f'browser-use {browser_use.__version__}'
|
||||
if hasattr(browser_use, '__version__')
|
||||
else 'browser-use installed',
|
||||
}
|
||||
except ImportError:
|
||||
checks['browser_use_package'] = {
|
||||
'status': 'error',
|
||||
'message': 'browser-use not installed',
|
||||
}
|
||||
# Step 1: Home directory
|
||||
step += 1
|
||||
print(f'Step {step}/{total}: Home directory')
|
||||
if home_dir.exists():
|
||||
print(f' ✓ {home_dir} exists')
|
||||
else:
|
||||
home_dir.mkdir(parents=True, exist_ok=True)
|
||||
print(f' ✓ {home_dir} created')
|
||||
results['home_dir'] = 'ok'
|
||||
|
||||
# Browser check (local and full modes)
|
||||
if mode in ('local', 'full'):
|
||||
checks['browser'] = await _check_browser()
|
||||
|
||||
# API key check (remote and full modes)
|
||||
if mode in ('remote', 'full'):
|
||||
from browser_use.skill_cli.api_key import check_api_key
|
||||
|
||||
api_status = check_api_key()
|
||||
if api_status['available']:
|
||||
checks['api_key'] = {
|
||||
'status': 'ok',
|
||||
'message': f'Configured via {api_status["source"]} ({api_status["key_prefix"]}...)',
|
||||
}
|
||||
else:
|
||||
checks['api_key'] = {
|
||||
'status': 'missing',
|
||||
'message': 'Not configured',
|
||||
}
|
||||
|
||||
# Cloudflared check (remote and full modes)
|
||||
if mode in ('remote', 'full'):
|
||||
from browser_use.skill_cli.tunnel import get_tunnel_manager
|
||||
|
||||
tunnel_mgr = get_tunnel_manager()
|
||||
status = tunnel_mgr.get_status()
|
||||
checks['cloudflared'] = {
|
||||
'status': 'ok' if status['available'] else 'missing',
|
||||
'message': status['note'],
|
||||
}
|
||||
|
||||
return checks
|
||||
|
||||
|
||||
async def _check_browser() -> dict[str, Any]:
|
||||
"""Check if browser is available."""
|
||||
try:
|
||||
from browser_use.browser.profile import BrowserProfile
|
||||
|
||||
profile = BrowserProfile(headless=True)
|
||||
# Just check if we can create a session without actually launching
|
||||
return {
|
||||
'status': 'ok',
|
||||
'message': 'Browser available',
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
'status': 'error',
|
||||
'message': f'Browser check failed: {e}',
|
||||
}
|
||||
|
||||
|
||||
def plan_actions(
|
||||
checks: dict[str, Any],
|
||||
mode: Literal['local', 'remote', 'full'],
|
||||
yes: bool,
|
||||
api_key: str | None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Plan which actions to take based on checks.
|
||||
|
||||
Returns:
|
||||
List of actions to execute
|
||||
"""
|
||||
actions: list[dict[str, Any]] = []
|
||||
|
||||
# Browser installation (local/full)
|
||||
if mode in ('local', 'full'):
|
||||
browser_check = checks.get('browser', {})
|
||||
if browser_check.get('status') != 'ok':
|
||||
actions.append(
|
||||
{
|
||||
'type': 'install_browser',
|
||||
'description': 'Install browser (Chromium)',
|
||||
'required': True,
|
||||
}
|
||||
)
|
||||
|
||||
# API key configuration (remote/full)
|
||||
if mode in ('remote', 'full'):
|
||||
api_check = checks.get('api_key', {})
|
||||
if api_check.get('status') != 'ok':
|
||||
if api_key:
|
||||
actions.append(
|
||||
{
|
||||
'type': 'configure_api_key',
|
||||
'description': 'Configure API key',
|
||||
'required': True,
|
||||
'api_key': api_key,
|
||||
}
|
||||
)
|
||||
elif not yes:
|
||||
actions.append(
|
||||
{
|
||||
'type': 'prompt_api_key',
|
||||
'description': 'Prompt for API key',
|
||||
'required': False,
|
||||
}
|
||||
)
|
||||
|
||||
# Cloudflared (remote/full)
|
||||
if mode in ('remote', 'full'):
|
||||
cloudflared_check = checks.get('cloudflared', {})
|
||||
if cloudflared_check.get('status') != 'ok':
|
||||
actions.append(
|
||||
{
|
||||
'type': 'install_cloudflared',
|
||||
'description': 'Install cloudflared (for tunneling)',
|
||||
'required': True,
|
||||
}
|
||||
)
|
||||
|
||||
return actions
|
||||
|
||||
|
||||
async def execute_actions(
|
||||
actions: list[dict[str, Any]],
|
||||
mode: Literal['local', 'remote', 'full'],
|
||||
api_key: str | None,
|
||||
json_output: bool,
|
||||
) -> None:
|
||||
"""Execute planned actions.
|
||||
|
||||
Args:
|
||||
actions: List of actions to execute
|
||||
mode: Setup mode (local/remote/full)
|
||||
api_key: Optional API key to configure
|
||||
json_output: Whether to output JSON
|
||||
"""
|
||||
for action in actions:
|
||||
action_type = action['type']
|
||||
|
||||
if action_type == 'install_browser':
|
||||
if not json_output:
|
||||
print('📦 Installing Chromium browser (~300MB)...')
|
||||
# Browser will be installed on first use by Playwright
|
||||
if not json_output:
|
||||
print('✓ Browser available (will be installed on first use)')
|
||||
|
||||
elif action_type == 'configure_api_key':
|
||||
if not json_output:
|
||||
print('🔑 Configuring API key...')
|
||||
from browser_use.skill_cli.api_key import save_api_key
|
||||
|
||||
if api_key:
|
||||
save_api_key(api_key)
|
||||
if not json_output:
|
||||
print('✓ API key configured')
|
||||
|
||||
elif action_type == 'prompt_api_key':
|
||||
if not json_output:
|
||||
print('🔑 API key not configured')
|
||||
print(' Set via: export BROWSER_USE_API_KEY=your_key')
|
||||
print(' Or: browser-use setup --api-key <key>')
|
||||
|
||||
elif action_type == 'install_cloudflared':
|
||||
if not json_output:
|
||||
print('⚠ cloudflared not installed')
|
||||
print(' Install via:')
|
||||
print(' macOS: brew install cloudflared')
|
||||
print(
|
||||
' Linux: curl -L https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -o ~/.local/bin/cloudflared && chmod +x ~/.local/bin/cloudflared'
|
||||
)
|
||||
print(' Windows: winget install Cloudflare.cloudflared')
|
||||
print()
|
||||
print(' Or re-run install.sh which installs cloudflared automatically.')
|
||||
|
||||
|
||||
async def validate_setup(
|
||||
mode: Literal['local', 'remote', 'full'],
|
||||
) -> dict[str, Any]:
|
||||
"""Validate that setup worked.
|
||||
|
||||
Returns:
|
||||
Dict with validation results
|
||||
"""
|
||||
results: dict[str, Any] = {}
|
||||
|
||||
# Check imports
|
||||
try:
|
||||
import browser_use # noqa: F401
|
||||
|
||||
results['browser_use_import'] = 'ok'
|
||||
except ImportError:
|
||||
results['browser_use_import'] = 'failed'
|
||||
|
||||
# Validate mode requirements
|
||||
if mode in ('local', 'full'):
|
||||
# Step 2: Config file
|
||||
step += 1
|
||||
config_path = home_dir / 'config.json'
|
||||
print(f'\nStep {step}/{total}: Config file')
|
||||
if config_path.exists():
|
||||
print(f' ✓ {config_path} exists')
|
||||
else:
|
||||
config_path.write_text('{}\n')
|
||||
try:
|
||||
from browser_use.browser.profile import BrowserProfile
|
||||
config_path.chmod(0o600)
|
||||
except OSError:
|
||||
pass
|
||||
print(f' ✓ {config_path} created')
|
||||
results['config'] = 'ok'
|
||||
|
||||
browser_profile = BrowserProfile(headless=True)
|
||||
results['browser_available'] = 'ok'
|
||||
except Exception as e:
|
||||
results['browser_available'] = f'failed: {e}'
|
||||
# Step 3: Chromium browser
|
||||
step += 1
|
||||
print(f'\nStep {step}/{total}: Chromium browser')
|
||||
chromium_installed = _check_chromium()
|
||||
if chromium_installed:
|
||||
print(' ✓ Chromium already installed')
|
||||
results['chromium'] = 'ok'
|
||||
else:
|
||||
if _prompt('Chromium is not installed (~300MB download). Install now?', yes):
|
||||
print(' ℹ Installing Chromium...')
|
||||
if _install_chromium():
|
||||
print(' ✓ Chromium installed')
|
||||
results['chromium'] = 'ok'
|
||||
else:
|
||||
print(' ✗ Chromium installation failed')
|
||||
results['chromium'] = 'failed'
|
||||
else:
|
||||
print(' ○ Skipped')
|
||||
results['chromium'] = 'skipped'
|
||||
|
||||
if mode in ('remote', 'full'):
|
||||
from browser_use.skill_cli.api_key import check_api_key
|
||||
from browser_use.skill_cli.tunnel import get_tunnel_manager
|
||||
# Step 4: Profile-use binary
|
||||
step += 1
|
||||
print(f'\nStep {step}/{total}: Profile-use binary')
|
||||
from browser_use.skill_cli.profile_use import get_profile_use_binary
|
||||
|
||||
api_check = check_api_key()
|
||||
results['api_key_available'] = api_check['available']
|
||||
if get_profile_use_binary():
|
||||
print(' ✓ profile-use already installed')
|
||||
results['profile_use'] = 'ok'
|
||||
else:
|
||||
if _prompt('profile-use is not installed (needed for browser-use profile). Install now?', yes):
|
||||
print(' ℹ Downloading profile-use...')
|
||||
if _install_profile_use():
|
||||
print(' ✓ profile-use installed')
|
||||
results['profile_use'] = 'ok'
|
||||
else:
|
||||
print(' ✗ profile-use installation failed')
|
||||
results['profile_use'] = 'failed'
|
||||
else:
|
||||
print(' ○ Skipped')
|
||||
results['profile_use'] = 'skipped'
|
||||
|
||||
tunnel_mgr = get_tunnel_manager()
|
||||
results['cloudflared_available'] = tunnel_mgr.is_available()
|
||||
# Step 5: Cloudflared
|
||||
step += 1
|
||||
print(f'\nStep {step}/{total}: Cloudflare tunnel (cloudflared)')
|
||||
if shutil.which('cloudflared'):
|
||||
print(' ✓ cloudflared already installed')
|
||||
results['cloudflared'] = 'ok'
|
||||
else:
|
||||
if _prompt('cloudflared is not installed (needed for browser-use tunnel). Install now?', yes):
|
||||
print(' ℹ Installing cloudflared...')
|
||||
if _install_cloudflared():
|
||||
print(' ✓ cloudflared installed')
|
||||
results['cloudflared'] = 'ok'
|
||||
else:
|
||||
print(' ✗ cloudflared installation failed')
|
||||
results['cloudflared'] = 'failed'
|
||||
else:
|
||||
print(' ○ Skipped')
|
||||
results['cloudflared'] = 'skipped'
|
||||
|
||||
# Step 6: Validation
|
||||
step += 1
|
||||
print(f'\nStep {step}/{total}: Validation')
|
||||
from browser_use.skill_cli.config import CLI_DOCS_URL, get_config_display
|
||||
|
||||
# Quick checks
|
||||
checks = {
|
||||
'package': _check_package(),
|
||||
'browser': 'ok' if _check_chromium() else 'missing',
|
||||
'profile_use': 'ok' if get_profile_use_binary() else 'missing',
|
||||
'cloudflared': 'ok' if shutil.which('cloudflared') else 'missing',
|
||||
}
|
||||
for name, status in checks.items():
|
||||
icon = '✓' if status == 'ok' else '○'
|
||||
print(f' {icon} {name}: {status}')
|
||||
|
||||
# Config display
|
||||
entries = get_config_display()
|
||||
print(f'\nConfig ({config_path}):')
|
||||
for entry in entries:
|
||||
if entry['is_set']:
|
||||
icon = '✓'
|
||||
val = 'set' if entry['sensitive'] else entry['value']
|
||||
else:
|
||||
icon = '○'
|
||||
val = entry['value'] if entry['value'] else 'not set'
|
||||
print(f' {icon} {entry["key"]}: {val}')
|
||||
print(f' Docs: {CLI_DOCS_URL}')
|
||||
|
||||
print('\n━━━━━━━━━━━━━━━━━')
|
||||
print('Setup complete! Next: browser-use open https://example.com\n')
|
||||
|
||||
results['status'] = 'success'
|
||||
return results
|
||||
|
||||
|
||||
def _log_checks(checks: dict[str, Any]) -> None:
|
||||
"""Log check results."""
|
||||
print('\n✓ Running checks...\n')
|
||||
for name, check in checks.items():
|
||||
status = check.get('status', 'unknown')
|
||||
message = check.get('message', '')
|
||||
icon = '✓' if status == 'ok' else '⚠' if status == 'missing' else '✗'
|
||||
print(f' {icon} {name.replace("_", " ")}: {message}')
|
||||
print()
|
||||
def _check_package() -> str:
|
||||
"""Check if browser-use package is importable."""
|
||||
try:
|
||||
import browser_use
|
||||
|
||||
version = getattr(browser_use, '__version__', 'unknown')
|
||||
return f'browser-use {version}'
|
||||
except ImportError:
|
||||
return 'not installed'
|
||||
|
||||
|
||||
def _log_actions(actions: list[dict[str, Any]]) -> None:
|
||||
"""Log planned actions."""
|
||||
if not actions:
|
||||
print('✓ No additional setup needed!\n')
|
||||
return
|
||||
def _check_chromium() -> bool:
|
||||
"""Check if playwright chromium is installed."""
|
||||
try:
|
||||
from browser_use.browser.profile import BrowserProfile
|
||||
|
||||
print('\n📋 Setup actions:\n')
|
||||
for i, action in enumerate(actions, 1):
|
||||
required = '(required)' if action.get('required') else '(optional)'
|
||||
print(f' {i}. {action["description"]} {required}')
|
||||
print()
|
||||
BrowserProfile(headless=True)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _log_validation(validation: dict[str, Any]) -> None:
|
||||
"""Log validation results."""
|
||||
print('\n✓ Validation:\n')
|
||||
for name, result in validation.items():
|
||||
icon = '✓' if result == 'ok' else '✗'
|
||||
print(f' {icon} {name.replace("_", " ")}: {result}')
|
||||
print()
|
||||
def _install_chromium() -> bool:
|
||||
"""Install Chromium via playwright."""
|
||||
try:
|
||||
cmd = [sys.executable, '-m', 'playwright', 'install', 'chromium']
|
||||
if sys.platform == 'linux':
|
||||
cmd.append('--with-deps')
|
||||
result = subprocess.run(cmd, timeout=300)
|
||||
return result.returncode == 0
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _install_profile_use() -> bool:
|
||||
"""Download profile-use binary."""
|
||||
try:
|
||||
from browser_use.skill_cli.profile_use import download_profile_use
|
||||
|
||||
download_profile_use()
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _install_cloudflared() -> bool:
|
||||
"""Install cloudflared."""
|
||||
try:
|
||||
if sys.platform == 'darwin':
|
||||
result = subprocess.run(['brew', 'install', 'cloudflared'], timeout=120)
|
||||
return result.returncode == 0
|
||||
elif sys.platform == 'win32':
|
||||
result = subprocess.run(['winget', 'install', 'Cloudflare.cloudflared'], timeout=120)
|
||||
return result.returncode == 0
|
||||
else:
|
||||
# Linux: download binary + verify SHA256 checksum before installing
|
||||
import hashlib
|
||||
import platform
|
||||
import shutil
|
||||
import tempfile
|
||||
import urllib.request
|
||||
|
||||
arch = 'arm64' if platform.machine() in ('aarch64', 'arm64') else 'amd64'
|
||||
base_url = f'https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-{arch}'
|
||||
|
||||
# Download to a temp file so we can verify before installing
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.tmp') as tmp:
|
||||
tmp_path = Path(tmp.name)
|
||||
try:
|
||||
urllib.request.urlretrieve(base_url, tmp_path)
|
||||
|
||||
# Fetch checksum file published alongside the binary
|
||||
with urllib.request.urlopen(f'{base_url}.sha256sum') as resp:
|
||||
expected_sha256 = resp.read().decode().split()[0]
|
||||
|
||||
# Verify integrity before touching the install destination
|
||||
actual_sha256 = hashlib.sha256(tmp_path.read_bytes()).hexdigest()
|
||||
if actual_sha256 != expected_sha256:
|
||||
raise RuntimeError(
|
||||
f'cloudflared checksum mismatch — expected {expected_sha256}, got {actual_sha256}. '
|
||||
'The download may be corrupt or tampered with.'
|
||||
)
|
||||
|
||||
dest = Path('/usr/local/bin/cloudflared')
|
||||
if not os.access('/usr/local/bin', os.W_OK):
|
||||
dest = Path.home() / '.local' / 'bin' / 'cloudflared'
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.move(str(tmp_path), dest)
|
||||
dest.chmod(0o755)
|
||||
finally:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
"""Shared utilities for CLI command handlers."""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from browser_use_sdk import BrowserUse
|
||||
|
||||
_client: BrowserUse | None = None
|
||||
|
||||
|
||||
def get_sdk_client() -> BrowserUse:
|
||||
"""Get authenticated SDK client (singleton)."""
|
||||
global _client
|
||||
if _client is None:
|
||||
from browser_use.skill_cli.api_key import require_api_key
|
||||
|
||||
api_key = require_api_key('Cloud API')
|
||||
_client = BrowserUse(api_key=api_key)
|
||||
return _client
|
||||
|
||||
|
||||
def format_duration(started_at: datetime | None, finished_at: datetime | None) -> str:
|
||||
"""Format duration between two timestamps, or elapsed time if still running."""
|
||||
if not started_at:
|
||||
return ''
|
||||
|
||||
try:
|
||||
if finished_at:
|
||||
end = finished_at
|
||||
else:
|
||||
end = datetime.now(timezone.utc)
|
||||
|
||||
delta = end - started_at
|
||||
total_seconds = int(delta.total_seconds())
|
||||
|
||||
if total_seconds < 60:
|
||||
return f'{total_seconds}s'
|
||||
elif total_seconds < 3600:
|
||||
minutes = total_seconds // 60
|
||||
seconds = total_seconds % 60
|
||||
return f'{minutes}m {seconds}s'
|
||||
else:
|
||||
hours = total_seconds // 3600
|
||||
minutes = (total_seconds % 3600) // 60
|
||||
return f'{hours}h {minutes}m'
|
||||
except Exception:
|
||||
return ''
|
||||
171
browser_use/skill_cli/config.py
Normal file
171
browser_use/skill_cli/config.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""CLI configuration schema and helpers.
|
||||
|
||||
Single source of truth for all CLI config keys. Doctor, setup, and
|
||||
getter functions all reference CONFIG_KEYS.
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
CLI_DOCS_URL = 'https://docs.browser-use.com/open-source/browser-use-cli'
|
||||
|
||||
CONFIG_KEYS: dict = {
|
||||
'api_key': {
|
||||
'type': str,
|
||||
'sensitive': True,
|
||||
'description': 'Browser Use Cloud API key',
|
||||
},
|
||||
'cloud_connect_profile_id': {
|
||||
'type': str,
|
||||
'description': 'Cloud browser profile ID (auto-created)',
|
||||
},
|
||||
'cloud_connect_proxy': {
|
||||
'type': str,
|
||||
'default': 'us',
|
||||
'description': 'Cloud proxy country code',
|
||||
},
|
||||
'cloud_connect_timeout': {
|
||||
'type': int,
|
||||
'description': 'Cloud browser timeout (minutes)',
|
||||
},
|
||||
'cloud_connect_recording': {
|
||||
'type': bool,
|
||||
'default': True,
|
||||
'description': 'Enable session recording in cloud browser',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _get_config_path() -> Path:
|
||||
from browser_use.skill_cli.utils import get_config_path
|
||||
|
||||
return get_config_path()
|
||||
|
||||
|
||||
def read_config() -> dict:
|
||||
"""Read CLI config file. Returns empty dict if missing or corrupt."""
|
||||
path = _get_config_path()
|
||||
if path.exists():
|
||||
try:
|
||||
return json.loads(path.read_text())
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return {}
|
||||
return {}
|
||||
|
||||
|
||||
def write_config(data: dict) -> None:
|
||||
"""Write CLI config file with 0o600 permissions, atomically via tmp+rename.
|
||||
|
||||
Writing directly to config.json risks truncation if the process is killed
|
||||
mid-write, which read_config() would silently treat as {} (empty config),
|
||||
wiping the API key and all other settings.
|
||||
"""
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
path = _get_config_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
content = json.dumps(data, indent=2) + '\n'
|
||||
|
||||
# Write to a temp file in the same directory so os.replace() is atomic
|
||||
# (same filesystem guaranteed — cross-device rename raises OSError).
|
||||
fd, tmp_str = tempfile.mkstemp(dir=path.parent, prefix='.config_tmp_')
|
||||
tmp_path = Path(tmp_str)
|
||||
try:
|
||||
with os.fdopen(fd, 'w') as f:
|
||||
f.write(content)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
try:
|
||||
tmp_path.chmod(0o600)
|
||||
except OSError:
|
||||
pass
|
||||
os.replace(tmp_path, path)
|
||||
except Exception:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
raise
|
||||
|
||||
|
||||
def get_config_value(key: str) -> str | int | None:
|
||||
"""Read a config value, applying schema defaults.
|
||||
|
||||
Priority: config file → schema default → None.
|
||||
"""
|
||||
schema = CONFIG_KEYS.get(key)
|
||||
if schema is None:
|
||||
return None
|
||||
|
||||
config = read_config()
|
||||
val = config.get(key)
|
||||
if val is not None:
|
||||
return val
|
||||
|
||||
return schema.get('default')
|
||||
|
||||
|
||||
def set_config_value(key: str, value: str) -> None:
|
||||
"""Set a config value. Validates key and coerces type."""
|
||||
schema = CONFIG_KEYS.get(key)
|
||||
if schema is None:
|
||||
raise ValueError(f'Unknown config key: {key}. Valid keys: {", ".join(CONFIG_KEYS)}')
|
||||
|
||||
# Coerce type
|
||||
expected_type = schema.get('type', str)
|
||||
try:
|
||||
if expected_type is int:
|
||||
coerced = int(value)
|
||||
elif expected_type is bool:
|
||||
if value.lower() in ('true', '1', 'yes'):
|
||||
coerced = True
|
||||
elif value.lower() in ('false', '0', 'no'):
|
||||
coerced = False
|
||||
else:
|
||||
raise ValueError(f'Invalid value for {key}: expected true/false, got {value!r}')
|
||||
else:
|
||||
coerced = str(value)
|
||||
except (ValueError, TypeError):
|
||||
raise ValueError(f'Invalid value for {key}: expected {expected_type.__name__}, got {value!r}')
|
||||
|
||||
config = read_config()
|
||||
config[key] = coerced
|
||||
write_config(config)
|
||||
|
||||
|
||||
def unset_config_value(key: str) -> None:
|
||||
"""Remove a config key from the file."""
|
||||
schema = CONFIG_KEYS.get(key)
|
||||
if schema is None:
|
||||
raise ValueError(f'Unknown config key: {key}. Valid keys: {", ".join(CONFIG_KEYS)}')
|
||||
|
||||
config = read_config()
|
||||
if key in config:
|
||||
del config[key]
|
||||
write_config(config)
|
||||
|
||||
|
||||
def get_config_display() -> list[dict]:
|
||||
"""Return config state for display (doctor, setup).
|
||||
|
||||
Each entry: {key, value, is_set, sensitive, description}
|
||||
"""
|
||||
config = read_config()
|
||||
entries = []
|
||||
for key, schema in CONFIG_KEYS.items():
|
||||
val = config.get(key)
|
||||
is_set = val is not None
|
||||
|
||||
# Apply default for display
|
||||
display_val = val
|
||||
if not is_set and 'default' in schema:
|
||||
display_val = f'{schema["default"]} (default)'
|
||||
|
||||
entries.append(
|
||||
{
|
||||
'key': key,
|
||||
'value': display_val,
|
||||
'is_set': is_set,
|
||||
'sensitive': schema.get('sensitive', False),
|
||||
'description': schema.get('description', ''),
|
||||
}
|
||||
)
|
||||
return entries
|
||||
537
browser_use/skill_cli/daemon.py
Normal file
537
browser_use/skill_cli/daemon.py
Normal file
@@ -0,0 +1,537 @@
|
||||
"""Background daemon - keeps a single BrowserSession alive.
|
||||
|
||||
Each daemon owns one session, identified by a session name (default: 'default').
|
||||
Isolation is per-session: each gets its own socket and PID file.
|
||||
Auto-exits when browser dies (polls is_cdp_connected).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from browser_use.skill_cli.sessions import SessionInfo
|
||||
|
||||
# Configure logging before imports
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s',
|
||||
handlers=[logging.StreamHandler()],
|
||||
)
|
||||
logger = logging.getLogger('browser_use.skill_cli.daemon')
|
||||
|
||||
|
||||
class Daemon:
|
||||
"""Single-session daemon that manages a browser and handles CLI commands."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
headed: bool,
|
||||
profile: str | None,
|
||||
cdp_url: str | None = None,
|
||||
use_cloud: bool = False,
|
||||
cloud_profile_id: str | None = None,
|
||||
cloud_proxy_country_code: str | None = None,
|
||||
cloud_timeout: int | None = None,
|
||||
session: str = 'default',
|
||||
) -> None:
|
||||
from browser_use.skill_cli.utils import validate_session_name
|
||||
|
||||
validate_session_name(session)
|
||||
self.session = session
|
||||
self.headed = headed
|
||||
self.profile = profile
|
||||
self.cdp_url = cdp_url
|
||||
self.use_cloud = use_cloud
|
||||
self.cloud_profile_id = cloud_profile_id
|
||||
self.cloud_proxy_country_code = cloud_proxy_country_code
|
||||
self.cloud_timeout = cloud_timeout
|
||||
self.running = True
|
||||
self._server: asyncio.Server | None = None
|
||||
self._shutdown_event = asyncio.Event()
|
||||
self._session: SessionInfo | None = None
|
||||
self._shutdown_task: asyncio.Task | None = None
|
||||
self._browser_watchdog_task: asyncio.Task | None = None
|
||||
self._session_lock = asyncio.Lock()
|
||||
self._last_command_time: float = 0.0
|
||||
self._idle_timeout: float = 30 * 60.0 # 30 minutes
|
||||
self._idle_watchdog_task: asyncio.Task | None = None
|
||||
self._is_shutting_down: bool = False
|
||||
self._auth_token: str = ''
|
||||
|
||||
def _write_state(self, phase: str) -> None:
|
||||
"""Atomically write session state file for CLI observability."""
|
||||
import time
|
||||
|
||||
from browser_use.skill_cli.utils import get_home_dir
|
||||
|
||||
state = {
|
||||
'phase': phase,
|
||||
'pid': os.getpid(),
|
||||
'updated_at': time.time(),
|
||||
'config': {
|
||||
'headed': self.headed,
|
||||
'profile': self.profile,
|
||||
'cdp_url': self.cdp_url,
|
||||
'use_cloud': self.use_cloud,
|
||||
},
|
||||
}
|
||||
state_path = get_home_dir() / f'{self.session}.state.json'
|
||||
tmp_path = state_path.with_suffix('.state.json.tmp')
|
||||
try:
|
||||
with open(tmp_path, 'w') as f:
|
||||
json.dump(state, f)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, state_path)
|
||||
except OSError as e:
|
||||
logger.debug(f'Failed to write state file: {e}')
|
||||
|
||||
def _request_shutdown(self) -> None:
|
||||
"""Request shutdown exactly once. Safe from any context."""
|
||||
if self._is_shutting_down:
|
||||
return
|
||||
self._is_shutting_down = True
|
||||
self._shutdown_task = asyncio.create_task(self._shutdown())
|
||||
|
||||
async def _get_or_create_session(self) -> SessionInfo:
|
||||
"""Lazy-create the single session on first command."""
|
||||
if self._session is not None:
|
||||
return self._session
|
||||
|
||||
async with self._session_lock:
|
||||
# Double-check after acquiring lock
|
||||
if self._session is not None:
|
||||
return self._session
|
||||
|
||||
from browser_use.skill_cli.sessions import SessionInfo, create_browser_session
|
||||
|
||||
logger.info(
|
||||
f'Creating session (headed={self.headed}, profile={self.profile}, cdp_url={self.cdp_url}, use_cloud={self.use_cloud})'
|
||||
)
|
||||
|
||||
self._write_state('starting')
|
||||
|
||||
bs = await create_browser_session(
|
||||
self.headed,
|
||||
self.profile,
|
||||
self.cdp_url,
|
||||
use_cloud=self.use_cloud,
|
||||
cloud_profile_id=self.cloud_profile_id,
|
||||
cloud_proxy_country_code=self.cloud_proxy_country_code,
|
||||
cloud_timeout=self.cloud_timeout,
|
||||
)
|
||||
|
||||
try:
|
||||
await bs.start()
|
||||
self._write_state('starting') # refresh updated_at after bs.start() returns
|
||||
|
||||
# Wait for Chrome to stabilize after CDP setup before accepting commands
|
||||
try:
|
||||
await bs.get_browser_state_summary()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Create action handler for direct command execution (no event bus)
|
||||
from browser_use.skill_cli.actions import ActionHandler
|
||||
|
||||
actions = ActionHandler(bs)
|
||||
|
||||
self._session = SessionInfo(
|
||||
name=self.session,
|
||||
headed=self.headed,
|
||||
profile=self.profile,
|
||||
cdp_url=self.cdp_url,
|
||||
browser_session=bs,
|
||||
actions=actions,
|
||||
use_cloud=self.use_cloud,
|
||||
)
|
||||
self._browser_watchdog_task = asyncio.create_task(self._watch_browser())
|
||||
|
||||
# Start idle timeout watchdog
|
||||
self._idle_watchdog_task = asyncio.create_task(self._watch_idle())
|
||||
|
||||
except Exception:
|
||||
# Startup failed — rollback browser resources
|
||||
logger.exception('Session startup failed, rolling back')
|
||||
self._write_state('failed')
|
||||
try:
|
||||
if self.use_cloud and hasattr(bs, '_cloud_browser_client') and bs._cloud_browser_client.current_session_id:
|
||||
await asyncio.wait_for(bs._cloud_browser_client.stop_browser(), timeout=10.0)
|
||||
elif not self.cdp_url and not self.use_cloud:
|
||||
await asyncio.wait_for(bs.kill(), timeout=10.0)
|
||||
else:
|
||||
await asyncio.wait_for(bs.stop(), timeout=10.0)
|
||||
except Exception as cleanup_err:
|
||||
logger.debug(f'Rollback cleanup error: {cleanup_err}')
|
||||
raise
|
||||
|
||||
self._write_state('running')
|
||||
return self._session
|
||||
|
||||
async def _watch_browser(self) -> None:
|
||||
"""Poll BrowserSession.is_cdp_connected every 2s. Shutdown when browser dies.
|
||||
|
||||
Skips checks while the BrowserSession is reconnecting. If reconnection fails,
|
||||
next poll will see is_cdp_connected=False and trigger shutdown.
|
||||
"""
|
||||
while self.running:
|
||||
await asyncio.sleep(2.0)
|
||||
if not self._session:
|
||||
continue
|
||||
bs = self._session.browser_session
|
||||
# Don't shut down while a reconnection attempt is in progress
|
||||
if bs.is_reconnecting:
|
||||
continue
|
||||
if not bs.is_cdp_connected:
|
||||
logger.info('Browser disconnected, shutting down daemon')
|
||||
self._request_shutdown()
|
||||
return
|
||||
|
||||
async def _watch_idle(self) -> None:
|
||||
"""Shutdown daemon after idle_timeout seconds of no commands."""
|
||||
while self.running:
|
||||
await asyncio.sleep(60.0)
|
||||
if self._last_command_time > 0:
|
||||
import time
|
||||
|
||||
idle = time.monotonic() - self._last_command_time
|
||||
if idle >= self._idle_timeout:
|
||||
logger.info(f'Daemon idle for {idle:.0f}s, shutting down')
|
||||
self._request_shutdown()
|
||||
return
|
||||
|
||||
async def handle_connection(
|
||||
self,
|
||||
reader: asyncio.StreamReader,
|
||||
writer: asyncio.StreamWriter,
|
||||
) -> None:
|
||||
"""Handle a single client request (one command per connection)."""
|
||||
try:
|
||||
line = await asyncio.wait_for(reader.readline(), timeout=300)
|
||||
if not line:
|
||||
return
|
||||
|
||||
request = {}
|
||||
try:
|
||||
import hmac
|
||||
|
||||
request = json.loads(line.decode())
|
||||
req_id = request.get('id', '')
|
||||
# Reject requests that don't carry the correct auth token.
|
||||
# Use hmac.compare_digest to prevent timing-oracle attacks.
|
||||
if self._auth_token and not hmac.compare_digest(
|
||||
request.get('token', ''),
|
||||
self._auth_token,
|
||||
):
|
||||
response = {'id': req_id, 'success': False, 'error': 'Unauthorized'}
|
||||
else:
|
||||
response = await self.dispatch(request)
|
||||
except json.JSONDecodeError as e:
|
||||
response = {'id': '', 'success': False, 'error': f'Invalid JSON: {e}'}
|
||||
except Exception as e:
|
||||
logger.exception(f'Error handling request: {e}')
|
||||
response = {'id': '', 'success': False, 'error': str(e)}
|
||||
|
||||
writer.write((json.dumps(response) + '\n').encode())
|
||||
await writer.drain()
|
||||
|
||||
if response.get('success') and request.get('action') == 'shutdown':
|
||||
self._request_shutdown()
|
||||
|
||||
except TimeoutError:
|
||||
logger.debug('Connection timeout')
|
||||
except Exception as e:
|
||||
logger.exception(f'Connection error: {e}')
|
||||
finally:
|
||||
writer.close()
|
||||
try:
|
||||
await writer.wait_closed()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def dispatch(self, request: dict) -> dict:
|
||||
"""Route to command handlers."""
|
||||
import time
|
||||
|
||||
self._last_command_time = time.monotonic()
|
||||
|
||||
action = request.get('action', '')
|
||||
params = request.get('params', {})
|
||||
req_id = request.get('id', '')
|
||||
|
||||
logger.info(f'Dispatch: {action} (id={req_id})')
|
||||
|
||||
try:
|
||||
# Handle shutdown
|
||||
if action == 'shutdown':
|
||||
return {'id': req_id, 'success': True, 'data': {'shutdown': True}}
|
||||
|
||||
# Handle ping — returns daemon config for mismatch detection
|
||||
if action == 'ping':
|
||||
# Return live CDP URL (may differ from constructor arg for cloud sessions)
|
||||
live_cdp_url = self.cdp_url
|
||||
if self._session and self._session.browser_session.cdp_url:
|
||||
live_cdp_url = self._session.browser_session.cdp_url
|
||||
return {
|
||||
'id': req_id,
|
||||
'success': True,
|
||||
'data': {
|
||||
'session': self.session,
|
||||
'pid': os.getpid(),
|
||||
'headed': self.headed,
|
||||
'profile': self.profile,
|
||||
'cdp_url': live_cdp_url,
|
||||
'use_cloud': self.use_cloud,
|
||||
},
|
||||
}
|
||||
|
||||
# Handle connect — forces immediate session creation (used by cloud connect)
|
||||
if action == 'connect':
|
||||
session = await self._get_or_create_session()
|
||||
bs = session.browser_session
|
||||
result_data: dict = {'status': 'connected'}
|
||||
if bs.cdp_url:
|
||||
result_data['cdp_url'] = bs.cdp_url
|
||||
if self.use_cloud and bs.cdp_url:
|
||||
from urllib.parse import quote
|
||||
|
||||
result_data['live_url'] = f'https://live.browser-use.com/?wss={quote(bs.cdp_url, safe="")}'
|
||||
return {'id': req_id, 'success': True, 'data': result_data}
|
||||
|
||||
from browser_use.skill_cli.commands import browser, python_exec
|
||||
|
||||
# Get or create the single session
|
||||
session = await self._get_or_create_session()
|
||||
|
||||
# Dispatch to handler
|
||||
if action in browser.COMMANDS:
|
||||
result = await browser.handle(action, session, params)
|
||||
elif action == 'python':
|
||||
result = await python_exec.handle(session, params)
|
||||
else:
|
||||
return {'id': req_id, 'success': False, 'error': f'Unknown action: {action}'}
|
||||
|
||||
return {'id': req_id, 'success': True, 'data': result}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f'Error dispatching {action}: {e}')
|
||||
return {'id': req_id, 'success': False, 'error': str(e)}
|
||||
|
||||
async def run(self) -> None:
|
||||
"""Listen on Unix socket (or TCP on Windows) with PID file.
|
||||
|
||||
Note: we do NOT unlink the socket in our finally block. If a replacement
|
||||
daemon was spawned during our shutdown, it already bound a new socket at
|
||||
the same path — unlinking here would delete *its* socket, orphaning it.
|
||||
Stale sockets are cleaned up by is_daemon_alive() and by the next
|
||||
daemon's startup (unlink before bind).
|
||||
"""
|
||||
import secrets
|
||||
|
||||
from browser_use.skill_cli.utils import get_auth_token_path, get_pid_path, get_socket_path
|
||||
|
||||
self._write_state('initializing')
|
||||
|
||||
# Generate and persist a per-session auth token.
|
||||
# The client reads this file to authenticate its requests, preventing
|
||||
# any other local process from sending commands to the daemon socket.
|
||||
# Create the temp file with 0o600 at open() time to avoid a permission
|
||||
# race window where the file exists but is not yet restricted.
|
||||
# Raise on failure — running without a readable token file leaves the
|
||||
# daemon permanently unauthorized for all clients.
|
||||
self._auth_token = secrets.token_hex(32)
|
||||
token_path = get_auth_token_path(self.session)
|
||||
tmp_token = token_path.with_suffix('.token.tmp')
|
||||
fd = os.open(str(tmp_token), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||
try:
|
||||
with os.fdopen(fd, 'w') as f:
|
||||
f.write(self._auth_token)
|
||||
except OSError:
|
||||
try:
|
||||
tmp_token.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
os.replace(tmp_token, token_path)
|
||||
|
||||
# Setup signal handlers
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
def signal_handler():
|
||||
self._request_shutdown()
|
||||
|
||||
for sig in (signal.SIGINT, signal.SIGTERM):
|
||||
try:
|
||||
loop.add_signal_handler(sig, signal_handler)
|
||||
except NotImplementedError:
|
||||
pass # Windows doesn't support add_signal_handler
|
||||
|
||||
if hasattr(signal, 'SIGHUP'):
|
||||
try:
|
||||
loop.add_signal_handler(signal.SIGHUP, signal_handler)
|
||||
except NotImplementedError:
|
||||
pass
|
||||
|
||||
sock_path = get_socket_path(self.session)
|
||||
pid_path = get_pid_path(self.session)
|
||||
logger.info(f'Session: {self.session}, Socket: {sock_path}')
|
||||
|
||||
if sock_path.startswith('tcp://'):
|
||||
# Windows: TCP server
|
||||
_, hostport = sock_path.split('://', 1)
|
||||
host, port = hostport.split(':')
|
||||
self._server = await asyncio.start_server(
|
||||
self.handle_connection,
|
||||
host,
|
||||
int(port),
|
||||
reuse_address=True,
|
||||
)
|
||||
logger.info(f'Listening on TCP {host}:{port}')
|
||||
else:
|
||||
# Unix: socket server
|
||||
Path(sock_path).unlink(missing_ok=True)
|
||||
self._server = await asyncio.start_unix_server(
|
||||
self.handle_connection,
|
||||
sock_path,
|
||||
)
|
||||
logger.info(f'Listening on Unix socket {sock_path}')
|
||||
|
||||
# Write PID file after server is bound
|
||||
my_pid = str(os.getpid())
|
||||
pid_path.write_text(my_pid)
|
||||
self._write_state('ready')
|
||||
|
||||
try:
|
||||
async with self._server:
|
||||
await self._shutdown_event.wait()
|
||||
# Wait for shutdown to finish browser cleanup before exiting
|
||||
if self._shutdown_task:
|
||||
await self._shutdown_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
finally:
|
||||
# Conditionally delete PID file only if it still contains our PID
|
||||
try:
|
||||
if pid_path.read_text().strip() == my_pid:
|
||||
pid_path.unlink(missing_ok=True)
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
logger.info('Daemon stopped')
|
||||
|
||||
async def _shutdown(self) -> None:
|
||||
"""Graceful shutdown. Only called via _request_shutdown().
|
||||
|
||||
Order matters: close the server first to release the socket/port
|
||||
immediately, so a replacement daemon can bind without waiting for
|
||||
browser cleanup. Then kill the browser session.
|
||||
"""
|
||||
logger.info('Shutting down daemon...')
|
||||
self._write_state('shutting_down')
|
||||
self.running = False
|
||||
self._shutdown_event.set()
|
||||
|
||||
if self._browser_watchdog_task:
|
||||
self._browser_watchdog_task.cancel()
|
||||
|
||||
if self._idle_watchdog_task:
|
||||
self._idle_watchdog_task.cancel()
|
||||
|
||||
if self._server:
|
||||
self._server.close()
|
||||
|
||||
if self._session:
|
||||
try:
|
||||
# Only kill the browser if the daemon launched it.
|
||||
# For external connections (--connect, --cdp-url, cloud), just disconnect.
|
||||
# Timeout ensures daemon exits even if CDP calls hang on a dead connection
|
||||
if self.cdp_url or self.use_cloud:
|
||||
await asyncio.wait_for(self._session.browser_session.stop(), timeout=10.0)
|
||||
else:
|
||||
await asyncio.wait_for(self._session.browser_session.kill(), timeout=10.0)
|
||||
except TimeoutError:
|
||||
logger.warning('Browser cleanup timed out after 10s, forcing exit')
|
||||
except Exception as e:
|
||||
logger.warning(f'Error closing session: {e}')
|
||||
self._session = None
|
||||
|
||||
# Delete PID and auth token files last, right before exit.
|
||||
import os
|
||||
|
||||
from browser_use.skill_cli.utils import get_auth_token_path, get_pid_path
|
||||
|
||||
pid_path = get_pid_path(self.session)
|
||||
try:
|
||||
if pid_path.exists() and pid_path.read_text().strip() == str(os.getpid()):
|
||||
pid_path.unlink(missing_ok=True)
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
|
||||
get_auth_token_path(self.session).unlink(missing_ok=True)
|
||||
|
||||
self._write_state('stopped')
|
||||
|
||||
# Force exit — the asyncio server's __aexit__ hangs waiting for the
|
||||
# handle_connection() call that triggered this shutdown to return.
|
||||
logger.info('Daemon process exiting')
|
||||
os._exit(0)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Main entry point for daemon process."""
|
||||
parser = argparse.ArgumentParser(description='Browser-use daemon')
|
||||
parser.add_argument('--session', default='default', help='Session name (default: "default")')
|
||||
parser.add_argument('--headed', action='store_true', help='Show browser window')
|
||||
parser.add_argument('--profile', help='Chrome profile (triggers real Chrome mode)')
|
||||
parser.add_argument('--cdp-url', help='CDP URL to connect to')
|
||||
parser.add_argument('--use-cloud', action='store_true', help='Use cloud browser')
|
||||
parser.add_argument('--cloud-profile-id', help='Cloud browser profile ID')
|
||||
parser.add_argument('--cloud-proxy-country', help='Cloud browser proxy country code')
|
||||
parser.add_argument('--cloud-timeout', type=int, help='Cloud browser timeout in minutes')
|
||||
args = parser.parse_args()
|
||||
|
||||
logger.info(
|
||||
f'Starting daemon: session={args.session}, headed={args.headed}, profile={args.profile}, cdp_url={args.cdp_url}, use_cloud={args.use_cloud}'
|
||||
)
|
||||
|
||||
daemon = Daemon(
|
||||
headed=args.headed,
|
||||
profile=args.profile,
|
||||
cdp_url=args.cdp_url,
|
||||
use_cloud=args.use_cloud,
|
||||
cloud_profile_id=args.cloud_profile_id,
|
||||
cloud_proxy_country_code=args.cloud_proxy_country,
|
||||
cloud_timeout=args.cloud_timeout,
|
||||
session=args.session,
|
||||
)
|
||||
|
||||
exit_code = 0
|
||||
try:
|
||||
asyncio.run(daemon.run())
|
||||
except KeyboardInterrupt:
|
||||
logger.info('Interrupted')
|
||||
except Exception as e:
|
||||
logger.exception(f'Daemon error: {e}')
|
||||
exit_code = 1
|
||||
finally:
|
||||
# Write failed state if we crashed without a clean shutdown
|
||||
if not daemon._is_shutting_down:
|
||||
try:
|
||||
daemon._write_state('failed')
|
||||
except Exception:
|
||||
pass
|
||||
# asyncio.run() may hang trying to cancel lingering tasks
|
||||
# Force-exit to prevent the daemon from becoming an orphan
|
||||
logger.info('Daemon process exiting')
|
||||
os._exit(exit_code)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -2,17 +2,8 @@
|
||||
# Browser-Use Bootstrap Installer
|
||||
#
|
||||
# Usage:
|
||||
# # Interactive install (shows mode selection TUI)
|
||||
# curl -fsSL https://browser-use.com/cli/install.sh | bash
|
||||
#
|
||||
# # Non-interactive install with flags
|
||||
# curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --full
|
||||
# curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --remote-only
|
||||
# curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --local-only
|
||||
#
|
||||
# # With API key
|
||||
# curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --remote-only --api-key bu_xxx
|
||||
#
|
||||
# For development testing:
|
||||
# curl -fsSL <raw-url> | BROWSER_USE_BRANCH=<branch-name> bash
|
||||
#
|
||||
@@ -24,7 +15,7 @@
|
||||
# winget install Git.Git
|
||||
#
|
||||
# Then run from PowerShell:
|
||||
# & "C:\Program Files\Git\bin\bash.exe" -c 'curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --full'
|
||||
# & "C:\Program Files\Git\bin\bash.exe" -c 'curl -fsSL https://browser-use.com/cli/install.sh | bash'
|
||||
#
|
||||
# KNOWN ISSUES AND SOLUTIONS:
|
||||
#
|
||||
@@ -76,10 +67,10 @@
|
||||
# - Always kill stale processes before retrying
|
||||
# - Or kill all Python: taskkill /IM python.exe /F
|
||||
#
|
||||
# 7. Debugging server issues
|
||||
# To see actual error messages instead of "Failed to start session server":
|
||||
# & "$env:USERPROFILE\.browser-use-env\Scripts\python.exe" -m browser_use.skill_cli.server --session default --browser chromium
|
||||
# This runs the server in foreground and shows all errors.
|
||||
# 7. Debugging daemon issues
|
||||
# To see actual error messages instead of "Failed to start daemon":
|
||||
# & "$env:USERPROFILE\.browser-use-env\Scripts\python.exe" -m browser_use.skill_cli.daemon
|
||||
# This runs the daemon in foreground and shows all errors.
|
||||
#
|
||||
# =============================================================================
|
||||
|
||||
@@ -89,12 +80,6 @@ set -e
|
||||
# Configuration
|
||||
# =============================================================================
|
||||
|
||||
# Mode flags (set by parse_args or TUI)
|
||||
INSTALL_LOCAL=false
|
||||
INSTALL_REMOTE=false
|
||||
SKIP_INTERACTIVE=false
|
||||
API_KEY=""
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
@@ -130,43 +115,15 @@ log_error() {
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--full|--all)
|
||||
INSTALL_LOCAL=true
|
||||
INSTALL_REMOTE=true
|
||||
SKIP_INTERACTIVE=true
|
||||
shift
|
||||
;;
|
||||
--remote-only)
|
||||
INSTALL_REMOTE=true
|
||||
SKIP_INTERACTIVE=true
|
||||
shift
|
||||
;;
|
||||
--local-only)
|
||||
INSTALL_LOCAL=true
|
||||
SKIP_INTERACTIVE=true
|
||||
shift
|
||||
;;
|
||||
--api-key)
|
||||
if [ -z "$2" ] || [[ "$2" == --* ]]; then
|
||||
log_error "--api-key requires a value"
|
||||
exit 1
|
||||
fi
|
||||
API_KEY="$2"
|
||||
shift 2
|
||||
;;
|
||||
--help|-h)
|
||||
echo "Browser-Use Installer"
|
||||
echo ""
|
||||
echo "Usage: install.sh [OPTIONS]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --full, --all Install all modes (local + remote)"
|
||||
echo " --remote-only Install remote mode only (no Chromium)"
|
||||
echo " --local-only Install local modes only (no cloudflared)"
|
||||
echo " --api-key KEY Set Browser-Use API key"
|
||||
echo " --help, -h Show this help"
|
||||
echo ""
|
||||
echo "Without options, shows interactive mode selection."
|
||||
echo "Installs Python 3.11+ (if needed), uv, browser-use, and Chromium."
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
@@ -331,6 +288,10 @@ install_python() {
|
||||
install_uv() {
|
||||
log_info "Installing uv package manager..."
|
||||
|
||||
# Add common uv install locations to PATH for current session
|
||||
# (covers both curl-based and Homebrew installs)
|
||||
export PATH="$HOME/.local/bin:$HOME/.cargo/bin:$PATH"
|
||||
|
||||
if command -v uv &> /dev/null; then
|
||||
log_success "uv already installed"
|
||||
return 0
|
||||
@@ -339,9 +300,6 @@ install_uv() {
|
||||
# Use official uv installer
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
|
||||
# Add common uv install locations to PATH for current session
|
||||
export PATH="$HOME/.local/bin:$HOME/.cargo/bin:$PATH"
|
||||
|
||||
if command -v uv &> /dev/null; then
|
||||
log_success "uv installed successfully"
|
||||
else
|
||||
@@ -350,121 +308,6 @@ install_uv() {
|
||||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Gum TUI installation
|
||||
# =============================================================================
|
||||
|
||||
install_gum() {
|
||||
# Install gum for beautiful TUI - silent and fast
|
||||
if command -v gum &> /dev/null; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
local arch=$(uname -m)
|
||||
local gum_version="0.14.5"
|
||||
local gum_dir=""
|
||||
|
||||
mkdir -p "$HOME/.local/bin"
|
||||
export PATH="$HOME/.local/bin:$PATH"
|
||||
|
||||
case "$PLATFORM" in
|
||||
macos)
|
||||
if [ "$arch" = "arm64" ]; then
|
||||
gum_dir="gum_${gum_version}_Darwin_arm64"
|
||||
curl -sL "https://github.com/charmbracelet/gum/releases/download/v${gum_version}/gum_${gum_version}_Darwin_arm64.tar.gz" | tar -xz -C /tmp
|
||||
else
|
||||
gum_dir="gum_${gum_version}_Darwin_x86_64"
|
||||
curl -sL "https://github.com/charmbracelet/gum/releases/download/v${gum_version}/gum_${gum_version}_Darwin_x86_64.tar.gz" | tar -xz -C /tmp
|
||||
fi
|
||||
mv "/tmp/${gum_dir}/gum" "$HOME/.local/bin/" 2>/dev/null || return 1
|
||||
rm -rf "/tmp/${gum_dir}" 2>/dev/null
|
||||
;;
|
||||
linux)
|
||||
if [ "$arch" = "aarch64" ] || [ "$arch" = "arm64" ]; then
|
||||
gum_dir="gum_${gum_version}_Linux_arm64"
|
||||
curl -sL "https://github.com/charmbracelet/gum/releases/download/v${gum_version}/gum_${gum_version}_Linux_arm64.tar.gz" | tar -xz -C /tmp
|
||||
else
|
||||
gum_dir="gum_${gum_version}_Linux_x86_64"
|
||||
curl -sL "https://github.com/charmbracelet/gum/releases/download/v${gum_version}/gum_${gum_version}_Linux_x86_64.tar.gz" | tar -xz -C /tmp
|
||||
fi
|
||||
mv "/tmp/${gum_dir}/gum" "$HOME/.local/bin/" 2>/dev/null || return 1
|
||||
rm -rf "/tmp/${gum_dir}" 2>/dev/null
|
||||
;;
|
||||
windows)
|
||||
# Download and extract Windows binary
|
||||
curl -sL "https://github.com/charmbracelet/gum/releases/download/v${gum_version}/gum_${gum_version}_Windows_x86_64.zip" -o /tmp/gum.zip
|
||||
unzip -q /tmp/gum.zip -d /tmp/gum_windows 2>/dev/null || return 1
|
||||
# Binary is inside a subdirectory: gum_x.x.x_Windows_x86_64/gum.exe
|
||||
mv "/tmp/gum_windows/gum_${gum_version}_Windows_x86_64/gum.exe" "$HOME/.local/bin/" 2>/dev/null || return 1
|
||||
rm -rf /tmp/gum.zip /tmp/gum_windows 2>/dev/null
|
||||
;;
|
||||
*)
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
|
||||
command -v gum &> /dev/null
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Interactive mode selection TUI
|
||||
# =============================================================================
|
||||
|
||||
show_mode_menu() {
|
||||
# Try to install gum for nice TUI
|
||||
if install_gum; then
|
||||
show_gum_menu
|
||||
else
|
||||
show_bash_menu
|
||||
fi
|
||||
}
|
||||
|
||||
show_gum_menu() {
|
||||
echo ""
|
||||
|
||||
# Styled header
|
||||
gum style --foreground 212 --bold "Select browser modes to install"
|
||||
gum style --foreground 240 "Use arrow keys to navigate, space to select, enter to confirm"
|
||||
echo ""
|
||||
|
||||
# Checkbox selection with gum choose
|
||||
set +e
|
||||
SELECTED=$(gum choose --no-limit --height 10 \
|
||||
--cursor-prefix "[ ] " --selected-prefix "[✓] " --unselected-prefix "[ ] " \
|
||||
--header "" \
|
||||
--cursor.foreground 212 \
|
||||
--selected.foreground 212 \
|
||||
"Local browser (chromium/real - requires Chromium)" \
|
||||
"Remote browser (cloud - requires API key)" < /dev/tty)
|
||||
set -e
|
||||
|
||||
# Parse selections
|
||||
if [[ "$SELECTED" == *"Local"* ]]; then INSTALL_LOCAL=true; fi
|
||||
if [[ "$SELECTED" == *"Remote"* ]]; then INSTALL_REMOTE=true; fi
|
||||
}
|
||||
|
||||
show_bash_menu() {
|
||||
echo ""
|
||||
echo "Select browser modes to install (space-separated numbers):"
|
||||
echo ""
|
||||
echo " 1) Local browser (chromium/real - requires Chromium download)"
|
||||
echo " 2) Remote browser (cloud - requires API key)"
|
||||
echo ""
|
||||
echo "Press Enter for default [1]"
|
||||
echo ""
|
||||
echo -n "> "
|
||||
|
||||
# Read from /dev/tty to work even when script is piped
|
||||
# Keep set +e for the whole function to avoid issues with pattern matching
|
||||
set +e
|
||||
read -r choices < /dev/tty
|
||||
choices=${choices:-1}
|
||||
|
||||
if [[ "$choices" == *"1"* ]]; then INSTALL_LOCAL=true; fi
|
||||
if [[ "$choices" == *"2"* ]]; then INSTALL_REMOTE=true; fi
|
||||
set -e
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Browser-Use installation
|
||||
# =============================================================================
|
||||
@@ -515,119 +358,19 @@ install_chromium() {
|
||||
log_success "Chromium installed"
|
||||
}
|
||||
|
||||
install_cloudflared() {
|
||||
log_info "Installing cloudflared..."
|
||||
install_profile_use() {
|
||||
log_info "Installing profile-use..."
|
||||
|
||||
if command -v cloudflared &> /dev/null; then
|
||||
log_success "cloudflared already installed"
|
||||
return 0
|
||||
fi
|
||||
mkdir -p "$HOME/.browser-use/bin"
|
||||
curl -fsSL https://browser-use.com/profile/cli/install.sh | PROFILE_USE_VERSION=v1.0.2 INSTALL_DIR="$HOME/.browser-use/bin" sh
|
||||
|
||||
local arch=$(uname -m)
|
||||
|
||||
case "$PLATFORM" in
|
||||
macos)
|
||||
if command -v brew &> /dev/null; then
|
||||
brew install cloudflared
|
||||
else
|
||||
# Direct download for macOS without Homebrew
|
||||
mkdir -p "$HOME/.local/bin"
|
||||
if [ "$arch" = "arm64" ]; then
|
||||
curl -L https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-darwin-arm64.tgz -o /tmp/cloudflared.tgz
|
||||
else
|
||||
curl -L https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-darwin-amd64.tgz -o /tmp/cloudflared.tgz
|
||||
fi
|
||||
tar -xzf /tmp/cloudflared.tgz -C "$HOME/.local/bin/"
|
||||
rm /tmp/cloudflared.tgz
|
||||
fi
|
||||
;;
|
||||
linux)
|
||||
mkdir -p "$HOME/.local/bin"
|
||||
if [ "$arch" = "aarch64" ] || [ "$arch" = "arm64" ]; then
|
||||
curl -L https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-arm64 -o "$HOME/.local/bin/cloudflared"
|
||||
else
|
||||
curl -L https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -o "$HOME/.local/bin/cloudflared"
|
||||
fi
|
||||
chmod +x "$HOME/.local/bin/cloudflared"
|
||||
;;
|
||||
windows)
|
||||
# Auto-install via winget (comes pre-installed on Windows 10/11)
|
||||
if command -v winget.exe &> /dev/null; then
|
||||
winget.exe install --id Cloudflare.cloudflared --accept-source-agreements --accept-package-agreements --silent
|
||||
else
|
||||
log_warn "winget not found. Install cloudflared manually:"
|
||||
log_warn " Download from: https://github.com/cloudflare/cloudflared/releases"
|
||||
return 0
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
# Add ~/.local/bin to PATH for current session
|
||||
export PATH="$HOME/.local/bin:$PATH"
|
||||
|
||||
if command -v cloudflared &> /dev/null; then
|
||||
log_success "cloudflared installed successfully"
|
||||
if [ -x "$HOME/.browser-use/bin/profile-use" ]; then
|
||||
log_success "profile-use installed"
|
||||
else
|
||||
log_warn "cloudflared installation failed. You can install it manually later."
|
||||
log_warn "profile-use installation failed (will auto-download on first use)"
|
||||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Install dependencies based on selected modes
|
||||
# =============================================================================
|
||||
|
||||
install_dependencies() {
|
||||
# Install base package (always needed)
|
||||
install_browser_use
|
||||
|
||||
# Install Chromium only if local mode selected
|
||||
if [ "$INSTALL_LOCAL" = true ]; then
|
||||
install_chromium
|
||||
else
|
||||
log_info "Skipping Chromium (remote-only mode)"
|
||||
fi
|
||||
|
||||
# Install cloudflared only if remote mode selected
|
||||
if [ "$INSTALL_REMOTE" = true ]; then
|
||||
install_cloudflared
|
||||
else
|
||||
log_info "Skipping cloudflared (local-only mode)"
|
||||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Write install configuration
|
||||
# =============================================================================
|
||||
|
||||
write_install_config() {
|
||||
# Determine installed modes and default
|
||||
local modes=""
|
||||
local default_mode=""
|
||||
|
||||
if [ "$INSTALL_LOCAL" = true ] && [ "$INSTALL_REMOTE" = true ]; then
|
||||
modes='["chromium", "real", "remote"]'
|
||||
default_mode="chromium"
|
||||
elif [ "$INSTALL_REMOTE" = true ]; then
|
||||
modes='["remote"]'
|
||||
default_mode="remote"
|
||||
else
|
||||
modes='["chromium", "real"]'
|
||||
default_mode="chromium"
|
||||
fi
|
||||
|
||||
# Write config file
|
||||
mkdir -p "$HOME/.browser-use"
|
||||
cat > "$HOME/.browser-use/install-config.json" << EOF
|
||||
{
|
||||
"installed_modes": $modes,
|
||||
"default_mode": "$default_mode"
|
||||
}
|
||||
EOF
|
||||
|
||||
local mode_names=$(echo $modes | tr -d '[]"' | tr ',' ' ')
|
||||
log_success "Configured: $mode_names"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# PATH configuration
|
||||
# =============================================================================
|
||||
@@ -637,20 +380,19 @@ configure_path() {
|
||||
local bin_path=$(get_venv_bin_dir)
|
||||
local local_bin="$HOME/.local/bin"
|
||||
|
||||
# Detect shell
|
||||
if [ -n "$BASH_VERSION" ]; then
|
||||
shell_rc="$HOME/.bashrc"
|
||||
elif [ -n "$ZSH_VERSION" ]; then
|
||||
shell_rc="$HOME/.zshrc"
|
||||
else
|
||||
shell_rc="$HOME/.profile"
|
||||
fi
|
||||
# Detect user's login shell (not the running shell, since this script
|
||||
# is typically executed via "curl ... | bash" which always sets BASH_VERSION)
|
||||
case "$(basename "$SHELL")" in
|
||||
zsh) shell_rc="$HOME/.zshrc" ;;
|
||||
bash) shell_rc="$HOME/.bashrc" ;;
|
||||
*) shell_rc="$HOME/.profile" ;;
|
||||
esac
|
||||
|
||||
# Check if already in PATH (browser-use-env matches both /bin and /Scripts)
|
||||
if grep -q "browser-use-env" "$shell_rc" 2>/dev/null; then
|
||||
log_info "PATH already configured in $shell_rc"
|
||||
else
|
||||
# Add to shell config (includes ~/.local/bin for cloudflared)
|
||||
# Add to shell config (includes ~/.local/bin for tools)
|
||||
echo "" >> "$shell_rc"
|
||||
echo "# Browser-Use" >> "$shell_rc"
|
||||
echo "export PATH=\"$bin_path:$local_bin:\$PATH\"" >> "$shell_rc"
|
||||
@@ -689,32 +431,6 @@ configure_powershell_path() {
|
||||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Setup wizard
|
||||
# =============================================================================
|
||||
|
||||
run_setup() {
|
||||
log_info "Running setup wizard..."
|
||||
|
||||
# Activate venv
|
||||
activate_venv
|
||||
|
||||
# Determine profile based on mode selections
|
||||
local profile="local"
|
||||
if [ "$INSTALL_REMOTE" = true ] && [ "$INSTALL_LOCAL" = true ]; then
|
||||
profile="full"
|
||||
elif [ "$INSTALL_REMOTE" = true ]; then
|
||||
profile="remote"
|
||||
fi
|
||||
|
||||
# Run setup with API key if provided
|
||||
if [ -n "$API_KEY" ]; then
|
||||
browser-use setup --mode "$profile" --api-key "$API_KEY" --yes
|
||||
else
|
||||
browser-use setup --mode "$profile" --yes
|
||||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Validation
|
||||
# =============================================================================
|
||||
@@ -738,34 +454,18 @@ validate() {
|
||||
# =============================================================================
|
||||
|
||||
print_next_steps() {
|
||||
# Detect shell for source command
|
||||
local shell_rc=".bashrc"
|
||||
if [ -n "$ZSH_VERSION" ]; then
|
||||
shell_rc=".zshrc"
|
||||
fi
|
||||
# Detect shell for source command (must match configure_path logic)
|
||||
case "$(basename "$SHELL")" in
|
||||
zsh) local shell_rc=".zshrc" ;;
|
||||
bash) local shell_rc=".bashrc" ;;
|
||||
*) local shell_rc=".profile" ;;
|
||||
esac
|
||||
|
||||
echo ""
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo ""
|
||||
log_success "Browser-Use installed successfully!"
|
||||
echo ""
|
||||
echo "Installed modes:"
|
||||
[ "$INSTALL_LOCAL" = true ] && echo " ✓ Local (chromium, real)"
|
||||
[ "$INSTALL_REMOTE" = true ] && echo " ✓ Remote (cloud)"
|
||||
echo ""
|
||||
|
||||
# Show API key instructions if remote selected but no key provided
|
||||
if [ "$INSTALL_REMOTE" = true ] && [ -z "$API_KEY" ]; then
|
||||
echo "⚠ API key required for remote mode:"
|
||||
if [ "$PLATFORM" = "windows" ]; then
|
||||
echo " \$env:BROWSER_USE_API_KEY=\"<your-api-key>\""
|
||||
else
|
||||
echo " export BROWSER_USE_API_KEY=<your-api-key>"
|
||||
fi
|
||||
echo ""
|
||||
echo " Get your API key at: https://browser-use.com"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
echo "Next steps:"
|
||||
if [ "$PLATFORM" = "windows" ]; then
|
||||
@@ -773,13 +473,7 @@ print_next_steps() {
|
||||
else
|
||||
echo " 1. Restart your shell or run: source ~/$shell_rc"
|
||||
fi
|
||||
|
||||
if [ "$INSTALL_REMOTE" = true ] && [ -z "$API_KEY" ]; then
|
||||
echo " 2. Set your API key (see above)"
|
||||
echo " 3. Try: browser-use open https://example.com"
|
||||
else
|
||||
echo " 2. Try: browser-use open https://example.com"
|
||||
fi
|
||||
echo " 2. Try: browser-use open https://example.com"
|
||||
|
||||
echo ""
|
||||
echo "Documentation: https://docs.browser-use.com"
|
||||
@@ -801,25 +495,13 @@ main() {
|
||||
# Parse command-line flags
|
||||
parse_args "$@"
|
||||
|
||||
# Show install mode if flags provided
|
||||
if [ "$SKIP_INTERACTIVE" = true ]; then
|
||||
if [ "$INSTALL_LOCAL" = true ] && [ "$INSTALL_REMOTE" = true ]; then
|
||||
log_info "Install mode: full (local + remote)"
|
||||
elif [ "$INSTALL_REMOTE" = true ]; then
|
||||
log_info "Install mode: remote-only"
|
||||
else
|
||||
log_info "Install mode: local-only"
|
||||
fi
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Step 1: Detect platform
|
||||
detect_platform
|
||||
|
||||
# Step 2: Check/install Python
|
||||
if ! check_python; then
|
||||
# In CI or non-interactive mode (no tty), auto-install Python
|
||||
if [ ! -t 0 ] || [ "$SKIP_INTERACTIVE" = true ]; then
|
||||
if [ ! -t 0 ]; then
|
||||
log_info "Python 3.11+ not found. Installing automatically..."
|
||||
install_python
|
||||
else
|
||||
@@ -837,35 +519,29 @@ main() {
|
||||
# Step 3: Install uv
|
||||
install_uv
|
||||
|
||||
# Step 4: Show mode selection TUI (unless skipped via flags)
|
||||
if [ "$SKIP_INTERACTIVE" = false ]; then
|
||||
show_mode_menu
|
||||
# Step 4: Install browser-use package
|
||||
install_browser_use
|
||||
|
||||
# Step 5: Install Chromium
|
||||
install_chromium
|
||||
|
||||
# Step 6: Install profile-use
|
||||
install_profile_use
|
||||
|
||||
# Step 6.5: Create config.json if it doesn't exist
|
||||
config_file="$HOME/.browser-use/config.json"
|
||||
if [ ! -f "$config_file" ]; then
|
||||
echo '{}' > "$config_file"
|
||||
chmod 600 "$config_file"
|
||||
fi
|
||||
|
||||
# Default to local-only if nothing selected
|
||||
if [ "$INSTALL_LOCAL" = false ] && [ "$INSTALL_REMOTE" = false ]; then
|
||||
log_warn "No modes selected, defaulting to local"
|
||||
INSTALL_LOCAL=true
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# Step 5: Install dependencies
|
||||
install_dependencies
|
||||
|
||||
# Step 6: Write install config
|
||||
write_install_config
|
||||
|
||||
# Step 7: Configure PATH
|
||||
configure_path
|
||||
|
||||
# Step 8: Run setup wizard
|
||||
run_setup
|
||||
|
||||
# Step 9: Validate
|
||||
# Step 8: Validate
|
||||
validate
|
||||
|
||||
# Step 10: Show next steps
|
||||
# Step 9: Print next steps
|
||||
print_next_steps
|
||||
}
|
||||
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
"""Install configuration - tracks which browser modes are available.
|
||||
|
||||
This module manages the installation configuration that determines which browser
|
||||
modes (chromium, real, remote) are available based on how browser-use was installed.
|
||||
|
||||
Config file: ~/.browser-use/install-config.json
|
||||
|
||||
When no config file exists (e.g., pip install users), all modes are available by default.
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
|
||||
CONFIG_PATH = Path.home() / '.browser-use' / 'install-config.json'
|
||||
|
||||
ModeType = Literal['chromium', 'real', 'remote']
|
||||
|
||||
# Local modes (both require Chromium to be installed)
|
||||
LOCAL_MODES: set[str] = {'chromium', 'real'}
|
||||
|
||||
|
||||
def get_config() -> dict:
|
||||
"""Read install config. Returns default if not found.
|
||||
|
||||
Default config enables all modes (for pip install users).
|
||||
"""
|
||||
if not CONFIG_PATH.exists():
|
||||
return {
|
||||
'installed_modes': ['chromium', 'real', 'remote'],
|
||||
'default_mode': 'chromium',
|
||||
}
|
||||
|
||||
try:
|
||||
return json.loads(CONFIG_PATH.read_text())
|
||||
except (json.JSONDecodeError, OSError):
|
||||
# Config file corrupt, return default
|
||||
return {
|
||||
'installed_modes': ['chromium', 'real', 'remote'],
|
||||
'default_mode': 'chromium',
|
||||
}
|
||||
|
||||
|
||||
def save_config(installed_modes: list[str], default_mode: str) -> None:
|
||||
"""Save install config."""
|
||||
CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
CONFIG_PATH.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
'installed_modes': installed_modes,
|
||||
'default_mode': default_mode,
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def is_mode_available(mode: str) -> bool:
|
||||
"""Check if a browser mode is available based on installation config.
|
||||
|
||||
Args:
|
||||
mode: The browser mode to check ('chromium', 'real', or 'remote')
|
||||
|
||||
Returns:
|
||||
True if the mode is available, False otherwise
|
||||
"""
|
||||
config = get_config()
|
||||
installed = config.get('installed_modes', [])
|
||||
|
||||
# Map 'real' to same category as 'chromium' (both are local)
|
||||
# If either local mode is installed, both are available
|
||||
if mode in LOCAL_MODES:
|
||||
return bool(LOCAL_MODES & set(installed))
|
||||
|
||||
return mode in installed
|
||||
|
||||
|
||||
def get_default_mode() -> str:
|
||||
"""Get the default browser mode based on installation config."""
|
||||
return get_config().get('default_mode', 'chromium')
|
||||
|
||||
|
||||
def get_available_modes() -> list[str]:
|
||||
"""Get list of available browser modes."""
|
||||
return get_config().get('installed_modes', ['chromium', 'real', 'remote'])
|
||||
|
||||
|
||||
def get_mode_unavailable_error(mode: str) -> str:
|
||||
"""Generate a helpful error message when a mode is not available.
|
||||
|
||||
Args:
|
||||
mode: The unavailable mode that was requested
|
||||
|
||||
Returns:
|
||||
A formatted error message with instructions for reinstalling
|
||||
"""
|
||||
available = get_available_modes()
|
||||
|
||||
if mode in LOCAL_MODES:
|
||||
install_flag = '--full'
|
||||
mode_desc = 'Local browser mode'
|
||||
else:
|
||||
install_flag = '--full'
|
||||
mode_desc = 'Remote browser mode'
|
||||
|
||||
return (
|
||||
f"Error: {mode_desc} '{mode}' not installed.\n"
|
||||
f'Available modes: {", ".join(available)}\n\n'
|
||||
f'To install all modes, reinstall with:\n'
|
||||
f' curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- {install_flag}'
|
||||
)
|
||||
510
browser_use/skill_cli/install_lite.sh
Executable file
510
browser_use/skill_cli/install_lite.sh
Executable file
@@ -0,0 +1,510 @@
|
||||
#!/usr/bin/env bash
|
||||
# Browser-Use Lightweight CLI Installer
|
||||
#
|
||||
# Installs only the minimal dependencies needed for the CLI (~10 packages
|
||||
# instead of ~50). Use this if you only need the browser-use CLI commands
|
||||
# and don't need the Python library (Agent, LLM integrations, etc.).
|
||||
#
|
||||
# Usage:
|
||||
# curl -fsSL <url>/install_lite.sh | bash
|
||||
#
|
||||
# For development testing:
|
||||
# curl -fsSL <raw-url> | BROWSER_USE_BRANCH=<branch-name> bash
|
||||
#
|
||||
# To install the full library instead, use install.sh.
|
||||
#
|
||||
# =============================================================================
|
||||
|
||||
set -e
|
||||
|
||||
# =============================================================================
|
||||
# Prerequisites
|
||||
# =============================================================================
|
||||
|
||||
if ! command -v curl &> /dev/null; then
|
||||
echo "Error: curl is required but not installed."
|
||||
echo "Install it and try again."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Configuration
|
||||
# =============================================================================
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
BOLD='\033[1m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# =============================================================================
|
||||
# Logging functions
|
||||
# =============================================================================
|
||||
|
||||
log_info() {
|
||||
echo -e "${BLUE}ℹ${NC} $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}✓${NC} $1"
|
||||
}
|
||||
|
||||
log_warn() {
|
||||
echo -e "${YELLOW}⚠${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}✗${NC} $1"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Argument parsing
|
||||
# =============================================================================
|
||||
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--help|-h)
|
||||
echo "Browser-Use Lightweight CLI Installer"
|
||||
echo ""
|
||||
echo "Usage: install_lite.sh [OPTIONS]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --help, -h Show this help"
|
||||
echo ""
|
||||
echo "Installs Python 3.11+ (if needed), uv, browser-use CLI (minimal deps), and Chromium."
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
log_warn "Unknown argument: $1 (ignored)"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Platform detection
|
||||
# =============================================================================
|
||||
|
||||
detect_platform() {
|
||||
local os=$(uname -s | tr '[:upper:]' '[:lower:]')
|
||||
local arch=$(uname -m)
|
||||
|
||||
case "$os" in
|
||||
linux*)
|
||||
PLATFORM="linux"
|
||||
;;
|
||||
darwin*)
|
||||
PLATFORM="macos"
|
||||
;;
|
||||
msys*|mingw*|cygwin*)
|
||||
PLATFORM="windows"
|
||||
;;
|
||||
*)
|
||||
log_error "Unsupported OS: $os"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
log_info "Detected platform: $PLATFORM ($arch)"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Virtual environment helpers
|
||||
# =============================================================================
|
||||
|
||||
# Get the correct venv bin directory (Scripts on Windows, bin on Unix)
|
||||
get_venv_bin_dir() {
|
||||
if [ "$PLATFORM" = "windows" ]; then
|
||||
echo "$HOME/.browser-use-env/Scripts"
|
||||
else
|
||||
echo "$HOME/.browser-use-env/bin"
|
||||
fi
|
||||
}
|
||||
|
||||
# Activate the virtual environment (handles Windows vs Unix paths)
|
||||
activate_venv() {
|
||||
local venv_bin=$(get_venv_bin_dir)
|
||||
if [ -f "$venv_bin/activate" ]; then
|
||||
source "$venv_bin/activate"
|
||||
else
|
||||
log_error "Virtual environment not found at $venv_bin"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Python management
|
||||
# =============================================================================
|
||||
|
||||
check_python() {
|
||||
log_info "Checking Python installation..."
|
||||
|
||||
# Check versioned python commands first (python3.13, python3.12, python3.11)
|
||||
# This handles Ubuntu/Debian where python3 may point to older version
|
||||
# Also check common install locations directly in case PATH isn't updated
|
||||
local py_candidates="python3.13 python3.12 python3.11 python3 python"
|
||||
local py_paths="/usr/bin/python3.11 /usr/local/bin/python3.11"
|
||||
|
||||
for py_cmd in $py_candidates; do
|
||||
if command -v "$py_cmd" &> /dev/null; then
|
||||
local version=$($py_cmd --version 2>&1 | awk '{print $2}')
|
||||
local major=$(echo $version | cut -d. -f1)
|
||||
local minor=$(echo $version | cut -d. -f2)
|
||||
|
||||
if [ "$major" -ge 3 ] && [ "$minor" -ge 11 ]; then
|
||||
PYTHON_CMD="$py_cmd"
|
||||
log_success "Python $version found ($py_cmd)"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Also check common paths directly (in case command -v doesn't find them)
|
||||
for py_path in $py_paths; do
|
||||
if [ -x "$py_path" ]; then
|
||||
local version=$($py_path --version 2>&1 | awk '{print $2}')
|
||||
local major=$(echo $version | cut -d. -f1)
|
||||
local minor=$(echo $version | cut -d. -f2)
|
||||
|
||||
if [ "$major" -ge 3 ] && [ "$minor" -ge 11 ]; then
|
||||
PYTHON_CMD="$py_path"
|
||||
log_success "Python $version found ($py_path)"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# No suitable Python found
|
||||
if command -v python3 &> /dev/null; then
|
||||
local version=$(python3 --version 2>&1 | awk '{print $2}')
|
||||
log_warn "Python $version found, but 3.11+ required"
|
||||
else
|
||||
log_warn "Python not found"
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
install_python() {
|
||||
log_info "Installing Python 3.11+..."
|
||||
|
||||
# Use sudo only if not root and sudo is available
|
||||
SUDO=""
|
||||
if [ "$(id -u)" -ne 0 ] && command -v sudo &> /dev/null; then
|
||||
SUDO="sudo"
|
||||
fi
|
||||
|
||||
case "$PLATFORM" in
|
||||
macos)
|
||||
if command -v brew &> /dev/null; then
|
||||
brew install python@3.11
|
||||
else
|
||||
log_error "Homebrew not found. Install from: https://brew.sh"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
linux)
|
||||
if command -v apt-get &> /dev/null; then
|
||||
$SUDO apt-get update
|
||||
$SUDO apt-get install -y python3.11 python3.11-venv python3-pip
|
||||
elif command -v yum &> /dev/null; then
|
||||
$SUDO yum install -y python311 python311-pip
|
||||
else
|
||||
log_error "Unsupported package manager. Install Python 3.11+ manually."
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
windows)
|
||||
log_error "Please install Python 3.11+ from: https://www.python.org/downloads/"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# Verify installation
|
||||
if check_python; then
|
||||
log_success "Python installed successfully"
|
||||
else
|
||||
log_error "Python installation failed"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# uv package manager
|
||||
# =============================================================================
|
||||
|
||||
install_uv() {
|
||||
log_info "Installing uv package manager..."
|
||||
|
||||
# Add common uv install locations to PATH for current session
|
||||
# (covers both curl-based and Homebrew installs)
|
||||
export PATH="$HOME/.local/bin:$HOME/.cargo/bin:$PATH"
|
||||
|
||||
if command -v uv &> /dev/null; then
|
||||
log_success "uv already installed"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Use official uv installer
|
||||
if ! command -v curl &> /dev/null; then
|
||||
log_error "curl is required but not found. Install curl and try again."
|
||||
exit 1
|
||||
fi
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
|
||||
if command -v uv &> /dev/null; then
|
||||
log_success "uv installed successfully"
|
||||
else
|
||||
log_error "uv installation failed. Try restarting your shell and run the installer again."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Browser-Use installation (lightweight - CLI deps only)
|
||||
# =============================================================================
|
||||
|
||||
install_browser_use() {
|
||||
log_info "Installing browser-use (lightweight CLI)..."
|
||||
|
||||
# Create or use existing virtual environment
|
||||
if [ ! -d "$HOME/.browser-use-env" ]; then
|
||||
# Use discovered Python command (e.g., python3.11) or fall back to version spec
|
||||
if [ -n "$PYTHON_CMD" ]; then
|
||||
uv venv "$HOME/.browser-use-env" --python "$PYTHON_CMD"
|
||||
else
|
||||
uv venv "$HOME/.browser-use-env" --python 3.11
|
||||
fi
|
||||
fi
|
||||
|
||||
# Activate venv and install
|
||||
activate_venv
|
||||
|
||||
# Install from GitHub (main branch by default, or custom branch for testing)
|
||||
BROWSER_USE_BRANCH="${BROWSER_USE_BRANCH:-main}"
|
||||
BROWSER_USE_REPO="${BROWSER_USE_REPO:-browser-use/browser-use}"
|
||||
log_info "Installing from GitHub: $BROWSER_USE_REPO@$BROWSER_USE_BRANCH"
|
||||
# Clone and install the package without its declared dependencies,
|
||||
# then install only the minimal deps the CLI actually needs at runtime.
|
||||
# This avoids pulling ~50 packages (LLM clients, PDF tools, etc.) that
|
||||
# the CLI never imports.
|
||||
local tmp_dir=$(mktemp -d)
|
||||
git clone --depth 1 --branch "$BROWSER_USE_BRANCH" "https://github.com/$BROWSER_USE_REPO.git" "$tmp_dir"
|
||||
uv pip install "$tmp_dir" --no-deps
|
||||
|
||||
# Install only the dependencies the CLI actually needs (~10 packages).
|
||||
# The list lives in requirements-cli.txt so it's discoverable and testable.
|
||||
# Transitive deps (e.g. websockets via cdp-use) are resolved automatically.
|
||||
log_info "Installing minimal CLI dependencies..."
|
||||
uv pip install -r "$tmp_dir/browser_use/skill_cli/requirements-cli.txt"
|
||||
|
||||
rm -rf "$tmp_dir"
|
||||
|
||||
log_success "browser-use CLI installed (lightweight)"
|
||||
}
|
||||
|
||||
install_chromium() {
|
||||
log_info "Installing Chromium browser..."
|
||||
|
||||
activate_venv
|
||||
|
||||
# Build command - only use --with-deps on Linux (it fails on Windows/macOS)
|
||||
local cmd="uvx playwright install chromium"
|
||||
if [ "$PLATFORM" = "linux" ]; then
|
||||
cmd="$cmd --with-deps"
|
||||
fi
|
||||
cmd="$cmd --no-shell"
|
||||
|
||||
eval $cmd
|
||||
|
||||
log_success "Chromium installed"
|
||||
}
|
||||
|
||||
install_profile_use() {
|
||||
log_info "Installing profile-use..."
|
||||
|
||||
mkdir -p "$HOME/.browser-use/bin"
|
||||
curl -fsSL https://browser-use.com/profile/cli/install.sh | PROFILE_USE_VERSION=v1.0.2 INSTALL_DIR="$HOME/.browser-use/bin" sh
|
||||
|
||||
if [ -x "$HOME/.browser-use/bin/profile-use" ]; then
|
||||
log_success "profile-use installed"
|
||||
else
|
||||
log_warn "profile-use installation failed (will auto-download on first use)"
|
||||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# PATH configuration
|
||||
# =============================================================================
|
||||
|
||||
configure_path() {
|
||||
local shell_rc=""
|
||||
local bin_path=$(get_venv_bin_dir)
|
||||
local local_bin="$HOME/.local/bin"
|
||||
|
||||
# Detect shell
|
||||
if [ -n "$BASH_VERSION" ]; then
|
||||
shell_rc="$HOME/.bashrc"
|
||||
elif [ -n "$ZSH_VERSION" ]; then
|
||||
shell_rc="$HOME/.zshrc"
|
||||
else
|
||||
shell_rc="$HOME/.profile"
|
||||
fi
|
||||
|
||||
# Check if already in PATH (browser-use-env matches both /bin and /Scripts)
|
||||
if grep -q "browser-use-env" "$shell_rc" 2>/dev/null; then
|
||||
log_info "PATH already configured in $shell_rc"
|
||||
else
|
||||
# Add to shell config (includes ~/.local/bin for tools)
|
||||
echo "" >> "$shell_rc"
|
||||
echo "# Browser-Use" >> "$shell_rc"
|
||||
echo "export PATH=\"$bin_path:$local_bin:\$PATH\"" >> "$shell_rc"
|
||||
log_success "Added to PATH in $shell_rc"
|
||||
fi
|
||||
|
||||
# On Windows, also configure PowerShell profile
|
||||
if [ "$PLATFORM" = "windows" ]; then
|
||||
configure_powershell_path
|
||||
fi
|
||||
}
|
||||
|
||||
configure_powershell_path() {
|
||||
# Use PowerShell to modify user PATH in registry (no execution policy needed)
|
||||
# This persists across sessions without requiring profile script execution
|
||||
|
||||
local scripts_path='\\.browser-use-env\\Scripts'
|
||||
local local_bin='\\.local\\bin'
|
||||
|
||||
# Check if already in user PATH
|
||||
local current_path=$(powershell.exe -Command "[Environment]::GetEnvironmentVariable('Path', 'User')" 2>/dev/null | tr -d '\r')
|
||||
|
||||
if echo "$current_path" | grep -q "browser-use-env"; then
|
||||
log_info "PATH already configured"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Append to user PATH via registry (safe, no truncation, no execution policy needed)
|
||||
powershell.exe -Command "[Environment]::SetEnvironmentVariable('Path', [Environment]::GetEnvironmentVariable('Path', 'User') + ';' + \$env:USERPROFILE + '$scripts_path;' + \$env:USERPROFILE + '$local_bin', 'User')" 2>/dev/null
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
log_success "Added to Windows PATH: %USERPROFILE%\\.browser-use-env\\Scripts"
|
||||
else
|
||||
log_warn "Could not update PATH automatically. Add manually:"
|
||||
log_warn " \$env:PATH += \";\$env:USERPROFILE\\.browser-use-env\\Scripts\""
|
||||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Validation
|
||||
# =============================================================================
|
||||
|
||||
validate() {
|
||||
log_info "Validating installation..."
|
||||
|
||||
activate_venv
|
||||
|
||||
if browser-use doctor; then
|
||||
log_success "Installation validated successfully!"
|
||||
return 0
|
||||
else
|
||||
log_warn "Some checks failed. Run 'browser-use doctor' for details."
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Print completion message
|
||||
# =============================================================================
|
||||
|
||||
print_next_steps() {
|
||||
# Detect shell for source command
|
||||
local shell_rc=".bashrc"
|
||||
if [ -n "$ZSH_VERSION" ]; then
|
||||
shell_rc=".zshrc"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo ""
|
||||
log_success "Browser-Use CLI installed successfully! (lightweight)"
|
||||
echo ""
|
||||
|
||||
echo "Next steps:"
|
||||
if [ "$PLATFORM" = "windows" ]; then
|
||||
echo " 1. Restart PowerShell (PATH is now configured automatically)"
|
||||
else
|
||||
echo " 1. Restart your shell or run: source ~/$shell_rc"
|
||||
fi
|
||||
echo " 2. Try: browser-use open https://example.com"
|
||||
echo ""
|
||||
echo "To install the full library (Agent, LLMs, etc.):"
|
||||
echo " uv pip install browser-use"
|
||||
|
||||
echo ""
|
||||
echo "Documentation: https://docs.browser-use.com"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Main installation flow
|
||||
# =============================================================================
|
||||
|
||||
main() {
|
||||
echo ""
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo " Browser-Use Lightweight CLI Installer"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo ""
|
||||
|
||||
# Parse command-line flags
|
||||
parse_args "$@"
|
||||
|
||||
# Step 1: Detect platform
|
||||
detect_platform
|
||||
|
||||
# Step 2: Check/install Python
|
||||
if ! check_python; then
|
||||
# In CI or non-interactive mode (no tty), auto-install Python
|
||||
if [ ! -t 0 ]; then
|
||||
log_info "Python 3.11+ not found. Installing automatically..."
|
||||
install_python
|
||||
else
|
||||
read -p "Python 3.11+ not found. Install now? [y/N] " -n 1 -r < /dev/tty
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
install_python
|
||||
else
|
||||
log_error "Python 3.11+ required. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Step 3: Install uv
|
||||
install_uv
|
||||
|
||||
# Step 4: Install browser-use package (minimal deps only)
|
||||
install_browser_use
|
||||
|
||||
# Step 5: Install Chromium
|
||||
install_chromium
|
||||
|
||||
# Step 6: Install profile-use
|
||||
install_profile_use
|
||||
|
||||
# Step 7: Configure PATH
|
||||
configure_path
|
||||
|
||||
# Step 8: Validate (non-fatal — warnings shouldn't block next-step instructions)
|
||||
validate || true
|
||||
|
||||
# Step 9: Print next steps
|
||||
print_next_steps
|
||||
}
|
||||
|
||||
# Run main function with all arguments
|
||||
main "$@"
|
||||
File diff suppressed because it is too large
Load Diff
104
browser_use/skill_cli/profile_use.py
Normal file
104
browser_use/skill_cli/profile_use.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""Profile-use Go binary management.
|
||||
|
||||
Downloads, locates, and invokes the profile-use Go binary as a managed
|
||||
subcommand of `browser-use profile`. The binary is always managed at
|
||||
~/.browser-use/bin/profile-use — standalone installs on $PATH are independent.
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def get_profile_use_binary() -> Path | None:
|
||||
"""Return path to managed profile-use binary, or None if not installed."""
|
||||
from browser_use.skill_cli.utils import get_bin_dir
|
||||
|
||||
binary = get_bin_dir() / ('profile-use.exe' if sys.platform == 'win32' else 'profile-use')
|
||||
if binary.is_file() and os.access(str(binary), os.X_OK):
|
||||
return binary
|
||||
return None
|
||||
|
||||
|
||||
def download_profile_use() -> Path:
|
||||
"""Download profile-use binary via the official install script.
|
||||
|
||||
Runs: curl -fsSL https://browser-use.com/profile/cli/install.sh | sh
|
||||
with INSTALL_DIR set to ~/.browser-use/bin/
|
||||
|
||||
Raises RuntimeError if download fails.
|
||||
"""
|
||||
from browser_use.skill_cli.utils import get_bin_dir
|
||||
|
||||
if not shutil.which('curl'):
|
||||
raise RuntimeError(
|
||||
'curl is required to download profile-use.\n'
|
||||
'Install curl and try again, or install profile-use manually:\n'
|
||||
' curl -fsSL https://browser-use.com/profile/cli/install.sh | sh'
|
||||
)
|
||||
|
||||
bin_dir = get_bin_dir()
|
||||
env = {**os.environ, 'INSTALL_DIR': str(bin_dir)}
|
||||
|
||||
result = subprocess.run(
|
||||
['sh', '-c', 'curl -fsSL https://browser-use.com/profile/cli/install.sh | sh'],
|
||||
env=env,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(
|
||||
'Failed to download profile-use. Try installing manually:\n curl -fsSL https://browser-use.com/profile/cli/install.sh | sh'
|
||||
)
|
||||
|
||||
binary = get_profile_use_binary()
|
||||
if binary is None:
|
||||
raise RuntimeError('Download appeared to succeed but binary not found at expected location.')
|
||||
|
||||
return binary
|
||||
|
||||
|
||||
def ensure_profile_use() -> Path:
|
||||
"""Return path to profile-use binary, downloading if not present."""
|
||||
binary = get_profile_use_binary()
|
||||
if binary is not None:
|
||||
return binary
|
||||
|
||||
print('profile-use not found, downloading...', file=sys.stderr)
|
||||
return download_profile_use()
|
||||
|
||||
|
||||
def run_profile_use(args: list[str]) -> int:
|
||||
"""Execute profile-use with the given arguments.
|
||||
|
||||
Handles the 'update' subcommand specially by re-running the install script.
|
||||
Passes BROWSER_USE_CONFIG_DIR so profile-use shares config with browser-use.
|
||||
"""
|
||||
# Handle 'update' subcommand — re-download latest binary
|
||||
if args and args[0] == 'update':
|
||||
try:
|
||||
download_profile_use()
|
||||
print('profile-use updated successfully')
|
||||
return 0
|
||||
except RuntimeError as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
binary = ensure_profile_use()
|
||||
except RuntimeError as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
from browser_use.skill_cli.utils import get_home_dir
|
||||
|
||||
env = {**os.environ, 'BROWSER_USE_CONFIG_DIR': str(get_home_dir())}
|
||||
# Forward API key from config.json for profile-use binary
|
||||
from browser_use.skill_cli.config import get_config_value
|
||||
|
||||
api_key = get_config_value('api_key')
|
||||
if api_key:
|
||||
env['BROWSER_USE_API_KEY'] = str(api_key)
|
||||
|
||||
return subprocess.call([str(binary)] + args, env=env)
|
||||
@@ -1,54 +0,0 @@
|
||||
"""Wire protocol for CLI↔Server communication.
|
||||
|
||||
Uses JSON over Unix sockets (or TCP on Windows) with newline-delimited messages.
|
||||
"""
|
||||
|
||||
import json
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class Request:
|
||||
"""Command request from CLI to server."""
|
||||
|
||||
id: str
|
||||
action: str
|
||||
session: str
|
||||
params: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_json(self) -> str:
|
||||
return json.dumps(asdict(self))
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, data: str) -> 'Request':
|
||||
d = json.loads(data)
|
||||
return cls(
|
||||
id=d['id'],
|
||||
action=d['action'],
|
||||
session=d['session'],
|
||||
params=d.get('params', {}),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Response:
|
||||
"""Response from server to CLI."""
|
||||
|
||||
id: str
|
||||
success: bool
|
||||
data: Any = None
|
||||
error: str | None = None
|
||||
|
||||
def to_json(self) -> str:
|
||||
return json.dumps(asdict(self))
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, data: str) -> 'Response':
|
||||
d = json.loads(data)
|
||||
return cls(
|
||||
id=d['id'],
|
||||
success=d['success'],
|
||||
data=d.get('data'),
|
||||
error=d.get('error'),
|
||||
)
|
||||
@@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Any, Literal
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from browser_use.browser.session import BrowserSession
|
||||
from browser_use.skill_cli.actions import ActionHandler
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -48,7 +49,11 @@ class PythonSession:
|
||||
)
|
||||
|
||||
def execute(
|
||||
self, code: str, browser_session: 'BrowserSession', loop: asyncio.AbstractEventLoop | None = None
|
||||
self,
|
||||
code: str,
|
||||
browser_session: 'BrowserSession',
|
||||
loop: asyncio.AbstractEventLoop | None = None,
|
||||
actions: 'ActionHandler | None' = None,
|
||||
) -> ExecutionResult:
|
||||
"""Execute code in persistent namespace.
|
||||
|
||||
@@ -59,10 +64,11 @@ class PythonSession:
|
||||
code: Python code to execute
|
||||
browser_session: The browser session for browser operations
|
||||
loop: The event loop for async operations (required for browser access)
|
||||
actions: Optional ActionHandler for direct execution (no event bus)
|
||||
"""
|
||||
# Inject browser wrapper with the event loop for async operations
|
||||
if loop is not None:
|
||||
self.namespace['browser'] = BrowserWrapper(browser_session, loop)
|
||||
if loop is not None and actions is not None:
|
||||
self.namespace['browser'] = BrowserWrapper(browser_session, loop, actions)
|
||||
self.execution_count += 1
|
||||
|
||||
stdout = io.StringIO()
|
||||
@@ -115,9 +121,10 @@ class BrowserWrapper:
|
||||
Runs coroutines on the server's event loop using run_coroutine_threadsafe.
|
||||
"""
|
||||
|
||||
def __init__(self, session: 'BrowserSession', loop: asyncio.AbstractEventLoop) -> None:
|
||||
def __init__(self, session: 'BrowserSession', loop: asyncio.AbstractEventLoop, actions: 'ActionHandler') -> None:
|
||||
self._session = session
|
||||
self._loop = loop
|
||||
self._actions = actions
|
||||
|
||||
def _run(self, coro: Any) -> Any:
|
||||
"""Run coroutine on the server's event loop."""
|
||||
@@ -147,21 +154,17 @@ class BrowserWrapper:
|
||||
self._run(self._goto_async(url))
|
||||
|
||||
async def _goto_async(self, url: str) -> None:
|
||||
from browser_use.browser.events import NavigateToUrlEvent
|
||||
|
||||
await self._session.event_bus.dispatch(NavigateToUrlEvent(url=url))
|
||||
await self._actions.navigate(url)
|
||||
|
||||
def click(self, index: int) -> None:
|
||||
"""Click element by index."""
|
||||
self._run(self._click_async(index))
|
||||
|
||||
async def _click_async(self, index: int) -> None:
|
||||
from browser_use.browser.events import ClickElementEvent
|
||||
|
||||
node = await self._session.get_element_by_index(index)
|
||||
if node is None:
|
||||
raise ValueError(f'Element index {index} not found')
|
||||
await self._session.event_bus.dispatch(ClickElementEvent(node=node))
|
||||
await self._actions.click_element(node)
|
||||
|
||||
def type(self, text: str) -> None:
|
||||
"""Type text into focused element."""
|
||||
@@ -181,22 +184,44 @@ class BrowserWrapper:
|
||||
self._run(self._input_async(index, text))
|
||||
|
||||
async def _input_async(self, index: int, text: str) -> None:
|
||||
from browser_use.browser.events import ClickElementEvent, TypeTextEvent
|
||||
node = await self._session.get_element_by_index(index)
|
||||
if node is None:
|
||||
raise ValueError(f'Element index {index} not found')
|
||||
await self._actions.click_element(node)
|
||||
await self._actions.type_text(node, text)
|
||||
|
||||
def upload(self, index: int, path: str) -> None:
|
||||
"""Upload a file to a file input element."""
|
||||
self._run(self._upload_async(index, path))
|
||||
|
||||
async def _upload_async(self, index: int, path: str) -> None:
|
||||
from pathlib import Path as P
|
||||
|
||||
file_path = str(P(path).expanduser().resolve())
|
||||
p = P(file_path)
|
||||
if not p.exists():
|
||||
raise FileNotFoundError(f'File not found: {file_path}')
|
||||
if not p.is_file():
|
||||
raise ValueError(f'Not a file: {file_path}')
|
||||
if p.stat().st_size == 0:
|
||||
raise ValueError(f'File is empty (0 bytes): {file_path}')
|
||||
|
||||
node = await self._session.get_element_by_index(index)
|
||||
if node is None:
|
||||
raise ValueError(f'Element index {index} not found')
|
||||
await self._session.event_bus.dispatch(ClickElementEvent(node=node))
|
||||
await self._session.event_bus.dispatch(TypeTextEvent(node=node, text=text))
|
||||
|
||||
file_input_node = self._session.find_file_input_near_element(node)
|
||||
if file_input_node is None:
|
||||
raise ValueError(f'Element {index} is not a file input and no file input found nearby')
|
||||
|
||||
await self._actions.upload_file(file_input_node, file_path)
|
||||
|
||||
def scroll(self, direction: Literal['up', 'down', 'left', 'right'] = 'down', amount: int = 500) -> None:
|
||||
"""Scroll the page."""
|
||||
self._run(self._scroll_async(direction, amount))
|
||||
|
||||
async def _scroll_async(self, direction: Literal['up', 'down', 'left', 'right'], amount: int) -> None:
|
||||
from browser_use.browser.events import ScrollEvent
|
||||
|
||||
await self._session.event_bus.dispatch(ScrollEvent(direction=direction, amount=amount))
|
||||
await self._actions.scroll(direction, amount)
|
||||
|
||||
def screenshot(self, path: str | None = None) -> bytes:
|
||||
"""Take screenshot, optionally save to file."""
|
||||
@@ -233,18 +258,14 @@ class BrowserWrapper:
|
||||
self._run(self._keys_async(keys))
|
||||
|
||||
async def _keys_async(self, keys: str) -> None:
|
||||
from browser_use.browser.events import SendKeysEvent
|
||||
|
||||
await self._session.event_bus.dispatch(SendKeysEvent(keys=keys))
|
||||
await self._actions.send_keys(keys)
|
||||
|
||||
def back(self) -> None:
|
||||
"""Go back in history."""
|
||||
self._run(self._back_async())
|
||||
|
||||
async def _back_async(self) -> None:
|
||||
from browser_use.browser.events import GoBackEvent
|
||||
|
||||
await self._session.event_bus.dispatch(GoBackEvent())
|
||||
await self._actions.go_back()
|
||||
|
||||
def wait(self, seconds: float) -> None:
|
||||
"""Wait for specified seconds."""
|
||||
|
||||
12
browser_use/skill_cli/requirements-cli.txt
Normal file
12
browser_use/skill_cli/requirements-cli.txt
Normal file
@@ -0,0 +1,12 @@
|
||||
# Minimal dependencies for the browser-use CLI.
|
||||
# Used by install_lite.sh — update this file if the CLI's import chain changes.
|
||||
aiohttp==3.13.4
|
||||
bubus==1.5.6
|
||||
cdp-use==1.4.5
|
||||
httpx==0.28.1
|
||||
psutil==7.2.2
|
||||
pydantic==2.12.5
|
||||
pydantic-settings==2.12.0
|
||||
python-dotenv==1.2.1
|
||||
typing-extensions==4.15.0
|
||||
uuid7==0.1.0
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user