chore: merge main

This commit is contained in:
reformedot
2025-08-26 11:42:08 +01:00
110 changed files with 2634 additions and 2815 deletions

View File

@@ -1,64 +1,51 @@
# Browser Use Environment Configuration
# Copy this file to .env and configure your API keys and settings
# Browser Use Configuration
# Copy this file to .env and fill in your values
# =============================================================================
# API Keys for Language Models
# =============================================================================
OPENAI_API_KEY=
ANTHROPIC_API_KEY=
GOOGLE_API_KEY=
DEEPSEEK_API_KEY=
GROK_API_KEY=
NOVITA_API_KEY=
# Azure OpenAI Configuration
AZURE_OPENAI_ENDPOINT=
AZURE_OPENAI_KEY=
# =============================================================================
# Logging Configuration
# =============================================================================
# Browser Use logging level (debug, info, warning, error)
# Set the logging level (debug, info, warning, error)
BROWSER_USE_LOGGING_LEVEL=info
# CDP (Chrome DevTools Protocol) logging level for cdp_use library
# Controls logging verbosity of Chrome DevTools Protocol interactions
# Recommended: WARNING to reduce noise (debug, info, warning, error)
# Log file paths (optional)
# Save debug level logs to this file
BROWSER_USE_DEBUG_LOG_FILE=debug.log
# Save info level logs to this file
BROWSER_USE_INFO_LOG_FILE=info.log
# CDP (Chrome DevTools Protocol) logging level
CDP_LOGGING_LEVEL=WARNING
# =============================================================================
# Telemetry and Cloud Configuration
# =============================================================================
# Enable anonymous telemetry collection
# Telemetry and Analytics
# Enable/disable anonymous telemetry
ANONYMIZED_TELEMETRY=true
# Browser Use Cloud Configuration
BROWSER_USE_CLOUD_SYNC=
BROWSER_USE_CLOUD_API_URL=https://api.browser-use.com
BROWSER_USE_CLOUD_UI_URL=
# Browser Use Cloud Configuration (optional)
# Your Browser Use Cloud API key - get it from: https://cloud.browser-use.com/billing
# BROWSER_USE_API_KEY=your_api_key_here
# =============================================================================
# Development and Runtime Configuration
# =============================================================================
# Skip LLM API key verification during initialization
SKIP_LLM_API_KEY_VERIFICATION=false
# Custom API base URL (for enterprise installations)
# BROWSER_USE_CLOUD_API_URL=https://api.browser-use.com
# Runtime environment flags
IN_DOCKER=
IS_IN_EVALS=false
# Cloud sync settings
# BROWSER_USE_CLOUD_SYNC=false
# Path configuration
XDG_CACHE_HOME=~/.cache
XDG_CONFIG_HOME=~/.config
BROWSER_USE_CONFIG_DIR=
# Model Configuration
# Default LLM model to use
# OPENAI_API_KEY=your_openai_api_key_here
# ANTHROPIC_API_KEY=your_anthropic_api_key_here
# Windows font directory (Windows only)
WIN_FONT_DIR=C:\Windows\Fonts
# Browser Configuration
# Path to Chrome/Chromium executable (optional)
# BROWSER_USE_EXECUTABLE_PATH=/path/to/chrome
# =============================================================================
# MCP (Model Context Protocol) Configuration
# =============================================================================
BROWSER_USE_CONFIG_PATH=
BROWSER_USE_HEADLESS=
BROWSER_USE_ALLOWED_DOMAINS=
BROWSER_USE_LLM_MODEL=
# Run browser in headless mode
# BROWSER_USE_HEADLESS=false
# User data directory for browser profile
# BROWSER_USE_USER_DATA_DIR=./browser_data
# Proxy Configuration (optional)
# BROWSER_USE_PROXY_SERVER=http://proxy.example.com:8080
# BROWSER_USE_NO_PROXY=localhost,127.0.0.1,*.internal
# BROWSER_USE_PROXY_USERNAME=username
# BROWSER_USE_PROXY_PASSWORD=password

3
.gitignore vendored
View File

@@ -53,3 +53,6 @@ credentials.json
token.json
!docs/docs.json
temp-profile-*

View File

@@ -12,7 +12,7 @@ repos:
- tomli
- repo: https://github.com/asottile/pyupgrade
rev: v3.19.1
rev: v3.20.0
hooks:
- id: pyupgrade
args: [--py311-plus]
@@ -23,19 +23,20 @@ repos:
# - id: add-trailing-comma
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.11.2
rev: v0.12.10
hooks:
- id: ruff
- id: ruff-check
args: [ --fix ]
- id: ruff-format
# see pyproject.toml for more details on ruff config
- repo: https://github.com/RobertCraigie/pyright-python
rev: v1.1.403
rev: v1.1.404
hooks:
- id: pyright
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
rev: v6.0.0
hooks:
# check for basic syntax errors in python and data files
- id: check-ast

View File

@@ -5,7 +5,14 @@ from browser_use.logging_config import setup_logging
# Only set up logging if not in MCP mode or if explicitly requested
if os.environ.get('BROWSER_USE_SETUP_LOGGING', 'true').lower() != 'false':
logger = setup_logging()
from browser_use.config import CONFIG
# Get log file paths from config/environment
debug_log_file = getattr(CONFIG, 'BROWSER_USE_DEBUG_LOG_FILE', None)
info_log_file = getattr(CONFIG, 'BROWSER_USE_INFO_LOG_FILE', None)
# Set up logging with file handlers if specified
logger = setup_logging(debug_log_file=debug_log_file, info_log_file=info_log_file)
else:
import logging
@@ -42,6 +49,7 @@ if TYPE_CHECKING:
from browser_use.agent.service import Agent
from browser_use.agent.views import ActionModel, ActionResult, AgentHistoryList
from browser_use.browser import BrowserProfile, BrowserSession
from browser_use.browser import BrowserSession as Browser
from browser_use.controller.service import Controller
from browser_use.dom.service import DomService
from browser_use.llm.anthropic.chat import ChatAnthropic
@@ -64,6 +72,7 @@ _LAZY_IMPORTS = {
'AgentHistoryList': ('browser_use.agent.views', 'AgentHistoryList'),
# Browser components (heavy due to playwright/patchright)
'BrowserSession': ('browser_use.browser', 'BrowserSession'),
'Browser': ('browser_use.browser', 'BrowserSession'), # Alias for BrowserSession
'BrowserProfile': ('browser_use.browser', 'BrowserProfile'),
# Controller (moderate weight)
'Controller': ('browser_use.controller.service', 'Controller'),
@@ -100,6 +109,7 @@ def __getattr__(name: str):
__all__ = [
'Agent',
'BrowserSession',
'Browser', # Alias for BrowserSession
'BrowserProfile',
'Controller',
'DomService',

View File

@@ -34,6 +34,8 @@ from bubus import EventBus
from pydantic import ValidationError
from uuid_extensions import uuid7str
from browser_use import Browser, BrowserProfile, BrowserSession
# Lazy import for gif to avoid heavy agent.views import at startup
# from browser_use.agent.gif import create_history_gif
from browser_use.agent.message_manager.service import (
@@ -53,7 +55,6 @@ from browser_use.agent.views import (
BrowserStateHistory,
StepMetadata,
)
from browser_use.browser import BrowserProfile, BrowserSession
from browser_use.browser.session import DEFAULT_BROWSER_PROFILE
from browser_use.browser.views import BrowserStateSummary
from browser_use.config import CONFIG
@@ -134,6 +135,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
# Optional parameters
browser_profile: BrowserProfile | None = None,
browser_session: BrowserSession | None = None,
browser: Browser | None = None, # Alias for browser_session (cleaner naming)
controller: Controller[Context] | None = None,
# Initial agent run parameters
sensitive_data: dict[str, str | dict[str, str]] | None = None,
@@ -323,7 +325,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
logger.debug(
f'{" +vision" if self.settings.use_vision else ""}'
f' extraction_model={self.settings.page_extraction_llm.model if self.settings.page_extraction_llm else "Unknown"}'
# Note: No longer logging planner_model (deprecated)
f'{" +file_system" if self.file_system else ""}'
)
@@ -357,6 +358,11 @@ class Agent(Generic[Context, AgentStructuredOutput]):
browser_profile = browser_profile or DEFAULT_BROWSER_PROFILE
# Handle browser vs browser_session parameter (browser takes precedence)
if browser and browser_session:
raise ValueError('Cannot specify both "browser" and "browser_session" parameters. Use "browser" for the cleaner API.')
browser_session = browser or browser_session
self.browser_session = browser_session or BrowserSession(
browser_profile=browser_profile,
id=uuid7str()[:-4] + self.id[-4:], # re-use the same 4-char suffix so they show up together in logs
@@ -466,13 +472,13 @@ class Agent(Generic[Context, AgentStructuredOutput]):
def logger(self) -> logging.Logger:
"""Get instance-specific logger with task ID in the name"""
_browser_session_id = self.browser_session.id if self.browser_session else self.id
_browser_session_id = self.browser_session.id if self.browser_session else '----'
_current_target_id = (
self.browser_session.agent_focus.target_id[-4:]
self.browser_session.agent_focus.target_id[-2:]
if self.browser_session and self.browser_session.agent_focus and self.browser_session.agent_focus.target_id
else '--'
)
return logging.getLogger(f'browser_use.Agent🅰 {self.task_id[-4:]} on 🆂 {_browser_session_id[-4:]} 🅟 {_current_target_id}')
return logging.getLogger(f'browser_use.Agent🅰 {self.task_id[-4:]} ⇢ 🅑 {_browser_session_id[-4:]} 🅣 {_current_target_id}')
@property
def browser_profile(self) -> BrowserProfile:
@@ -638,6 +644,13 @@ class Agent(Generic[Context, AgentStructuredOutput]):
# The task continues with new instructions, it doesn't end and start a new one
self.task = new_task
self._message_manager.add_new_task(new_task)
# Mark as follow-up task and recreate eventbus (gets shut down after each run)
self.state.follow_up_task = True
self.eventbus = EventBus(name=f'Agent_{str(self.id)[-self.state.n_steps :]}')
# Re-register cloud sync handler if it exists (if not disabled)
if hasattr(self, 'cloud_sync') and self.cloud_sync and self.enable_cloud_sync:
self.eventbus.on('*', self.cloud_sync.handle_event)
@observe_debug(ignore_input=True, ignore_output=True, name='_raise_if_stopped_or_paused')
async def _raise_if_stopped_or_paused(self) -> None:
@@ -1217,22 +1230,33 @@ class Agent(Generic[Context, AgentStructuredOutput]):
r'(?:www\.)?[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.[a-zA-Z]{2,}(?:/[^\s<>"\']*)?', # Domain names with subdomains and optional paths
]
# Email pattern to exclude
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
found_urls = []
for pattern in patterns:
match = re.search(pattern, task)
if match:
matches = re.finditer(pattern, task)
for match in matches:
url = match.group(0)
# Skip if this looks like an email address
if re.search(email_pattern, url):
continue
# Remove trailing punctuation that's not part of URLs
url = re.sub(r'[.,;:!?()\[\]]+$', '', url)
# Add https:// if missing
if not url.startswith(('http://', 'https://')):
url = 'https://' + url
return url
found_urls.append(url)
# If no URL found, check if task mentions Google or search
task_lower = task.lower()
if 'google' in task_lower or 'search' in task_lower:
self.logger.debug('📍 Task mentions "google" or "search", defaulting to https://google.com')
return 'https://google.com'
unique_urls = list(set(found_urls))
# If multiple URLs found, skip preloading
if len(unique_urls) > 1:
self.logger.debug(f'📍 Multiple URLs found ({len(found_urls)}), skipping preload to avoid ambiguity')
return None
# If exactly one URL found, return it
if len(unique_urls) == 1:
return unique_urls[0]
return None
@@ -1274,7 +1298,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
self._log_agent_run()
self.logger.debug(
f'🔧 Agent setup: Task ID {self.task_id[-4:]}, Session ID {self.session_id[-4:]}, Browser Session ID {self.browser_session.id[-4:] if self.browser_session else "None"}'
f'🔧 Agent setup: Agent Session ID {self.session_id[-4:]}, Task ID {self.task_id[-4:]}, Browser Session ID {self.browser_session.id[-4:] if self.browser_session else "None"} {"(connecting via CDP)" if (self.browser_session and self.browser_session.cdp_url) else "(launching local browser)"}'
)
# Initialize timing for session and task
@@ -1304,7 +1328,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
self.logger.debug('🔧 Browser session started with watchdogs attached')
# Check if task contains a URL and add it as an initial action (only if preload is enabled)
if self.preload:
if self.preload and not self.state.follow_up_task:
initial_url = self._extract_url_from_task(self.task)
if initial_url:
self.logger.info(f'🔗 Found URL in task: {initial_url}, adding as initial action...')
@@ -1337,7 +1361,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
self.logger.debug(f'✅ Added navigation to {initial_url} as initial action')
# Execute initial actions if provided
if self.initial_actions:
if self.initial_actions and not self.state.follow_up_task:
self.logger.debug(f'⚡ Executing {len(self.initial_actions)} initial actions...')
result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
self.state.last_result = result
@@ -1499,7 +1523,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
# Stop the event bus gracefully, waiting for all events to be processed
# Use longer timeout to avoid deadlocks in tests with multiple agents
await self.eventbus.stop(timeout=10.0)
await self.eventbus.stop(timeout=3.0)
await self.close()

View File

@@ -70,6 +70,7 @@ class AgentState(BaseModel):
paused: bool = False
stopped: bool = False
session_initialized: bool = False # Track if session events have been dispatched
follow_up_task: bool = False # Track if the agent is a follow-up task
message_manager_state: MessageManagerState = Field(default_factory=MessageManagerState)
file_system_state: FileSystemState | None = None

View File

@@ -5,6 +5,7 @@ if TYPE_CHECKING:
from .profile import BrowserProfile, ProxySettings
from .session import BrowserSession
# Lazy imports mapping for heavy browser components
_LAZY_IMPORTS = {
'ProxySettings': ('.profile', 'ProxySettings'),

View File

@@ -41,6 +41,7 @@ class ElementSelectedEvent(BaseEvent[T_EventResultType]):
is_visible=data.is_visible,
absolute_position=data.absolute_position,
# override the circular reference fields in EnhancedDOMTreeNode as they cant be serialized and aren't needed by event handlers
# only used internally by the DOM service during DOM tree building process, not intended public API use
content_document=None,
shadow_root_type=None,
shadow_roots=[],
@@ -86,7 +87,7 @@ class NavigateToUrlEvent(BaseEvent[None]):
)
# existing_tab: PageHandle | None = None # TODO
# limit enforced by bubus, not exposed to LLM:
# time limits enforced by bubus, not exposed to LLM:
event_timeout: float | None = 15.0 # seconds

View File

@@ -9,7 +9,6 @@ from typing import Annotated, Any, Literal, Self
from urllib.parse import urlparse
from pydantic import AfterValidator, AliasChoices, BaseModel, ConfigDict, Field, field_validator, model_validator
from uuid_extensions import uuid7str
from browser_use.config import CONFIG
from browser_use.observability import observe_debug
@@ -596,8 +595,9 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
# ... extends options defined in:
# BrowserLaunchPersistentContextArgs, BrowserLaunchArgs, BrowserNewContextArgs, BrowserConnectArgs
# Unique identifier for this browser profile
id: str = Field(default_factory=uuid7str)
# Session/connection configuration
cdp_url: str | None = Field(default=None, description='CDP URL for connecting to existing browser instance')
is_local: bool = Field(default=True, description='Whether this is a local browser instance')
# label: str = 'default'
# custom options we provide that aren't native playwright kwargs
@@ -673,10 +673,10 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
def __repr__(self) -> str:
short_dir = _log_pretty_path(self.user_data_dir) if self.user_data_dir else '<incognito>'
return f'BrowserProfile#{self.id[-4:]}(user_data_dir= {short_dir}, headless={self.headless})'
return f'BrowserProfile(user_data_dir= {short_dir}, headless={self.headless})'
def __str__(self) -> str:
return f'BrowserProfile#{self.id[-4:]}'
return 'BrowserProfile'
@model_validator(mode='after')
def copy_old_config_names_to_new(self) -> Self:

View File

@@ -2,7 +2,9 @@
import asyncio
import logging
from typing import Any, Self, cast
from functools import cached_property
from pathlib import Path
from typing import Any, Literal, Self, cast
import httpx
from bubus import EventBus
@@ -34,7 +36,7 @@ from browser_use.browser.events import (
TabClosedEvent,
TabCreatedEvent,
)
from browser_use.browser.profile import BrowserProfile
from browser_use.browser.profile import BrowserProfile, ProxySettings
from browser_use.browser.views import BrowserStateSummary, TabInfo
from browser_use.dom.views import EnhancedDOMTreeNode, TargetInfo
from browser_use.utils import _log_pretty_url, is_new_tab_page
@@ -44,6 +46,10 @@ DEFAULT_BROWSER_PROFILE = BrowserProfile()
MAX_SCREENSHOT_HEIGHT = 2000
MAX_SCREENSHOT_WIDTH = 1920
_LOGGED_UNIQUE_SESSION_IDS = set() # track unique session IDs that have been logged to make sure we always assign a unique enough id to new sessions and avoid ambiguity in logs
red = '\033[91m'
reset = '\033[0m'
class CDPSession(BaseModel):
"""Info about a single CDP session bound to a specific target.
@@ -88,7 +94,7 @@ class CDPSession(BaseModel):
import logging
logger = logging.getLogger(f'browser_use.CDPSession.{target_id[-4:]}')
logger.debug(f'🔌 Creating dedicated WebSocket connection for target {target_id}')
logger.debug(f'🔌 Creating new dedicated WebSocket connection for target 🅣 {target_id}')
target_cdp_client = CDPClient(cdp_url)
await target_cdp_client.start()
@@ -148,7 +154,7 @@ class CDPSession(BaseModel):
# if 'Debugger' not in domains:
# await self.cdp_client.send.Debugger.disable()
# await cdp_session.cdp_client.send.EventBreakpoints.disable(session_id=cdp_session.session_id)
except Exception as e:
except Exception:
# self.logger.warning(f'Failed to disable page JS breakpoints: {e}')
pass
@@ -186,6 +192,19 @@ class BrowserSession(BaseModel):
- Direct CDP/Playwright calls for browser operations
Supports both event-driven and imperative calling styles.
Browser configuration is stored in the browser_profile, session identity in direct fields:
```python
# Direct settings (recommended for most users)
session = BrowserSession(headless=True, user_data_dir='./profile')
# Or use a profile (for advanced use cases)
session = BrowserSession(browser_profile=BrowserProfile(...))
# Access session fields directly, browser settings via profile or property
print(session.id) # Session field
print(session.browser_profile.stealth) # Direct browser_profile access
```
"""
model_config = ConfigDict(
@@ -195,16 +214,124 @@ class BrowserSession(BaseModel):
revalidate_instances='never', # resets private attrs on every model rebuild
)
# Core configuration
id: str = Field(default_factory=lambda: str(uuid7str()))
def __init__(
self,
# Core configuration
id: str | None = None,
cdp_url: str | None = None,
is_local: bool = True,
browser_profile: BrowserProfile | None = None,
# BrowserProfile fields that can be passed directly
# From BrowserConnectArgs
headers: dict[str, str] | None = None,
slow_mo: float | None = None,
timeout: float | None = None,
# From BrowserLaunchArgs
env: dict[str, str | float | bool] | None = None,
executable_path: str | Path | None = None,
headless: bool | None = None,
args: list[str] | None = None,
ignore_default_args: list[str] | Literal[True] | None = None,
channel: str | None = None,
chromium_sandbox: bool | None = None,
devtools: bool | None = None,
downloads_path: str | Path | None = None,
traces_dir: str | Path | None = None,
handle_sighup: bool | None = None,
handle_sigint: bool | None = None,
handle_sigterm: bool | None = None,
# From BrowserContextArgs
accept_downloads: bool | None = None,
offline: bool | None = None,
strict_selectors: bool | None = None,
permissions: list[str] | None = None,
bypass_csp: bool | None = None,
extra_http_headers: dict[str, str] | None = None,
ignore_https_errors: bool | None = None,
java_script_enabled: bool | None = None,
base_url: str | None = None,
service_workers: str | None = None,
user_agent: str | None = None,
screen: dict | None = None,
viewport: dict | None = None,
no_viewport: bool | None = None,
device_scale_factor: float | None = None,
is_mobile: bool | None = None,
has_touch: bool | None = None,
locale: str | None = None,
timezone_id: str | None = None,
color_scheme: str | None = None,
contrast: str | None = None,
reduced_motion: str | None = None,
forced_colors: str | None = None,
record_har_content: str | None = None,
record_har_mode: str | None = None,
record_har_omit_content: bool | None = None,
record_har_path: str | Path | None = None,
record_har_url_filter: str | None = None,
record_video_dir: str | Path | None = None,
record_video_size: dict | None = None,
# From BrowserLaunchPersistentContextArgs
user_data_dir: str | Path | None = None,
# From BrowserNewContextArgs
storage_state: str | Path | dict[str, Any] | None = None,
# BrowserProfile specific fields
stealth: bool | None = None,
disable_security: bool | None = None,
deterministic_rendering: bool | None = None,
allowed_domains: list[str] | None = None,
keep_alive: bool | None = None,
proxy: ProxySettings | None = None,
enable_default_extensions: bool | None = None,
window_size: dict | None = None,
window_position: dict | None = None,
cross_origin_iframes: bool | None = None,
default_navigation_timeout: float | None = None,
default_timeout: float | None = None,
minimum_wait_page_load_time: float | None = None,
wait_for_network_idle_page_load_time: float | None = None,
maximum_wait_page_load_time: float | None = None,
wait_between_actions: float | None = None,
include_dynamic_attributes: bool | None = None,
highlight_elements: bool | None = None,
viewport_expansion: int | None = None,
auto_download_pdfs: bool | None = None,
profile_directory: str | None = None,
cookies_file: Path | None = None,
):
# Following the same pattern as AgentSettings in service.py
# Only pass non-None values to avoid validation errors
profile_kwargs = {k: v for k, v in locals().items() if k not in ['self', 'browser_profile', 'id'] and v is not None}
cdp_url: str | None = None
is_local: bool = Field(default=True)
# Create browser profile from direct parameters or use provided one
resolved_browser_profile = browser_profile or BrowserProfile(**profile_kwargs)
# Initialize the Pydantic model
super().__init__(
id=id or str(uuid7str()),
browser_profile=resolved_browser_profile,
)
# Session configuration (session identity only)
id: str = Field(default_factory=lambda: str(uuid7str()), description='Unique identifier for this browser session')
# Browser configuration (reusable profile)
browser_profile: BrowserProfile = Field(
default_factory=lambda: DEFAULT_BROWSER_PROFILE,
description='BrowserProfile() options to use for the session, otherwise a default profile will be used',
)
# Convenience properties for common browser settings
@property
def cdp_url(self) -> str | None:
"""CDP URL from browser profile."""
return self.browser_profile.cdp_url
@property
def is_local(self) -> bool:
"""Whether this is a local browser instance from browser profile."""
return self.browser_profile.is_local
# Main shared event bus for all browser session + all watchdogs
event_bus: EventBus = Field(default_factory=EventBus)
@@ -240,14 +367,28 @@ class BrowserSession(BaseModel):
# self._logger = logging.getLogger(f'browser_use.{self}')
return logging.getLogger(f'browser_use.{self}')
@cached_property
def _id_for_logs(self) -> str:
"""Get human-friendly semi-unique identifier for differentiating different BrowserSession instances in logs"""
str_id = self.id[-4:] # default to last 4 chars of truly random uuid, less helpful than cdp port but always unique enough
port_number = (self.cdp_url or 'no-cdp').rsplit(':', 1)[-1].split('/', 1)[0].strip()
port_is_random = not port_number.startswith('922')
port_is_unique_enough = port_number not in _LOGGED_UNIQUE_SESSION_IDS
if port_number and port_number.isdigit() and port_is_random and port_is_unique_enough:
# if cdp port is random/unique enough to identify this session, use it as our id in logs
_LOGGED_UNIQUE_SESSION_IDS.add(port_number)
str_id = port_number
return str_id
@property
def _tab_id_for_logs(self) -> str:
return self.agent_focus.target_id[-2:] if self.agent_focus and self.agent_focus.target_id else f'{red}--{reset}'
def __repr__(self) -> str:
port_number = (self.cdp_url or 'no-cdp').rsplit(':', 1)[-1].split('/', 1)[0]
return f'BrowserSession🆂 {self.id[-4:]}:{port_number} #{str(id(self))[-2:]} (cdp_url={self.cdp_url}, profile={self.browser_profile})'
return f'BrowserSession🅑 {self._id_for_logs} 🅣 {self._tab_id_for_logs} (cdp_url={self.cdp_url}, profile={self.browser_profile})'
def __str__(self) -> str:
# Note: _original_browser_session tracking moved to Agent class
port_number = (self.cdp_url or 'no-cdp').rsplit(':', 1)[-1].split('/', 1)[0]
return f'BrowserSession🆂 {self.id[-4:]}:{port_number} #{str(id(self))[-2:]}' # ' 🅟 {str(id(self.cdp_session.target_id))[-2:]}'
return f'BrowserSession🅑 {self._id_for_logs} 🅣 {self._tab_id_for_logs}'
async def reset(self) -> None:
"""Clear all cached CDP sessions with proper cleanup."""
@@ -269,7 +410,7 @@ class BrowserSession(BaseModel):
self.agent_focus = None
if self.is_local:
self.cdp_url = None
self.browser_profile.cdp_url = None
self._crash_watchdog = None
self._downloads_watchdog = None
@@ -374,7 +515,7 @@ class BrowserSession(BaseModel):
launch_result: BrowserLaunchResult = cast(
BrowserLaunchResult, await launch_event.event_result(raise_if_none=True, raise_if_any=True)
)
self.cdp_url = launch_result.cdp_url
self.browser_profile.cdp_url = launch_result.cdp_url
else:
raise ValueError('Got BrowserSession(is_local=False) but no cdp_url was provided to connect to!')
@@ -646,7 +787,7 @@ class BrowserSession(BaseModel):
# Reset state
if self.is_local:
self.cdp_url = None
self.browser_profile.cdp_url = None
# Notify stop and wait for all handlers to complete
# LocalBrowserWatchdog listens for BrowserStopEvent and dispatches BrowserKillEvent
@@ -795,17 +936,17 @@ class BrowserSession(BaseModel):
self.logger.debug('Watchdogs already attached, skipping duplicate attachment')
return
from browser_use.browser.aboutblank_watchdog import AboutBlankWatchdog
from browser_use.browser.watchdogs.aboutblank_watchdog import AboutBlankWatchdog
# from browser_use.browser.crash_watchdog import CrashWatchdog
from browser_use.browser.default_action_watchdog import DefaultActionWatchdog
from browser_use.browser.dom_watchdog import DOMWatchdog
from browser_use.browser.downloads_watchdog import DownloadsWatchdog
from browser_use.browser.local_browser_watchdog import LocalBrowserWatchdog
from browser_use.browser.permissions_watchdog import PermissionsWatchdog
from browser_use.browser.popups_watchdog import PopupsWatchdog
from browser_use.browser.screenshot_watchdog import ScreenshotWatchdog
from browser_use.browser.security_watchdog import SecurityWatchdog
from browser_use.browser.watchdogs.default_action_watchdog import DefaultActionWatchdog
from browser_use.browser.watchdogs.dom_watchdog import DOMWatchdog
from browser_use.browser.watchdogs.downloads_watchdog import DownloadsWatchdog
from browser_use.browser.watchdogs.local_browser_watchdog import LocalBrowserWatchdog
from browser_use.browser.watchdogs.permissions_watchdog import PermissionsWatchdog
from browser_use.browser.watchdogs.popups_watchdog import PopupsWatchdog
from browser_use.browser.watchdogs.screenshot_watchdog import ScreenshotWatchdog
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
# from browser_use.browser.storage_state_watchdog import StorageStateWatchdog
# Initialize CrashWatchdog
@@ -912,7 +1053,7 @@ class BrowserSession(BaseModel):
This MUST succeed or the browser is unusable. Fails hard on any error.
"""
self.cdp_url = cdp_url or self.cdp_url
self.browser_profile.cdp_url = cdp_url or self.cdp_url
if not self.cdp_url:
raise RuntimeError('Cannot setup CDP connection without CDP URL')
@@ -925,7 +1066,7 @@ class BrowserSession(BaseModel):
# Run a tiny HTTP client to query for the WebSocket URL from the /json/version endpoint
async with httpx.AsyncClient() as client:
version_info = await client.get(url)
self.cdp_url = version_info.json()['webSocketDebuggerUrl']
self.browser_profile.cdp_url = version_info.json()['webSocketDebuggerUrl']
assert self.cdp_url is not None
@@ -1940,29 +2081,3 @@ class BrowserSession(BaseModel):
self.logger.debug(f'Failed to get CDP client for target {node.target_id}: {e}, using main session')
return await self.get_or_create_cdp_session()
# # Fix Pydantic circular dependency for all watchdogs
# # This must be called after BrowserSession class is fully defined
# _watchdog_modules = [
# 'browser_use.browser.crash_watchdog.CrashWatchdog',
# 'browser_use.browser.downloads_watchdog.DownloadsWatchdog',
# 'browser_use.browser.local_browser_watchdog.LocalBrowserWatchdog',
# 'browser_use.browser.storage_state_watchdog.StorageStateWatchdog',
# 'browser_use.browser.security_watchdog.SecurityWatchdog',
# 'browser_use.browser.aboutblank_watchdog.AboutBlankWatchdog',
# 'browser_use.browser.popups_watchdog.PopupsWatchdog',
# 'browser_use.browser.permissions_watchdog.PermissionsWatchdog',
# 'browser_use.browser.default_action_watchdog.DefaultActionWatchdog',
# 'browser_use.browser.dom_watchdog.DOMWatchdog',
# 'browser_use.browser.screenshot_watchdog.ScreenshotWatchdog',
# ]
# for module_path in _watchdog_modules:
# try:
# module_name, class_name = module_path.rsplit('.', 1)
# module = __import__(module_name, fromlist=[class_name])
# watchdog_class = getattr(module, class_name)
# watchdog_class.model_rebuild()
# except Exception:
# pass # Ignore if watchdog can't be imported or rebuilt

View File

@@ -495,7 +495,7 @@ class DefaultActionWatchdog(BaseWatchdog):
self.logger.debug('🖱️ Clicked successfully using x,y coordinates')
# Return coordinates as dict for metadata
return {"click_x": center_x, "click_y": center_y}
return {'click_x': center_x, 'click_y': center_y}
except Exception as e:
self.logger.warning(f'CDP click failed: {type(e).__name__}: {e}')
@@ -673,7 +673,7 @@ class DefaultActionWatchdog(BaseWatchdog):
# Get element info
backend_node_id = element_node.backend_node_id
# Track coordinates for metadata
input_coordinates = None
@@ -707,7 +707,7 @@ class DefaultActionWatchdog(BaseWatchdog):
if bounds.get('width', 0) > 0 and bounds.get('height', 0) > 0:
center_x = bounds['x'] + bounds['width'] / 2
center_y = bounds['y'] + bounds['height'] / 2
input_coordinates = {"input_x": center_x, "input_y": center_y}
input_coordinates = {'input_x': center_x, 'input_y': center_y}
self.logger.debug(f'📍 Input coordinates: x={center_x:.1f}, y={center_y:.1f}')
# Provide helpful warnings for common issues
@@ -837,7 +837,7 @@ class DefaultActionWatchdog(BaseWatchdog):
)
# Small delay between characters
await asyncio.sleep(0.01)
# Return coordinates metadata if available
return input_coordinates
@@ -1293,6 +1293,9 @@ class DefaultActionWatchdog(BaseWatchdog):
async def on_ScrollToTextEvent(self, event: ScrollToTextEvent) -> None:
"""Handle scroll to text request with CDP. Raises exception if text not found."""
# TODO: handle looking for text inside cross-origin iframes as well
# Get CDP client and session
cdp_client = self.browser_session.cdp_client
if self.browser_session.agent_focus is None:

View File

@@ -411,7 +411,11 @@ class DOMWatchdog(BaseWatchdog):
# Create or reuse DOM service
if self._dom_service is None:
# self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Creating DomService...')
self._dom_service = DomService(browser_session=self.browser_session, logger=self.logger)
self._dom_service = DomService(
browser_session=self.browser_session,
logger=self.logger,
cross_origin_iframes=self.browser_session.browser_profile.cross_origin_iframes,
)
# self.logger.debug('🔍 DOMWatchdog._build_dom_tree: ✅ DomService created')
# else:
# self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Reusing existing DomService')

View File

@@ -269,9 +269,16 @@ class DownloadsWatchdog(BaseWatchdog):
self.browser_session.browser_profile.downloads_path
or f'{tempfile.gettempdir()}/browser_use_downloads.{str(self.browser_session.id)[-4:]}'
)
# Initialize variables that may be used outside try blocks
unique_filename = None
file_size = 0
expected_path = None
download_result = None
download_url = event.get('url', '')
suggested_filename = event.get('suggestedFilename', 'download')
try:
download_url = event.get('url', '')
suggested_filename = event.get('suggestedFilename', 'download')
guid = event.get('guid', '')
self.logger.debug(f'[DownloadsWatchdog] ⬇️ File download starting: {suggested_filename} from {download_url[:100]}...')

View File

@@ -46,17 +46,13 @@ class LocalBrowserWatchdog(BaseWatchdog):
"""Launch a local browser process."""
try:
self.logger.debug(
f'[LocalBrowserWatchdog] Received BrowserLaunchEvent, EventBus ID: {id(self.event_bus)}, launching local browser'
)
self.logger.debug('[LocalBrowserWatchdog] Received BrowserLaunchEvent, launching local browser...')
self.logger.debug('[LocalBrowserWatchdog] Calling _launch_browser...')
# self.logger.debug('[LocalBrowserWatchdog] Calling _launch_browser...')
process, cdp_url = await self._launch_browser()
self.logger.debug(f'[LocalBrowserWatchdog] _launch_browser returned: process={process}, cdp_url={cdp_url}')
self._subprocess = process
# self.logger.debug(f'[LocalBrowserWatchdog] _launch_browser returned: process={process}, cdp_url={cdp_url}')
self.logger.debug(f'[LocalBrowserWatchdog] Browser launched successfully at {cdp_url}, PID: {process.pid}')
return BrowserLaunchResult(cdp_url=cdp_url)
except Exception as e:
self.logger.error(f'[LocalBrowserWatchdog] Exception in on_BrowserLaunchEvent: {e}', exc_info=True)
@@ -145,7 +141,9 @@ class LocalBrowserWatchdog(BaseWatchdog):
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
self.logger.debug(f'[LocalBrowserWatchdog] 🎭 Browser subprocess launched with browser_pid= {subprocess.pid}')
self.logger.debug(
f'[LocalBrowserWatchdog] 🎭 Browser running with browser_pid= {subprocess.pid} 🔗 listening on CDP port :{debug_port}'
)
# Convert to psutil.Process
process = psutil.Process(subprocess.pid)

View File

@@ -181,6 +181,8 @@ class FlatEnvConfig(BaseSettings):
# Logging and telemetry
BROWSER_USE_LOGGING_LEVEL: str = Field(default='info')
CDP_LOGGING_LEVEL: str = Field(default='WARNING')
BROWSER_USE_DEBUG_LOG_FILE: str | None = Field(default=None)
BROWSER_USE_INFO_LOG_FILE: str | None = Field(default=None)
ANONYMIZED_TELEMETRY: bool = Field(default=True)
BROWSER_USE_CLOUD_SYNC: bool | None = Field(default=None)
BROWSER_USE_CLOUD_API_URL: str = Field(default='https://api.browser-use.com')
@@ -459,9 +461,7 @@ class Config:
proxy_dict['server'] = env_config.BROWSER_USE_PROXY_URL
if env_config.BROWSER_USE_NO_PROXY:
# store bypass as comma-separated string to match Chrome flag
proxy_dict['bypass'] = ','.join(
[d.strip() for d in env_config.BROWSER_USE_NO_PROXY.split(',') if d.strip()]
)
proxy_dict['bypass'] = ','.join([d.strip() for d in env_config.BROWSER_USE_NO_PROXY.split(',') if d.strip()])
if env_config.BROWSER_USE_PROXY_USERNAME:
proxy_dict['username'] = env_config.BROWSER_USE_PROXY_USERNAME
if env_config.BROWSER_USE_PROXY_PASSWORD:

View File

@@ -28,10 +28,6 @@ if TYPE_CHECKING:
from browser_use.browser.session import BrowserSession
# TODO: enable cross origin iframes -> experimental for now
ENABLE_CROSS_ORIGIN_IFRAMES = False
class DomService:
"""
Service for getting the DOM tree and other DOM-related information.
@@ -43,9 +39,12 @@ class DomService:
logger: logging.Logger
def __init__(self, browser_session: 'BrowserSession', logger: logging.Logger | None = None):
def __init__(
self, browser_session: 'BrowserSession', logger: logging.Logger | None = None, cross_origin_iframes: bool = False
):
self.browser_session = browser_session
self.logger = logger or browser_session.logger
self.cross_origin_iframes = cross_origin_iframes
async def __aenter__(self):
return self
@@ -616,7 +615,7 @@ class DomService:
if (
# TODO: hacky way to disable cross origin iframes for now
ENABLE_CROSS_ORIGIN_IFRAMES and node['nodeName'].upper() == 'IFRAME' and node.get('contentDocument', None) is None
self.cross_origin_iframes and node['nodeName'].upper() == 'IFRAME' and node.get('contentDocument', None) is None
): # None meaning there is no content
# Use get_all_frames to find the iframe's target
frame_id = node.get('frameId', None)

View File

@@ -61,13 +61,15 @@ def addLoggingLevel(levelName, levelNum, methodName=None):
setattr(logging, methodName, logToRoot)
def setup_logging(stream=None, log_level=None, force_setup=False):
def setup_logging(stream=None, log_level=None, force_setup=False, debug_log_file=None, info_log_file=None):
"""Setup logging configuration for browser-use.
Args:
stream: Output stream for logs (default: sys.stdout). Can be sys.stderr for MCP mode.
log_level: Override log level (default: uses CONFIG.BROWSER_USE_LOGGING_LEVEL)
force_setup: Force reconfiguration even if handlers already exist
debug_log_file: Path to log file for debug level logs only
info_log_file: Path to log file for info level logs only
"""
# Try to add RESULT level, but ignore if it already exists
try:
@@ -94,9 +96,9 @@ def setup_logging(stream=None, log_level=None, force_setup=False):
# Only clean up names in INFO mode, keep everything in DEBUG mode
if self.log_level > logging.DEBUG and isinstance(record.name, str) and record.name.startswith('browser_use.'):
# Extract clean component names from logger names
if 'Agent🅰' in record.name:
if 'Agent' in record.name:
record.name = 'Agent'
elif 'BrowserSession🆂' in record.name:
elif 'BrowserSession' in record.name:
record.name = 'BrowserSession'
elif 'controller' in record.name:
record.name = 'controller'
@@ -125,32 +127,57 @@ def setup_logging(stream=None, log_level=None, force_setup=False):
console.setLevel('RESULT')
console.setFormatter(BrowserUseFormatter('%(message)s', log_level))
else:
console.setLevel(log_level) # Keep console at original log level (e.g., INFO)
console.setFormatter(BrowserUseFormatter('%(levelname)-8s [%(name)s] %(message)s', log_level))
# Configure root logger only
root.addHandler(console)
# Configure root logger
root.setLevel(log_level)
# Add file handlers if specified
file_handlers = []
# Create debug log file handler
if debug_log_file:
debug_handler = logging.FileHandler(debug_log_file)
debug_handler.setLevel(logging.DEBUG)
debug_handler.setFormatter(BrowserUseFormatter('%(asctime)s - %(levelname)-8s [%(name)s] %(message)s', logging.DEBUG))
file_handlers.append(debug_handler)
root.addHandler(debug_handler)
# Create info log file handler
if info_log_file:
info_handler = logging.FileHandler(info_log_file)
info_handler.setLevel(logging.INFO)
info_handler.setFormatter(BrowserUseFormatter('%(asctime)s - %(levelname)-8s [%(name)s] %(message)s', logging.INFO))
file_handlers.append(info_handler)
root.addHandler(info_handler)
# Configure root logger - use DEBUG if debug file logging is enabled
effective_log_level = logging.DEBUG if debug_log_file else log_level
root.setLevel(effective_log_level)
# Configure browser_use logger
browser_use_logger = logging.getLogger('browser_use')
browser_use_logger.propagate = False # Don't propagate to root logger
browser_use_logger.addHandler(console)
browser_use_logger.setLevel(log_level)
for handler in file_handlers:
browser_use_logger.addHandler(handler)
browser_use_logger.setLevel(effective_log_level)
# Configure bubus logger to allow INFO level logs
bubus_logger = logging.getLogger('bubus')
bubus_logger.propagate = False # Don't propagate to root logger
bubus_logger.addHandler(console)
bubus_logger.setLevel(logging.INFO if log_type == 'result' else log_level)
for handler in file_handlers:
bubus_logger.addHandler(handler)
bubus_logger.setLevel(logging.INFO if log_type == 'result' else effective_log_level)
# Configure CDP logging using cdp_use's setup function
# This enables the formatted CDP output using CDP_LOGGING_LEVEL environment variable
# Convert CDP_LOGGING_LEVEL string to logging level
cdp_level_str = CONFIG.CDP_LOGGING_LEVEL.upper()
cdp_level = getattr(logging, cdp_level_str, logging.WARNING)
try:
from cdp_use.logging import setup_cdp_logging # type: ignore

View File

@@ -3,6 +3,8 @@ from collections.abc import Sequence
from dataclasses import asdict, dataclass
from typing import Any
from browser_use.config import is_running_in_docker
@dataclass
class BaseTelemetryEvent(ABC):
@@ -13,7 +15,10 @@ class BaseTelemetryEvent(ABC):
@property
def properties(self) -> dict[str, Any]:
return {k: v for k, v in asdict(self).items() if k != 'name'}
props = {k: v for k, v in asdict(self).items() if k != 'name'}
# Add Docker context if running in Docker
props['is_docker'] = is_running_in_docker()
return props
@dataclass

View File

@@ -1,239 +0,0 @@
---
title: "CLI"
description: "Start using the Browser Use CLI"
icon: "terminal"
---
# CLI Usage
The `browser-use` command-line interface provides multiple modes of operation for browser automation.
## Installation
Get started with browser-use immediately using `uvx`:
```bash
uvx 'browser-use[cli]' --help
```
Or install it globally:
```bash
uv tool install 'browser-use[cli]'
```
## Modes of Operation
### 1. Interactive TUI Mode (Default)
Launch an interactive terminal UI where you can chat with the browser automation agent:
```bash
uvx 'browser-use[cli]'
```
This opens a chat interface where you can:
- Type natural language commands to control the browser
- See real-time feedback from the agent
- View browser state and actions being performed
### 2. One-Shot Mode
Execute a single task without entering interactive mode:
```bash
uvx browser-use -p "Search for OpenAI documentation and take a screenshot"
```
Options:
- `-p, --prompt`: The task to execute
- `--headless`: Run browser in headless mode
- `--model`: Specify LLM model (default: gpt-4o)
### 3. MCP Server Mode
Run browser-use as a Model Context Protocol server:
```bash
uvx 'browser-use[cli]' --mcp # expects MCP JSON RPC over stdio
```
This mode exposes browser automation capabilities as MCP tools that can be used by:
- Claude Desktop
- Other MCP-compatible clients
- Custom applications using the MCP SDK
For MCP integration details, see:
- [MCP Server Documentation](/customize/mcp-server)
- [MCP Client Documentation](/customize/mcp-client)
## Configuration
Browser-use can be configured through environment variables and a configuration file.
### Configuration File Location
The default configuration file is located at:
- `~/.config/browseruse/config.json`
You can override this location with:
- `BROWSER_USE_CONFIG_PATH` environment variable
- `BROWSER_USE_CONFIG_DIR` environment variable (directory containing `config.json`)
### Configuration File Format
The configuration uses a database-style format with UUID entries:
```json
{
"browser_profile": {
"550e8400-e29b-41d4-a716-446655440000": {
"id": "550e8400-e29b-41d4-a716-446655440000",
"default": true,
"created_at": "2024-01-01T00:00:00",
"headless": false,
"user_data_dir": null,
"allowed_domains": ["example.com"],
"downloads_path": "~/Downloads/browser-use"
}
},
"llm": {
"6ba7b810-9dad-11d1-80b4-00c04fd430c8": {
"id": "6ba7b810-9dad-11d1-80b4-00c04fd430c8",
"default": true,
"created_at": "2024-01-01T00:00:00",
"api_key": "your-openai-api-key-here",
"model": "gpt-4o",
"temperature": 0.7
}
},
"agent": {
"6ba7b812-9dad-11d1-80b4-00c04fd430c8": {
"id": "6ba7b812-9dad-11d1-80b4-00c04fd430c8",
"default": true,
"created_at": "2024-01-01T00:00:00",
"max_steps": 100,
"use_vision": true
}
}
}
```
Each configuration type (browser_profile, llm, agent) can have multiple entries, with one marked as `default: true`.
### Environment Variables
Environment variables always override config.json values:
#### General Settings
- `BROWSER_USE_LOGGING_LEVEL`: Logging level (debug, info, warning, error)
- `BROWSER_USE_CONFIG_PATH`: Full path to config.json file
- `BROWSER_USE_CONFIG_DIR`: Directory containing config.json
#### Browser Profile Settings
- `BROWSER_USE_HEADLESS`: Run browser in headless mode (true/false)
- `BROWSER_USE_ALLOWED_DOMAINS`: Comma-separated list of allowed domains
- `BROWSER_USE_USER_DATA_DIR`: Chrome user data directory path
#### LLM Settings
- `OPENAI_API_KEY`: OpenAI API key
- `ANTHROPIC_API_KEY`: Anthropic API key
- `BROWSER_USE_LLM_MODEL`: LLM model to use (e.g., gpt-4o, claude-3-opus)
#### MCP-Specific Settings
When running in MCP mode, these environment variables are particularly useful:
- `BROWSER_USE_HEADLESS`: Control browser visibility
- `OPENAI_API_KEY`: Required for agent-based tools
### Browser Profiles Directory
Browser profiles are stored in:
```
~/.config/browseruse/profiles/
├── default/ # Default browser profile
├── work/ # Custom profile example
└── research/ # Another custom profile
```
Each profile directory contains Chrome user data, allowing you to:
- Maintain separate browser sessions
- Keep cookies and local storage isolated
- Use different extensions per profile
## Examples
### Basic Usage
```bash
# Interactive mode
uvx 'browser-use[cli]'
# One-shot task
uvx 'browser-use[cli]' -p "Go to github.com and search for browser-use"
# Headless one-shot
uvx 'browser-use[cli]' --headless -p "Extract prices from example.com/products"
```
### With Configuration
```bash
# Use specific config file
BROWSER_USE_CONFIG_PATH=~/my-config.json uvx 'browser-use[cli]'
# Override settings via environment
BROWSER_USE_HEADLESS=true OPENAI_API_KEY=sk-... uvx 'browser-use[cli]' -p "Check my email"
# Use different LLM model
BROWSER_USE_LLM_MODEL=gpt-4-turbo uvx 'browser-use[cli]'
```
### MCP Server Usage
```bash
# Start MCP server
uvx 'browser-use[cli]' --mcp
# With custom settings
BROWSER_USE_HEADLESS=false OPENAI_API_KEY=sk-... uvx 'browser-use[cli]' --mcp
```
For Claude Desktop integration, add to your Claude Desktop config:
```json
{
"mcpServers": {
"browser-use": {
"command": "uvx",
"args": ["browser-use[cli]", "--mcp"],
"env": {
"OPENAI_API_KEY": "sk-...",
"BROWSER_USE_HEADLESS": "false"
}
}
}
}
```
## Troubleshooting
### Common Issues
1. **Browser not launching**: Ensure Chrome/Chromium is installed
2. **API key errors**: Set appropriate API key environment variables
3. **Permission errors**: Check file permissions in `~/.config/browseruse/`
### Debug Mode
Enable debug logging for troubleshooting:
```bash
BROWSER_USE_LOGGING_LEVEL=debug uvx 'browser-use[cli]'
```
## See Also
- [Getting Started](/quickstart)
- [MCP Server Documentation](/customize/mcp-server)
- [MCP Client Documentation](/customize/mcp-client)
- [Browser Settings](/customize/browser-settings)

View File

@@ -2,6 +2,7 @@
title: "Authentication"
description: "Learn how to authenticate with the Browser Use Cloud API"
icon: "lock"
mode: "wide"
---
The Browser Use Cloud API uses API keys to authenticate requests. You can obtain an API key from your [Browser Use Cloud dashboard](https://cloud.browser-use.com/settings/api-keys).

View File

@@ -2,6 +2,7 @@
title: "Cloud SDK"
description: "Learn how to set up your own Browser Use Cloud SDK"
icon: "code"
mode: "wide"
---
This guide walks you through setting up your own Browser Use Cloud SDK.

View File

@@ -2,6 +2,7 @@
title: "V1 Implementation"
description: "Learn how to implement the Browser Use API in Python"
icon: "code"
mode: "wide"
---
This guide shows how to implement common API patterns using Python. We'll create a complete example that creates and monitors a browser automation task.

View File

@@ -2,6 +2,7 @@
title: "N8N + Browser Use Cloud"
description: "Learn how to integrate Browser Use Cloud API with n8n using a practical workflow example (competitor research)."
icon: "plug"
mode: "wide"
---
> **TL;DR** In **3 minutes** you can have an n8n workflow that:

View File

@@ -2,6 +2,7 @@
title: "Pricing"
description: "Browser Use Cloud API pricing structure and cost breakdown"
icon: "dollar-sign"
mode: "wide"
---
The Browser Use Cloud API pricing consists of two components:

View File

@@ -2,6 +2,7 @@
title: "Quickstart"
description: "Learn how to get started with the Browser Use Cloud API"
icon: "cloud"
mode: "wide"
---
<img

View File

@@ -2,6 +2,7 @@
title: "Search API"
description: "Get started with Browser Use's search endpoints to extract content from websites"
icon: "magnifying-glass"
mode: "wide"
---
<Warning>

View File

@@ -2,6 +2,7 @@
title: "Webhooks"
description: "Learn how to integrate webhooks with Browser Use Cloud API"
icon: "code"
mode: "wide"
---
Webhooks allow you to receive real-time notifications about events in your Browser Use tasks. This guide will show you how to set up and verify webhook endpoints.

View File

@@ -2,6 +2,7 @@
title: "Node.js"
description: "Get started with Browser Use Cloud API using Node.js"
icon: "node-js"
mode: "wide"
---
<img src="/images/cloud-banner-js.png" alt="Browser Use Node.js" width="full" />
@@ -62,6 +63,7 @@ const TaskOutput = z.object({
const result = await client.tasks.run({
task: "Search for the top 10 Hacker News posts and return the title and url.",
schema: TaskOutput,
});
for (const post of result.parsedOutput.posts) {
@@ -85,6 +87,8 @@ const stream = browseruse.tasks.stream({
for await (const msg of stream) {
switch (msg.status) {
case "started":
console.log(`started: ${msg.data.session.liveUrl}`);
break;
case "paused":
case "stopped":
console.log(`running: ${msg}`);

View File

@@ -2,6 +2,7 @@
title: "Python"
description: "Get started with Browser Use Cloud API using Python"
icon: "python"
mode: "wide"
---
<img

View File

@@ -2,6 +2,7 @@
title: "Quickstart"
description: "Skip the setup with Browser Use Cloud"
icon: "cloud"
mode: "wide"
---
<img
@@ -46,3 +47,33 @@ icon: "cloud"
{/* <br /> */}
> To play around with the API, you can use the [Browser Use Cloud Playground](https://cloud.browser-use.com/playground).
## Examples
Explore quick start examples to see how to use the SDKs.
<CardGroup cols={2}>
<Card
title="Python Examples"
icon="python"
href="https://github.com/browser-use/browser-use-examples/tree/main/python"
>
Explore quick start examples for Python.
</Card>
<Card
title="Typescript Examples"
icon="js"
href="https://github.com/browser-use/browser-use-examples/tree/main/typescript"
>
Explore quick start examples for Typescript.
</Card>
<Card
title="NextJS Examples"
icon={<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="currentColor" className="remixicon text-basis h-8 w-8 text-primary"><path d="M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM15.9999 8H14.6499V12H15.9999V8ZM9.34609 9.70937L15.405 17.5379L16.4591 16.7293L9.68281 8H8V15.9969H9.34609V9.70937Z"></path></svg>}
href="https://github.com/browser-use/browser-use-examples/tree/main/typescript/scrapper"
>
Explore quick start examples for NextJS.
</Card>
</CardGroup>

View File

@@ -0,0 +1,27 @@
---
title: "Basics"
description: ""
icon: "play"
mode: "wide"
---
```python
from browser_use import Agent, ChatOpenAI
agent = Agent(
task="Search for latest news about AI",
llm=ChatOpenAI(model="gpt-4.1-mini"),
)
async def main():
history = await agent.run(max_steps=100)
```
- `task`: The task you want to automate.
- `llm`: Your favorite LLM. See <a href="/customize/supported-models">Supported Models</a>.
The agent is executed using the async `run()` method:
- `max_steps` (default: `100`): Maximum number of steps the agent can take

View File

@@ -0,0 +1,45 @@
---
title: "Output Format"
description: ""
icon: "arrow-right-to-bracket"
mode: "wide"
---
## Agent History
The `run()` method returns an `AgentHistoryList` object with the complete execution history:
```python
history = await agent.run()
# Access useful information
history.urls() # List of visited URLs
history.screenshot_paths() # List of screenshot paths
history.screenshots() # List of screenshots as base64 strings
history.action_names() # Names of executed actions
history.extracted_content() # List of extracted content from all actions
history.errors() # List of errors (with None for steps without errors)
history.model_actions() # All actions with their parameters
history.model_outputs() # All model outputs from history
history.last_action() # Last action in history
# Analysis methods
history.final_result() # Get the final extracted content (last step)
history.is_done() # Check if agent completed successfully
history.is_successful() # Check if agent completed successfully (returns None if not done)
history.has_errors() # Check if any errors occurred
history.model_thoughts() # Get the agent's reasoning process (AgentBrain objects)
history.action_results() # Get all ActionResult objects from history
history.action_history() # Get truncated action history with essential fields
history.number_of_steps() # Get the number of steps in the history
history.total_duration_seconds() # Get total duration of all steps in seconds
# Structured output (when using output_model_schema)
history.structured_output # Property that returns parsed structured output
```
See all helper methods in the [AgentHistoryList source code](https://github.com/browser-use/browser-use/blob/main/browser_use/agent/views.py#L301).
## Structured Output
For structured output, use the `output_model_schema` parameter with a Pydantic model. [Example](https://github.com/browser-use/browser-use/blob/main/examples/features/custom_output.py).

View File

@@ -0,0 +1,50 @@
---
title: "All Parameters"
description: "Complete reference for all agent configuration options"
icon: "sliders"
mode: "wide"
---
## Available Parameters
### Core Settings
- `controller`: Registry of [our tools](https://github.com/browser-use/browser-use/blob/main/browser_use/controller/service.py
) the agent can call. [Example for custom tools](https://github.com/browser-use/browser-use/tree/main/examples/custom-functions)
- `browser`: Browser object where you can specify the browser settings.
- `output_model_schema`: Pydantic model class for structured output validation. [Example](https://github.com/browser-use/browser-use/blob/main/examples/features/custom_output.py)
### Vision & Processing
- `use_vision` (default: `True`): Enable/disable vision capabilities for processing screenshots
- `vision_detail_level` (default: `'auto'`): Screenshot detail level - `'low'`, `'high'`, or `'auto'`
- `page_extraction_llm`: Separate LLM model for page content extraction. You can choose a small & fast model because it only needs to extract text from the page (default: same as `llm`)
### Actions & Behavior
- `initial_actions`: List of actions to run before the main task without LLM. [Example](https://github.com/browser-use/browser-use/blob/main/examples/features/initial_actions.py)
- `max_actions_per_step` (default: `10`): Maximum actions per step, e.g. for form filling the agent can output 10 fields at once. We execute the actions until the page changes.
- `max_failures` (default: `3`): Maximum retries for steps with errors
- `use_thinking` (default: `True`): Controls whether the agent uses its internal "thinking" field for explicit reasoning steps.
- `flash_mode` (default: `False`): Fast mode that skips evaluation, next goal and thinking and only uses memory. If `flash_mode` is enabled, it overrides `use_thinking` and disables the thinking process entirely. [Example](https://github.com/browser-use/browser-use/blob/main/examples/getting_started/05_fast_agent.py)
### System Messages
- `override_system_message`: Completely replace the default system prompt.
- `extend_system_message`: Add additional instructions to the default system prompt. [Example](https://github.com/browser-use/browser-use/blob/main/examples/features/custom_system_prompt.py)
### File & Data Management
- `save_conversation_path`: Path to save complete conversation history
- `save_conversation_path_encoding` (default: `'utf-8'`): Encoding for saved conversations
- `available_file_paths`: List of file paths the agent can access
- `sensitive_data`: Dictionary of sensitive data to handle carefully. [Example](https://github.com/browser-use/browser-use/blob/main/examples/features/sensitive_data.py)
### Visual Output
- `generate_gif` (default: `False`): Generate GIF of agent actions. Set to `True` or string path
- `include_attributes`: List of HTML attributes to include in page analysis
### Performance & Limits
- `max_history_items`: Maximum number of last steps to keep in the LLM memory. If `None`, we keep all steps.
- `llm_timeout` (default: `90`): Timeout in seconds for LLM calls
- `step_timeout` (default: `120`): Timeout in seconds for each step
- `preload` (default: `True`): If we detect a url in the task, we directly open it.
### Advanced Options
- `calculate_cost` (default: `False`): Calculate and track API costs
- `display_files_in_done_text` (default: `True`): Show file information in completion messages

View File

@@ -1,201 +0,0 @@
---
title: "Agent Settings"
description: "Learn how to configure the agent"
icon: "gear"
---
## Overview
The `Agent` class is the core component of Browser Use that handles browser automation. Here are the main configuration options you can use when initializing an agent.
## Basic Settings
```python
from browser_use import Agent, ChatOpenAI
agent = Agent(
task="Search for latest news about AI",
llm=ChatOpenAI(model="gpt-4o"),
)
```
### Required Parameters
- `task`: The instruction for the agent to execute
- `llm`: A chat model instance. See <a href="/customize/supported-models">Supported Models</a> for supported models.
## Agent Behavior
Control how the agent operates:
```python
agent = Agent(
task="your task",
llm=llm,
controller=custom_controller, # For custom tool calling
use_vision=True, # Enable vision capabilities
save_conversation_path="logs/conversation" # Save chat logs
)
```
### Behavior Parameters
- `controller`: Registry of functions the agent can call. Defaults to base Controller. See <a href="/customize/custom-functions">Custom Functions</a> for details.
- `use_vision`: Enable/disable vision capabilities. Defaults to `True`.
- When enabled, the model processes visual information from web pages
- Disable to reduce costs or use models without vision support
- For GPT-4o, image processing costs approximately 800-1000 tokens (~$0.002 USD) per image (but this depends on the defined screen size)
- `vision_detail_level`: Controls the detail level of screenshots sent to the vision model. Can be `'low'`, `'high'`, or `'auto'` (default). Using `'low'` can significantly reduce token consumption and cost for simpler visual tasks, while `'high'` provides more detail for complex visual analysis.
- `save_conversation_path`: Path to save the complete conversation history. Useful for debugging.
- `override_system_message`: Completely replace the default system prompt with a custom one.
- `extend_system_message`: Add additional instructions to the default system prompt.
<Note>
Vision capabilities are recommended for better web interaction understanding,
but can be disabled to reduce costs or when using models without vision
support.
</Note>
### Reuse Existing Browser Context
By default browser-use launches its own builtin browser using playwright chromium.
You can also connect to a remote browser or pass any of the following
existing playwright objects to the Agent: `page`, `browser_context`, `browser`, `browser_session`, or `browser_profile`.
These all get passed down to create a `BrowserSession` for the `Agent`:
```python
agent = Agent(
task='book a flight to fiji',
llm=llm,
browser_profile=browser_profile, # use this profile to create a BrowserSession
browser_session=BrowserSession( # use an existing BrowserSession
cdp_url=..., # remote CDP browser to connect to
# or
wss_url=..., # remote wss playwright server provider
# or
browser_pid=... # pid of a locally running browser process to attach to
# or
executable_path=... # provide a custom chrome binary path
# or
channel=... # specify chrome, chromium, ms-edge, etc.
# or
page=page, # use an existing playwright Page object
# or
browser_context=browser_context, # use an existing playwright BrowserContext object
# or
browser=browser, # use an existing playwright Browser object
),
)
```
For example, to connect to an existing browser over CDP you could do:
```python
agent = Agent(
...
browser_session=BrowserSession(cdp_url='http://localhost:9222'),
)
```
For example, to connect to a local running chrome instance you can do:
```python
agent = Agent(
...
browser_session=BrowserSession(browser_pid=1234),
)
```
See <a href="/customize/real-browser">Connect to your Browser</a> for more info.
<Note>
You can reuse the same `BrowserSession` after an agent has completed running.
If you do nothing, the browser will be automatically closed on `run()`
completion only if it was launched by us.
</Note>
## Running the Agent
The agent is executed using the async `run()` method:
- `max_steps` (default: `100`)
Maximum number of steps the agent can take during execution. This prevents infinite loops and helps control execution time.
## Agent History
The method returns an `AgentHistoryList` object containing the complete execution history. This history is invaluable for debugging, analysis, and creating reproducible scripts.
```python
# Example of accessing history
history = await agent.run()
# Access (some) useful information
history.urls() # List of visited URLs
history.screenshot_paths() # List of screenshot paths
history.action_names() # Names of executed actions
history.extracted_content() # Content extracted during execution
history.errors() # Any errors that occurred
history.model_actions() # All actions with their parameters
```
The `AgentHistoryList` provides many helper methods to analyze the execution:
- `final_result()`: Get the final extracted content
- `is_done()`: Check if the agent completed successfully
- `has_errors()`: Check if any errors occurred
- `model_thoughts()`: Get the agent's reasoning process
- `action_results()`: Get results of all actions
<Note>
For a complete list of helper methods and detailed history analysis
capabilities, refer to the [AgentHistoryList source
code](https://github.com/browser-use/browser-use/blob/main/browser_use/agent/views.py#L111).
</Note>
## Run initial actions without LLM
With [this example](https://github.com/browser-use/browser-use/blob/main/examples/features/initial_actions.py) you can run initial actions without the LLM.
Specify the action as a dictionary where the key is the action name and the value is the action parameters. You can find all our actions in the [Controller](https://github.com/browser-use/browser-use/blob/main/browser_use/controller/service.py) source code.
```python
initial_actions = [
{'go_to_url': {'url': 'https://www.google.com', 'new_tab': True}},
{'go_to_url': {'url': 'https://en.wikipedia.org/wiki/Randomness', 'new_tab': True}},
{'scroll_down': {'amount': 1000}},
]
agent = Agent(
task='What theories are displayed on the page?',
initial_actions=initial_actions,
llm=llm,
)
```
### Optional Parameters
- `initial_actions`: List of initial actions to run before the main task.
- `max_actions_per_step`: Maximum number of actions to run in a step. Defaults to `10`.
- `max_failures`: Maximum number of failures before giving up. Defaults to `3`.
- `retry_delay`: Time to wait between retries in seconds when rate limited. Defaults to `10`.
- `generate_gif`: Enable/disable GIF generation. Defaults to `False`. Set to `True` or a string path to save the GIF.
## Memory
Memory management in browser-use has been significantly improved since version 0.3.2. The agent's context handling and state management are now robust enough that the previous memory system (`mem0`) is no longer needed or supported.
The agent maintains its context and task progress through:
- Detailed history tracking of actions and results
- Structured state management
- Clear goal setting and evaluation at each step
The `enable_memory` parameter has been removed as the new system provides better context management by default.
<Note>
If you're upgrading from an older version that used `enable_memory`, simply remove this parameter. The agent will automatically use the improved context management system.
</Note>

View File

@@ -0,0 +1,27 @@
---
title: "Basics"
description: ""
icon: "play"
---
---
```python
from browser_use import Agent, Browser, ChatOpenAI
browser = Browser(
headless=False, # Show browser window
window_size={'width': 1000, 'height': 700}, # Set window size
)
agent = Agent(
task='Search for Browser Use',
browser=browser,
llm=ChatOpenAI(model='gpt-4.1-mini'),
)
async def main():
await agent.run()
```

View File

@@ -0,0 +1,120 @@
---
title: "All Parameters"
description: "Complete reference for all browser configuration options"
icon: "sliders"
mode: "wide"
---
## Core Settings
- `cdp_url`: CDP URL for connecting to existing browser instance (e.g., `"http://localhost:9222"`)
- `is_local` (default: `True`): Whether this is a local browser instance. Set to `False` for remote browsers
## Display & Appearance
- `headless` (default: `None`): Run browser without UI. Auto-detects based on display availability (`True`/`False`/`None`)
- `window_size`: Browser window size for headful mode. Use dict `{'width': 1920, 'height': 1080}` or `ViewportSize` object
- `window_position` (default: `{'width': 0, 'height': 0}`): Window position from top-left corner in pixels
- `viewport`: Content area size, same format as `window_size`. Use `{'width': 1280, 'height': 720}` or `ViewportSize` object
- `no_viewport` (default: `None`): Disable viewport emulation, content fits to window size
- `device_scale_factor`: Device scale factor (DPI). Set to `2.0` or `3.0` for high-resolution screenshots
- `color_scheme` (default: `'light'`): Preferred color scheme (`'light'`, `'dark'`, `'no-preference'`)
- `contrast` (default: `'no-preference'`): High contrast mode (`'no-preference'`, `'more'`)
- `reduced_motion` (default: `'no-preference'`): Motion preference (`'reduce'`, `'no-preference'`)
- `forced_colors` (default: `'none'`): Forced colors mode (`'active'`, `'none'`)
## Browser Behavior
- `stealth` (default: `False`): Use stealth techniques to avoid bot detection
- `keep_alive` (default: `None`): Keep browser running after agent completes
- `allowed_domains`: Restrict navigation to specific domains. Domain pattern formats:
- `'example.com'` - Matches only `https://example.com/*`
- `'*.example.com'` - Matches `https://example.com/*` and any subdomain `https://*.example.com/*`
- `'http*://example.com'` - Matches both `http://` and `https://` protocols
- `'chrome-extension://*'` - Matches any Chrome extension URL
- **Security**: Wildcards in TLD (e.g., `example.*`) are **not allowed** for security
- Use list like `['*.google.com', 'https://example.com', 'chrome-extension://*']`
- `enable_default_extensions` (default: `True`): Load automation extensions (uBlock Origin, cookie handlers, ClearURLs)
- `cross_origin_iframes` (default: `False`): Enable cross-origin iframe support (may cause complexity)
## User Data & Profiles
- `user_data_dir` (default: auto-generated temp): Directory for browser profile data. Use `None` for incognito mode
- `profile_directory` (default: `'Default'`): Chrome profile subdirectory name (`'Profile 1'`, `'Work Profile'`, etc.)
- `storage_state`: Browser storage state (cookies, localStorage). Can be file path string or dict object
- `cookies_file`: **DEPRECATED** - Use `storage_state` instead
## Network & Security
- `proxy`: Proxy configuration using `ProxySettings(server='http://host:8080', bypass='localhost,127.0.0.1', username='user', password='pass')`
- `permissions` (default: `['clipboardReadWrite', 'notifications']`): Browser permissions to grant. Use list like `['camera', 'microphone', 'geolocation']`
- `bypass_csp` (default: `False`): Bypass Content Security Policy (increases bot detection risk)
- `ignore_https_errors` (default: `False`): Ignore HTTPS certificate errors
- `extra_http_headers`: Additional HTTP headers sent with every request. Use dict like `{'Accept-Language': 'en-US', 'Custom-Header': 'value'}`
- `headers`: Additional HTTP headers for connect requests (remote browsers only)
## Browser Launch
- `executable_path`: Path to browser executable for custom installations. Platform examples:
- macOS: `'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'`
- Windows: `'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'`
- Linux: `'/usr/bin/google-chrome'`
- `channel`: Browser channel (`'chromium'`, `'chrome'`, `'chrome-beta'`, `'msedge'`, etc.)
- `args`: Additional command-line arguments for the browser. Use list format: `['--disable-gpu', '--custom-flag=value', '--another-flag']`
- `env`: Environment variables for browser process. Use dict like `{'DISPLAY': ':0', 'LANG': 'en_US.UTF-8', 'CUSTOM_VAR': 'test'}`
- `chromium_sandbox` (default: `True` except in Docker): Enable Chromium sandboxing for security
- `devtools` (default: `False`): Open DevTools panel automatically (requires `headless=False`)
- `ignore_default_args`: List of default args to disable, or `True` to disable all. Use list like `['--enable-automation', '--disable-extensions']`
## Timing & Performance
- `slow_mo` (default: `0.0`): Slow down actions by this many milliseconds
- `timeout` (default: `30000`): Default timeout for browser operations in milliseconds
- `default_timeout`: Default timeout for playwright calls in milliseconds
- `default_navigation_timeout`: Default timeout for page navigation in milliseconds
- `minimum_wait_page_load_time` (default: `0.25`): Minimum time to wait before capturing page state in seconds
- `wait_for_network_idle_page_load_time` (default: `0.5`): Time to wait for network activity to cease in seconds
- `maximum_wait_page_load_time` (default: `5.0`): Maximum time to wait for page load in seconds
- `wait_between_actions` (default: `0.5`): Time to wait between agent actions in seconds
## AI Integration
- `highlight_elements` (default: `True`): Highlight interactive elements for AI vision
- `viewport_expansion` (default: `500`): Viewport expansion in pixels for AI context
- `include_dynamic_attributes` (default: `True`): Include dynamic attributes in selectors for better element identification
## Downloads & Files
- `accept_downloads` (default: `True`): Automatically accept all downloads
- `downloads_path`: Directory for downloaded files. Use string like `'./downloads'` or `Path` object
- `auto_download_pdfs` (default: `True`): Automatically download PDFs instead of viewing in browser
## Device Emulation
- `user_agent`: Custom user agent string. Example: `'Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X)'`
- `is_mobile` (default: `False`): Enable mobile viewport and touch events
- `has_touch` (default: `False`): Enable touch events for mobile emulation
- `locale`: User locale like `'en-GB'`, `'de-DE'`, `'ja-JP'`
- `timezone_id`: Timezone identifier like `'America/New_York'`, `'Europe/London'`, `'UTC'`
- `screen`: Screen size information, same format as `window_size`
## Recording & Debugging
- `record_video_dir`: Directory to save video recordings as `.webm` files
- `record_har_path`: Path to save network trace files as `.har` format
- `traces_dir`: Directory to save complete Playwright trace files for debugging
- `record_har_content` (default: `'embed'`): HAR content mode (`'omit'`, `'embed'`, `'attach'`)
- `record_har_mode` (default: `'full'`): HAR recording mode (`'full'`, `'minimal'`)
## Advanced Options
- `disable_security` (default: `False`): ⚠️ **NOT RECOMMENDED** - Disables all browser security features
- `deterministic_rendering` (default: `False`): ⚠️ **NOT RECOMMENDED** - Forces consistent rendering but reduces performance
- `java_script_enabled` (default: `True`): Enable/disable JavaScript execution
- `offline` (default: `False`): Start browser in offline mode
- `strict_selectors` (default: `False`): Use strict selector matching
- `base_url`: Base URL for relative navigation
- `service_workers` (default: `'allow'`): Service worker policy (`'allow'`, `'block'`)
---
## Outdated BrowserProfile
For backward compatibility, you can pass all the parameters from above to the `BrowserProfile` and then to the `Browser`.
```python
from browser_use import BrowserProfile
profile = BrowserProfile(headless=False, stealth=True)
browser = Browser(browser_profile=profile)
```
## Browser vs BrowserSession
`Browser` is an alias for `BrowserSession` - they are exactly the same class:
Use `Browser` for cleaner, more intuitive code.

View File

@@ -0,0 +1,57 @@
---
title: "Real Browser"
description: ""
icon: "arrow-right-to-bracket"
---
Connect your existing Chrome browser to preserve authentication.
## Basic Example
```python
from browser_use import Agent, Browser, ChatOpenAI
# Connect to your existing Chrome browser
browser = Browser(
executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
user_data_dir='~/Library/Application Support/Google/Chrome',
profile_directory='Default',
)
agent = Agent(
task='Visit https://duckduckgo.com and search for "browser-use founders"',
browser=browser,
llm=ChatOpenAI(model='gpt-4.1-mini'),
)
async def main():
await agent.run()
```
> **Note:** You need to fully close chrome before running this example.
> **Note:** Google blocks this approach currently so we use DuckDuckGo instead.
## How it Works
1. **`executable_path`** - Path to your Chrome installation
2. **`user_data_dir`** - Your Chrome profile folder (keeps cookies, extensions, bookmarks)
3. **`profile_directory`** - Specific profile name (Default, Profile 1, etc.)
## Platform Paths
```python
# macOS
executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
user_data_dir='~/Library/Application Support/Google/Chrome'
# Windows
executable_path='C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'
user_data_dir='%LOCALAPPDATA%\\Google\\Chrome\\User Data'
# Linux
executable_path='/usr/bin/google-chrome'
user_data_dir='~/.config/google-chrome'
```

View File

@@ -0,0 +1,38 @@
---
title: "Remote Browser"
description: ""
icon: "cloud"
mode: "wide"
---
### CDP URL Connection
Get a cdp url from your favorite browser provider like AnchorBorwser, HyperBrowser, BrowserBase, Steel.dev, etc.:
```python
from browser_use import Browser
# Connect to remote browser
browser = Browser(
cdp_url="http://remote-server:9222",
is_local=False # Important: don't try to launch local browser
)
agent = Agent(task="", browser=browser)
```
### Proxy Connection
```python
from browser_use.browser.profile import ProxySettings
browser = Browser(
cdp_url="http://remote-server:9222",
proxy=ProxySettings(
server="http://proxy-server:8080",
username="proxy-user",
password="proxy-pass"
),
is_local=False
)
```

View File

@@ -1,964 +0,0 @@
---
title: "Browser Settings"
description: "Launch or connect to an existing browser and configure it to your needs."
icon: "globe"
---
Browser Use uses [playwright](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context) (or [patchright](https://github.com/Kaliiiiiiiiii-Vinyzu/patchright)) to manage its connection with a real browser.
---
**To launch or connect to a browser**, pass any playwright / browser-use configuration arguments you want to `BrowserSession(...)`:
```python
from browser_use import BrowserSession, Agent
browser_session = BrowserSession(
headless=True,
viewport={'width': 964, 'height': 647},
user_data_dir='~/.config/browseruse/profiles/default',
)
agent = Agent('fill out the form on this page', browser_session=browser_session)
```
<Note>
The new `BrowserSession` & `BrowserProfile` accept all the same arguments that
Playwright's
[`launch_persistent_context(...)`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context)
takes, giving you full control over browser settings at launch. (see below for
the full list)
</Note>
---
## `BrowserSession`
- `BrowserSession(**params)` is Browser Use's object that tracks a connection to a running browser. It sets up:
- the `playwright`, `browser`, `browser_context`, and `page` objects and tracks which tabs the agent/human are focused on
- methods to interact with the browser window, apply config needed by the Agent, and run the `DOMService` for element detection
- it can take a `browser_profile=BrowserProfile(...)` template containing some config defaults, and `**kwargs` session-specific config overrides
### Browser Connection Parameters
Provide any one of these options to connect to an existing browser. These options are session-specific and cannot be stored in a `BrowserProfile(...)` template.
#### `wss_url`
```python
wss_url: str | None = None
```
WSS URL of the playwright-protocol browser server to connect to. See here for [WSS connection instructions](https://docs.browser-use.com/customize/real-browser#method-d%3A-connect-to-remote-playwright-node-js-browser-server-via-wss-url).
#### `cdp_url`
```python
cdp_url: str | None = None
```
CDP URL of the browser to connect to (e.g. `http://localhost:9222`). See here for [CDP connection instructions](https://docs.browser-use.com/customize/real-browser#method-e%3A-connect-to-remote-browser-via-cdp-url).
#### `browser_pid`
```python
browser_pid: int | None = None
```
PID of a running chromium-based browser process to connect to on localhost. See here for [connection via pid](https://docs.browser-use.com/customize/real-browser#method-c%3A-connect-to-local-browser-using-browser-pid) instructions.
<Note>
For web scraping tasks on sites that restrict automated access, we recommend
using [our cloud](https://browser-use.com) or an external browser provider for
better reliability. See the [Connect to your Browser](/customize/real-browser)
guide for detailed connection instructions.
</Note>
### Session-Specific Parameters
#### `browser_profile`
```python
browser_profile: BrowserProfile = BrowserProfile()
```
Optional `BrowserProfile` template containing default config to use for the `BrowserSession`. (see below for more info)
#### `**kwargs`
`BrowserSession` can also accept _all_ of the parameters [below](#browserprofile).
(the parameters _above_ this point are specific to `BrowserSession` and cannot be stored in a `BrowserProfile` template)
Extra `**kwargs` passed to `BrowserSession(...)` act as session-specific overrides to the `BrowserProfile(...)` template.
```python
base_iphone13 = BrowserProfile(
storage_state='/tmp/auth.json', # share cookies between parallel browsers
**playwright.devices['iPhone 13'],
timezone_id='UTC',
)
usa_phone = BrowserSession(
browser_profile=base_iphone13,
timezone_id='America/New_York', # kwargs override values in base_iphone13
)
eu_phone = BrowserSession(
browser_profile=base_iphone13,
timezone_id='Europe/Paris',
)
usa_agent = Agent(task='show me todays schedule...', browser_session=usa_phone)
eu_agent = Agent(task='show me todays schedule...', browser_session=eu_phone)
await asyncio.gather(agent1.run(), agent2.run())
```
---
## `BrowserProfile`
A `BrowserProfile` is a 📋 config template for a 🎭 `BrowserSession(...)`.
It's basically just a typed + validated version of a `dict` to hold config.
When you find yourself storing or re-using many browser configs, you can upgrade from:
```diff
- config = {key: val, key: val, ...}
- BrowserSession(**config)
```
To this instead:
```diff
+ config = BrowserProfile(key=val, key=val, ...)
+ BrowserSession(browser_profile=config)
```
<Tip>
You don't ever *need* to use a `BrowserProfile`, you can always pass config parameters directly to `BrowserSession`:
```python
session = BrowserSession(headless=True, storage_state='auth.json', viewport={...}, ...)
```
</Tip>
`BrowserProfile` is optional, but it provides a number of benefits over a normal `dict` for holding config:
- has type hints and pydantic field descriptions that show up in your IDE
- validates config at runtime quickly without having to start a browser
- provides helper methods to autodetect screen size, set up local paths, save/load config as json, and more...
<Tip>
`BrowserProfiles`s are designed to easily be given 🆔 `uuid`s and put in a database + made editable by users.
`BrowserSession`s get their own 🆔 `uuid`s and be linked by 🖇 foreign key to whatever `BrowserProfiles` they use.
This cleanly separates the per-connection rows from the bulky re-usable config and avoids wasting space in your db.
This is useful because a user may only have 2 or 3 profiles, but they could have 100k+ sessions within a few months.
</Tip>
`BrowserProfile` and `BrowserSession` can both take any of the:
- [Playwright parameters](#playwright)
- [Browser-Use parameters](#browser-use-parameters) (extra options we provide on top of `playwright`)
The only parameters `BrowserProfile` can NOT take are the session-specific connection parameters and live playwright objects:
`cdp_url`, `wss_url`, `browser_pid`, `page`, `browser`, `browser_context`, `playwright`, etc.
### Basic Example
```python
from browser_use.browser import BrowserProfile
profile = BrowserProfile(
stealth=True,
storage_state='/tmp/google_docs_cookies.json',
allowed_domains=['docs.google.com', 'https://accounts.google.com'],
viewport={'width': 396, 'height': 774},
# ... playwright args / browser-use config args ...
)
phone1 = BrowserSession(browser_profile=profile, device_scale_factor=1)
phone2 = BrowserSession(browser_profile=profile, device_scale_factor=2)
phone3 = BrowserSession(browser_profile=profile, device_scale_factor=3)
```
### Browser-Use Parameters
These parameters control Browser Use-specific features, and are outside the standard playwright set. They can be passed to `BrowserSession(...)` and/or stored in a `BrowserProfile` template.
#### `keep_alive`
```python
keep_alive: bool | None = None
```
If `True` it wont close the browser after the first `agent.run()` ends. Useful for running multiple tasks with the same browser instance. If this is left as `None` and the Agent launched its own browser, the default is to close the browser after the agent completes. If the agent connected to an existing browser then it will leave it open.
#### `stealth`
```python
stealth: bool = False
```
Set to `True` to use [`patchright`](https://github.com/Kaliiiiiiiiii-Vinyzu/patchright) to avoid bot-blocking. (Might cause issues with some sites, requires manual testing.)
<a name="restrict-urls"></a>
#### `allowed_domains`
```python
allowed_domains: list[str] | None = None
```
List of allowed domains for navigation. If None, all domains are allowed.
Example: `['google.com', '*.wikipedia.org']` - Here the agent will only be able to access `google.com` exactly and `wikipedia.org` + `*.wikipedia.org`.
Glob patterns are supported:
- `['example.com']` ✅ will match only `https://example.com/*` exactly, subdomains will not be allowed.
It's always the most secure to list all the domains you want to give the access to explicitly w/ schemes e.g.
`['https://google.com', 'http*://www.google.com', 'https://myaccount.google.com', 'https://mail.google.com', 'https://docs.google.com']`
- `['*.example.com']` ⚠️ **CAUTION** this will match `https://example.com` and _all_ its subdomains.
Make sure _all_ the subdomains are safe for the agent! `abc.example.com`, `def.example.com`, ..., `useruploads.example.com`, `admin.example.com`
#### `disable_security`
```python
disable_security: bool = False
```
<Warning>
⚠️ Setting this to `True` is NOT RECOMMENDED. It completely disables all basic
browser security features.
</Warning>
This option is for debugging and interacting across cross-origin iFrames when there are no cookies or sensitive data in use.
It's very INSECURE, under no circumstances should you enable this while using real cookies or sensitive data, visiting a single untrusted URL in this mode can immediately compromise all the profile cookies instantly. Consider a less nuclear option like `bypass_csp=True` instead.
#### `deterministic_rendering`
```python
deterministic_rendering: bool = False
```
<Warning>
⚠️ Setting this to `True` is NOT RECOMMENDED. It can be glitchy & slow, and it
increases chances of getting blocked by anti-bot systems. It's mostly useful
for QA applications.
</Warning>
It's a shortcut for adding these launch args:
- `--deterministic-mode`
- `--js-flags=--random-seed=1157259159`
- `--force-color-profile=srgb`
- `--font-render-hinting=none`
- `--force-device-scale-factor=2`
- `--enable-webgl`
With these options fonts will look slightly worse than macOS and slightly than Windows, but rendering will be more consistent between OSs and runs. The cost is performance and stability. Software rendering is slower, easier to fingerprint as a bot, and sometimes glitchy. You likely _don't need this option_ unless you're trying to do screenshot diffing.
#### `highlight_elements`
```python
highlight_elements: bool = True
```
Highlight interactive elements on the screen with colorful bounding boxes.
#### `viewport_expansion`
```python
viewport_expansion: int = 500
```
Viewport expansion in pixels. With this you can control how much of the page is included in the context of the LLM:
- `-1`: All elements from the entire page will be included, regardless of visibility (highest token usage but most complete).
- `0`: Only elements which are currently visible in the viewport will be included.
- `500` (default): Elements in the viewport plus an additional 500 pixels in each direction will be included, providing a balance between context and token usage.
#### `include_dynamic_attributes`
```python
include_dynamic_attributes: bool = True
```
Include dynamic attributes in selectors for better element targeting.
#### `minimum_wait_page_load_time`
```python
minimum_wait_page_load_time: float = 0.25
```
Minimum time to wait before capturing page state for LLM input.
#### `wait_for_network_idle_page_load_time`
```python
wait_for_network_idle_page_load_time: float = 0.5
```
Time to wait for network activity to cease. Increase to 3-5s for slower websites. This tracks essential content loading, not dynamic elements like videos.
#### `maximum_wait_page_load_time`
```python
maximum_wait_page_load_time: float = 5.0
```
Maximum time to wait for page load before proceeding.
#### `wait_between_actions`
```python
wait_between_actions: float = 0.5
```
Time to wait between agent actions.
#### `cookies_file`
```python
cookies_file: str | None = None
```
JSON file path to save cookies to.
<Warning>
This option is DEPRECATED. Use [`storage_state`](#storage-state) instead, it's the standard playwright format and also supports `localStorage` and `indexedDB`!
The library will automatically save a new `storage_state.json` next to any `cookies_file` path you provide, just use `storage_state='path/to/storage_state.json' to switch to the new format:
`cookies_file.json`: `[{cookie}, {cookie}, {cookie}]`
⬇️
`storage_state.json`: `{"cookies": [{cookie}, {cookie}, {cookie}], "origins": {... optional localstorage state ...}}`
Or run `playwright open https://example.com/ --save-storage=storage_state.json` and log into any sites you need to generate a fresh storage state file.
</Warning>
#### `profile_directory`
```python
profile_directory: str = 'Default'
```
Chrome profile subdirectory name inside of your `user_data_dir` (e.g. `Default`, `Profile 1`, `Work`, etc.).
No need to set this unless you have multiple profiles set up in a single `user_data_dir` and need to use a specific one.
#### `window_position`
```python
window_position: dict | None = {"width": 0, "height": 0}
```
Window position from top-left corner.
#### `save_recording_path`
```python
save_recording_path: str | None = None
```
Directory path for saving video recordings.
#### `trace_path`
```python
trace_path: str | None = None
```
Directory path for saving Agent trace files. Files are automatically named as `{trace_path}/{context_id}.zip`.
---
<a name="playwright-parameters"></a>
<a name="playwright"></a>
### Playwright Launch Options
All the parameters below are standard playwright parameters and can be passed to both `BrowserSession` and `BrowserProfile`.
They are defined in `browser_use/browser/profile.py`. See here for the [official Playwright documentation](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context) for all of these options.
#### `headless`
```python
headless: bool | None = None
```
Runs the browser without a visible UI. If None, auto-detects based on display availability. If you set `headless=False` on a server with no monitor attached, the browser will fail to launch (use `xvfb` + vnc to give a headless server a virtual display you can remote control).
`headless=False` is recommended for maximum stealth and is required for human-in-the-loop workflows.
#### `channel`
```python
channel: BrowserChannel = 'chromium'
```
Browser channel: `['chromium']` (default when `stealth=False`), `'chrome'` (default when `stealth=True`), `'chrome-beta'`, `'chrome-dev'`, `'chrome-canary'`, `'msedge'`, `'msedge-beta'`, `'msedge-dev'`, `'msedge-canary'`
Don't worry, other chromium-based browsers not in this list (e.g. `brave`) are still supported if you provide your own [`executable_path`](#executable_path), just set it to `chromium` for those.
#### `executable_path`
```python
executable_path: str | Path | None = None
```
Path to browser executable for custom installations.
#### `user_data_dir`
```python
user_data_dir: str | Path | None = '~/.config/browseruse/profiles/default'
```
Directory for browser profile data. Set to `None` to use an ephemeral temporary profile (aka incognito mode).
Multiple running browsers **cannot share a single `user_data_dir` at the same time**. You must set it to `None` or
provide a unique `user_data_dir` per-session if you plan to run multiple browsers.
The browser version run must always be equal to or greater than the version used to create the `user_data_dir`.
If you see errors like `Failed to parse Extensions` or similar and failures when launching, you're attempting to run an older browser with an incompatible `user_data_dir` that's already been migrated to a newer schema version.
#### `args`
```python
args: list[str] = []
```
Additional command-line arguments to pass to the browser. See here for the [full list of available chrome launch options](https://peter.sh/experiments/chromium-command-line-switches/).
#### `ignore_default_args`
```python
ignore_default_args: list[str] | bool = ['--enable-automation', '--disable-extensions']
```
List of default CLI args to stop playwright from including when launching chrome. Set it to `True` to disable _all_ default options (not recommended).
#### `env`
```python
env: dict[str, str] = {}
```
Extra environment variables to set when launching browser. e.g. `{'DISPLAY': '1'}` to use a specific X11 display.
#### `chromium_sandbox`
```python
chromium_sandbox: bool = not IN_DOCKER
```
Whether to enable Chromium sandboxing (recommended for security). Should always be `False` when running inside Docker
because Docker provides its own sandboxing can conflict with Chrome's.
#### `devtools`
```python
devtools: bool = False
```
Whether to open DevTools panel automatically (only works when `headless=False`).
#### `slow_mo`
```python
slow_mo: float = 0
```
Slow down actions by this many milliseconds.
#### `timeout`
```python
timeout: float = 30000
```
Default timeout in milliseconds for connecting to a remote browser.
#### `accept_downloads`
```python
accept_downloads: bool = True
```
Whether to automatically accept all downloads.
#### `proxy`
```python
proxy: ProxySettings | None = None
```
Proxy settings (typed). Example:
```python
proxy=ProxySettings(server="http://proxy.com:8080", username="user", password="pass")
```
#### `permissions`
```python
permissions: list[str] = ['clipboard-read', 'clipboard-write', 'notifications']
```
Browser permissions to grant. See here for the [full list of available permission](https://playwright.dev/python/docs/api/class-browsercontext#browser-context-grant-permissions).
#### `storage_state`
```python
storage_state: str | Path | dict | None = None
```
Browser storage state (cookies, localStorage). Can be file path or dict. See here for the [Playwright `storage_state` documentation](https://playwright.dev/python/docs/api/class-browsercontext#browser-context-storage-state) on how to use it.
This option is only applied when launching a new browser using the default builtin playwright chromium and `user_data_dir=None` is set.
```bash
# to create a storage state file, run the following and log into the sites you need once the browser opens:
playwright open https://example.com/ --save-storage=./storage_state.json
# then setup a BrowserSession with storage_state='./storage_state.json' and user_data_dir=None to use it
```
### Playwright Timing Settings
These control how the browser waits for CDP API calls to complete and pages to load.
#### `default_timeout`
```python
default_timeout: float | None = None
```
Default timeout for Playwright operations in milliseconds (e.g. `10000` if you want 10s).
#### `default_navigation_timeout`
```python
default_navigation_timeout: float | None = None
```
Default timeout for page navigation in milliseconds (e.g. `30000` if you want 30s).
### Playwright Viewport Options
Configure browser window size, viewport, and display properties:
#### `user_agent`
```python
user_agent: str | None = None
```
Specific user agent to use in this context. See [`playwright.devices`](https://playwright.dev/python/docs/emulation).
#### `is_mobile`
```python
is_mobile: bool = False
```
Whether the meta viewport tag is taken into account and touch events are enabled.
#### `has_touch`
```python
has_touch: bool = False
```
Specifies if viewport supports touch events.
#### `geolocation`
```python
geolocation: dict | None = None
```
Geolocation coordinates. Example: `{"latitude": 59.95, "longitude": 30.31667}`
#### `locale`
```python
locale: str | None = None
```
Specify user locale, for example `en-GB`, `de-DE`, etc. Locale will affect the `navigator.language` value, `Accept-Language` request header value as well as number and date formatting rules.
#### `timezone_id`
```python
timezone_id: str | None = None
```
Timezone identifier (e.g. `'America/New_York'` or `'UTC'`).
#### `window_size`
```python
window_size: dict | None = None
```
Browser window size for headful mode. Example: `{"width": 1920, "height": 1080}`
#### `viewport`
```python
viewport: dict | None = None
```
Viewport size with `width` and `height`. Example: `{"width": 1280, "height": 720}`
#### `no_viewport`
```python
no_viewport: bool | None = not headless
```
Disable fixed viewport. Content will resize with window.
_Tip:_ don't use this parameter, it's a playwright standard parameter but it's redundant and only serves to override the `viewport` setting above.
A viewport is _always_ used in headless mode regardless of this setting, and is _never_ used in headful mode unless you pass `viewport={width, height}` explicitly.
#### `device_scale_factor`
```python
device_scale_factor: float | None = None
```
Device scale factor (DPI). Useful for high-resolution screenshots (set it to 2 or 3).
#### `screen`
```python
screen: dict | None = None
```
Screen size available to browser. Auto-detected if not specified.
#### `color_scheme`
```python
color_scheme: ColorScheme = 'light'
```
Preferred color scheme: `'light'`, `'dark'`, `'no-preference'`
#### `contrast`
```python
contrast: Contrast = 'no-preference'
```
Contrast preference: `'no-preference'`, `'more'`, `'null'`
#### `reduced_motion`
```python
reduced_motion: ReducedMotion = 'no-preference'
```
Reduced motion preference: `'reduce'`, `'no-preference'`, `'null'`
#### `forced_colors`
```python
forced_colors: ForcedColors = 'none'
```
Forced colors mode: `'active'`, `'none'`, `'null'`
#### `**playwright.devices[...]`
Playwright provides launch & context arg presets to [emulate common device fingerprints](https://playwright.dev/python/docs/emulation).
```python
BrowserProfile(
...
**playwright.devices['iPhone 13'], # playwright = await async_playwright().start()
)
```
Because `BrowserSession` and `BrowserProfile` take all the standard playwright args, we are able to support these device presets as well.
### Playwright Security Options
> See `allowed_domains` above too!
#### `offline`
```python
offline: bool = False
```
Emulate network being offline.
#### `http_credentials`
```python
http_credentials: dict | None = None
```
Credentials for HTTP authentication.
#### `extra_http_headers`
```python
extra_http_headers: dict[str, str] = {}
```
Additional HTTP headers to be sent with every request.
#### `ignore_https_errors`
```python
ignore_https_errors: bool = False
```
Whether to ignore HTTPS errors when sending network requests.
#### `bypass_csp`
```python
bypass_csp: bool = False
```
<Warning>
Enabling this can increase security risk and makes the bot very easy to
fingerprint. (Cloudflare, Datadome, etc. will block you)
</Warning>
Toggles bypassing Content-Security-Policy. Enabling reduces some CSP-related errors that can arise from automation scripts injected into pages with strict policies that forbid inline scripts.
#### `java_script_enabled`
```python
java_script_enabled: bool = True
```
<Warning>
Not recommended, untested with Browser Use and likely breaks things.
</Warning>
Whether or not to enable JavaScript in the context.
#### `service_workers`
```python
service_workers: ServiceWorkers = 'allow'
```
Whether to allow sites to register Service workers: `'allow'`, `'block'`
#### `base_url`
```python
base_url: str | None = None
```
Base URL to be used in `page.goto()` and similar operations.
#### `strict_selectors`
```python
strict_selectors: bool = False
```
If true, selector passed to Playwright methods will throw if more than one element matches.
#### `client_certificates`
```python
client_certificates: list[ClientCertificate] = []
```
Client certificates to be used with requests.
### Playwright Recording Options
Note: Browser Use also provides some of our own recording-related options not listed below (see above).
#### `record_video_dir`
<a name="record-video-dir"></a>
<a name="save-recording-path"></a>
```python
record_video_dir: str | Path | None = None
```
Directory to save `.webm` video recordings. [Playwright Docs: `record_video_dir`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context-option-record-video-dir)
<Note>
This parameter also has an alias `save_recording_path` for backwards
compatibility with past versions, but we recommend using the standard
Playwright name `record_video_dir` going forward.
</Note>
#### `record_video_size`
```python
record_video_size: dict | None = None. [Playwright Docs: `record_video_size`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context-option-record-video-size)
```
Video size. Example: `{"width": 1280, "height": 720}`
#### `record_har_path`
<a name="record-har-path"></a>
<a name="save-har-path"></a>
```python
record_har_path: str | Path | None = None
```
Path to save `.har` network trace files. [Playwright Docs: `record_har_path`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context-option-record-har-path)
<Note>
This parameter also has an alias `save_har_path` for backwards compatibility
with past versions, but we recommend using the standard Playwright name
`record_har_path` going forward.
</Note>
#### `record_har_content`
```python
record_har_content: RecordHarContent = 'embed'
```
How to persist HAR content: `'omit'`, `'embed'`, `'attach'`
#### `record_har_mode`
```python
record_har_mode: RecordHarMode = 'full'
```
HAR recording mode: `'full'`, `'minimal'`
#### `record_har_omit_content`
```python
record_har_omit_content: bool = False
```
Whether to omit request content from the HAR.
#### `record_har_url_filter`
```python
record_har_url_filter: str | Pattern | None = None
```
URL filter for HAR recording.
#### `downloads_path`
```python
downloads_path: str | Path | None = '~/.config/browseruse/downloads'
```
(aliases: `downloads_dir`, `save_downloads_path`)
Local filesystem directory to save browser file downloads to.
#### `traces_dir`
<a name="traces-dir"></a>
<a name="trace-path"></a>
```python
traces_dir: str | Path | None = None
```
Directory to save all-in-one trace files. Files are automatically named as `{traces_dir}/{context_id}.zip`. [Playwright Docs: `traces_dir`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context-option-traces-dir)
<Note>
This parameter also has an alias `trace_path` for backwards compatibility with
past versions, but we recommend using the standard Playwright name
`traces_dir` going forward.
</Note>
#### `handle_sighup`
```python
handle_sighup: bool = True
```
Whether playwright should swallow SIGHUP signals and kill the browser.
#### `handle_sigint`
```python
handle_sigint: bool = False
```
Whether playwright should swallow SIGINT signals and kill the browser.
#### `handle_sigterm`
```python
handle_sigterm: bool = False
```
Whether playwright should swallow SIGTERM signals and kill the browser.
---
## Full Example
```python
from browser_use import BrowserSession, BrowserProfile, Agent
browser_profile = BrowserProfile(
headless=False,
storage_state="path/to/storage_state.json",
wait_for_network_idle_page_load_time=3.0,
viewport={"width": 1280, "height": 1100},
locale='en-US',
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
highlight_elements=True,
viewport_expansion=500,
allowed_domains=['*.google.com', 'http*://*.wikipedia.org'],
user_data_dir=None,
)
browser_session = BrowserSession(
browser_profile=browser_profile,
headless=True, # extra kwargs to the session override the defaults in the profile
)
# you can drive a session without the agent / reuse it between agents
await browser_session.start()
page = await browser_session.get_current_page()
await page.goto('https://example.com/first/page')
async def run_search():
agent = Agent(
task='Your task',
llm=llm,
page=page, # optional: pass a specific playwright page to start on
browser_session=browser_session, # optional: pass an existing browser session to an agent
)
```
---
## Summary
- **BrowserSession** (defined in `browser_use/browser/session.py`) handles the live browser connection and runtime state
- **BrowserProfile** (defined in `browser_use/browser/profile.py`) is a template that can store default config parameters for a `BrowserSession(...)`
Configuration parameters defined in both scopes consumed by these calls depending on whether we're connecting/launching:
- `BrowserConnectArgs` - args for `playwright.BrowserType.connect_over_cdp(...)`
- `BrowserLaunchArgs` - args for `playwright.BrowserType.launch(...)`
- `BrowserNewContextArgs` - args for `playwright.BrowserType.new_context(...)`
- `BrowserLaunchPersistentContextArgs` - args for `playwright.BrowserType.launch_persistent_context(...)`
- Browser Use's own internal methods
For more details on Playwright's browser context options, see their [launch args documentation](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context).
---

View File

@@ -0,0 +1,45 @@
---
title: "Chain Agents"
description: "Chain multiple tasks together with the same agent and browser session."
icon: "link"
mode: "wide"
---
## Chain Agent Tasks
Keep your browser session alive and chain multiple tasks together. Perfect for conversational workflows or multi-step processes.
```python
import asyncio
from dotenv import load_dotenv
load_dotenv()
from browser_use import Agent, BrowserProfile
profile = BrowserProfile(keep_alive=True)
async def main():
agent = Agent(task="Go to reddit.com", browser_profile=profile)
await agent.run(max_steps=1)
while True:
user_response = input('\n👤 New task or "q" to quit: ')
if user_response.lower() == 'q':
break
agent.add_new_task(f'New task: {user_response}')
await agent.run()
if __name__ == '__main__':
asyncio.run(main())
```
## How It Works
1. **Persistent Browser**: `BrowserProfile(keep_alive=True)` prevents browser from closing between tasks
2. **Task Chaining**: Use `agent.add_new_task()` to add follow-up tasks
3. **Context Preservation**: Agent maintains memory and browser state across tasks
4. **Interactive Flow**: Perfect for conversational interfaces or complex workflows
<Note>
The browser session remains active throughout the entire chain, preserving all cookies, local storage, and page state.
</Note>

View File

@@ -1,7 +1,8 @@
---
title: "Custom Functions"
title: "Tools"
description: "Extend default agent and write custom action functions to do certain tasks"
icon: "function"
icon: "wrench"
mode: "wide"
---
Custom actions are functions *you* provide, that are added to our [default actions](https://github.com/browser-use/browser-use/blob/main/browser_use/controller/service.py) the agent can use to accomplish tasks.
@@ -59,10 +60,18 @@ When the LLM calls an action, it sees its argument names & types, and will provi
```python
@controller.action('Click element')
def click_element(css_selector: str, page: Page) -> ActionResult:
async def click_element(css_selector: str, browser_session: Browser) -> ActionResult:
# css_selector is an action param the LLM must provide when calling
# page is a special framework-provided param to access the browser APIs (see below)
await page.locator(css_selector).click()
# browser_session is a special framework-provided param to access the browser APIs (see below)
# Get the current CDP session to interact with the browser
cdp_session = await browser_session.get_or_create_cdp_session()
# Use CDP to evaluate JavaScript and click the element
await cdp_session.cdp_client.send.Runtime.evaluate(
params={'expression': f'document.querySelector("{css_selector}").click()'},
session_id=cdp_session.session_id,
)
return ActionResult(extracted_content=f"Clicked element {css_selector}")
```
@@ -89,12 +98,27 @@ class MyParams(BaseModel):
field4: str = Field(default='abc', description='Detailed description for the LLM')
@controller.action('My action', param_model=MyParams)
def my_action(params: MyParams, page: Page) -> ActionResult:
await page.keyboard.type(params.field2)
return ActionResult(extracted_content=f"Inputted {params} on {page.url}")
async def my_action(params: MyParams, browser_session: Browser) -> ActionResult:
# Get the current CDP session to interact with the browser
cdp_session = await browser_session.get_or_create_cdp_session()
# Use CDP to type text
await cdp_session.cdp_client.send.Input.insertText(
params={'text': params.field2},
session_id=cdp_session.session_id,
)
# Get current URL using CDP
result = await cdp_session.cdp_client.send.Runtime.evaluate(
params={'expression': 'window.location.href', 'returnByValue': True},
session_id=cdp_session.session_id,
)
current_url = result.get('result', {}).get('value', 'unknown')
return ActionResult(extracted_content=f"Inputted {params} on {current_url}")
```
Any special framework-provided arguments (e.g. `page`) will be passed as separate positional arguments after `params`.
Any special framework-provided arguments (e.g. `browser_session`) will be passed as separate positional arguments after `params`.
<Important>
To use a `BaseModel` the arg *must* be called `params`. Action function args are matched and filled like named arguments; arg order doesn't matter but names and types do.
@@ -104,47 +128,134 @@ To use a `BaseModel` the arg *must* be called `params`. Action function args are
These special action parameters are injected by the `Controller` and are passed as extra args to any actions that expect them.
For example, actions that need to run playwright code to interact with the browser should take the argument `page` or `browser_session`.
For example, actions that need to interact with the browser should take the `browser_session` argument.
- `page: Page` - The current Playwright page (shortcut for `browser_session.get_current_page()`)
- `browser_session: BrowserSession` - The current browser session (and playwright context via `browser_session.browser_context`)
- `browser_session: Browser` - The current browser session with access to CDP for browser interaction
- `context: AgentContext` - Any optional top-level context object passed to the Agent, e.g. `Agent(context=user_provided_obj)`
- `page_extraction_llm: BaseChatModel` - LLM instance used for page content extraction
- `available_file_paths: list[str]` - List of available file paths for upload / processing
- `has_sensitive_data: bool` - Whether the action content contains sensitive data markers (check this to avoid logging sensitive data to terminal by accident)
#### Example: Action uses the current `page`
<Note>
Browser Use has moved from Playwright to Chrome DevTools Protocol (CDP) for browser interaction. The `browser_session` provides access to CDP through `browser_session.agent_focus.cdp_client` or `await browser_session.get_or_create_cdp_session()`. Playwright is only used internally to install the browser binary, but all browser interaction is done via CDP.
</Note>
### Understanding the Browser Session Context
The `Browser` object provides multiple ways to interact with the browser:
#### 1. Direct CDP Access
```python
# Get the current CDP session
cdp_session = await browser_session.get_or_create_cdp_session()
# Execute JavaScript
result = await cdp_session.cdp_client.send.Runtime.evaluate(
params={'expression': 'document.title', 'returnByValue': True},
session_id=cdp_session.session_id,
)
# Click at coordinates
await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
params={
'type': 'mousePressed',
'x': 100,
'y': 200,
'button': 'left',
'clickCount': 1,
},
session_id=cdp_session.session_id,
)
await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
params={
'type': 'mouseReleased',
'x': 100,
'y': 200,
'button': 'left',
},
session_id=cdp_session.session_id,
)
```
#### 2. Event-Based Actions
```python
from browser_use.browser.events import ClickElementEvent, TypeTextEvent, NavigateToUrlEvent
# Get a DOM element first
element = await browser_session.get_dom_element_by_index(5)
# Dispatch events through the event bus
click_event = browser_session.event_bus.dispatch(ClickElementEvent(node=element))
await click_event
type_event = browser_session.event_bus.dispatch(TypeTextEvent(node=element, text="Hello"))
await type_event
navigate_event = browser_session.event_bus.dispatch(NavigateToUrlEvent(url="https://example.com"))
await navigate_event
```
#### 3. High-Level Browser Session Methods
```python
# Get current page information
state = await browser_session.get_browser_state_summary()
print(f"Current URL: {state.url}")
print(f"Page title: {state.title}")
# Take a screenshot
screenshot_path = await browser_session.take_screenshot()
# Get page HTML
html = await browser_session.get_page_html()
# Get all open tabs
tabs = await browser_session.get_tabs()
```
#### Example: Action uses the current browser session
```python
from browser_use.browser.types import Page
from browser_use import Controller, ActionResult
from browser_use import Browser, Controller, ActionResult
controller = Controller()
@controller.action('Type keyboard input into a page')
async def input_text_into_page(text: str, page: Page) -> ActionResult:
await page.keyboard.type(text)
return ActionResult(extracted_content='Website opened')
async def input_text_into_page(text: str, browser_session: Browser) -> ActionResult:
# Get the current CDP session to interact with the browser
cdp_session = await browser_session.get_or_create_cdp_session()
# Use CDP to type text
await cdp_session.cdp_client.send.Input.insertText(
params={'text': text},
session_id=cdp_session.session_id,
)
return ActionResult(extracted_content='Text input completed')
```
#### Example: Action uses the `browser_context`
#### Example: Action uses browser session for tab management
```python
from browser_use import BrowserSession, Controller, ActionResult
from browser_use import Browser, Controller, ActionResult
from browser_use.browser.events import NavigateToUrlEvent, SwitchTabEvent
controller = Controller()
@controller.action('Open website')
async def open_website(url: str, browser_session: BrowserSession) -> ActionResult:
# find matching existing tab by looking through all pages in playwright browser_context
all_tabs = await browser_session.browser_context.pages
for tab in all_tabs:
async def open_website(url: str, browser_session: Browser) -> ActionResult:
# Get all open tabs
tabs = await browser_session.get_tabs()
# Check if URL is already open in any tab
for tab in tabs:
if tab.url == url:
await tab.bring_to_foreground()
return ActionResult(extracted_content=f'Switched to tab with url {url}')
# otherwise, create a new tab
new_tab = await browser_session.browser_context.new_page()
await new_tab.goto(url)
# Switch to existing tab using events
switch_event = browser_session.event_bus.dispatch(SwitchTabEvent(target_id=tab.target_id))
await switch_event
return ActionResult(extracted_content=f'Switched to existing tab with url {url}')
# Otherwise, open URL in a new tab using events
navigate_event = browser_session.event_bus.dispatch(NavigateToUrlEvent(url=url, new_tab=True))
await navigate_event
return ActionResult(extracted_content=f'Opened new tab with url {url}')
```
@@ -155,15 +266,15 @@ async def open_website(url: str, browser_session: BrowserSession) -> ActionResul
## Important Rules
1. **Return an [`ActionResult`](https://github.com/search?q=repo%3Abrowser-use%2Fbrowser-use+%22class+ActionResult%28BaseModel%29%22&type=code)**: All actions should return an `ActionResult | str | None`. The stringified version of the result is passed back to the LLM, and optionally persisted in the long-term memory when `ActionResult(..., include_in_memory=True)`.
2. **Type hints on arguments are required**: They are used to verify that action params don't conflict with special arguments injected by the controller (e.g. `page`)
2. **Type hints on arguments are required**: They are used to verify that action params don't conflict with special arguments injected by the controller (e.g. `browser_session`)
3. **Actions functions called directly must be passed kwargs**: When calling actions from other actions or python code, you must **pass all parameters as kwargs only**, even though the actions are usually defined using positional args (for the same reasons as [pluggy](https://pluggy.readthedocs.io/en/stable/index.html#calling-hooks)).
Action arguments are always matched by name and type, **not** positional order, so this helps prevent ambiguity / reordering issues while keeping action signatures short.
```python
@controller.action('Fill in the country form field')
def input_country_field(country: str, page: Page) -> ActionResult:
await some_action(123, page=page) # ❌ not allowed: positional args, use kwarg syntax when calling
await some_action(abc=123, page=page) # ✅ allowed: action params & special kwargs
await some_other_action(params=OtherAction(abc=123), page=page) # ✅ allowed: params=model & special kwargs
async def input_country_field(country: str, browser_session: Browser) -> ActionResult:
await some_action(123, browser_session=browser_session) # ❌ not allowed: positional args, use kwarg syntax when calling
await some_action(abc=123, browser_session=browser_session) # ✅ allowed: action params & special kwargs
await some_other_action(params=OtherAction(abc=123), browser_session=browser_session) # ✅ allowed: params=model & special kwargs
```
```python
@@ -173,12 +284,12 @@ class PinCodeParams(BaseModel):
retries: int = 3 # ✅ supports optional/defaults
@controller.action('...', param_model=PinCodeParams)
async def input_pin_code(params: PinCodeParams, page: Page): ... # ✅ special params at the end
async def input_pin_code(params: PinCodeParams, browser_session: Browser): ... # ✅ special params at the end
# Using function arguments to define action params
async def input_pin_code(code: int, retries: int, page: Page): ... # ✅ params first, special params second, no defaults
async def input_pin_code(code: int, retries: int, browser_session: Browser): ... # ✅ params first, special params second, no defaults
async def input_pin_code(code: int, retries: int=3): ... # ✅ defaults ok only if no special params needed
async def input_pin_code(code: int, retries: int=3, page: Page): ... # ❌ Python SyntaxError! not allowed
async def input_pin_code(code: int, retries: int=3, browser_session: Browser): ... # ❌ Python SyntaxError! not allowed
```
@@ -228,23 +339,8 @@ agent = Agent(controller=controller, ...)
```
If you want actions to only be available on certain pages, and to not tell the LLM about them on other pages,
you can use the `allowed_domains` and `page_filter`:
```python
from pydantic import BaseModel
from browser_use import Controller, ActionResult
controller = Controller()
async def is_ai_allowed(page: Page):
if api.some_service.check_url(page.url):
logger.warning('Allowing AI agent to visit url:', page.url)
return True
return False
@controller.action('Fill out secret_form', allowed_domains=['https://*.example.com'], page_filter=is_ai_allowed)
@controller.action('Fill out secret_form', allowed_domains=['https://*.example.com'])
def fill_out_form(...) -> ActionResult:
... will only be runnable by LLM on pages that match https://*.example.com *AND* where is_ai_allowed(page) returns True
... will only be runnable by LLM on pages that match https://*.example.com
```

View File

@@ -0,0 +1,97 @@
---
title: "Fast Agent"
description: "Optimize agent performance for maximum speed and efficiency."
icon: "bolt"
mode: "wide"
---
```python
import asyncio
from dotenv import load_dotenv
load_dotenv()
from browser_use import Agent, BrowserProfile
# Speed optimization instructions for the model
SPEED_OPTIMIZATION_PROMPT = """
Speed optimization instructions:
- Be extremely concise and direct in your responses
- Get to the goal as quickly as possible
- Use multi-action sequences whenever possible to reduce steps
"""
async def main():
# 1. Use fast LLM - Llama 4 on Groq for ultra-fast inference
from browser_use import ChatGroq
llm = ChatGroq(
model='meta-llama/llama-4-maverick-17b-128e-instruct',
temperature=0.0,
)
# from browser_use import ChatGoogle
# llm = ChatGoogle(model='gemini-2.5-flash')
# 2. Create speed-optimized browser profile
browser_profile = BrowserProfile(
minimum_wait_page_load_time=0.1,
wait_between_actions=0.1,
headless=False,
)
# 3. Define a speed-focused task
task = """
1. Go to reddit https://www.reddit.com/search/?q=browser+agent&type=communities
2. Click directly on the first 5 communities to open each in new tabs
3. Find out what the latest post is about, and switch directly to the next tab
4. Return the latest post summary for each page
"""
# 4. Create agent with all speed optimizations
agent = Agent(
task=task,
llm=llm,
flash_mode=True, # Disables thinking in the LLM output for maximum speed
browser_profile=browser_profile,
extend_system_message=SPEED_OPTIMIZATION_PROMPT,
)
await agent.run()
if __name__ == '__main__':
asyncio.run(main())
```
## Speed Optimization Techniques
### 1. Fast LLM Models
```python
# Groq - Ultra-fast inference
from browser_use import ChatGroq
llm = ChatGroq(model='meta-llama/llama-4-maverick-17b-128e-instruct')
# Google Gemini Flash - Optimized for speed
from browser_use import ChatGoogle
llm = ChatGoogle(model='gemini-2.5-flash')
```
### 2. Browser Optimizations
```python
browser_profile = BrowserProfile(
minimum_wait_page_load_time=0.1, # Reduce wait time
wait_between_actions=0.1, # Faster action execution
headless=True, # No GUI overhead
)
```
### 3. Agent Optimizations
```python
agent = Agent(
task=task,
llm=llm,
flash_mode=True, # Skip LLM thinking process
extend_system_message=SPEED_PROMPT, # Optimize LLM behavior
)
```

View File

@@ -3,6 +3,7 @@ title: "Lifecycle Hooks"
description: "Customize agent behavior with lifecycle hooks"
icon: "Wrench"
author: "Carlos A. Planchón"
mode: "wide"
---
Browser-Use provides lifecycle hooks that allow you to execute custom code at specific points during the agent's execution.
@@ -35,11 +36,10 @@ async def my_step_hook(agent: Agent):
# agent.controller, agent.llm, agent.browser_session
# agent.pause(), agent.resume(), agent.add_new_task(...), etc.
# You also have direct access to the playwright Page and Browser Context
page = await agent.browser_session.get_current_page()
# https://playwright.dev/python/docs/api/class-page
current_url = page.url
# You also have direct access to the browser state
state = await agent.browser_session.get_browser_state_summary()
current_url = state.url
visit_log = agent.history.urls()
previous_url = visit_log[-2] if len(visit_log) >= 2 else None
print(f"Agent was last on URL: {previous_url} and is now on {current_url}")
@@ -68,7 +68,7 @@ async def my_step_hook(agent: Agent):
agent = Agent(
task="Search for the latest news about AI",
llm=ChatOpenAI(model="gpt-4o"),
llm=ChatOpenAI(model="gpt-4.1-mini"),
)
await agent.run(
@@ -96,10 +96,10 @@ When working with agent hooks, you have access to the entire `Agent` instance. H
- `agent.history.model_actions()`: Actions taken by the agent
- `agent.history.extracted_content()`: Content extracted from web pages
- `agent.history.urls()`: URLs visited by the agent
- `agent.browser_session` gives direct access to the `BrowserSession()` and playwright objects
- `agent.browser_session.get_current_page()`: Get the current playwright `Page` object the agent is focused on
- `agent.browser_session.browser_context`: Get the current playwright `BrowserContext` object
- `agent.browser_session.browser_context.pages`: Get all the tabs currently open in the context
- `agent.browser_session` gives direct access to the `Browser()` and CDP interface
- `agent.browser_session.agent_focus`: Get the current CDP session the agent is focused on
- `agent.browser_session.get_or_create_cdp_session()`: Get the current CDP session for browser interaction
- `agent.browser_session.get_tabs()`: Get all tabs currently open
- `agent.browser_session.get_page_html()`: Current page HTML
- `agent.browser_session.take_screenshot()`: Screenshot of the current page
@@ -337,7 +337,7 @@ async def run_agent():
"""Run the Browser-Use agent with the recording hook"""
agent = Agent(
task="Compare the price of gpt-4o and DeepSeek-V3",
llm=ChatOpenAI(model="gpt-4o"),
llm=ChatOpenAI(model="gpt-4.1-mini"),
)
try:

View File

@@ -2,6 +2,7 @@
title: "MCP Client"
description: "Connect external MCP servers to extend browser-use with additional tools and integrations"
icon: "plug"
mode: "wide"
---
The MCP (Model Context Protocol) client allows browser-use agents to connect to external MCP servers, automatically exposing their tools as actions.

View File

@@ -2,6 +2,7 @@
title: "MCP Server"
description: "Expose browser-use capabilities as an MCP server for AI assistants like Claude Desktop"
icon: "server"
mode: "wide"
---
The MCP server exposes browser-use's browser automation capabilities as tools that can be used by AI assistants like Claude Desktop. This allows external MCP clients to control browsers, navigate websites, extract content, and perform automated tasks.
@@ -17,7 +18,7 @@ The MCP server acts as a bridge between MCP-compatible AI assistants and browser
```mermaid
graph LR
A[Claude Desktop] -->|MCP Protocol| B[Browser-use MCP Server]
B --> C[BrowserSession]
B --> C[Browser]
B --> D[Controller]
B --> E[FileSystem]
C --> F[Playwright Browser]
@@ -379,7 +380,7 @@ uvx 'browser-use[cli]' --mcp
playwright install chromium
# Test browser launch
python -c "from browser_use import BrowserSession; import asyncio; asyncio.run(BrowserSession().start())"
python -c "from browser_use import Browser; import asyncio; asyncio.run(Browser().start())"
```
### Connection Errors

View File

@@ -0,0 +1,54 @@
---
title: "More Examples"
description: "Explore additional examples and use cases on GitHub."
icon: "arrow-up-right-from-square"
mode: "wide"
---
## Additional Examples
Explore our comprehensive collection of examples on GitHub for more advanced use cases and integrations.
### 📁 Featured Examples
**🔒 [Secure Setup](https://github.com/browser-use/browser-use/blob/main/examples/features/secure.py)**
Azure OpenAI with enterprise security and data privacy
**🎯 [Custom Functions](https://github.com/browser-use/browser-use/tree/main/examples/custom-functions)**
2FA integration, file uploads, notifications, and more
**🏪 [E-commerce](https://github.com/browser-use/browser-use/blob/main/examples/use-cases/shopping.py)**
Automated shopping and product comparison
**💼 [Job Applications](https://github.com/browser-use/browser-use/blob/main/examples/use-cases/find_and_apply_to_jobs.py)**
CV upload and job application automation
### 🔗 Browse All Examples
**[View Complete Examples Directory →](https://github.com/browser-use/browser-use/tree/main/examples)**
Categories available:
- **Getting Started** - Basic examples for beginners
- **Features** - Advanced functionality demonstrations
- **Custom Functions** - Extend agent capabilities
- **Integrations** - Gmail, Slack, Discord, MCP servers
- **Models** - Different LLM provider examples
- **Use Cases** - Real-world application scenarios
- **Browser** - Browser configuration examples
- **UI** - Gradio and Streamlit interfaces
### 🤝 Contributing Examples
Have a great use case? **[Submit a pull request](https://github.com/browser-use/browser-use/pulls)** with your example!
**What makes a good example:**
- Clear documentation and comments
- Real-world use case
- Follows project conventions
- Includes error handling
### 📞 Need Help?
- **[GitHub Issues](https://github.com/browser-use/browser-use/issues)** - Bug reports and feature requests
- **[Discord Community](https://link.browser-use.com/discord)** - Live support and discussions
- **Enterprise Support** - [support@browser-use.com](mailto:support@browser-use.com)

View File

@@ -1,50 +0,0 @@
---
title: "Output Format"
description: "The default is text. But you can define a structured output format to make post-processing easier."
icon: "code"
---
## Custom output format
With [this example](https://github.com/browser-use/browser-use/blob/main/examples/features/custom_output.py) you can define what output format the agent should return to you.
```python
from pydantic import BaseModel
# Define the output format as a Pydantic model
class Post(BaseModel):
post_title: str
post_url: str
num_comments: int
hours_since_post: int
class Posts(BaseModel):
posts: List[Post]
controller = Controller(output_model=Posts)
async def main():
task = 'Go to hackernews show hn and give me the first 5 posts'
model = ChatOpenAI(model='gpt-4o')
agent = Agent(task=task, llm=model, controller=controller)
history = await agent.run()
result = history.final_result()
if result:
parsed: Posts = Posts.model_validate_json(result)
for post in parsed.posts:
print('\n--------------------------------')
print(f'Title: {post.post_title}')
print(f'URL: {post.post_url}')
print(f'Comments: {post.num_comments}')
print(f'Hours since post: {post.hours_since_post}')
else:
print('No result')
if __name__ == '__main__':
asyncio.run(main())
```

View File

@@ -0,0 +1,47 @@
---
title: "Parallel Agents"
description: "Run multiple agents in parallel with separate browser instances"
icon: "copy"
---
```python
import asyncio
from browser_use import Agent, Browser, ChatOpenAI
async def main():
# Create 3 separate browser instances
browsers = [
Browser(
user_data_dir=f'./temp-profile-{i}',
headless=False,
)
for i in range(3)
]
# Create 3 agents with different tasks
agents = [
Agent(
task='Search for "browser automation" on Google',
browser=browsers[0],
llm=ChatOpenAI(model='gpt-4.1-mini'),
),
Agent(
task='Search for "AI agents" on DuckDuckGo',
browser=browsers[1],
llm=ChatOpenAI(model='gpt-4.1-mini'),
),
Agent(
task='Visit Wikipedia and search for "web scraping"',
browser=browsers[2],
llm=ChatOpenAI(model='gpt-4.1-mini'),
),
]
# Run all agents in parallel
tasks = [agent.run() for agent in agents]
results = await asyncio.gather(*tasks, return_exceptions=True)
print('🎉 All agents completed!')
```
> **Note:** This is experimental, and agents might conflict each other.

View File

@@ -2,6 +2,7 @@
title: "Connect to your Browser"
description: "Connect to a remote browser or launch a new local browser."
icon: "computer"
mode: "wide"
---
## Overview
@@ -10,7 +11,6 @@ Browser Use supports a wide variety of ways to launch or connect to a browser:
- Launch a new local browser using playwright/patchright chromium (the default)
- Connect to a remote browser using CDP or WSS
- Use an existing playwright `Page`, `Browser`, or `BrowserContext` object
- Connect to a local browser already running using `browser_pid`
<Tip>
@@ -24,13 +24,13 @@ We provide automatic CAPTCHA solving, proxies, human-in-the-loop automation, and
### Method A: Launch a New Local Browser (Default)
Launch a local browser using built-in default (playwright `chromium`) or a provided `executable_path`:
Launch a local browser using built-in default (Playwright-installed `chromium`) or a provided `executable_path`:
```python
from browser_use import Agent, BrowserSession
from browser_use import Agent, Browser
# If no executable_path provided, uses Playwright/Patchright's built-in Chromium
browser_session = BrowserSession(
browser = Browser(
# Path to a specific Chromium-based executable (optional)
executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', # macOS
# For Windows: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'
@@ -45,7 +45,7 @@ browser_session = BrowserSession(
agent = Agent(
task="Your task here",
llm=llm,
browser_session=browser_session,
browser=browser,
)
```
@@ -63,41 +63,42 @@ We support most `chromium`-based browsers in `executable_path`, including [Brave
persist over time.
</Warning>
### Method B: Connect Using Existing Playwright Objects
### Method B: Connect to Remote Browser via CDP
Pass existing Playwright `Page`, `BrowserContext`, `Browser`, and/or `playwright` API object to `BrowserSession(...)`:
Connect to a remote browser instance using Chrome DevTools Protocol:
```python
from browser_use import Agent, BrowserSession
from playwright.async_api import async_playwright
# from patchright.async_api import async_playwright # stealth alternative
from browser_use import Agent, Browser
async with async_playwright() as playwright:
browser = await playwright.chromium.launch()
context = await browser.new_context()
page = await context.new_page()
# Connect to a remote browser (e.g., running in Docker, cloud, or another machine)
browser = Browser(
cdp_url="ws://remote-browser:9222/devtools/browser", # Remote CDP WebSocket URL
is_local=False, # Important: set to False for remote connections
)
browser_session = BrowserSession(
page=page,
# browser_context=context, # all these are supported
# browser=browser,
# playwright=playwright,
)
agent = Agent(
task="Your task here",
llm=llm,
browser_session=browser_session,
)
```
You can also pass `page` directly to `Agent(...)` as a shortcut.
```python
agent = Agent(
task="Your task here",
llm=llm,
page=page,
browser=browser,
)
```
<Note>
Playwright Page/Browser/Context objects are no longer supported. Browser Use now uses CDP exclusively for all browser interactions.
</Note>
You can also use HTTP-based CDP connections:
```python
browser = Browser(
cdp_url="http://remote-browser:9222", # Remote CDP HTTP URL
is_local=False,
)
agent = Agent(
task="Your task here",
llm=llm,
browser=browser,
)
```
@@ -106,18 +107,18 @@ agent = Agent(
Connect to a browser with open `--remote-debugging-port`:
```python
from browser_use import Agent, BrowserSession
from browser_use import Agent, Browser
# First, start Chrome with remote debugging:
# /Applications/Google Chrome.app/Contents/MacOS/Google Chrome --remote-debugging-port=9242
# Then connect using the process ID
browser_session = BrowserSession(browser_pid=12345) # Replace with actual Chrome PID
browser = Browser(browser_pid=12345) # Replace with actual Chrome PID
agent = Agent(
task="Your task here",
llm=llm,
browser_session=browser_session,
browser=browser,
)
```
@@ -126,15 +127,15 @@ agent = Agent(
Connect to Playwright Node.js server providers:
```python
from browser_use import Agent, BrowserSession
from browser_use import Agent, Browser
# Connect to a playwright server
browser_session = BrowserSession(wss_url="wss://your-playwright-server.com/ws")
browser = Browser(wss_url="wss://your-playwright-server.com/ws")
agent = Agent(
task="Your task here",
llm=llm,
browser_session=browser_session,
browser=browser,
)
```
@@ -143,15 +144,15 @@ agent = Agent(
Connect to any remote Chromium-based browser:
```python
from browser_use import Agent, BrowserSession
from browser_use import Agent, Browser
# Connect to Chrome via CDP
browser_session = BrowserSession(cdp_url="http://localhost:9222")
browser = Browser(cdp_url="http://localhost:9222")
agent = Agent(
task="Your task here",
llm=llm,
browser_session=browser_session,
browser=browser,
)
```
@@ -165,7 +166,7 @@ agent = Agent(
- Extensions and their data
Always review the task you're giving to the agent and ensure it aligns with your security requirements!
Use `Agent(sensitive_data={'https://auth.example.com': {x_key: value}})` for any secrets, and restrict the browser with `BrowserSession(allowed_domains=['https://*.example.com'])`.
Use `Agent(sensitive_data={'https://auth.example.com': {x_key: value}})` for any secrets, and restrict the browser with `Browser(allowed_domains=['https://*.example.com'])`.
</Warning>
## Best Practices
@@ -173,7 +174,7 @@ agent = Agent(
1. **Use isolated profiles**: Create separate Chrome profiles for different agents to limit scope of risk:
```python
browser_session = BrowserSession(
browser = Browser(
user_data_dir='~/.config/browseruse/profiles/banking',
# profile_directory='Default'
)
@@ -182,40 +183,40 @@ agent = Agent(
2. **Limit domain access**: Restrict which sites the agent can visit:
```python
browser_session = BrowserSession(
browser = Browser(
allowed_domains=['example.com', 'http*://*.github.com'],
)
```
3. **Enable `keep_alive=True`** If you want to use a single `BrowserSession` with more than one agent:
3. **Enable `keep_alive=True`** If you want to use a single `Browser` with more than one agent:
```python
browser_session = BrowserSession(
browser = Browser(
keep_alive=True,
...
)
await browser_session.start() # start the session yourself before passing to Agent
await browser.start() # start the session yourself before passing to Agent
...
agent = Agent(..., browser_session=browser_session)
agent = Agent(..., browser=browser)
await agent.run()
...
await browser_session.kill() # end the session yourself, shortcut for keep_alive=False + .stop()
await browser.kill() # end the session yourself, shortcut for keep_alive=False + .stop()
```
## Re-Using a Browser
A `BrowserSession` starts when the browser is launched/connected, and ends when the browser process exits/disconnects. A session internally manages a single live playwright browser context, and is normally auto-closed by the agent when its task is complete (_if_ the agent started the session itself). If you pass an existing `BrowserSession` into an Agent, or if you set `BrowserSession(keep_alive=True)`, the session will not be closed and can be re-used between agents.
A `Browser` starts when the browser is launched/connected, and ends when the browser process exits/disconnects. A session internally manages a single live playwright browser context, and is normally auto-closed by the agent when its task is complete (_if_ the agent started the session itself). If you pass an existing `Browser` into an Agent, or if you set `Browser(keep_alive=True)`, the session will not be closed and can be re-used between agents.
Browser Use provides a number of ways to re-use profiles, sessions, and other configuration across multiple agents.
- ✅ sequential agents can re-use a single `user_data_dir` in new `BrowserSession`s
- ✅ sequential agents can re-use a single `BrowserSession` without closing it
- ❌ parallel agents cannot run separate `BrowserSession`s using the same `user_data_dir`
- ✅ parallel agents can run separate `BrowserSession`s using the same `storage_state`
- ✅ parallel agents can share a single `BrowserSession`, working in different tabs
- ⚠️ parallel agents can share a single `BrowserSession`, working in the same tab
- ✅ sequential agents can re-use a single `user_data_dir` in new `Browser`s
- ✅ sequential agents can re-use a single `Browser` without closing it
- ❌ parallel agents cannot run separate `Browser`s using the same `user_data_dir`
- ✅ parallel agents can run separate `Browser`s using the same `storage_state`
- ✅ parallel agents can share a single `Browser`, working in different tabs
- ⚠️ parallel agents can share a single `Browser`, working in the same tab
<Important>
Multiple `BrowserSession`s (aka chrome processes) cannot share the same
Multiple `Browser`s (aka chrome processes) cannot share the same
`user_data_dir` at the same time, but they can share a `storage_state` file or
`BrowserProfile` config.
</Important>
@@ -225,21 +226,21 @@ Browser Use provides a number of ways to re-use profiles, sessions, and other co
If you are only running one agent & browser at a time, they can re-use the same `user_data_dir` sequentially.
```python
from browser_use import Agent, BrowserSession
from browser_use import Agent, Browser
from browser_use.llm import ChatOpenAI
reused_profile = BrowserProfile(user_data_dir='~/.config/browseruse/profiles/default')
agent1 = Agent(
task="The first task...",
llm=ChatOpenAI(model="gpt-4o-mini"),
llm=ChatOpenAI(model="gpt-4.1-mini"),
browser_profile=reused_profile, # pass the profile in, it will auto-create a session
)
await agent1.run()
agent2 = Agent(
task="The second task...",
llm=ChatOpenAI(model="gpt-4o-mini"),
llm=ChatOpenAI(model="gpt-4.1-mini"),
browser_profile=reused_profile, # agent will auto-create its own new session
)
await agent2.run()
@@ -249,14 +250,14 @@ await agent2.run()
### Sequential Agents, Same Profile, Same Browser
If you are only running one agent at a time, they can re-use the same active `BrowserSession` and avoid having to relaunch chrome.
If you are only running one agent at a time, they can re-use the same active `Browser` and avoid having to relaunch chrome.
Each agent will start off looking at the same tab the last agent ended off on.
```python
from browser_use import Agent, BrowserSession
from browser_use import Agent, Browser
from browser_use.llm import ChatOpenAI
reused_session = BrowserSession(
reused_session = Browser(
user_data_dir='~/.config/browseruse/profiles/default',
keep_alive=True, # dont close browser after 1st agent.run() ends
)
@@ -264,15 +265,15 @@ await reused_session.start() # when keep_alive=True, session must be started m
agent1 = Agent(
task="The first task...",
llm=ChatOpenAI(model="gpt-4o-mini"),
browser_session=reused_session,
llm=ChatOpenAI(model="gpt-4.1-mini"),
browser=reused_session,
)
await agent1.run()
agent2 = Agent(
task="The second task...",
llm=ChatOpenAI(model="gpt-4o-mini"),
browser_session=reused_session, # re-use the same session
llm=ChatOpenAI(model="gpt-4.1-mini"),
browser=reused_session, # re-use the same session
)
await agent2.run()
@@ -282,26 +283,40 @@ await reused_session.close()
### Parallel Agents, Same Browser, Multiple Tabs
```python
from browser_use import Agent, BrowserSession
import asyncio
from browser_use import Agent, Browser
from browser_use.llm import ChatOpenAI
from playwright.async_api import async_playwright
from browser_use.browser.events import NavigateToUrlEvent
async with async_playwright() as playwright:
browser_context = await playwright.chromium.launch_persistent_context()
page1 = await browser_context.new_page()
page2 = await browser_context.new_page()
# Create a shared browser session
browser = Browser()
await browser.start()
agent1 = Agent(
task="The first task...",
llm=ChatOpenAI(model="gpt-4o-mini"),
page=page1,
)
agent2 = Agent(
task="The second task...",
llm=ChatOpenAI(model="gpt-4o-mini"),
page=page2,
)
await asyncio.gather(agent1.run(), agent2.run()) # run in parallel
# Create tabs for each agent using events
tab1_event = browser.event_bus.dispatch(NavigateToUrlEvent(url="about:blank", new_tab=True))
await tab1_event
tab2_event = browser.event_bus.dispatch(NavigateToUrlEvent(url="about:blank", new_tab=True))
await tab2_event
# Get tab information
tabs = await browser.get_tabs()
# Create agents that will work with different tabs
agent1 = Agent(
task="The first task...",
llm=ChatOpenAI(model="gpt-4.1-mini"),
browser=browser,
)
agent2 = Agent(
task="The second task...",
llm=ChatOpenAI(model="gpt-4.1-mini"),
browser=browser,
)
# Run agents in parallel (they will automatically coordinate tab switching)
await asyncio.gather(agent1.run(), agent2.run())
```
### Parallel Agents, Same Browser, Same Tab
@@ -313,32 +328,34 @@ async with async_playwright() as playwright:
</Warning>
```python
from browser_use import Agent, BrowserSession
import asyncio
from browser_use import Agent, Browser
from browser_use.llm import ChatOpenAI
from playwright.async_api import async_playwright
from browser_use.browser.events import NavigateToUrlEvent
playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless=True)
context = await browser.new_context()
shared_page = await context.new_page()
await shared_page.goto('https://example.com', wait_until='load')
shared_session = BrowserSession(page=shared_page, keep_alive=True)
# Create a shared browser session
shared_session = Browser()
await shared_session.start()
# Navigate to the target page
navigate_event = shared_session.event_bus.dispatch(NavigateToUrlEvent(url='https://example.com'))
await navigate_event
agent1 = Agent(
task="Fill out the form in section A...",
llm=ChatOpenAI(model="gpt-4o-mini"),
browser_session=shared_session
llm=ChatOpenAI(model="gpt-4.1-mini"),
browser=shared_session
)
agent2 = Agent(
task="Fill out the form in section B...",
llm=ChatOpenAI(model="gpt-4o-mini"),
browser_session=shared_session,
llm=ChatOpenAI(model="gpt-4.1-mini"),
browser=shared_session,
)
await asyncio.gather(agent1.run(), agent2.run()) # run in parallel
await shared_session.kill()
# Run agents in parallel on the same tab (not recommended)
await asyncio.gather(agent1.run(), agent2.run())
await shared_session.stop()
```
### Parallel Agents, Same Profile, Different Browsers
@@ -356,7 +373,7 @@ playwright open https://example.com/ --load-storage=/tmp/auth.json
```
```python
from browser_use.browser import BrowserProfile, BrowserSession
from browser_use.browser import BrowserProfile, Browser
shared_profile = BrowserProfile(
headless=True,
@@ -365,13 +382,13 @@ shared_profile = BrowserProfile(
keep_alive=True, # don't close the browser after the agent finishes
)
window1 = BrowserSession(browser_profile=shared_profile)
window1 = Browser(browser_profile=shared_profile)
await window1.start()
agent1 = Agent(browser_session=window1)
agent1 = Agent(browser=window1)
window2 = BrowserSession(browser_profile=shared_profile)
window2 = Browser(browser_profile=shared_profile)
await window2.start()
agent2 = Agent(browser_session=window2)
agent2 = Agent(browser=window2)
await asyncio.gather(agent1.run(), agent2.run()) # run in parallel
await window1.save_storage_state() # write storage state (cookies, localStorage, etc.) to auth.json
@@ -404,7 +421,7 @@ If you're having trouble connecting:
If you get a "profile is already in use" error:
1. Close all Chrome instances
2. The profile will automatically be unlocked when BrowserSession starts
2. The profile will automatically be unlocked when Browser starts
3. Alternatively, manually delete the `SingletonLock` file in the profile directory
<Note>

65
docs/customize/secure.mdx Normal file
View File

@@ -0,0 +1,65 @@
---
title: "Secure Setup"
description: "Azure OpenAI with data privacy and security configuration."
icon: "shield-check"
mode: "wide"
---
## Secure Setup with Azure OpenAI
Enterprise-grade security with Azure OpenAI, data privacy protection, and restricted browser access.
```python
import asyncio
import os
from dotenv import load_dotenv
load_dotenv()
os.environ['ANONYMIZED_TELEMETRY'] = 'false'
from browser_use import Agent, BrowserProfile, ChatAzureOpenAI
# Azure OpenAI configuration
api_key = os.getenv('AZURE_OPENAI_KEY')
azure_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
llm = ChatAzureOpenAI(model='gpt-4.1-mini', api_key=api_key, azure_endpoint=azure_endpoint)
# Secure browser configuration
browser_profile = BrowserProfile(
allowed_domains=['*google.com', 'browser-use.com'],
enable_default_extensions=False
)
# Sensitive data filtering
sensitive_data = {'company_name': 'browser-use'}
# Create secure agent
agent = Agent(
task='Find the founders of the sensitive company_name',
llm=llm,
browser_profile=browser_profile,
sensitive_data=sensitive_data
)
async def main():
await agent.run(max_steps=10)
asyncio.run(main())
```
## Security Features
**Azure OpenAI:**
- NOT used to train OpenAI models
- NOT shared with other customers
- Hosted entirely within Azure
- 30-day retention (or zero with Limited Access Program)
**Browser Security:**
- `allowed_domains`: Restrict navigation to trusted sites
- `enable_default_extensions=False`: Disable potentially dangerous extensions
- `sensitive_data`: Filter sensitive information from LLM input
<Note>
For enterprise deployments contact support@browser-use.com.
</Note>

View File

@@ -2,195 +2,35 @@
title: "Sensitive Data"
description: "Handle sensitive information securely and avoid sending PII & passwords to the LLM."
icon: "shield"
mode: "wide"
---
## Handling Sensitive Data
When working with sensitive information like passwords or PII, you can use the `Agent(sensitive_data=...)` parameter to provide sensitive strings that the model can use in actions without ever seeing directly.
```python
import os
from browser_use import Agent, Browser, ChatOpenAI
os.environ['ANONYMIZED_TELEMETRY'] = "false"
agent = Agent(
task='Log into example.com as user x_username with password x_password',
task='Log into example.com with username x_user and password x_pass',
sensitive_data={
'https://example.com': {
'x_username': 'abc@example.com',
'x_password': 'abc123456', # 'x_placeholder': '<actual secret value>',
'x_user': 'your-real-username@email.com',
'x_pass': 'your-real-password123',
},
},
use_vision=False, # Disable vision to prevent LLM seeing sensitive data in screenshots
llm=ChatOpenAI(model='gpt-4.1-mini'),
)
```
<Note>
You should also configure [`BrowserSession(allowed_domains=...)`](https://docs.browser-use.com/customize/browser-settings#allowed-domains) to prevent the Agent from visiting URLs not needed for the task.
</Note>
### Basic Usage
Here's a basic example of how to use sensitive data:
```python
from dotenv import load_dotenv
load_dotenv()
from browser_use.llm import ChatOpenAI
from browser_use import Agent, BrowserSession
llm = ChatOpenAI(model='gpt-4.1')
# Define sensitive data
# The LLM will only see placeholder names (x_member_number, x_passphrase), never the actual values
sensitive_data = {
'https://*.example.com': {
'x_member_number': '123235325',
'x_passphrase': 'abcwe234',
},
}
# Use the placeholder names in your task description
task = """
1. go to https://travel.example.com
2. sign in with your member number x_member_number and private access code x_passphrase
3. extract today's list of travel deals as JSON
"""
# Recommended: Limit the domains available for the entire browser so the Agent can't be tricked into visiting untrusted URLs
browser_session = BrowserSession(allowed_domains=['https://*.example.com'])
agent = Agent(
task=task,
llm=llm,
sensitive_data=sensitive_data, # Pass the sensitive data to the agent
browser_session=browser_session, # Pass the restricted browser_session to limit URLs Agent can visit
use_vision=False, # Disable vision or else the LLM might see entered values in screenshots
)
async def main():
await agent.run()
if __name__ == '__main__':
asyncio.run(main())
await agent.run()
```
In this example:
## How it Works
1. **Text Filtering**: The LLM only sees placeholders (`x_user`, `x_pass`), we filter your sensitive data from the input text.
2. **DOM Actions**: Real values are injected directly into form fields after the LLM call
1. The LLM only ever sees the `x_member_number` and `x_passphrase` placeholders in prompts
2. When the model wants to use your password it outputs x_passphrase - and we replace it with the actual value in the DOM
3. When sensitive data appear in the content of the current page, we replace it in the page summary fed to the LLM - so that the model never has it in its state.
4. The browser will be entirely prevented from going to any site not under `https://*.example.com`
This approach ensures that sensitive information remains secure while still allowing the agent to perform tasks that require authentication.
---
### Best Practices
- Always restrict your sensitive data to only the exact domains that need it, `https://travel.example.com` is better than `*.example.com`
- Always restrict [`BrowserSession(allowed_domains=[...])`](https://docs.browser-use.com/customize/browser-settings#allowed-domains) to only the domains the agent needs to visit to accomplish its task. This helps guard against prompt injection attacks, jailbreaks, and LLM mistakes.
- Only use `sensitive_data` for strings that can be inputted verbatim as text. The LLM never sees the actual values, so it can't "understand" them, adapt them, or split them up for multiple input fields. For example, you can't ask the Agent to click through a datepicker UI to input the sensitive value `1990-12-31`. For these situations you can implement a [custom function](/customize/custom-functions) the LLM can call that updates the DOM using Python / JS.
- Don't use `sensitive_data` for login credentials, it's better to use [`storage_state`](docs.browser-use.com/customize/browser-settings#storage-state) or a [`user_data_dir`](/customize/browser-settings#user-data-dir) to log into the sites the agent needs in advance & reuse the cookies:
```bash
# open a browser to log into the sites you need & save the cookies
$ playwright open https://accounts.google.com --save-storage auth.json
```
Then use those cookies when the agent runs:
```python
agent = Agent(..., browser_session=BrowserSession(storage_state='./auth.json'))
```
<Warning>
Warning: Vision models still see the screenshot of the page by default - where the sensitive data might be visible.
It's recommended to set `Agent(use_vision=False)` when working with `sensitive_data`.
</Warning>
<a name="allowed_domains"></a>
<a name="domain-pattern-format"></a>
### Allowed Domains
Domain patterns in `sensitive_data` follow the same format as [`allowed_domains`](https://docs.browser-use.com/customize/browser-settings#allowed-domains):
- `example.com` - Matches only `https://example.com/*`
- `*.example.com` - Matches `https://example.com/*` and any subdomain `https://*.example.com/*`
- `http*://example.com` - Matches both `http://` and `https://` protocols for `example.com/*`
- `chrome-extension://*` - Matches any Chrome extension URL e.g. `chrome-extension://anyextensionid/options.html`
> **Security Warning**: For security reasons, certain patterns are explicitly rejected:
>
> - Wildcards in TLD part (e.g., `example.*`) are **not allowed** (`google.*` would match `google.ninja`, `google.pizza`, etc. which is a bad idea)
> - Embedded wildcards (e.g., `g*e.com`) are rejected to prevent overly broad matches
> - Multiple wildcards like `*.*.domain` are not supported currently, open an issue if you need this feature
The default protocol when no scheme is specified is now `https://` for enhanced security.
For convenience the system will validate that all domain patterns used in `Agent(sensitive_data)` are also included in `BrowserSession(allowed_domains)`.
### Missing or Empty Values
When working with sensitive data, keep these details in mind:
- If a key referenced by the model (`<secret>key_name</secret>`) is missing from your `sensitive_data` dictionary, a warning will be logged but the substitution tag will be preserved.
- If you provide an empty value for a key in the `sensitive_data` dictionary, it will be treated the same as a missing key.
- The system will always attempt to process all valid substitutions, even if some keys are missing or empty.
---
### Full Example
Here's a more complex example demonstrating multiple domains and sensitive data values.
```python
from dotenv import load_dotenv
load_dotenv()
from browser_use.llm import ChatOpenAI
from browser_use import Agent, BrowserSession
llm = ChatOpenAI(model='gpt-4.1')
# Domain-specific sensitive data
sensitive_data = {
'https://*.google.com': {'x_email': '...', 'x_pass': '...'},
'chrome-extension://abcd1243': {'x_api_key': '...'},
'http*://example.com': {'x_authcode': '123123'}
}
# Set browser session with allowed domains that match all domain patterns in sensitive_data
browser_session = BrowserSession(
allowed_domains=[
'https://*.google.com',
'chrome-extension://abcd',
'http://example.com', # Explicitly include http:// if needed
'https://example.com' # By default, only https:// is matched
]
)
# Pass the sensitive data to the agent
agent = Agent(
task="Log into Google, then check my account information",
llm=llm,
sensitive_data=sensitive_data,
browser_session=browser_session,
use_vision=False,
)
async def main():
await agent.run()
if __name__ == '__main__':
asyncio.run(main())
```
With this approach:
1. The Google credentials (`x_email` and `x_pass`) will only be used on Google domains (any subdomain, https only)
2. The API key (`x_api_key`) will only be used on pages served by the specific Chrome extension `abcd1243`
3. The auth code (`x_authcode`) will only be used on `http://example.com/*` or `https://example.com/*`
## Best Practices
- Use `Browser(allowed_domains=[...])` to restrict navigation
- Set `use_vision=False` to prevent screenshot leaks
- Use `storage_state='./auth.json'` for login cookies instead of passwords when possible

View File

@@ -1,24 +1,23 @@
---
title: "Supported Models"
description: "Using different chat providers with Browser Use"
description: "Choose your favorite LLM"
icon: "robot"
---
## Model Recommendations
### Recommendations
We recommend using `O3` for the best performance. The best price to performance can be achieved using `gemini-2.0-flash-exp`.
- Best accuracy: `O3`
- Fastest: `llama4` on groq
- Balanced: fast + cheap + clever: `gemini-2.5-flash` or `gpt-4.1-mini`
## Supported Models
In addition to all the models below, we support all other models that can be called via OpenAI compatible API (deepseek, novita, x, qwen). We are open to PRs for more providers.
### OpenAI
### OpenAI [example](https://github.com/browser-use/browser-use/blob/main/examples/models/gpt-4.1.py)
`O3` model is recommended for best performance.
```python
from browser_use.llm import ChatOpenAI
from browser_use import Agent
from browser_use import Agent, ChatOpenAI
# Initialize the model
llm = ChatOpenAI(
@@ -44,11 +43,10 @@ OPENAI_API_KEY=
into the normal OpenAI API call).
</Info>
### Anthropic
### Anthropic [example](https://github.com/browser-use/browser-use/blob/main/examples/models/claude-4-sonnet.py)
```python
from browser_use.llm import ChatAnthropic
from browser_use import Agent
from browser_use import Agent, ChatAnthropic
# Initialize the model
llm = ChatAnthropic(
@@ -68,11 +66,10 @@ And add the variable:
ANTHROPIC_API_KEY=
```
### Azure OpenAI
### Azure OpenAI [example](https://github.com/browser-use/browser-use/blob/main/examples/models/azure_openai.py)
```python
from browser_use.llm import ChatAzureOpenAI
from browser_use import Agent
from browser_use import Agent, ChatAzureOpenAI
from pydantic import SecretStr
import os
@@ -95,20 +92,19 @@ AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
AZURE_OPENAI_API_KEY=
```
### Gemini
### Gemini [example](https://github.com/browser-use/browser-use/blob/main/examples/models/gemini.py)
> [!IMPORTANT] `GEMINI_API_KEY` was the old environment var name, it should be called `GOOGLE_API_KEY` as of 2025-05.
```python
from browser_use.llm import ChatGoogle
from browser_use import Agent
from browser_use import Agent, ChatGoogle
from dotenv import load_dotenv
# Read GOOGLE_API_KEY into env
load_dotenv()
# Initialize the model
llm = ChatGoogle(model='gemini-2.0-flash-exp')
llm = ChatGoogle(model='gemini-2.5-flash')
# Create agent with the model
agent = Agent(
@@ -123,15 +119,14 @@ Required environment variables:
GOOGLE_API_KEY=
```
### AWS Bedrock
### AWS Bedrock [example](https://github.com/browser-use/browser-use/blob/main/examples/models/aws.py)
AWS Bedrock provides access to multiple model providers through a single API. We support both a general AWS Bedrock client and provider-specific convenience classes.
#### General AWS Bedrock (supports all providers)
```python
from browser_use.llm import ChatAWSBedrock
from browser_use import Agent
from browser_use import Agent, ChatAWSBedrock
# Works with any Bedrock model (Anthropic, Meta, AI21, etc.)
llm = ChatAWSBedrock(
@@ -149,8 +144,7 @@ agent = Agent(
#### Anthropic Claude via AWS Bedrock (convenience class)
```python
from browser_use.llm import ChatAnthropicBedrock
from browser_use import Agent
from browser_use import Agent, ChatAnthropicBedrock
# Anthropic-specific class with Claude defaults
llm = ChatAnthropicBedrock(
@@ -183,11 +177,10 @@ You can also use AWS profiles or IAM roles instead of environment variables. The
- Session tokens for temporary credentials
- AWS SSO authentication (`aws_sso_auth=True`)
## Groq
## Groq [example](https://github.com/browser-use/browser-use/blob/main/examples/models/llama4-groq.py)
```python
from browser_use.llm import ChatGroq
from browser_use import Agent
from browser_use import Agent, ChatGroq
llm = ChatGroq(model="meta-llama/llama-4-maverick-17b-128e-instruct")
@@ -206,16 +199,21 @@ GROQ_API_KEY=
## Ollama
```python
from browser_use.llm import ChatOllama
from browser_use import Agent
from browser_use import Agent, ChatOllama
llm = ChatOllama(model="llama3.1:8b")
```
## Migration Guides
## Langchain
### From Langchain
[Example](https://github.com/browser-use/browser-use/blob/main/examples/models/langchain) on how to use Langchain with Browser Use.
To migrate the Langchain based code, just replace `from langchain_openai import ChatOpenAI` with `from browser_use.llm import ChatOpenAI` etc. The methods should be compatible(ish).
## Other models (DeepSeek, Novita, X, Qwen...)
We support all other models that can be called via OpenAI compatible API. We are open to PRs for more providers.
**Examples available:**
- [DeepSeek](https://github.com/browser-use/browser-use/blob/main/examples/models/deepseek-chat.py)
- [Novita](https://github.com/browser-use/browser-use/blob/main/examples/models/novita.py)
- [OpenRouter](https://github.com/browser-use/browser-use/blob/main/examples/models/openrouter.py)
We also made and example [here](https://github.com/browser-use/browser-use/blob/main/examples/models/langchain) to help you stay with Langchain in case your workflow requires it.

View File

@@ -2,6 +2,7 @@
title: "System Prompt"
description: "Customize the system prompt to control agent behavior and capabilities"
icon: "message"
mode: "wide"
---
## Overview
@@ -65,8 +66,8 @@ Always suggest exploring multiple options before making a decision.
"""
# Create agent with extended planner system prompt
llm = ChatOpenAI(model='gpt-4o')
planner_llm = ChatOpenAI(model='gpt-4o-mini')
llm = ChatOpenAI(model='gpt-4.1-mini')
planner_llm = ChatOpenAI(model='gpt-4.1-mini')
agent = Agent(
task="Your task here",

View File

@@ -1,6 +1,7 @@
---
title: 'Development'
description: 'Preview changes locally to update your docs'
mode: "wide"
---
<Info>

View File

@@ -2,6 +2,7 @@
title: "Contribution Guide"
description: "Learn how to contribute to Browser Use"
icon: "github"
mode: "wide"
---
# Join the Browser Use Community!

View File

@@ -1,48 +0,0 @@
---
title: "Evaluations"
description: "Test the Browser Use agent on standardized benchmarks"
icon: "chart-bar"
---
## Prerequisites
Browser Use uses proprietary/private test sets that must never be committed to Github and must be fetched through a authorized api request.
Accessing these test sets requires an approved Browser Use account.
There are currently no publicly available test sets, but some may be released in the future.
## Get an Api Access Key
First, navigate to https://browser-use.tools and log in with an authorized browser use account.
Then, click the "Account" button at the top right of the page, and click the "Cycle New Key" button on that page.
Copy the resulting url and secret key into your `.env` file. It should look like this:
```bash .env
EVALUATION_TOOL_URL= ...
EVALUATION_TOOL_SECRET_KEY= ...
```
## Running Evaluations
First, ensure your file `eval/service.py` is up to date.
Then run the file:
```bash
python eval/service.py
```
## Configuring Evaluations
You can modify the evaluation by providing flags to the evaluation script. For instance:
```bash
python eval/service.py --parallel_runs 5 --parallel_evaluations 5 --max-steps 25 --start 0 --end 100 --model gpt-4o
```
The evaluations webpage has a convenient GUI for generating these commands. To use it, navigate to https://browser-use.tools/dashboard.
Then click the button "New Eval Run" on the left panel. This will open a interface with selectors, inputs, sliders, and switches.
Input your desired configuration into the interface and copy the resulting python command at the bottom. Then run this command as before.

View File

@@ -2,6 +2,7 @@
title: "Local Setup"
description: "Set up Browser Use development environment locally"
icon: "laptop-code"
mode: "wide"
---
# Welcome to Browser Use Development!

View File

@@ -1,6 +1,7 @@
---
title: 'n8n Integration'
description: 'Learn how to integrate Browser Use with n8n workflows'
mode: "wide"
---
# Browser Use n8n Integration

View File

@@ -2,6 +2,7 @@
title: "Observability"
description: "Trace Browser Use's agent execution steps and browser sessions"
icon: "eye"
mode: "wide"
---
## Overview
@@ -9,20 +10,12 @@ icon: "eye"
Browser Use has a native integration with [Laminar](https://lmnr.ai) - open-source platform for tracing, evals and labeling of AI agents.
Read more about Laminar in the [Laminar docs](https://docs.lmnr.ai).
<Note>
Laminar excels at tracing browser agents by providing unified visibility into
both browser session recordings and agent execution steps.
</Note>
## Setup
To setup Laminar, you need to install the `lmnr` package and set the `LMNR_PROJECT_API_KEY` environment variable.
To get your project API key, you can either:
- Register on [Laminar Cloud](https://lmnr.ai) and get the key from your project settings
- Or spin up a local Laminar instance and get the key from the settings page
Register on [Laminar Cloud](https://lmnr.ai) and get the key from your project settings.
Set the `LMNR_PROJECT_API_KEY` environment variable.
```bash
pip install 'lmnr[all]'
export LMNR_PROJECT_API_KEY=<your-project-api-key>
@@ -33,21 +26,19 @@ export LMNR_PROJECT_API_KEY=<your-project-api-key>
Then, you simply initialize the Laminar at the top of your project and both Browser Use and session recordings will be automatically traced.
```python {5-8}
from browser_use.llm import ChatOpenAI
from browser_use import Agent
from browser_use import Agent, ChatOpenAI
import asyncio
from lmnr import Laminar, Instruments
# this line auto-instruments Browser Use and any browser you use (local or remote)
Laminar.initialize(project_api_key="...", disable_batch=True, disabled_instruments={Instruments.BROWSER_USE}) # you can also pass project api key here
Laminar.initialize(project_api_key="...")
async def main():
agent = Agent(
task="open google, search Laminar AI",
llm=ChatOpenAI(model="gpt-4.1-mini"),
)
result = await agent.run()
print(result)
await agent.run()
asyncio.run(main())
```

View File

@@ -2,6 +2,7 @@
title: "Roadmap"
description: "Future plans and upcoming features for Browser Use"
icon: "road"
mode: "wide"
---
Big things coming soon!

View File

@@ -2,6 +2,7 @@
title: "Telemetry"
description: "Understanding Browser Use's telemetry and privacy settings"
icon: "chart-mixed"
mode: "wide"
---
## Overview

View File

@@ -9,7 +9,10 @@
},
"favicon": "/favicon.ico",
"contextual": {
"options": ["copy", "view"]
"options": [
"copy",
"view"
]
},
"fonts": {
"family": "Geist"
@@ -26,22 +29,50 @@
"groups": [
{
"group": "Get Started",
"pages": ["introduction", "quickstart", "cli"]
"pages": [
"introduction",
"quickstart",
"quickstart_llm"
]
},
{
"group": "Customize",
"pages": [
"customize/supported-models",
"customize/agent-settings",
"customize/browser-settings",
"customize/real-browser",
"customize/output-format",
"customize/system-prompt",
"customize/sensitive-data",
"customize/custom-functions",
"customize/mcp-client",
"customize/mcp-server",
"customize/hooks"
{
"group": "Agent",
"icon": "robot",
"isDefaultOpen": true,
"pages": [
"customize/agent-basic",
"customize/supported-models",
"customize/agent-output-format",
"customize/agent-parameters"
]
},
{
"group": "Browser",
"icon": "window",
"isDefaultOpen": false,
"pages": [
"customize/browser-basic",
"customize/browser-real-browser",
"customize/browser-remote",
"customize/browser-parameters"
]
},
{
"group": "Examples",
"icon": "folder-open",
"pages": [
"customize/fast-agent",
"customize/chain-agents",
"customize/parallel-browser",
"customize/sensitive-data",
"customize/secure",
"customize/more-examples"
]
},
"customize/custom-functions"
]
},
{
@@ -49,10 +80,17 @@
"pages": [
"development/contribution-guide",
"development/local-setup",
{
"group": "MCP",
"icon": "link",
"pages": [
"customize/mcp-client",
"customize/mcp-server"
]
},
"customize/hooks",
"development/telemetry",
"development/observability",
"development/evaluations",
"development/roadmap"
"development/observability"
]
}
]
@@ -126,7 +164,11 @@
"display": "interactive"
},
"examples": {
"languages": ["javascript", "curl", "python"],
"languages": [
"javascript",
"curl",
"python"
],
"required": true
}
},
@@ -154,4 +196,4 @@
"linkedin": "https://linkedin.com/company/browser-use"
}
}
}
}

View File

@@ -1,6 +1,6 @@
---
title: "Introduction"
description: "Repetitive work is dead. Browser Use empowers anyone to automate repetitive online tasks. Simply tell it what do you want done."
description: "Automate browser tasks in plain text. "
icon: "book-open"
---
@@ -17,7 +17,7 @@ icon: "book-open"
<CardGroup cols={2}>
<Card title="Local Setup" icon="terminal" href="/quickstart">
Get up and running with Browser Use locally
Open-source Python library.
</Card>
<Card
title="Cloud API"
@@ -25,6 +25,7 @@ icon: "book-open"
href="/cloud/v2/quickstart"
color="#FE750E"
>
Skip the setup and start automating with Browser Use Cloud
Scale up with our cloud.
</Card>
</CardGroup>

View File

@@ -1,23 +1,18 @@
---
title: "Quickstart"
description: "Start using Browser Use with this quickstart guide"
title: "Human Quickstart"
description: ""
icon: "rocket"
---
<Info>
You can skip this steps by using [Browser Use Cloud](/cloud/v2/quickstart)
</Info>
## Prepare the environment
## 1. Easy setup
Use [uv](https://docs.astral.sh/uv/) to setup the Python environment.
Use [uv](https://docs.astral.sh/uv/) to create and activate the environment:
```bash
uv venv --python 3.12
```
and activate it with:
```bash
# For Mac/Linux:
source .venv/bin/activate
@@ -26,50 +21,42 @@ source .venv/bin/activate
.venv\Scripts\activate
```
Install the dependencies:
Install browser-use:
```bash
uv pip install browser-use
```
Then install Chromium from [source](https://www.chromium.org/getting-involved/download-chromium/) or run the command below (this does not install Playwright only Chromium and dependencies).
Install Chromium:
```bash
uvx playwright install chromium --with-deps
```
## Create an agent
Then you can use the agent as follows:
```python agent.py
from browser_use.llm import ChatOpenAI
from browser_use import Agent
from dotenv import load_dotenv
load_dotenv()
import asyncio
llm = ChatOpenAI(model="gpt-5")
async def main():
agent = Agent(
task="Go to Hacker News and find the number 1 trending on Show HN",
llm=llm,
)
result = await agent.run()
print(result)
asyncio.run(main())
```
## Set up your LLM API keys
You need to set up API keys for the LLM you want to use and store them in `.env` file. For example, for OpenAI and Anthropic:
## 2. Choose your favorite LLM
Create a `.env` file and add your API key:
```bash .env
OPENAI_API_KEY=
ANTHROPIC_API_KEY=
```
For other LLM models you can refer to the [Supported Models](/customize/supported-models) page to find how to set them up with their specific API keys.
See [Supported Models](/customize/supported-models) for other models.
## 3. Run your first agent
```python agent.py
from browser_use import Agent, ChatOpenAI
from dotenv import load_dotenv
import asyncio
load_dotenv()
async def main():
llm = ChatOpenAI(model="gpt-4.1-mini")
task = "Find the number 1 post on Show HN"
agent = Agent(task=task, llm=llm)
await agent.run()
if __name__ == "__main__":
asyncio.run(main())
```

11
docs/quickstart_llm.mdx Normal file
View File

@@ -0,0 +1,11 @@
---
title: "LLM Quickstart"
description: ""
icon: "brain"
---
1. Copy all content [🔗 from here](https://docs.browser-use.com/llms-full.txt) (~40k tokens)
2. Paste it into your favorite coding agent (Cursor, Claude, ChatGPT ...).

View File

@@ -0,0 +1,45 @@
import asyncio
from browser_use import Agent, Browser, ChatOpenAI
# NOTE: This is still experimental, and agents might conflict each other.
async def main():
# Create 3 separate browser instances
browsers = [
Browser(
user_data_dir=f'./temp-profile-{i}',
headless=False,
)
for i in range(3)
]
# Create 3 agents with different tasks
agents = [
Agent(
task='Search for "browser automation" on Google',
browser=browsers[0],
llm=ChatOpenAI(model='gpt-4.1-mini'),
),
Agent(
task='Search for "AI agents" on DuckDuckGo',
browser=browsers[1],
llm=ChatOpenAI(model='gpt-4.1-mini'),
),
Agent(
task='Visit Wikipedia and search for "web scraping"',
browser=browsers[2],
llm=ChatOpenAI(model='gpt-4.1-mini'),
),
]
# Run all agents in parallel
tasks = [agent.run() for agent in agents]
results = await asyncio.gather(*tasks, return_exceptions=True)
print('🎉 All agents completed!')
if __name__ == '__main__':
asyncio.run(main())

View File

@@ -8,25 +8,22 @@ from dotenv import load_dotenv
load_dotenv()
from browser_use import Agent, BrowserProfile, BrowserSession, ChatOpenAI
from browser_use import Agent, Browser, ChatOpenAI
# SETUP: First copy your real Chrome profile (close Chrome first, then run):
# Mac:
# mkdir -p ~/.config/browseruse/profiles && cp -r ~/Library/Application\ Support/Google/Chrome ~/.config/browseruse/profiles/real-chrome
browser_profile = BrowserProfile(
# Connect to your existing Chrome browser
browser = Browser(
executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
user_data_dir='~/.config/browseruse/profiles/real-chrome',
user_data_dir='~/Library/Application Support/Google/Chrome',
profile_directory='Default',
)
browser_session = BrowserSession(browser_profile=browser_profile)
async def main():
agent = Agent(
llm=ChatOpenAI(model='gpt-4.1-mini'),
# Google blocks this approach, so we use a different search engine
task='Visit https://duckduckgo.com and search for "browser-use founders"',
browser_session=browser_session,
browser=browser,
)
await agent.run()

View File

@@ -0,0 +1,53 @@
"""
Simple demonstration of the CDP feature.
To test this locally, follow these steps:
1. Create a shortcut for the executable Chrome file.
2. Add the following argument to the shortcut:
- On Windows: `--remote-debugging-port=9222`
3. Open a web browser and navigate to `http://localhost:9222/json/version` to verify that the Remote Debugging Protocol (CDP) is running.
4. Launch this example.
@dev You need to set the `OPENAI_API_KEY` environment variable before proceeding.
"""
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from browser_use import Agent, Controller
from browser_use.browser import BrowserProfile, BrowserSession
from browser_use.llm import ChatOpenAI
browser_session = BrowserSession(
browser_profile=BrowserProfile(
headless=False,
),
cdp_url='http://localhost:9222',
is_local=True, # set to False if you want to use a remote browser
)
controller = Controller()
async def main():
agent = Agent(
task='Visit https://duckduckgo.com and search for "browser-use founders"',
lllm=ChatOpenAI(model='gpt-4.1-mini'),
controller=controller,
browser_session=browser_session,
)
await agent.run()
await browser_session.kill()
input('Press Enter to close...')
if __name__ == '__main__':
asyncio.run(main())

View File

@@ -16,7 +16,7 @@ load_dotenv()
from pydantic import BaseModel
from browser_use import Agent, ChatOpenAI, Controller
from browser_use import Agent, ChatOpenAI
class Post(BaseModel):
@@ -30,13 +30,10 @@ class Posts(BaseModel):
posts: list[Post]
controller = Controller(output_model=Posts)
async def main():
task = 'Go to hackernews show hn and give me the first 5 posts'
model = ChatOpenAI(model='gpt-4.1-mini')
agent = Agent(task=task, llm=model, controller=controller)
agent = Agent(task=task, llm=model, output_model_schema=Posts)
history = await agent.run()

View File

@@ -2,46 +2,30 @@ import asyncio
import os
import sys
from browser_use.browser.profile import BrowserProfile
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from browser_use import Agent, ChatOpenAI, Controller
from browser_use.browser import BrowserProfile, BrowserSession
from browser_use import Agent
# Initialize the model
llm = ChatOpenAI(
model='gpt-4.1',
temperature=0.0,
)
# Get your chrome path
browser_session = BrowserSession(
browser_profile=BrowserProfile(
executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
keep_alive=True,
user_data_dir='~/.config/browseruse/profiles/default',
),
)
controller = Controller()
profile = BrowserProfile(keep_alive=True)
task = 'Find the founders of browser-use and draft them a short personalized message'
agent = Agent(task=task, llm=llm, controller=controller, browser_session=browser_session)
task = """Go to reddit.com"""
async def main():
await agent.run()
agent = Agent(task=task, browser_profile=profile)
await agent.run(max_steps=1)
# new_task = input('Type in a new task: ')
new_task = 'Find an image of the founders'
agent.add_new_task(new_task)
await agent.run()
while True:
user_response = input('\n👤 New task or "q" to quit: ')
agent.add_new_task(f'New task: {user_response}')
await agent.run()
if __name__ == '__main__':

View File

@@ -49,8 +49,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(
load_dotenv()
# Disable all telemetry
os.environ['BROWSER_USE_CLOUD_SYNC'] = 'false'
os.environ['ANONYMIZED_TELEMETRY'] = 'false'
@@ -67,7 +66,7 @@ task = 'Find the founders of the sensitive company_name'
# Configuration Browser (optional)
browser_profile = BrowserProfile(allowed_domains=['*google.com', 'browser-use.com'], enable_default_extensions=False)
# Sensitive data (optional) - {key: sensitive_information} - we filter out the sensitive_information from any input to the LLM, it will only wokr with placeholder.
# Sensitive data (optional) - {key: sensitive_information} - we filter out the sensitive_information from any input to the LLM, it will only work with placeholder.
# By default we pass screenshots to the LLM which can contain your information. Set use_vision=False to disable this.
# If you trust your LLM endpoint, you don't need to worry about this.
sensitive_data: dict[str, str | dict[str, str]] = {'company_name': 'browser-use'}

View File

@@ -14,7 +14,7 @@ from browser_use import Agent, BrowserProfile
# Speed optimization instructions for the model
SPEED_OPTIMIZATION_PROMPT = """
SPEED OPTIMIZATION INSTRUCTIONS:
Speed optimization instructions:
- Be extremely concise and direct in your responses
- Get to the goal as quickly as possible
- Use multi-action sequences whenever possible to reduce steps

View File

@@ -1,3 +1,13 @@
from browser_use import Agent
import asyncio
Agent('Find the founders of browser-use').run_sync()
from browser_use import Agent, ChatOpenAI
async def main():
task = 'Find the founders of browser-use'
agent = Agent(task=task, llm=ChatOpenAI(model='gpt-4.1-mini'))
await agent.run()
if __name__ == '__main__':
asyncio.run(main())

View File

@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
async def main():
agent = Agent(
task="""
Objective: Navigate to the following URL whats on page 3?
Objective: Navigate to the following UR, what is on page 3?
URL: https://docs.house.gov/meetings/GO/GO00/20220929/115171/HHRG-117-GO00-20220929-SD010.pdf
""",

View File

@@ -0,0 +1,656 @@
"""Test GetDropdownOptionsEvent and SelectDropdownOptionEvent functionality.
This file consolidates all tests related to dropdown functionality including:
- Native <select> dropdowns
- ARIA role="menu" dropdowns
- Custom dropdown implementations
"""
import pytest
from pytest_httpserver import HTTPServer
from browser_use.agent.views import ActionModel, ActionResult
from browser_use.browser import BrowserSession
from browser_use.browser.events import GetDropdownOptionsEvent, NavigationCompleteEvent, SelectDropdownOptionEvent
from browser_use.browser.profile import BrowserProfile
from browser_use.controller.service import Controller
from browser_use.controller.views import GoToUrlAction
@pytest.fixture(scope='session')
def http_server():
"""Create and provide a test HTTP server that serves static content."""
server = HTTPServer()
server.start()
# Add route for native dropdown test page
server.expect_request('/native-dropdown').respond_with_data(
"""
<!DOCTYPE html>
<html>
<head>
<title>Native Dropdown Test</title>
</head>
<body>
<h1>Native Dropdown Test</h1>
<select id="test-dropdown" name="test-dropdown">
<option value="">Please select</option>
<option value="option1">First Option</option>
<option value="option2">Second Option</option>
<option value="option3">Third Option</option>
</select>
<div id="result">No selection made</div>
<script>
document.getElementById('test-dropdown').addEventListener('change', function(e) {
document.getElementById('result').textContent = 'Selected: ' + e.target.options[e.target.selectedIndex].text;
});
</script>
</body>
</html>
""",
content_type='text/html',
)
# Add route for ARIA menu test page
server.expect_request('/aria-menu').respond_with_data(
"""
<!DOCTYPE html>
<html>
<head>
<title>ARIA Menu Test</title>
<style>
.menu {
list-style: none;
padding: 0;
margin: 0;
border: 1px solid #ccc;
background: white;
width: 200px;
}
.menu-item {
padding: 10px 20px;
border-bottom: 1px solid #eee;
}
.menu-item:hover {
background: #f0f0f0;
}
.menu-item-anchor {
text-decoration: none;
color: #333;
display: block;
}
#result {
margin-top: 20px;
padding: 10px;
border: 1px solid #ddd;
min-height: 20px;
}
</style>
</head>
<body>
<h1>ARIA Menu Test</h1>
<p>This menu uses ARIA roles instead of native select elements</p>
<ul class="menu menu-format-standard menu-regular" role="menu" id="pyNavigation1752753375773" style="display: block;">
<li class="menu-item menu-item-enabled" role="presentation">
<a href="#" onclick="pd(event);" class="menu-item-anchor" tabindex="0" role="menuitem">
<span class="menu-item-title-wrap"><span class="menu-item-title">Filter</span></span>
</a>
</li>
<li class="menu-item menu-item-enabled" role="presentation" id="menu-item-$PpyNavigation1752753375773$ppyElements$l2">
<a href="#" onclick="pd(event);" class="menu-item-anchor menu-item-expand" tabindex="0" role="menuitem" aria-haspopup="true">
<span class="menu-item-title-wrap"><span class="menu-item-title">Sort</span></span>
</a>
<div class="menu-panel-wrapper">
<ul class="menu menu-format-standard menu-regular" role="menu" id="$PpyNavigation1752753375773$ppyElements$l2">
<li class="menu-item menu-item-enabled" role="presentation">
<a href="#" onclick="pd(event);" class="menu-item-anchor" tabindex="0" role="menuitem">
<span class="menu-item-title-wrap"><span class="menu-item-title">Lowest to highest</span></span>
</a>
</li>
<li class="menu-item menu-item-enabled" role="presentation">
<a href="#" onclick="pd(event);" class="menu-item-anchor" tabindex="0" role="menuitem">
<span class="menu-item-title-wrap"><span class="menu-item-title">Highest to lowest</span></span>
</a>
</li>
</ul>
</div>
</li>
<li class="menu-item menu-item-enabled" role="presentation">
<a href="#" onclick="pd(event);" class="menu-item-anchor" tabindex="0" role="menuitem">
<span class="menu-item-title-wrap"><span class="menu-item-title">Appearance</span></span>
</a>
</li>
<li class="menu-item menu-item-enabled" role="presentation">
<a href="#" onclick="pd(event);" class="menu-item-anchor" tabindex="0" role="menuitem">
<span class="menu-item-title-wrap"><span class="menu-item-title">Summarize</span></span>
</a>
</li>
<li class="menu-item menu-item-enabled" role="presentation">
<a href="#" onclick="pd(event);" class="menu-item-anchor" tabindex="0" role="menuitem">
<span class="menu-item-title-wrap"><span class="menu-item-title">Delete</span></span>
</a>
</li>
</ul>
<div id="result">Click an option to see the result</div>
<script>
// Mock the pd function that prevents default
function pd(event) {
event.preventDefault();
const text = event.target.closest('[role="menuitem"]').textContent.trim();
document.getElementById('result').textContent = 'Clicked: ' + text;
}
</script>
</body>
</html>
""",
content_type='text/html',
)
# Add route for custom dropdown test page
server.expect_request('/custom-dropdown').respond_with_data(
"""
<!DOCTYPE html>
<html>
<head>
<title>Custom Dropdown Test</title>
<style>
.dropdown {
position: relative;
display: inline-block;
width: 200px;
}
.dropdown-button {
padding: 10px;
border: 1px solid #ccc;
background: white;
cursor: pointer;
width: 100%;
}
.dropdown-menu {
position: absolute;
top: 100%;
left: 0;
right: 0;
border: 1px solid #ccc;
background: white;
display: block;
z-index: 1000;
}
.dropdown-menu.hidden {
display: none;
}
.dropdown .item {
padding: 10px;
cursor: pointer;
}
.dropdown .item:hover {
background: #f0f0f0;
}
.dropdown .item.selected {
background: #e0e0e0;
}
#result {
margin-top: 20px;
padding: 10px;
border: 1px solid #ddd;
}
</style>
</head>
<body>
<h1>Custom Dropdown Test</h1>
<p>This is a custom dropdown implementation (like Semantic UI)</p>
<div class="dropdown ui" id="custom-dropdown">
<div class="dropdown-button" onclick="toggleDropdown()">
<span id="selected-text">Choose an option</span>
</div>
<div class="dropdown-menu" id="dropdown-menu">
<div class="item" data-value="red" onclick="selectOption('Red', 'red')">Red</div>
<div class="item" data-value="green" onclick="selectOption('Green', 'green')">Green</div>
<div class="item" data-value="blue" onclick="selectOption('Blue', 'blue')">Blue</div>
<div class="item" data-value="yellow" onclick="selectOption('Yellow', 'yellow')">Yellow</div>
</div>
</div>
<div id="result">No selection made</div>
<script>
function toggleDropdown() {
const menu = document.getElementById('dropdown-menu');
menu.classList.toggle('hidden');
}
function selectOption(text, value) {
document.getElementById('selected-text').textContent = text;
document.getElementById('result').textContent = 'Selected: ' + text + ' (value: ' + value + ')';
// Mark as selected
document.querySelectorAll('.item').forEach(item => item.classList.remove('selected'));
event.target.classList.add('selected');
// Close dropdown
document.getElementById('dropdown-menu').classList.add('hidden');
}
</script>
</body>
</html>
""",
content_type='text/html',
)
yield server
server.stop()
@pytest.fixture(scope='session')
def base_url(http_server):
"""Return the base URL for the test HTTP server."""
return f'http://{http_server.host}:{http_server.port}'
@pytest.fixture(scope='module')
async def browser_session():
"""Create and provide a Browser instance with security disabled."""
browser_session = BrowserSession(
browser_profile=BrowserProfile(
headless=True,
user_data_dir=None,
keep_alive=True,
chromium_sandbox=False, # Disable sandbox for CI environment
)
)
await browser_session.start()
yield browser_session
await browser_session.kill()
@pytest.fixture(scope='function')
def controller():
"""Create and provide a Controller instance."""
return Controller()
class TestGetDropdownOptionsEvent:
"""Test GetDropdownOptionsEvent functionality for various dropdown types."""
async def test_native_select_dropdown(self, controller, browser_session: BrowserSession, base_url):
"""Test get_dropdown_options with native HTML select element."""
# Navigate to the native dropdown test page
goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
class GoToUrlActionModel(ActionModel):
go_to_url: GoToUrlAction | None = None
await controller.act(GoToUrlActionModel(**goto_action), browser_session)
# Initialize the DOM state to populate the selector map
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
# Get the selector map and find the select element
selector_map = await browser_session.get_selector_map()
dropdown_index = None
for idx, element in selector_map.items():
if element.tag_name.lower() == 'select' and element.attributes.get('id') == 'test-dropdown':
dropdown_index = idx
break
assert dropdown_index is not None, (
f'Could not find select element in selector map. Available elements: {[f"{idx}: {element.tag_name}" for idx, element in selector_map.items()]}'
)
# Test via controller action
class GetDropdownOptionsModel(ActionModel):
get_dropdown_options: dict[str, int]
result = await controller.act(
action=GetDropdownOptionsModel(get_dropdown_options={'index': dropdown_index}),
browser_session=browser_session,
)
# Verify the result
assert isinstance(result, ActionResult)
assert result.extracted_content is not None
# Verify all expected options are present
expected_options = ['Please select', 'First Option', 'Second Option', 'Third Option']
for option in expected_options:
assert option in result.extracted_content, f"Option '{option}' not found in result content"
# Verify instruction is included
assert 'Use the exact text string' in result.extracted_content and 'select_dropdown_option' in result.extracted_content
# Also test direct event dispatch
node = await browser_session.get_element_by_index(dropdown_index)
assert node is not None
event = browser_session.event_bus.dispatch(GetDropdownOptionsEvent(node=node))
dropdown_data = await event.event_result(timeout=3.0)
assert dropdown_data is not None
assert 'options' in dropdown_data
assert 'type' in dropdown_data
assert dropdown_data['type'] == 'select'
async def test_aria_menu_dropdown(self, controller, browser_session: BrowserSession, base_url):
"""Test get_dropdown_options with ARIA role='menu' element."""
# Navigate to the ARIA menu test page
goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
class GoToUrlActionModel(ActionModel):
go_to_url: GoToUrlAction | None = None
await controller.act(GoToUrlActionModel(**goto_action), browser_session)
# Initialize the DOM state
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
# Get the selector map and find the ARIA menu
selector_map = await browser_session.get_selector_map()
menu_index = None
for idx, element in selector_map.items():
if (
element.tag_name.lower() == 'ul'
and element.attributes.get('role') == 'menu'
and element.attributes.get('id') == 'pyNavigation1752753375773'
):
menu_index = idx
break
assert menu_index is not None, (
f'Could not find ARIA menu element in selector map. Available elements: {[f"{idx}: {element.tag_name} role={element.attributes.get('role', 'None')}" for idx, element in selector_map.items()]}'
)
# Test via controller action
class GetDropdownOptionsModel(ActionModel):
get_dropdown_options: dict[str, int]
result = await controller.act(
action=GetDropdownOptionsModel(get_dropdown_options={'index': menu_index}),
browser_session=browser_session,
)
# Verify the result
assert isinstance(result, ActionResult)
assert result.extracted_content is not None
# Verify expected ARIA menu options are present
expected_options = ['Filter', 'Sort', 'Appearance', 'Summarize', 'Delete']
for option in expected_options:
assert option in result.extracted_content, f"Option '{option}' not found in result content"
# Also test direct event dispatch
node = await browser_session.get_element_by_index(menu_index)
assert node is not None
event = browser_session.event_bus.dispatch(GetDropdownOptionsEvent(node=node))
dropdown_data = await event.event_result(timeout=3.0)
assert dropdown_data is not None
assert 'options' in dropdown_data
assert 'type' in dropdown_data
assert dropdown_data['type'] == 'aria'
async def test_custom_dropdown(self, controller, browser_session: BrowserSession, base_url):
"""Test get_dropdown_options with custom dropdown implementation."""
# Navigate to the custom dropdown test page
goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/custom-dropdown', new_tab=False)}
class GoToUrlActionModel(ActionModel):
go_to_url: GoToUrlAction | None = None
await controller.act(GoToUrlActionModel(**goto_action), browser_session)
# Initialize the DOM state
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
# Get the selector map and find the custom dropdown
selector_map = await browser_session.get_selector_map()
dropdown_index = None
for idx, element in selector_map.items():
if element.attributes.get('id') == 'custom-dropdown' and 'dropdown' in element.attributes.get('class', ''):
dropdown_index = idx
break
assert dropdown_index is not None, (
f'Could not find custom dropdown element in selector map. Available elements: {[f"{idx}: {element.tag_name} id={element.attributes.get('id', 'None')}" for idx, element in selector_map.items()]}'
)
# Test via controller action
class GetDropdownOptionsModel(ActionModel):
get_dropdown_options: dict[str, int]
result = await controller.act(
action=GetDropdownOptionsModel(get_dropdown_options={'index': dropdown_index}),
browser_session=browser_session,
)
# Verify the result
assert isinstance(result, ActionResult)
assert result.extracted_content is not None
# Verify expected custom dropdown options are present
expected_options = ['Red', 'Green', 'Blue', 'Yellow']
for option in expected_options:
assert option in result.extracted_content, f"Option '{option}' not found in result content"
# Also test direct event dispatch
node = await browser_session.get_element_by_index(dropdown_index)
assert node is not None
event = browser_session.event_bus.dispatch(GetDropdownOptionsEvent(node=node))
dropdown_data = await event.event_result(timeout=3.0)
assert dropdown_data is not None
assert 'options' in dropdown_data
assert 'type' in dropdown_data
assert dropdown_data['type'] == 'custom'
async def test_element_not_found_error(self, controller, browser_session: BrowserSession, base_url):
"""Test get_dropdown_options with invalid element index."""
# Navigate to any test page
goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
class GoToUrlActionModel(ActionModel):
go_to_url: GoToUrlAction | None = None
await controller.act(GoToUrlActionModel(**goto_action), browser_session)
await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
# Try to get dropdown options with invalid index
class GetDropdownOptionsModel(ActionModel):
get_dropdown_options: dict[str, int]
result = await controller.act(
action=GetDropdownOptionsModel(get_dropdown_options={'index': 99999}),
browser_session=browser_session,
)
# Should return an error
assert isinstance(result, ActionResult)
assert result.error is not None
assert 'not found' in result.error.lower()
class TestSelectDropdownOptionEvent:
"""Test SelectDropdownOptionEvent functionality for various dropdown types."""
async def test_select_native_dropdown_option(self, controller, browser_session: BrowserSession, base_url):
"""Test select_dropdown_option with native HTML select element."""
# Navigate to the native dropdown test page
goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
class GoToUrlActionModel(ActionModel):
go_to_url: GoToUrlAction | None = None
await controller.act(GoToUrlActionModel(**goto_action), browser_session)
await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
# Initialize the DOM state
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
# Get the selector map and find the select element
selector_map = await browser_session.get_selector_map()
dropdown_index = None
for idx, element in selector_map.items():
if element.tag_name.lower() == 'select' and element.attributes.get('id') == 'test-dropdown':
dropdown_index = idx
break
assert dropdown_index is not None
# Test via controller action
class SelectDropdownOptionModel(ActionModel):
select_dropdown_option: dict
result = await controller.act(
SelectDropdownOptionModel(select_dropdown_option={'index': dropdown_index, 'text': 'Second Option'}),
browser_session,
)
# Verify the result
assert isinstance(result, ActionResult)
assert result.extracted_content is not None
assert 'Second Option' in result.extracted_content
# Verify the selection actually worked using CDP
cdp_session = await browser_session.get_or_create_cdp_session()
result = await cdp_session.cdp_client.send.Runtime.evaluate(
params={'expression': "document.getElementById('test-dropdown').selectedIndex", 'returnByValue': True},
session_id=cdp_session.session_id,
)
selected_index = result.get('result', {}).get('value', -1)
assert selected_index == 2, f'Expected selected index 2, got {selected_index}'
async def test_select_aria_menu_option(self, controller, browser_session: BrowserSession, base_url):
"""Test select_dropdown_option with ARIA menu."""
# Navigate to the ARIA menu test page
goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
class GoToUrlActionModel(ActionModel):
go_to_url: GoToUrlAction | None = None
await controller.act(GoToUrlActionModel(**goto_action), browser_session)
await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
# Initialize the DOM state
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
# Get the selector map and find the ARIA menu
selector_map = await browser_session.get_selector_map()
menu_index = None
for idx, element in selector_map.items():
if (
element.tag_name.lower() == 'ul'
and element.attributes.get('role') == 'menu'
and element.attributes.get('id') == 'pyNavigation1752753375773'
):
menu_index = idx
break
assert menu_index is not None
# Test via controller action
class SelectDropdownOptionModel(ActionModel):
select_dropdown_option: dict
result = await controller.act(
SelectDropdownOptionModel(select_dropdown_option={'index': menu_index, 'text': 'Filter'}),
browser_session,
)
# Verify the result
assert isinstance(result, ActionResult)
assert result.extracted_content is not None
assert 'Filter' in result.extracted_content
# Verify the click had an effect using CDP
cdp_session = await browser_session.get_or_create_cdp_session()
result = await cdp_session.cdp_client.send.Runtime.evaluate(
params={'expression': "document.getElementById('result').textContent", 'returnByValue': True},
session_id=cdp_session.session_id,
)
result_text = result.get('result', {}).get('value', '')
assert 'Filter' in result_text, f"Expected 'Filter' in result text, got '{result_text}'"
async def test_select_custom_dropdown_option(self, controller, browser_session: BrowserSession, base_url):
"""Test select_dropdown_option with custom dropdown."""
# Navigate to the custom dropdown test page
goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/custom-dropdown', new_tab=False)}
class GoToUrlActionModel(ActionModel):
go_to_url: GoToUrlAction | None = None
await controller.act(GoToUrlActionModel(**goto_action), browser_session)
await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
# Initialize the DOM state
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
# Get the selector map and find the custom dropdown
selector_map = await browser_session.get_selector_map()
dropdown_index = None
for idx, element in selector_map.items():
if element.attributes.get('id') == 'custom-dropdown' and 'dropdown' in element.attributes.get('class', ''):
dropdown_index = idx
break
assert dropdown_index is not None
# Test via controller action
class SelectDropdownOptionModel(ActionModel):
select_dropdown_option: dict
result = await controller.act(
SelectDropdownOptionModel(select_dropdown_option={'index': dropdown_index, 'text': 'Blue'}),
browser_session,
)
# Verify the result
assert isinstance(result, ActionResult)
assert result.extracted_content is not None
assert 'Blue' in result.extracted_content
# Verify the selection worked using CDP
cdp_session = await browser_session.get_or_create_cdp_session()
result = await cdp_session.cdp_client.send.Runtime.evaluate(
params={'expression': "document.getElementById('result').textContent", 'returnByValue': True},
session_id=cdp_session.session_id,
)
result_text = result.get('result', {}).get('value', '')
assert 'Blue' in result_text, f"Expected 'Blue' in result text, got '{result_text}'"
async def test_select_invalid_option_error(self, controller, browser_session: BrowserSession, base_url):
"""Test select_dropdown_option with non-existent option text."""
# Navigate to the native dropdown test page
goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
class GoToUrlActionModel(ActionModel):
go_to_url: GoToUrlAction | None = None
await controller.act(GoToUrlActionModel(**goto_action), browser_session)
await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
# Initialize the DOM state
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
# Get the selector map and find the select element
selector_map = await browser_session.get_selector_map()
dropdown_index = None
for idx, element in selector_map.items():
if element.tag_name.lower() == 'select' and element.attributes.get('id') == 'test-dropdown':
dropdown_index = idx
break
assert dropdown_index is not None
# Try to select non-existent option via direct event
node = await browser_session.get_element_by_index(dropdown_index)
assert node is not None
event = browser_session.event_bus.dispatch(SelectDropdownOptionEvent(node=node, text='Non-existent Option'))
try:
selection_data = await event.event_result(timeout=3.0)
# Should have an error in the result
assert selection_data is not None
assert 'error' in selection_data or 'not found' in str(selection_data).lower()
except Exception as e:
# Or raise an exception
assert 'not found' in str(e).lower() or 'no option' in str(e).lower()

View File

@@ -0,0 +1,112 @@
import asyncio
from typing import Any
import pytest
from browser_use.browser import BrowserProfile, BrowserSession
from browser_use.config import CONFIG
def test_chromium_args_include_proxy_flags():
profile = BrowserProfile(
headless=True,
user_data_dir=str(CONFIG.BROWSER_USE_PROFILES_DIR / 'proxy-smoke'),
proxy={
'server': 'http://proxy.local:8080',
'bypass': 'localhost,127.0.0.1',
},
)
args = profile.get_args()
assert any(a == '--proxy-server=http://proxy.local:8080' for a in args), args
assert any(a == '--proxy-bypass-list=localhost,127.0.0.1' for a in args), args
@pytest.mark.asyncio
async def test_cdp_proxy_auth_handler_registers_and_responds():
# Create profile with proxy auth credentials
profile = BrowserProfile(
headless=True,
user_data_dir=str(CONFIG.BROWSER_USE_PROFILES_DIR / 'proxy-smoke'),
proxy={'username': 'user', 'password': 'pass'},
)
session = BrowserSession(browser_profile=profile)
# Stub CDP client with minimal Fetch support
class StubCDP:
def __init__(self) -> None:
self.enabled = False
self.last_auth: dict[str, Any] | None = None
self.last_default: dict[str, Any] | None = None
self.auth_callback = None
self.request_paused_callback = None
class _FetchSend:
def __init__(self, outer: 'StubCDP') -> None:
self._outer = outer
async def enable(self, params: dict, session_id: str | None = None) -> None:
self._outer.enabled = True
async def continueWithAuth(self, params: dict, session_id: str | None = None) -> None:
self._outer.last_auth = {'params': params, 'session_id': session_id}
async def continueRequest(self, params: dict, session_id: str | None = None) -> None:
# no-op; included to mirror CDP API surface used by impl
pass
class _Send:
def __init__(self, outer: 'StubCDP') -> None:
self.Fetch = _FetchSend(outer)
class _FetchRegister:
def __init__(self, outer: 'StubCDP') -> None:
self._outer = outer
def authRequired(self, callback) -> None:
self._outer.auth_callback = callback
def requestPaused(self, callback) -> None:
self._outer.request_paused_callback = callback
class _Register:
def __init__(self, outer: 'StubCDP') -> None:
self.Fetch = _FetchRegister(outer)
self.send = _Send(self)
self.register = _Register(self)
root = StubCDP()
# Attach stubs to session
session._cdp_client_root = root # type: ignore[attr-defined]
# No need to attach a real CDPSession; _setup_proxy_auth works with root client
# Should register Fetch handler and enable auth handling without raising
await session._setup_proxy_auth()
assert root.enabled is True
assert callable(root.auth_callback)
# Simulate proxy auth required event
ev = {'requestId': 'r1', 'authChallenge': {'source': 'Proxy'}}
root.auth_callback(ev, session_id='s1') # type: ignore[misc]
# Let scheduled task run
await asyncio.sleep(0.05)
assert root.last_auth is not None
params = root.last_auth['params']
assert params['authChallengeResponse']['response'] == 'ProvideCredentials'
assert params['authChallengeResponse']['username'] == 'user'
assert params['authChallengeResponse']['password'] == 'pass'
assert root.last_auth['session_id'] == 's1'
# Now simulate a non-proxy auth challenge and ensure default handling
ev2 = {'requestId': 'r2', 'authChallenge': {'source': 'Server'}}
root.auth_callback(ev2, session_id='s2') # type: ignore[misc]
await asyncio.sleep(0.05)
# After non-proxy challenge, last_auth should reflect Default response
assert root.last_auth is not None
params2 = root.last_auth['params']
assert params2['requestId'] == 'r2'
assert params2['authChallengeResponse']['response'] == 'Default'

View File

@@ -58,7 +58,7 @@ async def download_test_server(httpserver):
return httpserver
@pytest.mark.asyncio
@pytest.mark.skip(reason='TODO: fix')
async def test_downloads_watchdog_lifecycle():
"""Test that DownloadsWatchdog starts and stops with browser session."""
# Use temp directory for downloads
@@ -94,7 +94,7 @@ async def test_downloads_watchdog_lifecycle():
await session.event_bus.stop(clear=True, timeout=5)
@pytest.mark.asyncio
@pytest.mark.skip(reason='TODO: fix')
async def test_downloads_watchdog_file_detection(download_test_server):
"""Test that DownloadsWatchdog detects file downloads."""
# Use temp directory for downloads

Some files were not shown because too many files have changed in this diff Show More