mirror of
https://github.com/browser-use/browser-use
synced 2026-04-22 17:45:09 +02:00
cdp_use.CDPClient.send_raw awaits a future that only resolves when the browser sends a response with a matching message id. There is no timeout on that await. Against the cloud browser service, the failure mode we observed is: WebSocket stays alive at the TCP/keepalive layer (proxy keeps pong-ing our pings), but the browser upstream is dead / unhealthy and never sends any CDP response. send_raw's future never resolves, and every higher-level timeout in browser-use (session.start's 15s connect guard, agent.step_timeout, tools.act's action timeout) relies on eventually getting a response — so they all wait forever too. Evidence from a 170k-task collector run: 1,090 empty-history traces, 100% hit the 240s outer watchdog, median duration 582s, max 2214s, with cloud HTTP layer clean throughout (all 200/201). One sample showed /json/version returning 200 OK and then 5 minutes of total silence on the WebSocket before forced stop — classic silent-hang. Fix: add TimeoutWrappedCDPClient, a thin subclass of cdp_use.CDPClient that wraps send_raw in asyncio.wait_for(timeout=cdp_request_timeout_s). Any CDP method that doesn't respond within the cap raises plain TimeoutError, which propagates through existing `except TimeoutError` handlers in session.py / tools/service.py. Uses the same defensive env parse pattern as BROWSER_USE_ACTION_TIMEOUT_S — rejects empty / non-numeric / nan / inf / non-positive values with a warning fallback. Default is 60s: generous for slow operations like Page.captureScreenshot or Page.printToPDF on heavy pages, but well below the 180s step timeout and any typical outer watchdog. Override via BROWSER_USE_CDP_TIMEOUT_S. Wired into both CDPClient construction sites in session.py (initial connect + reconnect path). All 17 existing real-browser tests (test_action_blank_page, test_multi_act_guards) still pass.
4001 lines
152 KiB
Python
4001 lines
152 KiB
Python
"""Event-driven browser session with backwards compatibility."""
|
|
|
|
import asyncio
|
|
import logging
|
|
import re
|
|
import time
|
|
from functools import cached_property
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING, Any, Literal, Self, Union, cast, overload
|
|
from urllib.parse import urlparse, urlunparse
|
|
from uuid import UUID
|
|
|
|
import httpx
|
|
from bubus import EventBus
|
|
from cdp_use import CDPClient
|
|
from cdp_use.cdp.fetch import AuthRequiredEvent, RequestPausedEvent
|
|
from cdp_use.cdp.network import Cookie
|
|
from cdp_use.cdp.target import SessionID, TargetID
|
|
from cdp_use.cdp.target.commands import CreateTargetParameters
|
|
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
|
from uuid_extensions import uuid7str
|
|
|
|
from browser_use.browser._cdp_timeout import TimeoutWrappedCDPClient
|
|
from browser_use.browser.cloud.cloud import CloudBrowserAuthError, CloudBrowserClient, CloudBrowserError
|
|
|
|
# CDP logging is now handled by setup_logging() in logging_config.py
|
|
# It automatically sets CDP logs to the same level as browser_use logs
|
|
from browser_use.browser.cloud.views import CloudBrowserParams, CreateBrowserRequest, ProxyCountryCode
|
|
from browser_use.browser.events import (
|
|
AgentFocusChangedEvent,
|
|
BrowserConnectedEvent,
|
|
BrowserErrorEvent,
|
|
BrowserLaunchEvent,
|
|
BrowserLaunchResult,
|
|
BrowserReconnectedEvent,
|
|
BrowserReconnectingEvent,
|
|
BrowserStartEvent,
|
|
BrowserStateRequestEvent,
|
|
BrowserStopEvent,
|
|
BrowserStoppedEvent,
|
|
CloseTabEvent,
|
|
FileDownloadedEvent,
|
|
NavigateToUrlEvent,
|
|
NavigationCompleteEvent,
|
|
NavigationStartedEvent,
|
|
SwitchTabEvent,
|
|
TabClosedEvent,
|
|
TabCreatedEvent,
|
|
)
|
|
from browser_use.browser.profile import BrowserProfile, ProxySettings
|
|
from browser_use.browser.views import BrowserStateSummary, TabInfo
|
|
from browser_use.dom.views import DOMRect, EnhancedDOMTreeNode, TargetInfo
|
|
from browser_use.observability import observe_debug
|
|
from browser_use.utils import _log_pretty_url, create_task_with_error_handling, is_new_tab_page
|
|
|
|
if TYPE_CHECKING:
|
|
from browser_use.actor.page import Page
|
|
from browser_use.browser.demo_mode import DemoMode
|
|
from browser_use.browser.watchdogs.captcha_watchdog import CaptchaWaitResult
|
|
|
|
DEFAULT_BROWSER_PROFILE = BrowserProfile()
|
|
|
|
_LOGGED_UNIQUE_SESSION_IDS = set() # track unique session IDs that have been logged to make sure we always assign a unique enough id to new sessions and avoid ambiguity in logs
|
|
red = '\033[91m'
|
|
reset = '\033[0m'
|
|
|
|
|
|
class Target(BaseModel):
|
|
"""Browser target (page, iframe, worker) - the actual entity being controlled.
|
|
|
|
A target represents a browsing context with its own URL, title, and type.
|
|
Multiple CDP sessions can attach to the same target for communication.
|
|
"""
|
|
|
|
model_config = ConfigDict(arbitrary_types_allowed=True, revalidate_instances='never')
|
|
|
|
target_id: TargetID
|
|
target_type: str # 'page', 'iframe', 'worker', etc.
|
|
url: str = 'about:blank'
|
|
title: str = 'Unknown title'
|
|
|
|
|
|
class CDPSession(BaseModel):
|
|
"""CDP communication channel to a target.
|
|
|
|
A session is a connection that allows sending CDP commands to a specific target.
|
|
Multiple sessions can attach to the same target.
|
|
"""
|
|
|
|
model_config = ConfigDict(arbitrary_types_allowed=True, revalidate_instances='never')
|
|
|
|
cdp_client: CDPClient
|
|
target_id: TargetID
|
|
session_id: SessionID
|
|
|
|
# Lifecycle monitoring (populated by SessionManager)
|
|
_lifecycle_events: Any = PrivateAttr(default=None)
|
|
_lifecycle_lock: Any = PrivateAttr(default=None)
|
|
|
|
|
|
class BrowserSession(BaseModel):
|
|
"""Event-driven browser session with backwards compatibility.
|
|
|
|
This class provides a 2-layer architecture:
|
|
- High-level event handling for agents/tools
|
|
- Direct CDP/Playwright calls for browser operations
|
|
|
|
Supports both event-driven and imperative calling styles.
|
|
|
|
Browser configuration is stored in the browser_profile, session identity in direct fields:
|
|
```python
|
|
# Direct settings (recommended for most users)
|
|
session = BrowserSession(headless=True, user_data_dir='./profile')
|
|
|
|
# Or use a profile (for advanced use cases)
|
|
session = BrowserSession(browser_profile=BrowserProfile(...))
|
|
|
|
# Access session fields directly, browser settings via profile or property
|
|
print(session.id) # Session field
|
|
```
|
|
"""
|
|
|
|
model_config = ConfigDict(
|
|
arbitrary_types_allowed=True,
|
|
validate_assignment=True,
|
|
extra='forbid',
|
|
revalidate_instances='never', # resets private attrs on every model rebuild
|
|
)
|
|
|
|
# Overload 1: Cloud browser mode (use cloud-specific params)
|
|
@overload
|
|
def __init__(
|
|
self,
|
|
*,
|
|
# Cloud browser params - use these for cloud mode
|
|
cloud_profile_id: UUID | str | None = None,
|
|
cloud_proxy_country_code: ProxyCountryCode | None = None,
|
|
cloud_timeout: int | None = None,
|
|
# Backward compatibility aliases
|
|
profile_id: UUID | str | None = None,
|
|
proxy_country_code: ProxyCountryCode | None = None,
|
|
timeout: int | None = None,
|
|
use_cloud: bool | None = None,
|
|
cloud_browser: bool | None = None, # Backward compatibility alias
|
|
cloud_browser_params: CloudBrowserParams | None = None,
|
|
# Common params that work with cloud
|
|
id: str | None = None,
|
|
headers: dict[str, str] | None = None,
|
|
allowed_domains: list[str] | None = None,
|
|
prohibited_domains: list[str] | None = None,
|
|
keep_alive: bool | None = None,
|
|
minimum_wait_page_load_time: float | None = None,
|
|
wait_for_network_idle_page_load_time: float | None = None,
|
|
wait_between_actions: float | None = None,
|
|
captcha_solver: bool | None = None,
|
|
auto_download_pdfs: bool | None = None,
|
|
cookie_whitelist_domains: list[str] | None = None,
|
|
cross_origin_iframes: bool | None = None,
|
|
highlight_elements: bool | None = None,
|
|
dom_highlight_elements: bool | None = None,
|
|
paint_order_filtering: bool | None = None,
|
|
max_iframes: int | None = None,
|
|
max_iframe_depth: int | None = None,
|
|
) -> None: ...
|
|
|
|
# Overload 2: Local browser mode (use local browser params)
|
|
@overload
|
|
def __init__(
|
|
self,
|
|
*,
|
|
# Core configuration for local
|
|
id: str | None = None,
|
|
cdp_url: str | None = None,
|
|
browser_profile: BrowserProfile | None = None,
|
|
# Local browser launch params
|
|
executable_path: str | Path | None = None,
|
|
headless: bool | None = None,
|
|
user_data_dir: str | Path | None = None,
|
|
args: list[str] | None = None,
|
|
downloads_path: str | Path | None = None,
|
|
# Common params
|
|
headers: dict[str, str] | None = None,
|
|
allowed_domains: list[str] | None = None,
|
|
prohibited_domains: list[str] | None = None,
|
|
keep_alive: bool | None = None,
|
|
minimum_wait_page_load_time: float | None = None,
|
|
wait_for_network_idle_page_load_time: float | None = None,
|
|
wait_between_actions: float | None = None,
|
|
auto_download_pdfs: bool | None = None,
|
|
cookie_whitelist_domains: list[str] | None = None,
|
|
cross_origin_iframes: bool | None = None,
|
|
highlight_elements: bool | None = None,
|
|
dom_highlight_elements: bool | None = None,
|
|
paint_order_filtering: bool | None = None,
|
|
max_iframes: int | None = None,
|
|
max_iframe_depth: int | None = None,
|
|
# All other local params
|
|
env: dict[str, str | float | bool] | None = None,
|
|
ignore_default_args: list[str] | Literal[True] | None = None,
|
|
channel: str | None = None,
|
|
chromium_sandbox: bool | None = None,
|
|
devtools: bool | None = None,
|
|
traces_dir: str | Path | None = None,
|
|
accept_downloads: bool | None = None,
|
|
permissions: list[str] | None = None,
|
|
user_agent: str | None = None,
|
|
screen: dict | None = None,
|
|
viewport: dict | None = None,
|
|
no_viewport: bool | None = None,
|
|
device_scale_factor: float | None = None,
|
|
record_har_content: str | None = None,
|
|
record_har_mode: str | None = None,
|
|
record_har_path: str | Path | None = None,
|
|
record_video_dir: str | Path | None = None,
|
|
record_video_framerate: int | None = None,
|
|
record_video_size: dict | None = None,
|
|
storage_state: str | Path | dict[str, Any] | None = None,
|
|
disable_security: bool | None = None,
|
|
deterministic_rendering: bool | None = None,
|
|
proxy: ProxySettings | None = None,
|
|
enable_default_extensions: bool | None = None,
|
|
captcha_solver: bool | None = None,
|
|
window_size: dict | None = None,
|
|
window_position: dict | None = None,
|
|
filter_highlight_ids: bool | None = None,
|
|
profile_directory: str | None = None,
|
|
) -> None: ...
|
|
|
|
def __init__(
|
|
self,
|
|
# Core configuration
|
|
id: str | None = None,
|
|
cdp_url: str | None = None,
|
|
is_local: bool = False,
|
|
browser_profile: BrowserProfile | None = None,
|
|
# Cloud browser params (don't mix with local browser params)
|
|
cloud_profile_id: UUID | str | None = None,
|
|
cloud_proxy_country_code: ProxyCountryCode | None = None,
|
|
cloud_timeout: int | None = None,
|
|
# Backward compatibility aliases for cloud params
|
|
profile_id: UUID | str | None = None,
|
|
proxy_country_code: ProxyCountryCode | None = None,
|
|
timeout: int | None = None,
|
|
# BrowserProfile fields that can be passed directly
|
|
# From BrowserConnectArgs
|
|
headers: dict[str, str] | None = None,
|
|
# From BrowserLaunchArgs
|
|
env: dict[str, str | float | bool] | None = None,
|
|
executable_path: str | Path | None = None,
|
|
headless: bool | None = None,
|
|
args: list[str] | None = None,
|
|
ignore_default_args: list[str] | Literal[True] | None = None,
|
|
channel: str | None = None,
|
|
chromium_sandbox: bool | None = None,
|
|
devtools: bool | None = None,
|
|
downloads_path: str | Path | None = None,
|
|
traces_dir: str | Path | None = None,
|
|
# From BrowserContextArgs
|
|
accept_downloads: bool | None = None,
|
|
permissions: list[str] | None = None,
|
|
user_agent: str | None = None,
|
|
screen: dict | None = None,
|
|
viewport: dict | None = None,
|
|
no_viewport: bool | None = None,
|
|
device_scale_factor: float | None = None,
|
|
record_har_content: str | None = None,
|
|
record_har_mode: str | None = None,
|
|
record_har_path: str | Path | None = None,
|
|
record_video_dir: str | Path | None = None,
|
|
record_video_framerate: int | None = None,
|
|
record_video_size: dict | None = None,
|
|
# From BrowserLaunchPersistentContextArgs
|
|
user_data_dir: str | Path | None = None,
|
|
# From BrowserNewContextArgs
|
|
storage_state: str | Path | dict[str, Any] | None = None,
|
|
# BrowserProfile specific fields
|
|
## Cloud Browser Fields
|
|
use_cloud: bool | None = None,
|
|
cloud_browser: bool | None = None, # Backward compatibility alias
|
|
cloud_browser_params: CloudBrowserParams | None = None,
|
|
## Other params
|
|
disable_security: bool | None = None,
|
|
deterministic_rendering: bool | None = None,
|
|
allowed_domains: list[str] | None = None,
|
|
prohibited_domains: list[str] | None = None,
|
|
keep_alive: bool | None = None,
|
|
proxy: ProxySettings | None = None,
|
|
enable_default_extensions: bool | None = None,
|
|
captcha_solver: bool | None = None,
|
|
window_size: dict | None = None,
|
|
window_position: dict | None = None,
|
|
minimum_wait_page_load_time: float | None = None,
|
|
wait_for_network_idle_page_load_time: float | None = None,
|
|
wait_between_actions: float | None = None,
|
|
filter_highlight_ids: bool | None = None,
|
|
auto_download_pdfs: bool | None = None,
|
|
profile_directory: str | None = None,
|
|
cookie_whitelist_domains: list[str] | None = None,
|
|
# DOM extraction layer configuration
|
|
cross_origin_iframes: bool | None = None,
|
|
highlight_elements: bool | None = None,
|
|
dom_highlight_elements: bool | None = None,
|
|
paint_order_filtering: bool | None = None,
|
|
# Iframe processing limits
|
|
max_iframes: int | None = None,
|
|
max_iframe_depth: int | None = None,
|
|
):
|
|
# Following the same pattern as AgentSettings in service.py
|
|
# Only pass non-None values to avoid validation errors
|
|
profile_kwargs = {
|
|
k: v
|
|
for k, v in locals().items()
|
|
if k
|
|
not in [
|
|
'self',
|
|
'browser_profile',
|
|
'id',
|
|
'cloud_profile_id',
|
|
'cloud_proxy_country_code',
|
|
'cloud_timeout',
|
|
'profile_id',
|
|
'proxy_country_code',
|
|
'timeout',
|
|
]
|
|
and v is not None
|
|
}
|
|
|
|
# Handle backward compatibility: prefer cloud_* params over old names
|
|
final_profile_id = cloud_profile_id if cloud_profile_id is not None else profile_id
|
|
final_proxy_country_code = cloud_proxy_country_code if cloud_proxy_country_code is not None else proxy_country_code
|
|
final_timeout = cloud_timeout if cloud_timeout is not None else timeout
|
|
|
|
# If any cloud params are provided, create cloud_browser_params
|
|
if final_profile_id is not None or final_proxy_country_code is not None or final_timeout is not None:
|
|
cloud_params = CreateBrowserRequest(
|
|
cloud_profile_id=final_profile_id,
|
|
cloud_proxy_country_code=final_proxy_country_code,
|
|
cloud_timeout=final_timeout,
|
|
)
|
|
profile_kwargs['cloud_browser_params'] = cloud_params
|
|
profile_kwargs['use_cloud'] = True
|
|
|
|
# Handle backward compatibility: map cloud_browser to use_cloud
|
|
if 'cloud_browser' in profile_kwargs:
|
|
profile_kwargs['use_cloud'] = profile_kwargs.pop('cloud_browser')
|
|
|
|
# If cloud_browser_params is set, force use_cloud=True
|
|
if cloud_browser_params is not None:
|
|
profile_kwargs['use_cloud'] = True
|
|
|
|
# if is_local is False but executable_path is provided, set is_local to True
|
|
if is_local is False and executable_path is not None:
|
|
profile_kwargs['is_local'] = True
|
|
# Only set is_local=True when cdp_url is missing if we're not using cloud browser
|
|
# (cloud browser will provide cdp_url later)
|
|
use_cloud = profile_kwargs.get('use_cloud') or profile_kwargs.get('cloud_browser')
|
|
if not cdp_url and not use_cloud:
|
|
profile_kwargs['is_local'] = True
|
|
|
|
# Create browser profile from direct parameters or use provided one
|
|
if browser_profile is not None:
|
|
# Merge any direct kwargs into the provided browser_profile (direct kwargs take precedence)
|
|
merged_kwargs = {**browser_profile.model_dump(exclude_unset=True), **profile_kwargs}
|
|
resolved_browser_profile = BrowserProfile(**merged_kwargs)
|
|
else:
|
|
resolved_browser_profile = BrowserProfile(**profile_kwargs)
|
|
|
|
# Initialize the Pydantic model
|
|
super().__init__(
|
|
id=id or str(uuid7str()),
|
|
browser_profile=resolved_browser_profile,
|
|
)
|
|
|
|
# Session configuration (session identity only)
|
|
id: str = Field(default_factory=lambda: str(uuid7str()), description='Unique identifier for this browser session')
|
|
|
|
# Browser configuration (reusable profile)
|
|
browser_profile: BrowserProfile = Field(
|
|
default_factory=lambda: DEFAULT_BROWSER_PROFILE,
|
|
description='BrowserProfile() options to use for the session, otherwise a default profile will be used',
|
|
)
|
|
|
|
# LLM screenshot resizing configuration
|
|
llm_screenshot_size: tuple[int, int] | None = Field(
|
|
default=None,
|
|
description='Target size (width, height) to resize screenshots before sending to LLM. Coordinates from LLM will be scaled back to original viewport size.',
|
|
)
|
|
|
|
# Cache of original viewport size for coordinate conversion (set when browser state is captured)
|
|
_original_viewport_size: tuple[int, int] | None = PrivateAttr(default=None)
|
|
|
|
@classmethod
|
|
def from_system_chrome(cls, profile_directory: str | None = None, **kwargs: Any) -> Self:
|
|
"""Create a BrowserSession using system's Chrome installation and profile"""
|
|
from browser_use.skill_cli.utils import find_chrome_executable, get_chrome_profile_path, list_chrome_profiles
|
|
|
|
executable_path = find_chrome_executable()
|
|
if executable_path is None:
|
|
raise RuntimeError(
|
|
'Chrome not found. Please install Chrome or use Browser() with explicit executable_path.\n'
|
|
'Expected locations:\n'
|
|
' macOS: /Applications/Google Chrome.app/Contents/MacOS/Google Chrome\n'
|
|
' Linux: /usr/bin/google-chrome or /usr/bin/chromium\n'
|
|
' Windows: C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'
|
|
)
|
|
|
|
user_data_dir = get_chrome_profile_path(None)
|
|
if user_data_dir is None:
|
|
raise RuntimeError(
|
|
'Could not detect Chrome profile directory for your platform.\n'
|
|
'Expected locations:\n'
|
|
' macOS: ~/Library/Application Support/Google/Chrome\n'
|
|
' Linux: ~/.config/google-chrome or ~/.config/chromium\n'
|
|
' Windows: %LocalAppData%\\Google\\Chrome\\User Data'
|
|
)
|
|
|
|
# Auto-select profile if not specified
|
|
profiles = list_chrome_profiles()
|
|
if profile_directory is None:
|
|
if profiles:
|
|
# Use first available profile
|
|
profile_directory = profiles[0]['directory']
|
|
logging.getLogger('browser_use').info(
|
|
f'Auto-selected Chrome profile: {profiles[0]["name"]} ({profile_directory})'
|
|
)
|
|
else:
|
|
profile_directory = 'Default'
|
|
|
|
return cls(
|
|
executable_path=executable_path,
|
|
user_data_dir=user_data_dir,
|
|
profile_directory=profile_directory,
|
|
**kwargs,
|
|
)
|
|
|
|
@classmethod
|
|
def list_chrome_profiles(cls) -> list[dict[str, str]]:
|
|
"""List available Chrome profiles on the system"""
|
|
from browser_use.skill_cli.utils import list_chrome_profiles
|
|
|
|
return list_chrome_profiles()
|
|
|
|
# Convenience properties for common browser settings
|
|
@property
|
|
def cdp_url(self) -> str | None:
|
|
"""CDP URL from browser profile."""
|
|
return self.browser_profile.cdp_url
|
|
|
|
@property
|
|
def is_local(self) -> bool:
|
|
"""Whether this is a local browser instance from browser profile."""
|
|
return self.browser_profile.is_local
|
|
|
|
@property
|
|
def is_cdp_connected(self) -> bool:
|
|
"""Check if the CDP WebSocket connection is alive and usable.
|
|
|
|
Returns True only if the root CDP client exists and its WebSocket is in OPEN state.
|
|
A dead/closing/closed WebSocket returns False, preventing handlers from dispatching
|
|
CDP commands that would hang until timeout on a broken connection.
|
|
"""
|
|
if self._cdp_client_root is None or self._cdp_client_root.ws is None:
|
|
return False
|
|
try:
|
|
from websockets.protocol import State
|
|
|
|
return self._cdp_client_root.ws.state is State.OPEN
|
|
except Exception:
|
|
return False
|
|
|
|
async def wait_if_captcha_solving(self, timeout: float | None = None) -> 'CaptchaWaitResult | None':
|
|
"""Wait if a captcha is currently being solved by the browser proxy.
|
|
|
|
Returns:
|
|
A CaptchaWaitResult if we had to wait, or None if no captcha was in progress.
|
|
"""
|
|
if self._captcha_watchdog is not None:
|
|
return await self._captcha_watchdog.wait_if_captcha_solving(timeout=timeout)
|
|
return None
|
|
|
|
@property
|
|
def is_reconnecting(self) -> bool:
|
|
"""Whether a WebSocket reconnection attempt is currently in progress."""
|
|
return self._reconnecting
|
|
|
|
@property
|
|
def cloud_browser(self) -> bool:
|
|
"""Whether to use cloud browser service from browser profile."""
|
|
return self.browser_profile.use_cloud
|
|
|
|
@property
|
|
def demo_mode(self) -> 'DemoMode | None':
|
|
"""Lazy init demo mode helper when enabled."""
|
|
if not self.browser_profile.demo_mode:
|
|
return None
|
|
if self._demo_mode is None:
|
|
from browser_use.browser.demo_mode import DemoMode
|
|
|
|
self._demo_mode = DemoMode(self)
|
|
return self._demo_mode
|
|
|
|
# Main shared event bus for all browser session + all watchdogs
|
|
event_bus: EventBus = Field(default_factory=EventBus)
|
|
|
|
# Mutable public state - which target has agent focus
|
|
agent_focus_target_id: TargetID | None = None
|
|
|
|
# Mutable private state shared between watchdogs
|
|
_cdp_client_root: CDPClient | None = PrivateAttr(default=None)
|
|
_connection_lock: Any = PrivateAttr(default=None) # asyncio.Lock for preventing concurrent connections
|
|
|
|
# PUBLIC: SessionManager instance (OWNS all targets and sessions)
|
|
session_manager: Any = Field(default=None, exclude=True) # SessionManager
|
|
|
|
_cached_browser_state_summary: Any = PrivateAttr(default=None)
|
|
_cached_selector_map: dict[int, EnhancedDOMTreeNode] = PrivateAttr(default_factory=dict)
|
|
_downloaded_files: list[str] = PrivateAttr(default_factory=list) # Track files downloaded during this session
|
|
_closed_popup_messages: list[str] = PrivateAttr(default_factory=list) # Store messages from auto-closed JavaScript dialogs
|
|
|
|
# Watchdogs
|
|
_crash_watchdog: Any | None = PrivateAttr(default=None)
|
|
_downloads_watchdog: Any | None = PrivateAttr(default=None)
|
|
_aboutblank_watchdog: Any | None = PrivateAttr(default=None)
|
|
_security_watchdog: Any | None = PrivateAttr(default=None)
|
|
_storage_state_watchdog: Any | None = PrivateAttr(default=None)
|
|
_local_browser_watchdog: Any | None = PrivateAttr(default=None)
|
|
_default_action_watchdog: Any | None = PrivateAttr(default=None)
|
|
_dom_watchdog: Any | None = PrivateAttr(default=None)
|
|
_screenshot_watchdog: Any | None = PrivateAttr(default=None)
|
|
_permissions_watchdog: Any | None = PrivateAttr(default=None)
|
|
_recording_watchdog: Any | None = PrivateAttr(default=None)
|
|
_captcha_watchdog: Any | None = PrivateAttr(default=None)
|
|
_watchdogs_attached: bool = PrivateAttr(default=False)
|
|
|
|
_cloud_browser_client: CloudBrowserClient = PrivateAttr(default_factory=lambda: CloudBrowserClient())
|
|
_demo_mode: 'DemoMode | None' = PrivateAttr(default=None)
|
|
|
|
# WebSocket reconnection state
|
|
# Max wait = attempts * timeout_per_attempt + sum(delays) + small buffer
|
|
# Default: 3 * 15s + (1+2+4)s + 2s = 54s
|
|
RECONNECT_WAIT_TIMEOUT: float = 54.0
|
|
_reconnecting: bool = PrivateAttr(default=False)
|
|
_reconnect_event: asyncio.Event = PrivateAttr(default_factory=asyncio.Event)
|
|
_reconnect_lock: asyncio.Lock = PrivateAttr(default_factory=asyncio.Lock)
|
|
_reconnect_task: asyncio.Task | None = PrivateAttr(default=None)
|
|
_intentional_stop: bool = PrivateAttr(default=False)
|
|
|
|
_logger: Any = PrivateAttr(default=None)
|
|
|
|
@property
|
|
def logger(self) -> Any:
|
|
"""Get instance-specific logger with session ID in the name"""
|
|
# **regenerate it every time** because our id and str(self) can change as browser connection state changes
|
|
# if self._logger is None or not self._cdp_client_root:
|
|
# self._logger = logging.getLogger(f'browser_use.{self}')
|
|
return logging.getLogger(f'browser_use.{self}')
|
|
|
|
@cached_property
|
|
def _id_for_logs(self) -> str:
|
|
"""Get human-friendly semi-unique identifier for differentiating different BrowserSession instances in logs"""
|
|
str_id = self.id[-4:] # default to last 4 chars of truly random uuid, less helpful than cdp port but always unique enough
|
|
port_number = (self.cdp_url or 'no-cdp').rsplit(':', 1)[-1].split('/', 1)[0].strip()
|
|
port_is_random = not port_number.startswith('922')
|
|
port_is_unique_enough = port_number not in _LOGGED_UNIQUE_SESSION_IDS
|
|
if port_number and port_number.isdigit() and port_is_random and port_is_unique_enough:
|
|
# if cdp port is random/unique enough to identify this session, use it as our id in logs
|
|
_LOGGED_UNIQUE_SESSION_IDS.add(port_number)
|
|
str_id = port_number
|
|
return str_id
|
|
|
|
@property
|
|
def _tab_id_for_logs(self) -> str:
|
|
return self.agent_focus_target_id[-2:] if self.agent_focus_target_id else f'{red}--{reset}'
|
|
|
|
def __repr__(self) -> str:
|
|
return f'BrowserSession🅑 {self._id_for_logs} 🅣 {self._tab_id_for_logs} (cdp_url={self.cdp_url}, profile={self.browser_profile})'
|
|
|
|
def __str__(self) -> str:
|
|
return f'BrowserSession🅑 {self._id_for_logs} 🅣 {self._tab_id_for_logs}'
|
|
|
|
async def reset(self) -> None:
|
|
"""Clear all cached CDP sessions with proper cleanup."""
|
|
|
|
# Suppress auto-reconnect callback during teardown
|
|
self._intentional_stop = True
|
|
# Cancel any in-flight reconnection task
|
|
if self._reconnect_task and not self._reconnect_task.done():
|
|
self._reconnect_task.cancel()
|
|
self._reconnect_task = None
|
|
self._reconnecting = False
|
|
self._reconnect_event.set() # unblock any waiters
|
|
|
|
cdp_status = 'connected' if self._cdp_client_root else 'not connected'
|
|
session_mgr_status = 'exists' if self.session_manager else 'None'
|
|
self.logger.debug(
|
|
f'🔄 Resetting browser session (CDP: {cdp_status}, SessionManager: {session_mgr_status}, '
|
|
f'focus: {self.agent_focus_target_id[-4:] if self.agent_focus_target_id else "None"})'
|
|
)
|
|
|
|
# Clear session manager (which owns _targets, _sessions, _target_sessions)
|
|
if self.session_manager:
|
|
await self.session_manager.clear()
|
|
self.session_manager = None
|
|
|
|
# Close CDP WebSocket before clearing to prevent stale event handlers
|
|
if self._cdp_client_root:
|
|
try:
|
|
await self._cdp_client_root.stop()
|
|
self.logger.debug('Closed CDP client WebSocket during reset')
|
|
except Exception as e:
|
|
self.logger.debug(f'Error closing CDP client during reset: {e}')
|
|
|
|
self._cdp_client_root = None # type: ignore
|
|
self._cached_browser_state_summary = None
|
|
self._cached_selector_map.clear()
|
|
self._downloaded_files.clear()
|
|
|
|
self.agent_focus_target_id = None
|
|
if self.is_local:
|
|
self.browser_profile.cdp_url = None
|
|
|
|
self._crash_watchdog = None
|
|
self._downloads_watchdog = None
|
|
self._aboutblank_watchdog = None
|
|
self._security_watchdog = None
|
|
self._storage_state_watchdog = None
|
|
self._local_browser_watchdog = None
|
|
self._default_action_watchdog = None
|
|
self._dom_watchdog = None
|
|
self._screenshot_watchdog = None
|
|
self._permissions_watchdog = None
|
|
self._recording_watchdog = None
|
|
self._captcha_watchdog = None
|
|
self._watchdogs_attached = False
|
|
if self._demo_mode:
|
|
self._demo_mode.reset()
|
|
self._demo_mode = None
|
|
|
|
self._intentional_stop = False
|
|
self.logger.info('✅ Browser session reset complete')
|
|
|
|
def model_post_init(self, __context) -> None:
|
|
"""Register event handlers after model initialization."""
|
|
self._connection_lock = asyncio.Lock()
|
|
# Initialize reconnect event as set (no reconnection pending)
|
|
self._reconnect_event = asyncio.Event()
|
|
self._reconnect_event.set()
|
|
|
|
# Check if handlers are already registered to prevent duplicates
|
|
from browser_use.browser.watchdog_base import BaseWatchdog
|
|
|
|
start_handlers = self.event_bus.handlers.get('BrowserStartEvent', [])
|
|
start_handler_names = [getattr(h, '__name__', str(h)) for h in start_handlers]
|
|
|
|
if any('on_BrowserStartEvent' in name for name in start_handler_names):
|
|
raise RuntimeError(
|
|
'[BrowserSession] Duplicate handler registration attempted! '
|
|
'on_BrowserStartEvent is already registered. '
|
|
'This likely means BrowserSession was initialized multiple times with the same EventBus.'
|
|
)
|
|
|
|
BaseWatchdog.attach_handler_to_session(self, BrowserStartEvent, self.on_BrowserStartEvent)
|
|
BaseWatchdog.attach_handler_to_session(self, BrowserStopEvent, self.on_BrowserStopEvent)
|
|
BaseWatchdog.attach_handler_to_session(self, NavigateToUrlEvent, self.on_NavigateToUrlEvent)
|
|
BaseWatchdog.attach_handler_to_session(self, SwitchTabEvent, self.on_SwitchTabEvent)
|
|
BaseWatchdog.attach_handler_to_session(self, TabCreatedEvent, self.on_TabCreatedEvent)
|
|
BaseWatchdog.attach_handler_to_session(self, TabClosedEvent, self.on_TabClosedEvent)
|
|
BaseWatchdog.attach_handler_to_session(self, AgentFocusChangedEvent, self.on_AgentFocusChangedEvent)
|
|
BaseWatchdog.attach_handler_to_session(self, FileDownloadedEvent, self.on_FileDownloadedEvent)
|
|
BaseWatchdog.attach_handler_to_session(self, CloseTabEvent, self.on_CloseTabEvent)
|
|
|
|
@observe_debug(ignore_input=True, ignore_output=True, name='browser_session_start')
|
|
async def start(self) -> None:
|
|
"""Start the browser session."""
|
|
start_event = self.event_bus.dispatch(BrowserStartEvent())
|
|
await start_event
|
|
# Ensure any exceptions from the event handler are propagated
|
|
await start_event.event_result(raise_if_any=True, raise_if_none=False)
|
|
|
|
async def kill(self) -> None:
|
|
"""Kill the browser session and reset all state."""
|
|
self._intentional_stop = True
|
|
self.logger.debug('🛑 kill() called - stopping browser with force=True and resetting state')
|
|
|
|
# First save storage state while CDP is still connected
|
|
from browser_use.browser.events import SaveStorageStateEvent
|
|
|
|
save_event = self.event_bus.dispatch(SaveStorageStateEvent())
|
|
await save_event
|
|
|
|
# Dispatch stop event to kill the browser
|
|
await self.event_bus.dispatch(BrowserStopEvent(force=True))
|
|
# Stop the event bus
|
|
await self.event_bus.stop(clear=True, timeout=5)
|
|
# Reset all state
|
|
await self.reset()
|
|
# Create fresh event bus
|
|
self.event_bus = EventBus()
|
|
|
|
async def stop(self) -> None:
|
|
"""Stop the browser session without killing the browser process.
|
|
|
|
This clears event buses and cached state but keeps the browser alive.
|
|
Useful when you want to clean up resources but plan to reconnect later.
|
|
"""
|
|
self._intentional_stop = True
|
|
self.logger.debug('⏸️ stop() called - stopping browser gracefully (force=False) and resetting state')
|
|
|
|
# First save storage state while CDP is still connected
|
|
from browser_use.browser.events import SaveStorageStateEvent
|
|
|
|
save_event = self.event_bus.dispatch(SaveStorageStateEvent())
|
|
await save_event
|
|
|
|
# Now dispatch BrowserStopEvent to notify watchdogs
|
|
await self.event_bus.dispatch(BrowserStopEvent(force=False))
|
|
|
|
# Stop the event bus
|
|
await self.event_bus.stop(clear=True, timeout=5)
|
|
# Reset all state
|
|
await self.reset()
|
|
# Create fresh event bus
|
|
self.event_bus = EventBus()
|
|
|
|
async def close(self) -> None:
|
|
"""Alias for stop()."""
|
|
await self.stop()
|
|
|
|
@observe_debug(ignore_input=True, ignore_output=True, name='browser_start_event_handler')
|
|
async def on_BrowserStartEvent(self, event: BrowserStartEvent) -> dict[str, str]:
|
|
"""Handle browser start request.
|
|
|
|
Returns:
|
|
Dict with 'cdp_url' key containing the CDP URL
|
|
|
|
Note: This method is idempotent - calling start() multiple times is safe.
|
|
- If already connected, it skips reconnection
|
|
- If you need to reset state, call stop() or kill() first
|
|
"""
|
|
|
|
# Initialize and attach all watchdogs FIRST so LocalBrowserWatchdog can handle BrowserLaunchEvent
|
|
await self.attach_all_watchdogs()
|
|
|
|
try:
|
|
# If no CDP URL, launch local browser or cloud browser
|
|
if not self.cdp_url:
|
|
if self.browser_profile.use_cloud or self.browser_profile.cloud_browser_params is not None:
|
|
# Use cloud browser service
|
|
try:
|
|
# Use cloud_browser_params if provided, otherwise create empty request
|
|
cloud_params = self.browser_profile.cloud_browser_params or CreateBrowserRequest()
|
|
cloud_browser_response = await self._cloud_browser_client.create_browser(cloud_params)
|
|
self.browser_profile.cdp_url = cloud_browser_response.cdpUrl
|
|
self.browser_profile.is_local = False
|
|
self.logger.info('🌤️ Successfully connected to cloud browser service')
|
|
except CloudBrowserAuthError:
|
|
raise
|
|
except CloudBrowserError as e:
|
|
raise CloudBrowserError(f'Failed to create cloud browser: {e}')
|
|
elif self.is_local:
|
|
# Launch local browser using event-driven approach
|
|
launch_event = self.event_bus.dispatch(BrowserLaunchEvent())
|
|
await launch_event
|
|
|
|
# Get the CDP URL from LocalBrowserWatchdog handler result
|
|
launch_result: BrowserLaunchResult = cast(
|
|
BrowserLaunchResult, await launch_event.event_result(raise_if_none=True, raise_if_any=True)
|
|
)
|
|
self.browser_profile.cdp_url = launch_result.cdp_url
|
|
else:
|
|
raise ValueError('Got BrowserSession(is_local=False) but no cdp_url was provided to connect to!')
|
|
|
|
assert self.cdp_url and '://' in self.cdp_url
|
|
|
|
# Use lock to prevent concurrent connection attempts (race condition protection)
|
|
async with self._connection_lock:
|
|
# Only connect if not already connected
|
|
if self._cdp_client_root is None:
|
|
# Setup browser via CDP (for both local and remote cases)
|
|
# Global timeout prevents connect() from hanging indefinitely on
|
|
# slow/broken WebSocket connections (common on Lambda → remote browser)
|
|
try:
|
|
await asyncio.wait_for(self.connect(cdp_url=self.cdp_url), timeout=15.0)
|
|
except TimeoutError:
|
|
# Timeout cancels connect() via CancelledError, which bypasses
|
|
# connect()'s `except Exception` cleanup (CancelledError is BaseException).
|
|
# Clean up the partially-initialized client so future start attempts
|
|
# don't skip reconnection due to _cdp_client_root being non-None.
|
|
cdp_client = cast(CDPClient | None, self._cdp_client_root)
|
|
if cdp_client is not None:
|
|
try:
|
|
await cdp_client.stop()
|
|
except Exception:
|
|
pass
|
|
self._cdp_client_root = None
|
|
manager = self.session_manager
|
|
if manager is not None:
|
|
try:
|
|
await manager.clear()
|
|
except Exception:
|
|
pass
|
|
self.session_manager = None
|
|
self.agent_focus_target_id = None
|
|
raise RuntimeError(
|
|
f'connect() timed out after 15s — CDP connection to {self.cdp_url} is too slow or unresponsive'
|
|
)
|
|
assert self.cdp_client is not None
|
|
|
|
# Notify that browser is connected (single place)
|
|
# Ensure BrowserConnected handlers (storage_state restore) complete before
|
|
# start() returns so cookies/storage are applied before navigation.
|
|
await self.event_bus.dispatch(BrowserConnectedEvent(cdp_url=self.cdp_url))
|
|
|
|
if self.browser_profile.demo_mode:
|
|
try:
|
|
demo = self.demo_mode
|
|
if demo:
|
|
await demo.ensure_ready()
|
|
except Exception as exc:
|
|
self.logger.warning(f'[DemoMode] Failed to inject demo overlay: {exc}')
|
|
else:
|
|
self.logger.debug('Already connected to CDP, skipping reconnection')
|
|
if self.browser_profile.demo_mode:
|
|
try:
|
|
demo = self.demo_mode
|
|
if demo:
|
|
await demo.ensure_ready()
|
|
except Exception as exc:
|
|
self.logger.warning(f'[DemoMode] Failed to inject demo overlay: {exc}')
|
|
|
|
# Return the CDP URL for other components
|
|
return {'cdp_url': self.cdp_url}
|
|
|
|
except Exception as e:
|
|
self.event_bus.dispatch(
|
|
BrowserErrorEvent(
|
|
error_type='BrowserStartEventError',
|
|
message=f'Failed to start browser: {type(e).__name__} {e}',
|
|
details={'cdp_url': self.cdp_url, 'is_local': self.is_local},
|
|
)
|
|
)
|
|
if self.is_local and not isinstance(e, (CloudBrowserAuthError, CloudBrowserError)):
|
|
self.logger.warning(
|
|
'Local browser failed to start. Cloud browsers require no local install and work out of the box.\n'
|
|
' Try: Browser(use_cloud=True) | Get an API key: https://cloud.browser-use.com?utm_source=oss&utm_medium=browser_launch_failure'
|
|
)
|
|
raise
|
|
|
|
async def on_NavigateToUrlEvent(self, event: NavigateToUrlEvent) -> None:
|
|
"""Handle navigation requests - core browser functionality."""
|
|
self.logger.debug(f'[on_NavigateToUrlEvent] Received NavigateToUrlEvent: url={event.url}, new_tab={event.new_tab}')
|
|
if not self.agent_focus_target_id:
|
|
self.logger.warning('Cannot navigate - browser not connected')
|
|
return
|
|
|
|
target_id = None
|
|
current_target_id = self.agent_focus_target_id
|
|
|
|
# If new_tab=True but we're already in a new tab, set new_tab=False
|
|
current_target = self.session_manager.get_target(current_target_id)
|
|
if event.new_tab and is_new_tab_page(current_target.url):
|
|
self.logger.debug(f'[on_NavigateToUrlEvent] Already on blank tab ({current_target.url}), reusing')
|
|
event.new_tab = False
|
|
|
|
try:
|
|
# Find or create target for navigation
|
|
self.logger.debug(f'[on_NavigateToUrlEvent] Processing new_tab={event.new_tab}')
|
|
|
|
if event.new_tab:
|
|
page_targets = self.session_manager.get_all_page_targets()
|
|
self.logger.debug(f'[on_NavigateToUrlEvent] Found {len(page_targets)} existing tabs')
|
|
|
|
# Look for existing about:blank tab that's not the current one
|
|
for idx, target in enumerate(page_targets):
|
|
self.logger.debug(f'[on_NavigateToUrlEvent] Tab {idx}: url={target.url}, targetId={target.target_id}')
|
|
if target.url == 'about:blank' and target.target_id != current_target_id:
|
|
target_id = target.target_id
|
|
self.logger.debug(f'Reusing existing about:blank tab #{target_id[-4:]}')
|
|
break
|
|
|
|
# Create new tab if no reusable one found
|
|
if not target_id:
|
|
self.logger.debug('[on_NavigateToUrlEvent] No reusable about:blank tab found, creating new tab...')
|
|
try:
|
|
target_id = await self._cdp_create_new_page('about:blank')
|
|
self.logger.debug(f'Created new tab #{target_id[-4:]}')
|
|
# Dispatch TabCreatedEvent for new tab
|
|
await self.event_bus.dispatch(TabCreatedEvent(target_id=target_id, url='about:blank'))
|
|
except Exception as e:
|
|
self.logger.error(f'[on_NavigateToUrlEvent] Failed to create new tab: {type(e).__name__}: {e}')
|
|
# Fall back to using current tab
|
|
target_id = current_target_id
|
|
self.logger.warning(f'[on_NavigateToUrlEvent] Falling back to current tab #{target_id[-4:]}')
|
|
else:
|
|
# Use current tab
|
|
target_id = target_id or current_target_id
|
|
|
|
# Switch to target tab if needed (for both new_tab=True and new_tab=False)
|
|
if self.agent_focus_target_id is None or self.agent_focus_target_id != target_id:
|
|
self.logger.debug(
|
|
f'[on_NavigateToUrlEvent] Switching to target tab {target_id[-4:]} (current: {self.agent_focus_target_id[-4:] if self.agent_focus_target_id else "none"})'
|
|
)
|
|
# Activate target (bring to foreground)
|
|
await self.event_bus.dispatch(SwitchTabEvent(target_id=target_id))
|
|
else:
|
|
self.logger.debug(f'[on_NavigateToUrlEvent] Already on target tab {target_id[-4:]}, skipping SwitchTabEvent')
|
|
|
|
assert self.agent_focus_target_id is not None and self.agent_focus_target_id == target_id, (
|
|
'Agent focus not updated to new target_id after SwitchTabEvent should have switched to it'
|
|
)
|
|
|
|
# Dispatch navigation started
|
|
await self.event_bus.dispatch(NavigationStartedEvent(target_id=target_id, url=event.url))
|
|
|
|
# Navigate to URL with proper lifecycle waiting
|
|
await self._navigate_and_wait(
|
|
event.url,
|
|
target_id,
|
|
timeout=event.timeout_ms / 1000 if event.timeout_ms is not None else None,
|
|
wait_until=event.wait_until,
|
|
nav_timeout=event.event_timeout,
|
|
)
|
|
|
|
# Close any extension options pages that might have opened
|
|
await self._close_extension_options_pages()
|
|
|
|
# Dispatch navigation complete
|
|
self.logger.debug(f'Dispatching NavigationCompleteEvent for {event.url} (tab #{target_id[-4:]})')
|
|
await self.event_bus.dispatch(
|
|
NavigationCompleteEvent(
|
|
target_id=target_id,
|
|
url=event.url,
|
|
status=None, # CDP doesn't provide status directly
|
|
)
|
|
)
|
|
await self.event_bus.dispatch(AgentFocusChangedEvent(target_id=target_id, url=event.url))
|
|
|
|
# Note: These should be handled by dedicated watchdogs:
|
|
# - Security checks (security_watchdog)
|
|
# - Page health checks (crash_watchdog)
|
|
# - Dialog handling (dialog_watchdog)
|
|
# - Download handling (downloads_watchdog)
|
|
# - DOM rebuilding (dom_watchdog)
|
|
|
|
except Exception as e:
|
|
self.logger.error(f'Navigation failed: {type(e).__name__}: {e}')
|
|
# target_id might be unbound if exception happens early
|
|
if 'target_id' in locals() and target_id:
|
|
await self.event_bus.dispatch(
|
|
NavigationCompleteEvent(
|
|
target_id=target_id,
|
|
url=event.url,
|
|
error_message=f'{type(e).__name__}: {e}',
|
|
)
|
|
)
|
|
await self.event_bus.dispatch(AgentFocusChangedEvent(target_id=target_id, url=event.url))
|
|
raise
|
|
|
|
async def _navigate_and_wait(
|
|
self,
|
|
url: str,
|
|
target_id: str,
|
|
timeout: float | None = None,
|
|
wait_until: str = 'load',
|
|
nav_timeout: float | None = None,
|
|
) -> None:
|
|
"""Navigate to URL and wait for page readiness using CDP lifecycle events.
|
|
|
|
Polls stored lifecycle events (registered once per session in SessionManager).
|
|
wait_until controls the minimum acceptable signal: 'commit', 'domcontentloaded', 'load', 'networkidle'.
|
|
nav_timeout controls the timeout for the CDP Page.navigate() call itself (defaults to 20.0s).
|
|
"""
|
|
cdp_session = await self.get_or_create_cdp_session(target_id, focus=False)
|
|
|
|
if timeout is None:
|
|
target = self.session_manager.get_target(target_id)
|
|
current_url = target.url
|
|
same_domain = (
|
|
url.split('/')[2] == current_url.split('/')[2]
|
|
if url.startswith('http') and current_url.startswith('http')
|
|
else False
|
|
)
|
|
timeout = 3.0 if same_domain else 8.0
|
|
|
|
nav_start_time = asyncio.get_event_loop().time()
|
|
|
|
# Wrap Page.navigate() with timeout — heavy sites can block here for 10s+
|
|
# Use nav_timeout parameter if provided, otherwise default to 20.0
|
|
if nav_timeout is None:
|
|
nav_timeout = 20.0
|
|
try:
|
|
nav_result = await asyncio.wait_for(
|
|
cdp_session.cdp_client.send.Page.navigate(
|
|
params={'url': url, 'transitionType': 'address_bar'},
|
|
session_id=cdp_session.session_id,
|
|
),
|
|
timeout=nav_timeout,
|
|
)
|
|
except TimeoutError:
|
|
duration_ms = (asyncio.get_event_loop().time() - nav_start_time) * 1000
|
|
raise RuntimeError(f'Page.navigate() timed out after {nav_timeout}s ({duration_ms:.0f}ms) for {url}')
|
|
|
|
if nav_result.get('errorText'):
|
|
raise RuntimeError(f'Navigation failed: {nav_result["errorText"]}')
|
|
|
|
if wait_until == 'commit':
|
|
duration_ms = (asyncio.get_event_loop().time() - nav_start_time) * 1000
|
|
self.logger.debug(f'✅ Page ready for {url} (commit, {duration_ms:.0f}ms)')
|
|
return
|
|
|
|
navigation_id = nav_result.get('loaderId')
|
|
start_time = asyncio.get_event_loop().time()
|
|
seen_events = []
|
|
|
|
if not hasattr(cdp_session, '_lifecycle_events'):
|
|
raise RuntimeError(
|
|
f'❌ Lifecycle monitoring not enabled for {cdp_session.target_id[:8]}! '
|
|
f'This is a bug - SessionManager should have initialized it. '
|
|
f'Session: {cdp_session}'
|
|
)
|
|
|
|
# Acceptable events by readiness level (higher is always acceptable)
|
|
acceptable_events: set[str] = {'networkIdle'}
|
|
if wait_until in ('load', 'domcontentloaded'):
|
|
acceptable_events.add('load')
|
|
if wait_until == 'domcontentloaded':
|
|
acceptable_events.add('DOMContentLoaded')
|
|
|
|
poll_interval = 0.05
|
|
while (asyncio.get_event_loop().time() - start_time) < timeout:
|
|
try:
|
|
for event_data in list(cdp_session._lifecycle_events):
|
|
event_name = event_data.get('name')
|
|
event_loader_id = event_data.get('loaderId')
|
|
|
|
event_str = f'{event_name}(loader={event_loader_id[:8] if event_loader_id else "none"})'
|
|
if event_str not in seen_events:
|
|
seen_events.append(event_str)
|
|
|
|
if event_loader_id and navigation_id and event_loader_id != navigation_id:
|
|
continue
|
|
|
|
if event_name in acceptable_events:
|
|
duration_ms = (asyncio.get_event_loop().time() - nav_start_time) * 1000
|
|
self.logger.debug(f'✅ Page ready for {url} ({event_name}, {duration_ms:.0f}ms)')
|
|
return
|
|
|
|
except Exception as e:
|
|
self.logger.debug(f'Error polling lifecycle events: {e}')
|
|
|
|
await asyncio.sleep(poll_interval)
|
|
|
|
duration_ms = (asyncio.get_event_loop().time() - nav_start_time) * 1000
|
|
if not seen_events:
|
|
self.logger.error(
|
|
f'❌ No lifecycle events received for {url} after {duration_ms:.0f}ms! '
|
|
f'Monitoring may have failed. Target: {cdp_session.target_id[:8]}'
|
|
)
|
|
else:
|
|
self.logger.warning(f'⚠️ Page readiness timeout ({timeout}s, {duration_ms:.0f}ms) for {url}')
|
|
|
|
async def on_SwitchTabEvent(self, event: SwitchTabEvent) -> TargetID:
|
|
"""Handle tab switching - core browser functionality."""
|
|
if not self.agent_focus_target_id:
|
|
raise RuntimeError('Cannot switch tabs - browser not connected')
|
|
|
|
# Get all page targets
|
|
page_targets = self.session_manager.get_all_page_targets()
|
|
if event.target_id is None:
|
|
# Most recently opened page
|
|
if page_targets:
|
|
# Update the target id to be the id of the most recently opened page, then proceed to switch to it
|
|
event.target_id = page_targets[-1].target_id
|
|
else:
|
|
# No pages open at all, create a new one (handles switching to it automatically)
|
|
assert self._cdp_client_root is not None, 'CDP client root not initialized - browser may not be connected yet'
|
|
new_target = await self._cdp_client_root.send.Target.createTarget(params={'url': 'about:blank'})
|
|
target_id = new_target['targetId']
|
|
# Don't await, these may circularly trigger SwitchTabEvent and could deadlock, dispatch to enqueue and return
|
|
self.event_bus.dispatch(TabCreatedEvent(url='about:blank', target_id=target_id))
|
|
self.event_bus.dispatch(AgentFocusChangedEvent(target_id=target_id, url='about:blank'))
|
|
return target_id
|
|
|
|
# Switch to the target
|
|
assert event.target_id is not None, 'target_id must be set at this point'
|
|
# Ensure session exists and update agent focus (only for page/tab targets)
|
|
cdp_session = await self.get_or_create_cdp_session(target_id=event.target_id, focus=True)
|
|
|
|
# Visually switch to the tab in the browser
|
|
# The Force Background Tab extension prevents Chrome from auto-switching when links create new tabs,
|
|
# but we still want the agent to be able to explicitly switch tabs when needed
|
|
await cdp_session.cdp_client.send.Target.activateTarget(params={'targetId': event.target_id})
|
|
|
|
# Get target to access url
|
|
target = self.session_manager.get_target(event.target_id)
|
|
|
|
# dispatch focus changed event
|
|
await self.event_bus.dispatch(
|
|
AgentFocusChangedEvent(
|
|
target_id=target.target_id,
|
|
url=target.url,
|
|
)
|
|
)
|
|
return target.target_id
|
|
|
|
async def on_CloseTabEvent(self, event: CloseTabEvent) -> None:
|
|
"""Handle tab closure - update focus if needed."""
|
|
try:
|
|
# Dispatch tab closed event
|
|
await self.event_bus.dispatch(TabClosedEvent(target_id=event.target_id))
|
|
|
|
# Try to close the target, but don't fail if it's already closed
|
|
try:
|
|
cdp_session = await self.get_or_create_cdp_session(target_id=None, focus=False)
|
|
await cdp_session.cdp_client.send.Target.closeTarget(params={'targetId': event.target_id})
|
|
except Exception as e:
|
|
self.logger.debug(f'Target may already be closed: {e}')
|
|
except Exception as e:
|
|
self.logger.warning(f'Error during tab close cleanup: {e}')
|
|
|
|
async def on_TabCreatedEvent(self, event: TabCreatedEvent) -> None:
|
|
"""Handle tab creation - apply viewport settings to new tab."""
|
|
# Note: Tab switching prevention is handled by the Force Background Tab extension
|
|
# The extension automatically keeps focus on the current tab when new tabs are created
|
|
|
|
# Apply viewport settings if configured
|
|
if self.browser_profile.viewport and not self.browser_profile.no_viewport:
|
|
try:
|
|
viewport_width = self.browser_profile.viewport.width
|
|
viewport_height = self.browser_profile.viewport.height
|
|
device_scale_factor = self.browser_profile.device_scale_factor or 1.0
|
|
|
|
self.logger.info(
|
|
f'Setting viewport to {viewport_width}x{viewport_height} with device scale factor {device_scale_factor} whereas original device scale factor was {self.browser_profile.device_scale_factor}'
|
|
)
|
|
# Use the helper method with the new tab's target_id
|
|
await self._cdp_set_viewport(viewport_width, viewport_height, device_scale_factor, target_id=event.target_id)
|
|
|
|
self.logger.debug(f'Applied viewport {viewport_width}x{viewport_height} to tab {event.target_id[-8:]}')
|
|
except Exception as e:
|
|
self.logger.warning(f'Failed to set viewport for new tab {event.target_id[-8:]}: {e}')
|
|
|
|
async def on_TabClosedEvent(self, event: TabClosedEvent) -> None:
|
|
"""Handle tab closure - update focus if needed."""
|
|
if not self.agent_focus_target_id:
|
|
return
|
|
|
|
# Get current tab index
|
|
current_target_id = self.agent_focus_target_id
|
|
|
|
# If the closed tab was the current one, find a new target
|
|
if current_target_id == event.target_id:
|
|
await self.event_bus.dispatch(SwitchTabEvent(target_id=None))
|
|
|
|
async def on_AgentFocusChangedEvent(self, event: AgentFocusChangedEvent) -> None:
|
|
"""Handle agent focus change - update focus and clear cache."""
|
|
self.logger.debug(f'🔄 AgentFocusChangedEvent received: target_id=...{event.target_id[-4:]} url={event.url}')
|
|
|
|
# Clear cached DOM state since focus changed
|
|
if self._dom_watchdog:
|
|
self._dom_watchdog.clear_cache()
|
|
|
|
# Clear cached browser state
|
|
self._cached_browser_state_summary = None
|
|
self._cached_selector_map.clear()
|
|
self.logger.debug('🔄 Cached browser state cleared')
|
|
|
|
# Update agent focus if a specific target_id is provided (only for page/tab targets)
|
|
if event.target_id:
|
|
# Ensure session exists and update agent focus (validates target_type internally)
|
|
await self.get_or_create_cdp_session(target_id=event.target_id, focus=True)
|
|
|
|
# Apply viewport settings to the newly focused tab
|
|
if self.browser_profile.viewport and not self.browser_profile.no_viewport:
|
|
try:
|
|
viewport_width = self.browser_profile.viewport.width
|
|
viewport_height = self.browser_profile.viewport.height
|
|
device_scale_factor = self.browser_profile.device_scale_factor or 1.0
|
|
|
|
# Use the helper method with the current tab's target_id
|
|
await self._cdp_set_viewport(viewport_width, viewport_height, device_scale_factor, target_id=event.target_id)
|
|
|
|
self.logger.debug(f'Applied viewport {viewport_width}x{viewport_height} to tab {event.target_id[-8:]}')
|
|
except Exception as e:
|
|
self.logger.warning(f'Failed to set viewport for tab {event.target_id[-8:]}: {e}')
|
|
else:
|
|
raise RuntimeError('AgentFocusChangedEvent received with no target_id for newly focused tab')
|
|
|
|
async def on_FileDownloadedEvent(self, event: FileDownloadedEvent) -> None:
|
|
"""Track downloaded files during this session."""
|
|
self.logger.debug(f'FileDownloadedEvent received: {event.file_name} at {event.path}')
|
|
if event.path and event.path not in self._downloaded_files:
|
|
self._downloaded_files.append(event.path)
|
|
self.logger.info(f'📁 Tracked download: {event.file_name} ({len(self._downloaded_files)} total downloads in session)')
|
|
else:
|
|
if not event.path:
|
|
self.logger.warning(f'FileDownloadedEvent has no path: {event}')
|
|
else:
|
|
self.logger.debug(f'File already tracked: {event.path}')
|
|
|
|
def _cloud_session_id_from_cdp_url(self) -> str | None:
|
|
"""Derive cloud browser session ID from a Browser Use CDP URL."""
|
|
if not self.cdp_url:
|
|
return None
|
|
host = urlparse(self.cdp_url).hostname or ''
|
|
match = re.match(r'^([0-9a-fA-F-]{36})\.cdp\d+\.browser-use\.com$', host)
|
|
return match.group(1) if match else None
|
|
|
|
async def on_BrowserStopEvent(self, event: BrowserStopEvent) -> None:
|
|
"""Handle browser stop request."""
|
|
|
|
try:
|
|
# Check if we should keep the browser alive
|
|
if self.browser_profile.keep_alive and not event.force:
|
|
self.event_bus.dispatch(BrowserStoppedEvent(reason='Kept alive due to keep_alive=True'))
|
|
return
|
|
|
|
# Clean up cloud browser session for both:
|
|
# 1) native use_cloud sessions (current_session_id set by create_browser)
|
|
# 2) reconnected cdp_url sessions (derive UUID from host)
|
|
cloud_session_id = self._cloud_browser_client.current_session_id or self._cloud_session_id_from_cdp_url()
|
|
if cloud_session_id:
|
|
try:
|
|
await self._cloud_browser_client.stop_browser(cloud_session_id)
|
|
self.logger.info(f'🌤️ Cloud browser session cleaned up: {cloud_session_id}')
|
|
except Exception as e:
|
|
self.logger.debug(f'Failed to cleanup cloud browser session {cloud_session_id}: {e}')
|
|
finally:
|
|
# Always close the httpx client to free connection pool memory
|
|
try:
|
|
await self._cloud_browser_client.close()
|
|
except Exception:
|
|
pass
|
|
|
|
# Clear CDP session cache before stopping
|
|
self.logger.info(
|
|
f'📢 on_BrowserStopEvent - Calling reset() (force={event.force}, keep_alive={self.browser_profile.keep_alive})'
|
|
)
|
|
await self.reset()
|
|
|
|
# Reset state
|
|
if self.is_local:
|
|
self.browser_profile.cdp_url = None
|
|
|
|
# Notify stop and wait for all handlers to complete
|
|
# LocalBrowserWatchdog listens for BrowserStopEvent and dispatches BrowserKillEvent
|
|
stop_event = self.event_bus.dispatch(BrowserStoppedEvent(reason='Stopped by request'))
|
|
await stop_event
|
|
|
|
except Exception as e:
|
|
self.event_bus.dispatch(
|
|
BrowserErrorEvent(
|
|
error_type='BrowserStopEventError',
|
|
message=f'Failed to stop browser: {type(e).__name__} {e}',
|
|
details={'cdp_url': self.cdp_url, 'is_local': self.is_local},
|
|
)
|
|
)
|
|
|
|
# region - ========== CDP-based replacements for browser_context operations ==========
|
|
@property
|
|
def cdp_client(self) -> CDPClient:
|
|
"""Get the cached root CDP cdp_session.cdp_client. The client is created and started in self.connect()."""
|
|
assert self._cdp_client_root is not None, 'CDP client not initialized - browser may not be connected yet'
|
|
return self._cdp_client_root
|
|
|
|
async def new_page(self, url: str | None = None) -> 'Page':
|
|
"""Create a new page (tab)."""
|
|
from cdp_use.cdp.target.commands import CreateTargetParameters
|
|
|
|
params: CreateTargetParameters = {'url': url or 'about:blank'}
|
|
result = await self.cdp_client.send.Target.createTarget(params)
|
|
|
|
target_id = result['targetId']
|
|
|
|
# Import here to avoid circular import
|
|
from browser_use.actor.page import Page as Target
|
|
|
|
return Target(self, target_id)
|
|
|
|
async def get_current_page(self) -> 'Page | None':
|
|
"""Get the current page as an actor Page."""
|
|
target_info = await self.get_current_target_info()
|
|
|
|
if not target_info:
|
|
return None
|
|
|
|
from browser_use.actor.page import Page as Target
|
|
|
|
return Target(self, target_info['targetId'])
|
|
|
|
async def must_get_current_page(self) -> 'Page':
|
|
"""Get the current page as an actor Page."""
|
|
page = await self.get_current_page()
|
|
if not page:
|
|
raise RuntimeError('No current target found')
|
|
|
|
return page
|
|
|
|
async def get_pages(self) -> list['Page']:
|
|
"""Get all available pages using SessionManager (source of truth)."""
|
|
# Import here to avoid circular import
|
|
from browser_use.actor.page import Page as PageActor
|
|
|
|
page_targets = self.session_manager.get_all_page_targets() if self.session_manager else []
|
|
|
|
targets = []
|
|
for target in page_targets:
|
|
targets.append(PageActor(self, target.target_id))
|
|
|
|
return targets
|
|
|
|
def get_focused_target(self) -> 'Target | None':
|
|
"""Get the target that currently has agent focus.
|
|
|
|
Returns:
|
|
Target object if agent has focus, None otherwise.
|
|
"""
|
|
if not self.session_manager:
|
|
return None
|
|
return self.session_manager.get_focused_target()
|
|
|
|
def get_page_targets(self) -> list['Target']:
|
|
"""Get all page/tab targets (excludes iframes, workers, etc.).
|
|
|
|
Returns:
|
|
List of Target objects for all page/tab targets.
|
|
"""
|
|
if not self.session_manager:
|
|
return []
|
|
return self.session_manager.get_all_page_targets()
|
|
|
|
async def close_page(self, page: 'Union[Page, str]') -> None:
|
|
"""Close a page by Page object or target ID."""
|
|
from cdp_use.cdp.target.commands import CloseTargetParameters
|
|
|
|
# Import here to avoid circular import
|
|
from browser_use.actor.page import Page as Target
|
|
|
|
if isinstance(page, Target):
|
|
target_id = page._target_id
|
|
else:
|
|
target_id = str(page)
|
|
|
|
params: CloseTargetParameters = {'targetId': target_id}
|
|
await self.cdp_client.send.Target.closeTarget(params)
|
|
|
|
async def cookies(self) -> list['Cookie']:
|
|
"""Get cookies, optionally filtered by URLs."""
|
|
|
|
result = await self.cdp_client.send.Storage.getCookies()
|
|
return result['cookies']
|
|
|
|
async def clear_cookies(self) -> None:
|
|
"""Clear all cookies."""
|
|
await self.cdp_client.send.Network.clearBrowserCookies()
|
|
|
|
async def export_storage_state(self, output_path: str | Path | None = None) -> dict[str, Any]:
|
|
"""Export all browser cookies and storage to storage_state format.
|
|
|
|
Extracts decrypted cookies via CDP, bypassing keychain encryption.
|
|
|
|
Args:
|
|
output_path: Optional path to save storage_state.json. If None, returns dict only.
|
|
|
|
Returns:
|
|
Storage state dict with cookies in Playwright format.
|
|
|
|
"""
|
|
from pathlib import Path
|
|
|
|
# Get all cookies using Storage.getCookies (returns decrypted cookies from all domains)
|
|
cookies = await self._cdp_get_cookies()
|
|
|
|
# Convert CDP cookie format to Playwright storage_state format
|
|
storage_state = {
|
|
'cookies': [
|
|
{
|
|
'name': c['name'],
|
|
'value': c['value'],
|
|
'domain': c['domain'],
|
|
'path': c['path'],
|
|
'expires': c.get('expires', -1),
|
|
'httpOnly': c.get('httpOnly', False),
|
|
'secure': c.get('secure', False),
|
|
'sameSite': c.get('sameSite', 'Lax'),
|
|
}
|
|
for c in cookies
|
|
],
|
|
'origins': [], # Could add localStorage/sessionStorage extraction if needed
|
|
}
|
|
|
|
if output_path:
|
|
import json
|
|
|
|
output_file = Path(output_path).expanduser().resolve()
|
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
output_file.write_text(json.dumps(storage_state, indent=2, ensure_ascii=False), encoding='utf-8')
|
|
self.logger.info(f'💾 Exported {len(cookies)} cookies to {output_file}')
|
|
|
|
return storage_state
|
|
|
|
async def get_or_create_cdp_session(self, target_id: TargetID | None = None, focus: bool = True) -> CDPSession:
|
|
"""Get CDP session for a target from the event-driven pool.
|
|
|
|
With autoAttach=True, sessions are created automatically by Chrome and added
|
|
to the pool via Target.attachedToTarget events. This method retrieves them.
|
|
|
|
Args:
|
|
target_id: Target ID to get session for. If None, uses current agent focus.
|
|
focus: If True, switches agent focus to this target (page targets only).
|
|
|
|
Returns:
|
|
CDPSession for the specified target.
|
|
|
|
Raises:
|
|
ValueError: If target doesn't exist or session is not available.
|
|
"""
|
|
assert self._cdp_client_root is not None, 'Root CDP client not initialized'
|
|
assert self.session_manager is not None, 'SessionManager not initialized'
|
|
|
|
# If no target_id specified, ensure current agent focus is valid and wait for recovery if needed
|
|
if target_id is None:
|
|
# Validate and wait for focus recovery if stale (centralized protection)
|
|
focus_valid = await self.session_manager.ensure_valid_focus(timeout=5.0)
|
|
if not focus_valid:
|
|
raise ValueError(
|
|
'No valid agent focus available - target may have detached and recovery failed. '
|
|
'This indicates browser is in an unstable state.'
|
|
)
|
|
|
|
assert self.agent_focus_target_id is not None, 'Focus validation passed but agent_focus_target_id is None'
|
|
target_id = self.agent_focus_target_id
|
|
|
|
session = self.session_manager._get_session_for_target(target_id)
|
|
|
|
if not session:
|
|
# Session not in pool yet - wait for attach event
|
|
self.logger.debug(f'[SessionManager] Waiting for target {target_id[:8]}... to attach...')
|
|
|
|
# Wait up to 2 seconds for the attach event
|
|
for attempt in range(20):
|
|
await asyncio.sleep(0.1)
|
|
session = self.session_manager._get_session_for_target(target_id)
|
|
if session:
|
|
self.logger.debug(f'[SessionManager] Target appeared after {attempt * 100}ms')
|
|
break
|
|
|
|
if not session:
|
|
# Timeout - target doesn't exist
|
|
raise ValueError(f'Target {target_id} not found - may have detached or never existed')
|
|
|
|
# Validate session is still active
|
|
is_valid = await self.session_manager.validate_session(target_id)
|
|
if not is_valid:
|
|
raise ValueError(f'Target {target_id} has detached - no active sessions')
|
|
|
|
# Update focus if requested
|
|
# CRITICAL: Only allow focus change to 'page' type targets, not iframes/workers
|
|
if focus and self.agent_focus_target_id != target_id:
|
|
# Get target type from SessionManager
|
|
target = self.session_manager.get_target(target_id)
|
|
target_type = target.target_type if target else 'unknown'
|
|
|
|
if target_type == 'page':
|
|
# Format current focus safely (could be None after detach)
|
|
current_focus = self.agent_focus_target_id[:8] if self.agent_focus_target_id else 'None'
|
|
self.logger.debug(f'[SessionManager] Switching focus: {current_focus}... → {target_id[:8]}...')
|
|
self.agent_focus_target_id = target_id
|
|
else:
|
|
# Ignore focus request for non-page targets (iframes, workers, etc.)
|
|
# These can detach at any time, causing agent_focus to point to dead target
|
|
current_focus = self.agent_focus_target_id[:8] if self.agent_focus_target_id else 'None'
|
|
self.logger.debug(
|
|
f'[SessionManager] Ignoring focus request for {target_type} target {target_id[:8]}... '
|
|
f'(agent_focus stays on {current_focus}...)'
|
|
)
|
|
|
|
# Resume if waiting for debugger (non-essential, don't let it block connect)
|
|
if focus:
|
|
try:
|
|
await asyncio.wait_for(
|
|
session.cdp_client.send.Runtime.runIfWaitingForDebugger(session_id=session.session_id),
|
|
timeout=3.0,
|
|
)
|
|
except Exception:
|
|
pass # May fail if not waiting, or timeout — either is fine
|
|
|
|
return session
|
|
|
|
async def set_extra_headers(self, headers: dict[str, str], target_id: TargetID | None = None) -> None:
|
|
"""Set extra HTTP headers using CDP Network.setExtraHTTPHeaders.
|
|
|
|
These headers will be sent with every HTTP request made by the target.
|
|
Network domain must be enabled first (done automatically for page targets
|
|
in SessionManager._enable_page_monitoring).
|
|
|
|
Args:
|
|
headers: Dictionary of header name -> value pairs to inject into every request.
|
|
target_id: Target to set headers on. Defaults to the current agent focus target.
|
|
"""
|
|
if target_id is None:
|
|
if not self.agent_focus_target_id:
|
|
return
|
|
target_id = self.agent_focus_target_id
|
|
|
|
cdp_session = await self.get_or_create_cdp_session(target_id, focus=False)
|
|
# Ensure Network domain is enabled (idempotent - safe to call multiple times)
|
|
await cdp_session.cdp_client.send.Network.enable(session_id=cdp_session.session_id)
|
|
await cdp_session.cdp_client.send.Network.setExtraHTTPHeaders(
|
|
params={'headers': cast(Any, headers)}, session_id=cdp_session.session_id
|
|
)
|
|
|
|
# endregion - ========== CDP-based ... ==========
|
|
|
|
# region - ========== Helper Methods ==========
|
|
@observe_debug(ignore_input=True, ignore_output=True, name='get_browser_state_summary')
|
|
async def get_browser_state_summary(
|
|
self,
|
|
include_screenshot: bool = True,
|
|
cached: bool = False,
|
|
include_recent_events: bool = False,
|
|
) -> BrowserStateSummary:
|
|
if cached and self._cached_browser_state_summary is not None and self._cached_browser_state_summary.dom_state:
|
|
# Don't use cached state if it has 0 interactive elements
|
|
selector_map = self._cached_browser_state_summary.dom_state.selector_map
|
|
|
|
# Don't use cached state if we need a screenshot but the cached state doesn't have one
|
|
if include_screenshot and not self._cached_browser_state_summary.screenshot:
|
|
self.logger.debug('⚠️ Cached browser state has no screenshot, fetching fresh state with screenshot')
|
|
# Fall through to fetch fresh state with screenshot
|
|
elif selector_map and len(selector_map) > 0:
|
|
self.logger.debug('🔄 Using pre-cached browser state summary for open tab')
|
|
return self._cached_browser_state_summary
|
|
else:
|
|
self.logger.debug('⚠️ Cached browser state has 0 interactive elements, fetching fresh state')
|
|
# Fall through to fetch fresh state
|
|
|
|
# Dispatch the event and wait for result
|
|
event: BrowserStateRequestEvent = cast(
|
|
BrowserStateRequestEvent,
|
|
self.event_bus.dispatch(
|
|
BrowserStateRequestEvent(
|
|
include_dom=True,
|
|
include_screenshot=include_screenshot,
|
|
include_recent_events=include_recent_events,
|
|
)
|
|
),
|
|
)
|
|
|
|
# The handler returns the BrowserStateSummary directly
|
|
result = await event.event_result(raise_if_none=True, raise_if_any=True)
|
|
assert result is not None and result.dom_state is not None
|
|
return result
|
|
|
|
async def get_state_as_text(self) -> str:
|
|
"""Get the browser state as text."""
|
|
state = await self.get_browser_state_summary()
|
|
assert state.dom_state is not None
|
|
dom_state = state.dom_state
|
|
return dom_state.llm_representation()
|
|
|
|
async def attach_all_watchdogs(self) -> None:
|
|
"""Initialize and attach all watchdogs with explicit handler registration."""
|
|
# Prevent duplicate watchdog attachment
|
|
if self._watchdogs_attached:
|
|
self.logger.debug('Watchdogs already attached, skipping duplicate attachment')
|
|
return
|
|
|
|
from browser_use.browser.watchdogs.aboutblank_watchdog import AboutBlankWatchdog
|
|
from browser_use.browser.watchdogs.captcha_watchdog import CaptchaWatchdog
|
|
|
|
# from browser_use.browser.crash_watchdog import CrashWatchdog
|
|
from browser_use.browser.watchdogs.default_action_watchdog import DefaultActionWatchdog
|
|
from browser_use.browser.watchdogs.dom_watchdog import DOMWatchdog
|
|
from browser_use.browser.watchdogs.downloads_watchdog import DownloadsWatchdog
|
|
from browser_use.browser.watchdogs.har_recording_watchdog import HarRecordingWatchdog
|
|
from browser_use.browser.watchdogs.local_browser_watchdog import LocalBrowserWatchdog
|
|
from browser_use.browser.watchdogs.permissions_watchdog import PermissionsWatchdog
|
|
from browser_use.browser.watchdogs.popups_watchdog import PopupsWatchdog
|
|
from browser_use.browser.watchdogs.recording_watchdog import RecordingWatchdog
|
|
from browser_use.browser.watchdogs.screenshot_watchdog import ScreenshotWatchdog
|
|
from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
|
|
from browser_use.browser.watchdogs.storage_state_watchdog import StorageStateWatchdog
|
|
|
|
# Initialize CrashWatchdog
|
|
# CrashWatchdog.model_rebuild()
|
|
# self._crash_watchdog = CrashWatchdog(event_bus=self.event_bus, browser_session=self)
|
|
# self.event_bus.on(BrowserConnectedEvent, self._crash_watchdog.on_BrowserConnectedEvent)
|
|
# self.event_bus.on(BrowserStoppedEvent, self._crash_watchdog.on_BrowserStoppedEvent)
|
|
# self._crash_watchdog.attach_to_session()
|
|
|
|
# Initialize DownloadsWatchdog
|
|
DownloadsWatchdog.model_rebuild()
|
|
self._downloads_watchdog = DownloadsWatchdog(event_bus=self.event_bus, browser_session=self)
|
|
# self.event_bus.on(BrowserLaunchEvent, self._downloads_watchdog.on_BrowserLaunchEvent)
|
|
# self.event_bus.on(TabCreatedEvent, self._downloads_watchdog.on_TabCreatedEvent)
|
|
# self.event_bus.on(TabClosedEvent, self._downloads_watchdog.on_TabClosedEvent)
|
|
# self.event_bus.on(BrowserStoppedEvent, self._downloads_watchdog.on_BrowserStoppedEvent)
|
|
# self.event_bus.on(NavigationCompleteEvent, self._downloads_watchdog.on_NavigationCompleteEvent)
|
|
self._downloads_watchdog.attach_to_session()
|
|
if self.browser_profile.auto_download_pdfs:
|
|
self.logger.debug('📄 PDF auto-download enabled for this session')
|
|
|
|
# Initialize StorageStateWatchdog conditionally
|
|
# Enable when user provides either storage_state or user_data_dir (indicating they want persistence)
|
|
should_enable_storage_state = (
|
|
self.browser_profile.storage_state is not None or self.browser_profile.user_data_dir is not None
|
|
)
|
|
|
|
if should_enable_storage_state:
|
|
StorageStateWatchdog.model_rebuild()
|
|
self._storage_state_watchdog = StorageStateWatchdog(
|
|
event_bus=self.event_bus,
|
|
browser_session=self,
|
|
# More conservative defaults when auto-enabled
|
|
auto_save_interval=60.0, # 1 minute instead of 30 seconds
|
|
save_on_change=False, # Only save on shutdown by default
|
|
)
|
|
self._storage_state_watchdog.attach_to_session()
|
|
self.logger.debug(
|
|
f'🍪 StorageStateWatchdog enabled (storage_state: {bool(self.browser_profile.storage_state)}, user_data_dir: {bool(self.browser_profile.user_data_dir)})'
|
|
)
|
|
else:
|
|
self.logger.debug('🍪 StorageStateWatchdog disabled (no storage_state or user_data_dir configured)')
|
|
|
|
# Initialize LocalBrowserWatchdog
|
|
LocalBrowserWatchdog.model_rebuild()
|
|
self._local_browser_watchdog = LocalBrowserWatchdog(event_bus=self.event_bus, browser_session=self)
|
|
# self.event_bus.on(BrowserLaunchEvent, self._local_browser_watchdog.on_BrowserLaunchEvent)
|
|
# self.event_bus.on(BrowserKillEvent, self._local_browser_watchdog.on_BrowserKillEvent)
|
|
# self.event_bus.on(BrowserStopEvent, self._local_browser_watchdog.on_BrowserStopEvent)
|
|
self._local_browser_watchdog.attach_to_session()
|
|
|
|
# Initialize SecurityWatchdog (hooks NavigationWatchdog and implements allowed_domains restriction)
|
|
SecurityWatchdog.model_rebuild()
|
|
self._security_watchdog = SecurityWatchdog(event_bus=self.event_bus, browser_session=self)
|
|
# Core navigation is now handled in BrowserSession directly
|
|
# SecurityWatchdog only handles security policy enforcement
|
|
self._security_watchdog.attach_to_session()
|
|
|
|
# Initialize AboutBlankWatchdog (handles about:blank pages and DVD loading animation on first load)
|
|
AboutBlankWatchdog.model_rebuild()
|
|
self._aboutblank_watchdog = AboutBlankWatchdog(event_bus=self.event_bus, browser_session=self)
|
|
# self.event_bus.on(BrowserStopEvent, self._aboutblank_watchdog.on_BrowserStopEvent)
|
|
# self.event_bus.on(BrowserStoppedEvent, self._aboutblank_watchdog.on_BrowserStoppedEvent)
|
|
# self.event_bus.on(TabCreatedEvent, self._aboutblank_watchdog.on_TabCreatedEvent)
|
|
# self.event_bus.on(TabClosedEvent, self._aboutblank_watchdog.on_TabClosedEvent)
|
|
self._aboutblank_watchdog.attach_to_session()
|
|
|
|
# Initialize PopupsWatchdog (handles accepting and dismissing JS dialogs, alerts, confirm, onbeforeunload, etc.)
|
|
PopupsWatchdog.model_rebuild()
|
|
self._popups_watchdog = PopupsWatchdog(event_bus=self.event_bus, browser_session=self)
|
|
# self.event_bus.on(TabCreatedEvent, self._popups_watchdog.on_TabCreatedEvent)
|
|
# self.event_bus.on(DialogCloseEvent, self._popups_watchdog.on_DialogCloseEvent)
|
|
self._popups_watchdog.attach_to_session()
|
|
|
|
# Initialize PermissionsWatchdog (handles granting and revoking browser permissions like clipboard, microphone, camera, etc.)
|
|
PermissionsWatchdog.model_rebuild()
|
|
self._permissions_watchdog = PermissionsWatchdog(event_bus=self.event_bus, browser_session=self)
|
|
# self.event_bus.on(BrowserConnectedEvent, self._permissions_watchdog.on_BrowserConnectedEvent)
|
|
self._permissions_watchdog.attach_to_session()
|
|
|
|
# Initialize DefaultActionWatchdog (handles all default actions like click, type, scroll, go back, go forward, refresh, wait, send keys, upload file, scroll to text, etc.)
|
|
DefaultActionWatchdog.model_rebuild()
|
|
self._default_action_watchdog = DefaultActionWatchdog(event_bus=self.event_bus, browser_session=self)
|
|
# self.event_bus.on(ClickElementEvent, self._default_action_watchdog.on_ClickElementEvent)
|
|
# self.event_bus.on(TypeTextEvent, self._default_action_watchdog.on_TypeTextEvent)
|
|
# self.event_bus.on(ScrollEvent, self._default_action_watchdog.on_ScrollEvent)
|
|
# self.event_bus.on(GoBackEvent, self._default_action_watchdog.on_GoBackEvent)
|
|
# self.event_bus.on(GoForwardEvent, self._default_action_watchdog.on_GoForwardEvent)
|
|
# self.event_bus.on(RefreshEvent, self._default_action_watchdog.on_RefreshEvent)
|
|
# self.event_bus.on(WaitEvent, self._default_action_watchdog.on_WaitEvent)
|
|
# self.event_bus.on(SendKeysEvent, self._default_action_watchdog.on_SendKeysEvent)
|
|
# self.event_bus.on(UploadFileEvent, self._default_action_watchdog.on_UploadFileEvent)
|
|
# self.event_bus.on(ScrollToTextEvent, self._default_action_watchdog.on_ScrollToTextEvent)
|
|
self._default_action_watchdog.attach_to_session()
|
|
|
|
# Initialize ScreenshotWatchdog (handles taking screenshots of the browser)
|
|
ScreenshotWatchdog.model_rebuild()
|
|
self._screenshot_watchdog = ScreenshotWatchdog(event_bus=self.event_bus, browser_session=self)
|
|
# self.event_bus.on(BrowserStartEvent, self._screenshot_watchdog.on_BrowserStartEvent)
|
|
# self.event_bus.on(BrowserStoppedEvent, self._screenshot_watchdog.on_BrowserStoppedEvent)
|
|
# self.event_bus.on(ScreenshotEvent, self._screenshot_watchdog.on_ScreenshotEvent)
|
|
self._screenshot_watchdog.attach_to_session()
|
|
|
|
# Initialize DOMWatchdog (handles building the DOM tree and detecting interactive elements, depends on ScreenshotWatchdog)
|
|
DOMWatchdog.model_rebuild()
|
|
self._dom_watchdog = DOMWatchdog(event_bus=self.event_bus, browser_session=self)
|
|
# self.event_bus.on(TabCreatedEvent, self._dom_watchdog.on_TabCreatedEvent)
|
|
# self.event_bus.on(BrowserStateRequestEvent, self._dom_watchdog.on_BrowserStateRequestEvent)
|
|
self._dom_watchdog.attach_to_session()
|
|
|
|
# Initialize RecordingWatchdog (handles video recording)
|
|
RecordingWatchdog.model_rebuild()
|
|
self._recording_watchdog = RecordingWatchdog(event_bus=self.event_bus, browser_session=self)
|
|
self._recording_watchdog.attach_to_session()
|
|
|
|
# Initialize HarRecordingWatchdog if record_har_path is configured (handles HTTPS HAR capture)
|
|
if self.browser_profile.record_har_path:
|
|
HarRecordingWatchdog.model_rebuild()
|
|
self._har_recording_watchdog = HarRecordingWatchdog(event_bus=self.event_bus, browser_session=self)
|
|
self._har_recording_watchdog.attach_to_session()
|
|
|
|
# Initialize CaptchaWatchdog (listens for captcha solver events from the browser proxy)
|
|
if self.browser_profile.captcha_solver:
|
|
CaptchaWatchdog.model_rebuild()
|
|
self._captcha_watchdog = CaptchaWatchdog(event_bus=self.event_bus, browser_session=self)
|
|
self._captcha_watchdog.attach_to_session()
|
|
|
|
# Mark watchdogs as attached to prevent duplicate attachment
|
|
self._watchdogs_attached = True
|
|
|
|
async def connect(self, cdp_url: str | None = None) -> Self:
|
|
"""Connect to a remote chromium-based browser via CDP using cdp-use.
|
|
|
|
This MUST succeed or the browser is unusable. Fails hard on any error.
|
|
"""
|
|
|
|
self.browser_profile.cdp_url = cdp_url or self.cdp_url
|
|
if not self.cdp_url:
|
|
raise RuntimeError('Cannot setup CDP connection without CDP URL')
|
|
|
|
# Prevent duplicate connections - clean up existing connection first
|
|
if self._cdp_client_root is not None:
|
|
self.logger.warning(
|
|
'⚠️ connect() called but CDP client already exists! Cleaning up old connection before creating new one.'
|
|
)
|
|
try:
|
|
await self._cdp_client_root.stop()
|
|
except Exception as e:
|
|
self.logger.debug(f'Error stopping old CDP client: {e}')
|
|
self._cdp_client_root = None
|
|
|
|
if not self.cdp_url.startswith('ws'):
|
|
# If it's an HTTP URL, fetch the WebSocket URL from /json/version endpoint
|
|
parsed_url = urlparse(self.cdp_url)
|
|
path = parsed_url.path.rstrip('/')
|
|
|
|
if not path.endswith('/json/version'):
|
|
path = path + '/json/version'
|
|
|
|
url = urlunparse(
|
|
(parsed_url.scheme, parsed_url.netloc, path, parsed_url.params, parsed_url.query, parsed_url.fragment)
|
|
)
|
|
|
|
# Run a tiny HTTP client to query for the WebSocket URL from the /json/version endpoint
|
|
# Default httpx timeout is 5s which can race the global wait_for(connect(), 15s).
|
|
# Use 30s as a safety net for direct connect() callers; the wait_for is the real deadline.
|
|
# For localhost/127.0.0.1, disable trust_env to prevent proxy env vars (HTTP_PROXY, HTTPS_PROXY)
|
|
# from routing local requests through a proxy, which causes 502 errors on Windows.
|
|
# Remote CDP URLs should still respect proxy settings.
|
|
is_localhost = parsed_url.hostname in ('localhost', '127.0.0.1', '::1')
|
|
async with httpx.AsyncClient(timeout=httpx.Timeout(30.0), trust_env=not is_localhost) as client:
|
|
headers = dict(self.browser_profile.headers or {})
|
|
from browser_use.utils import get_browser_use_version
|
|
|
|
headers.setdefault('User-Agent', f'browser-use/{get_browser_use_version()}')
|
|
version_info = await client.get(url, headers=headers)
|
|
self.logger.debug(f'Raw version info: {str(version_info)}')
|
|
self.browser_profile.cdp_url = version_info.json()['webSocketDebuggerUrl']
|
|
|
|
assert self.cdp_url is not None, 'CDP URL is None.'
|
|
|
|
browser_location = 'local browser' if self.is_local else 'remote browser'
|
|
self.logger.debug(f'🌎 Connecting to existing chromium-based browser via CDP: {self.cdp_url} -> ({browser_location})')
|
|
|
|
try:
|
|
# Create and store the CDP client for direct CDP communication
|
|
headers = dict(getattr(self.browser_profile, 'headers', None) or {})
|
|
if not self.is_local:
|
|
from browser_use.utils import get_browser_use_version
|
|
|
|
headers.setdefault('User-Agent', f'browser-use/{get_browser_use_version()}')
|
|
self._cdp_client_root = TimeoutWrappedCDPClient(
|
|
self.cdp_url,
|
|
additional_headers=headers or None,
|
|
max_ws_frame_size=200 * 1024 * 1024, # Use 200MB limit to handle pages with very large DOMs
|
|
)
|
|
assert self._cdp_client_root is not None
|
|
await self._cdp_client_root.start()
|
|
|
|
# Initialize event-driven session manager FIRST (before enabling autoAttach)
|
|
# SessionManager will:
|
|
# 1. Register attach/detach event handlers
|
|
# 2. Discover and attach to all existing targets
|
|
# 3. Initialize sessions and enable lifecycle monitoring
|
|
# 4. Enable autoAttach for future targets
|
|
from browser_use.browser.session_manager import SessionManager
|
|
|
|
self.session_manager = SessionManager(self)
|
|
await self.session_manager.start_monitoring()
|
|
self.logger.debug('Event-driven session manager started')
|
|
|
|
# Enable auto-attach so Chrome automatically notifies us when NEW targets attach/detach
|
|
# This is the foundation of event-driven session management
|
|
await self._cdp_client_root.send.Target.setAutoAttach(
|
|
params={'autoAttach': True, 'waitForDebuggerOnStart': False, 'flatten': True}
|
|
)
|
|
self.logger.debug('CDP client connected with auto-attach enabled')
|
|
|
|
# Get browser targets from SessionManager (source of truth)
|
|
# SessionManager has already discovered all targets via start_monitoring()
|
|
page_targets_from_manager = self.session_manager.get_all_page_targets()
|
|
|
|
# Check for chrome://newtab pages and redirect them to about:blank (in parallel)
|
|
from browser_use.utils import is_new_tab_page
|
|
|
|
async def _redirect_newtab(target):
|
|
target_url = target.url
|
|
target_id = target.target_id
|
|
self.logger.debug(f'🔄 Redirecting {target_url} to about:blank for target {target_id}')
|
|
try:
|
|
session = await self.get_or_create_cdp_session(target_id, focus=False)
|
|
await session.cdp_client.send.Page.navigate(params={'url': 'about:blank'}, session_id=session.session_id)
|
|
target.url = 'about:blank'
|
|
except Exception as e:
|
|
self.logger.warning(f'Failed to redirect {target_url}: {e}')
|
|
|
|
redirect_tasks = [
|
|
_redirect_newtab(target)
|
|
for target in page_targets_from_manager
|
|
if is_new_tab_page(target.url) and target.url != 'about:blank'
|
|
]
|
|
if redirect_tasks:
|
|
await asyncio.gather(*redirect_tasks, return_exceptions=True)
|
|
|
|
# Ensure we have at least one page
|
|
if not page_targets_from_manager:
|
|
new_target = await self._cdp_client_root.send.Target.createTarget(params={'url': 'about:blank'})
|
|
target_id = new_target['targetId']
|
|
self.logger.debug(f'📄 Created new blank page: {target_id}')
|
|
else:
|
|
target_id = page_targets_from_manager[0].target_id
|
|
self.logger.debug(f'📄 Using existing page: {target_id}')
|
|
|
|
# Set up initial focus using the public API
|
|
# Note: get_or_create_cdp_session() will wait for attach event and set focus
|
|
try:
|
|
await self.get_or_create_cdp_session(target_id, focus=True)
|
|
# agent_focus_target_id is now set by get_or_create_cdp_session
|
|
self.logger.debug(f'📄 Agent focus set to {target_id[:8]}...')
|
|
except ValueError as e:
|
|
raise RuntimeError(f'Failed to get session for initial target {target_id}: {e}') from e
|
|
|
|
# Note: Lifecycle monitoring is enabled automatically in SessionManager._handle_target_attached()
|
|
# when targets attach, so no manual enablement needed!
|
|
|
|
# Enable proxy authentication handling if configured
|
|
await self._setup_proxy_auth()
|
|
|
|
# Attach WS drop detection callback for auto-reconnection
|
|
self._intentional_stop = False
|
|
self._attach_ws_drop_callback()
|
|
|
|
# Verify the target is working
|
|
if self.agent_focus_target_id:
|
|
target = self.session_manager.get_target(self.agent_focus_target_id)
|
|
if target.title == 'Unknown title':
|
|
self.logger.warning('Target created but title is unknown (may be normal for about:blank)')
|
|
|
|
# Dispatch TabCreatedEvent for all initial tabs (so watchdogs can initialize)
|
|
for idx, target in enumerate(page_targets_from_manager):
|
|
target_url = target.url
|
|
self.logger.debug(f'Dispatching TabCreatedEvent for initial tab {idx}: {target_url}')
|
|
self.event_bus.dispatch(TabCreatedEvent(url=target_url, target_id=target.target_id))
|
|
|
|
# Dispatch initial focus event
|
|
if page_targets_from_manager:
|
|
initial_url = page_targets_from_manager[0].url
|
|
self.event_bus.dispatch(AgentFocusChangedEvent(target_id=page_targets_from_manager[0].target_id, url=initial_url))
|
|
self.logger.debug(f'Initial agent focus set to tab 0: {initial_url}')
|
|
|
|
except Exception as e:
|
|
# Fatal error - browser is not usable without CDP connection
|
|
self.logger.error(f'❌ FATAL: Failed to setup CDP connection: {e}')
|
|
self.logger.error('❌ Browser cannot continue without CDP connection')
|
|
|
|
# Clear SessionManager state
|
|
if self.session_manager:
|
|
try:
|
|
await self.session_manager.clear()
|
|
self.logger.debug('Cleared SessionManager state after initialization failure')
|
|
except Exception as cleanup_error:
|
|
self.logger.debug(f'Error clearing SessionManager: {cleanup_error}')
|
|
|
|
# Close CDP client WebSocket and unregister handlers
|
|
if self._cdp_client_root:
|
|
try:
|
|
await self._cdp_client_root.stop() # Close WebSocket and unregister handlers
|
|
self.logger.debug('Closed CDP client WebSocket after initialization failure')
|
|
except Exception as cleanup_error:
|
|
self.logger.debug(f'Error closing CDP client: {cleanup_error}')
|
|
|
|
self.session_manager = None
|
|
self._cdp_client_root = None
|
|
self.agent_focus_target_id = None
|
|
# Re-raise as a fatal error
|
|
raise RuntimeError(f'Failed to establish CDP connection to browser: {e}') from e
|
|
|
|
return self
|
|
|
|
async def _setup_proxy_auth(self) -> None:
|
|
"""Enable CDP Fetch auth handling for authenticated proxy, if credentials provided.
|
|
|
|
Handles HTTP proxy authentication challenges (Basic/Proxy) by providing
|
|
configured credentials from BrowserProfile.
|
|
"""
|
|
|
|
assert self._cdp_client_root
|
|
|
|
try:
|
|
proxy_cfg = self.browser_profile.proxy
|
|
username = proxy_cfg.username if proxy_cfg else None
|
|
password = proxy_cfg.password if proxy_cfg else None
|
|
if not username or not password:
|
|
self.logger.debug('Proxy credentials not provided; skipping proxy auth setup')
|
|
return
|
|
|
|
# Enable Fetch domain with auth handling (do not pause all requests)
|
|
try:
|
|
await self._cdp_client_root.send.Fetch.enable(params={'handleAuthRequests': True})
|
|
self.logger.debug('Fetch.enable(handleAuthRequests=True) enabled on root client')
|
|
except Exception as e:
|
|
self.logger.debug(f'Fetch.enable on root failed: {type(e).__name__}: {e}')
|
|
|
|
# Also enable on the focused target's session if available to ensure events are delivered
|
|
try:
|
|
if self.agent_focus_target_id:
|
|
cdp_session = await self.get_or_create_cdp_session(self.agent_focus_target_id, focus=False)
|
|
await cdp_session.cdp_client.send.Fetch.enable(
|
|
params={'handleAuthRequests': True},
|
|
session_id=cdp_session.session_id,
|
|
)
|
|
self.logger.debug('Fetch.enable(handleAuthRequests=True) enabled on focused session')
|
|
except Exception as e:
|
|
self.logger.debug(f'Fetch.enable on focused session failed: {type(e).__name__}: {e}')
|
|
|
|
def _on_auth_required(event: AuthRequiredEvent, session_id: SessionID | None = None):
|
|
# event keys may be snake_case or camelCase depending on generator; handle both
|
|
request_id = event.get('requestId') or event.get('request_id')
|
|
if not request_id:
|
|
return
|
|
|
|
challenge = event.get('authChallenge') or event.get('auth_challenge') or {}
|
|
source = (challenge.get('source') or '').lower()
|
|
# Only respond to proxy challenges
|
|
if source == 'proxy' and request_id:
|
|
|
|
async def _respond():
|
|
assert self._cdp_client_root
|
|
try:
|
|
await self._cdp_client_root.send.Fetch.continueWithAuth(
|
|
params={
|
|
'requestId': request_id,
|
|
'authChallengeResponse': {
|
|
'response': 'ProvideCredentials',
|
|
'username': username,
|
|
'password': password,
|
|
},
|
|
},
|
|
session_id=session_id,
|
|
)
|
|
except Exception as e:
|
|
self.logger.debug(f'Proxy auth respond failed: {type(e).__name__}: {e}')
|
|
|
|
# schedule
|
|
create_task_with_error_handling(
|
|
_respond(), name='auth_respond', logger_instance=self.logger, suppress_exceptions=True
|
|
)
|
|
else:
|
|
# Default behaviour for non-proxy challenges: let browser handle
|
|
async def _default():
|
|
assert self._cdp_client_root
|
|
try:
|
|
await self._cdp_client_root.send.Fetch.continueWithAuth(
|
|
params={'requestId': request_id, 'authChallengeResponse': {'response': 'Default'}},
|
|
session_id=session_id,
|
|
)
|
|
except Exception as e:
|
|
self.logger.debug(f'Default auth respond failed: {type(e).__name__}: {e}')
|
|
|
|
if request_id:
|
|
create_task_with_error_handling(
|
|
_default(), name='auth_default', logger_instance=self.logger, suppress_exceptions=True
|
|
)
|
|
|
|
def _on_request_paused(event: RequestPausedEvent, session_id: SessionID | None = None):
|
|
# Continue all paused requests to avoid stalling the network
|
|
request_id = event.get('requestId') or event.get('request_id')
|
|
if not request_id:
|
|
return
|
|
|
|
async def _continue():
|
|
assert self._cdp_client_root
|
|
try:
|
|
await self._cdp_client_root.send.Fetch.continueRequest(
|
|
params={'requestId': request_id},
|
|
session_id=session_id,
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
create_task_with_error_handling(
|
|
_continue(), name='request_continue', logger_instance=self.logger, suppress_exceptions=True
|
|
)
|
|
|
|
# Register event handler on root client
|
|
try:
|
|
self._cdp_client_root.register.Fetch.authRequired(_on_auth_required)
|
|
self._cdp_client_root.register.Fetch.requestPaused(_on_request_paused)
|
|
if self.agent_focus_target_id:
|
|
cdp_session = await self.get_or_create_cdp_session(self.agent_focus_target_id, focus=False)
|
|
cdp_session.cdp_client.register.Fetch.authRequired(_on_auth_required)
|
|
cdp_session.cdp_client.register.Fetch.requestPaused(_on_request_paused)
|
|
self.logger.debug('Registered Fetch.authRequired handlers')
|
|
except Exception as e:
|
|
self.logger.debug(f'Failed to register authRequired handlers: {type(e).__name__}: {e}')
|
|
|
|
# Ensure Fetch is enabled for the current focused target's session, too
|
|
try:
|
|
if self.agent_focus_target_id:
|
|
# Use safe API with focus=False to avoid changing focus
|
|
cdp_session = await self.get_or_create_cdp_session(self.agent_focus_target_id, focus=False)
|
|
await cdp_session.cdp_client.send.Fetch.enable(
|
|
params={'handleAuthRequests': True, 'patterns': [{'urlPattern': '*'}]},
|
|
session_id=cdp_session.session_id,
|
|
)
|
|
except Exception as e:
|
|
self.logger.debug(f'Fetch.enable on focused session failed: {type(e).__name__}: {e}')
|
|
except Exception as e:
|
|
self.logger.debug(f'Skipping proxy auth setup: {type(e).__name__}: {e}')
|
|
|
|
async def reconnect(self) -> None:
|
|
"""Re-establish the CDP WebSocket connection to an already-running browser.
|
|
|
|
This is a lightweight reconnection that:
|
|
1. Stops the old CDPClient (WS already dead, just clean state)
|
|
2. Clears SessionManager (all CDP sessions are invalid post-disconnect)
|
|
3. Creates a new CDPClient with the same cdp_url
|
|
4. Re-initializes SessionManager and re-enables autoAttach
|
|
5. Re-discovers page targets and restores agent focus
|
|
6. Re-enables proxy auth if configured
|
|
"""
|
|
assert self.cdp_url, 'Cannot reconnect without a CDP URL'
|
|
|
|
old_focus_target_id = self.agent_focus_target_id
|
|
|
|
# 1. Stop old CDPClient (WS is already dead, this just cleans internal state)
|
|
if self._cdp_client_root:
|
|
try:
|
|
await self._cdp_client_root.stop()
|
|
except Exception as e:
|
|
self.logger.debug(f'Error stopping old CDP client during reconnect: {e}')
|
|
self._cdp_client_root = None
|
|
|
|
# 2. Clear SessionManager (all sessions are stale)
|
|
if self.session_manager:
|
|
try:
|
|
await self.session_manager.clear()
|
|
except Exception as e:
|
|
self.logger.debug(f'Error clearing SessionManager during reconnect: {e}')
|
|
self.session_manager = None
|
|
|
|
self.agent_focus_target_id = None
|
|
|
|
# 3. Create new CDPClient with the same cdp_url
|
|
headers = dict(getattr(self.browser_profile, 'headers', None) or {})
|
|
if not self.is_local:
|
|
from browser_use.utils import get_browser_use_version
|
|
|
|
headers.setdefault('User-Agent', f'browser-use/{get_browser_use_version()}')
|
|
self._cdp_client_root = TimeoutWrappedCDPClient(
|
|
self.cdp_url,
|
|
additional_headers=headers or None,
|
|
max_ws_frame_size=200 * 1024 * 1024,
|
|
)
|
|
await self._cdp_client_root.start()
|
|
|
|
# 4. Re-initialize SessionManager
|
|
from browser_use.browser.session_manager import SessionManager
|
|
|
|
self.session_manager = SessionManager(self)
|
|
await self.session_manager.start_monitoring()
|
|
|
|
# 5. Re-enable autoAttach
|
|
await self._cdp_client_root.send.Target.setAutoAttach(
|
|
params={'autoAttach': True, 'waitForDebuggerOnStart': False, 'flatten': True}
|
|
)
|
|
|
|
# 6. Re-discover page targets and restore focus
|
|
page_targets = self.session_manager.get_all_page_targets()
|
|
|
|
# Prefer the old focus target if it still exists
|
|
restored = False
|
|
if old_focus_target_id:
|
|
for target in page_targets:
|
|
if target.target_id == old_focus_target_id:
|
|
await self.get_or_create_cdp_session(old_focus_target_id, focus=True)
|
|
restored = True
|
|
self.logger.debug(f'🔄 Restored agent focus to previous target {old_focus_target_id[:8]}...')
|
|
break
|
|
|
|
if not restored:
|
|
if page_targets:
|
|
fallback_id = page_targets[0].target_id
|
|
await self.get_or_create_cdp_session(fallback_id, focus=True)
|
|
self.logger.debug(f'🔄 Agent focus set to fallback target {fallback_id[:8]}...')
|
|
else:
|
|
# No pages exist — create one
|
|
new_target = await self._cdp_client_root.send.Target.createTarget(params={'url': 'about:blank'})
|
|
target_id = new_target['targetId']
|
|
await self.get_or_create_cdp_session(target_id, focus=True)
|
|
self.logger.debug(f'🔄 Created new blank page during reconnect: {target_id[:8]}...')
|
|
|
|
# 7. Re-enable proxy auth if configured
|
|
await self._setup_proxy_auth()
|
|
|
|
# 8. Attach the WS drop detection callback to the new client
|
|
self._attach_ws_drop_callback()
|
|
|
|
async def _auto_reconnect(self, max_attempts: int = 3) -> None:
|
|
"""Attempt to reconnect with exponential backoff.
|
|
|
|
Dispatches BrowserReconnectingEvent before each attempt and
|
|
BrowserReconnectedEvent on success.
|
|
"""
|
|
async with self._reconnect_lock:
|
|
if self._reconnecting:
|
|
return # already in progress from another caller
|
|
self._reconnecting = True
|
|
self._reconnect_event.clear()
|
|
|
|
start_time = time.time()
|
|
delays = [1.0, 2.0, 4.0]
|
|
|
|
try:
|
|
for attempt in range(1, max_attempts + 1):
|
|
self.event_bus.dispatch(
|
|
BrowserReconnectingEvent(
|
|
cdp_url=self.cdp_url or '',
|
|
attempt=attempt,
|
|
max_attempts=max_attempts,
|
|
)
|
|
)
|
|
self.logger.warning(f'🔄 WebSocket reconnection attempt {attempt}/{max_attempts}...')
|
|
|
|
try:
|
|
await asyncio.wait_for(self.reconnect(), timeout=15.0)
|
|
# Success
|
|
downtime = time.time() - start_time
|
|
self.event_bus.dispatch(
|
|
BrowserReconnectedEvent(
|
|
cdp_url=self.cdp_url or '',
|
|
attempt=attempt,
|
|
downtime_seconds=downtime,
|
|
)
|
|
)
|
|
self.logger.info(f'🔄 WebSocket reconnected after {downtime:.1f}s (attempt {attempt})')
|
|
return
|
|
except Exception as e:
|
|
self.logger.warning(f'🔄 Reconnection attempt {attempt} failed: {type(e).__name__}: {e}')
|
|
if attempt < max_attempts:
|
|
delay = delays[attempt - 1] if attempt - 1 < len(delays) else delays[-1]
|
|
await asyncio.sleep(delay)
|
|
|
|
# All attempts exhausted
|
|
self.logger.error(f'🔄 All {max_attempts} reconnection attempts failed')
|
|
self.event_bus.dispatch(
|
|
BrowserErrorEvent(
|
|
error_type='ReconnectionFailed',
|
|
message=f'Failed to reconnect after {max_attempts} attempts ({time.time() - start_time:.1f}s)',
|
|
details={'cdp_url': self.cdp_url or '', 'max_attempts': max_attempts},
|
|
)
|
|
)
|
|
finally:
|
|
self._reconnecting = False
|
|
self._reconnect_event.set() # wake up all waiters regardless of outcome
|
|
|
|
def _attach_ws_drop_callback(self) -> None:
|
|
"""Attach a done callback to the CDPClient's message handler task to detect WS drops."""
|
|
if not self._cdp_client_root or not hasattr(self._cdp_client_root, '_message_handler_task'):
|
|
return
|
|
|
|
task = self._cdp_client_root._message_handler_task
|
|
if task is None or task.done():
|
|
return
|
|
|
|
def _on_message_handler_done(fut: asyncio.Future) -> None:
|
|
# Guard: skip if intentionally stopped, already reconnecting, or no cdp_url
|
|
if self._intentional_stop or self._reconnecting or not self.cdp_url:
|
|
return
|
|
|
|
# The message handler task exiting means the WS connection dropped
|
|
exc = fut.exception() if not fut.cancelled() else None
|
|
self.logger.warning(
|
|
f'🔌 CDP WebSocket message handler exited unexpectedly'
|
|
f'{f": {type(exc).__name__}: {exc}" if exc else " (connection closed)"}'
|
|
)
|
|
|
|
# Fire auto-reconnect as an asyncio task
|
|
try:
|
|
loop = asyncio.get_running_loop()
|
|
self._reconnect_task = loop.create_task(self._auto_reconnect())
|
|
except RuntimeError:
|
|
# No running event loop — can't reconnect
|
|
self.logger.error('🔌 No event loop available for auto-reconnect')
|
|
|
|
task.add_done_callback(_on_message_handler_done)
|
|
|
|
async def get_tabs(self) -> list[TabInfo]:
|
|
"""Get information about all open tabs using cached target data."""
|
|
tabs = []
|
|
|
|
# Safety check - return empty list if browser not connected yet
|
|
if not self.session_manager:
|
|
return tabs
|
|
|
|
# Get all page targets from SessionManager
|
|
page_targets = self.session_manager.get_all_page_targets()
|
|
|
|
for i, target in enumerate(page_targets):
|
|
target_id = target.target_id
|
|
url = target.url
|
|
title = target.title
|
|
|
|
try:
|
|
# Skip JS execution for chrome:// pages and new tab pages
|
|
if is_new_tab_page(url) or url.startswith('chrome://'):
|
|
# Use URL as title for chrome pages, or mark new tabs as unusable
|
|
if is_new_tab_page(url):
|
|
title = ''
|
|
elif not title:
|
|
# For chrome:// pages without a title, use the URL itself
|
|
title = url
|
|
|
|
# Special handling for PDF pages without titles
|
|
if (not title or title == '') and (url.endswith('.pdf') or 'pdf' in url):
|
|
# PDF pages might not have a title, use URL filename
|
|
try:
|
|
from urllib.parse import urlparse
|
|
|
|
filename = urlparse(url).path.split('/')[-1]
|
|
if filename:
|
|
title = filename
|
|
except Exception:
|
|
pass
|
|
|
|
except Exception as e:
|
|
# Fallback to basic title handling
|
|
self.logger.debug(f'⚠️ Failed to get target info for tab #{i}: {_log_pretty_url(url)} - {type(e).__name__}')
|
|
|
|
if is_new_tab_page(url):
|
|
title = ''
|
|
elif url.startswith('chrome://'):
|
|
title = url
|
|
else:
|
|
title = ''
|
|
|
|
tab_info = TabInfo(
|
|
target_id=target_id,
|
|
url=url,
|
|
title=title,
|
|
parent_target_id=None,
|
|
)
|
|
tabs.append(tab_info)
|
|
|
|
return tabs
|
|
|
|
# endregion - ========== Helper Methods ==========
|
|
|
|
# region - ========== ID Lookup Methods ==========
|
|
async def get_current_target_info(self) -> TargetInfo | None:
|
|
"""Get info about the current active target using cached session data."""
|
|
if not self.agent_focus_target_id:
|
|
return None
|
|
|
|
target = self.session_manager.get_target(self.agent_focus_target_id)
|
|
|
|
return {
|
|
'targetId': target.target_id,
|
|
'url': target.url,
|
|
'title': target.title,
|
|
'type': target.target_type,
|
|
'attached': True,
|
|
'canAccessOpener': False,
|
|
}
|
|
|
|
async def get_current_page_url(self) -> str:
|
|
"""Get the URL of the current page."""
|
|
if self.agent_focus_target_id:
|
|
target = self.session_manager.get_target(self.agent_focus_target_id)
|
|
return target.url
|
|
return 'about:blank'
|
|
|
|
async def get_current_page_title(self) -> str:
|
|
"""Get the title of the current page."""
|
|
if self.agent_focus_target_id:
|
|
target = self.session_manager.get_target(self.agent_focus_target_id)
|
|
return target.title
|
|
return 'Unknown page title'
|
|
|
|
async def navigate_to(self, url: str, new_tab: bool = False) -> None:
|
|
"""Navigate to a URL using the standard event system.
|
|
|
|
Args:
|
|
url: URL to navigate to
|
|
new_tab: Whether to open in a new tab
|
|
"""
|
|
from browser_use.browser.events import NavigateToUrlEvent
|
|
|
|
event = self.event_bus.dispatch(NavigateToUrlEvent(url=url, new_tab=new_tab))
|
|
await event
|
|
await event.event_result(raise_if_any=True, raise_if_none=False)
|
|
|
|
# endregion - ========== ID Lookup Methods ==========
|
|
|
|
# region - ========== DOM Helper Methods ==========
|
|
|
|
async def get_dom_element_by_index(self, index: int) -> EnhancedDOMTreeNode | None:
|
|
"""Get DOM element by index.
|
|
|
|
Get element from cached selector map.
|
|
|
|
Args:
|
|
index: The element index from the serialized DOM
|
|
|
|
Returns:
|
|
EnhancedDOMTreeNode or None if index not found
|
|
"""
|
|
# Check cached selector map
|
|
if self._cached_selector_map and index in self._cached_selector_map:
|
|
return self._cached_selector_map[index]
|
|
|
|
return None
|
|
|
|
def update_cached_selector_map(self, selector_map: dict[int, EnhancedDOMTreeNode]) -> None:
|
|
"""Update the cached selector map with new DOM state.
|
|
|
|
This should be called by the DOM watchdog after rebuilding the DOM.
|
|
|
|
Args:
|
|
selector_map: The new selector map from DOM serialization
|
|
"""
|
|
self._cached_selector_map = selector_map
|
|
|
|
# Alias for backwards compatibility
|
|
async def get_element_by_index(self, index: int) -> EnhancedDOMTreeNode | None:
|
|
"""Alias for get_dom_element_by_index for backwards compatibility."""
|
|
return await self.get_dom_element_by_index(index)
|
|
|
|
async def get_dom_element_at_coordinates(self, x: int, y: int) -> EnhancedDOMTreeNode | None:
|
|
"""Get DOM element at coordinates as EnhancedDOMTreeNode.
|
|
|
|
First checks the cached selector_map for a matching element, then falls back
|
|
to CDP DOM.describeNode if not found. This ensures safety checks (e.g., for
|
|
<select> elements and file inputs) work correctly.
|
|
|
|
Args:
|
|
x: X coordinate relative to viewport
|
|
y: Y coordinate relative to viewport
|
|
|
|
Returns:
|
|
EnhancedDOMTreeNode at the coordinates, or None if no element found
|
|
"""
|
|
from browser_use.dom.views import NodeType
|
|
|
|
# Get current page to access CDP session
|
|
page = await self.get_current_page()
|
|
if page is None:
|
|
raise RuntimeError('No active page found')
|
|
|
|
# Get session ID for CDP call
|
|
session_id = await page._ensure_session()
|
|
|
|
try:
|
|
# Call CDP DOM.getNodeForLocation to get backend_node_id
|
|
result = await self.cdp_client.send.DOM.getNodeForLocation(
|
|
params={
|
|
'x': x,
|
|
'y': y,
|
|
'includeUserAgentShadowDOM': False,
|
|
'ignorePointerEventsNone': False,
|
|
},
|
|
session_id=session_id,
|
|
)
|
|
|
|
backend_node_id = result.get('backendNodeId')
|
|
if backend_node_id is None:
|
|
self.logger.debug(f'No element found at coordinates ({x}, {y})')
|
|
return None
|
|
|
|
# Try to find element in cached selector_map (avoids extra CDP call)
|
|
if self._cached_selector_map:
|
|
for node in self._cached_selector_map.values():
|
|
if node.backend_node_id == backend_node_id:
|
|
self.logger.debug(f'Found element at ({x}, {y}) in cached selector_map')
|
|
return node
|
|
|
|
# Not in cache - fall back to CDP DOM.describeNode to get actual node info
|
|
try:
|
|
describe_result = await self.cdp_client.send.DOM.describeNode(
|
|
params={'backendNodeId': backend_node_id},
|
|
session_id=session_id,
|
|
)
|
|
node_info = describe_result.get('node', {})
|
|
node_name = node_info.get('nodeName', '')
|
|
|
|
# Parse attributes from flat list [key1, val1, key2, val2, ...] to dict
|
|
attrs_list = node_info.get('attributes', [])
|
|
attributes = {attrs_list[i]: attrs_list[i + 1] for i in range(0, len(attrs_list), 2)}
|
|
|
|
return EnhancedDOMTreeNode(
|
|
node_id=result.get('nodeId', 0),
|
|
backend_node_id=backend_node_id,
|
|
node_type=NodeType(node_info.get('nodeType', NodeType.ELEMENT_NODE.value)),
|
|
node_name=node_name,
|
|
node_value=node_info.get('nodeValue', '') or '',
|
|
attributes=attributes,
|
|
is_scrollable=None,
|
|
frame_id=result.get('frameId'),
|
|
session_id=session_id,
|
|
target_id=self.agent_focus_target_id or '',
|
|
content_document=None,
|
|
shadow_root_type=None,
|
|
shadow_roots=None,
|
|
parent_node=None,
|
|
children_nodes=None,
|
|
ax_node=None,
|
|
snapshot_node=None,
|
|
is_visible=None,
|
|
absolute_position=None,
|
|
)
|
|
except Exception as e:
|
|
self.logger.debug(f'DOM.describeNode failed for backend_node_id={backend_node_id}: {e}')
|
|
# Fall back to minimal node if describeNode fails
|
|
return EnhancedDOMTreeNode(
|
|
node_id=result.get('nodeId', 0),
|
|
backend_node_id=backend_node_id,
|
|
node_type=NodeType.ELEMENT_NODE,
|
|
node_name='',
|
|
node_value='',
|
|
attributes={},
|
|
is_scrollable=None,
|
|
frame_id=result.get('frameId'),
|
|
session_id=session_id,
|
|
target_id=self.agent_focus_target_id or '',
|
|
content_document=None,
|
|
shadow_root_type=None,
|
|
shadow_roots=None,
|
|
parent_node=None,
|
|
children_nodes=None,
|
|
ax_node=None,
|
|
snapshot_node=None,
|
|
is_visible=None,
|
|
absolute_position=None,
|
|
)
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f'Failed to get DOM element at coordinates ({x}, {y}): {e}')
|
|
return None
|
|
|
|
async def get_target_id_from_tab_id(self, tab_id: str) -> TargetID:
|
|
"""Get the full-length TargetID from the truncated 4-char tab_id using SessionManager."""
|
|
if not self.session_manager:
|
|
raise RuntimeError('SessionManager not initialized')
|
|
|
|
for full_target_id in self.session_manager.get_all_target_ids():
|
|
if full_target_id.endswith(tab_id):
|
|
if await self.session_manager.is_target_valid(full_target_id):
|
|
return full_target_id
|
|
# Stale target - Chrome should have sent detach event
|
|
# If we're here, event listener will clean it up
|
|
self.logger.debug(f'Found stale target {full_target_id}, skipping')
|
|
|
|
raise ValueError(f'No TargetID found ending in tab_id=...{tab_id}')
|
|
|
|
async def get_target_id_from_url(self, url: str) -> TargetID:
|
|
"""Get the TargetID from a URL using SessionManager (source of truth)."""
|
|
if not self.session_manager:
|
|
raise RuntimeError('SessionManager not initialized')
|
|
|
|
# Search in SessionManager targets (exact match first)
|
|
for target_id, target in self.session_manager.get_all_targets().items():
|
|
if target.target_type in ('page', 'tab') and target.url == url:
|
|
return target_id
|
|
|
|
# Still not found, try substring match as fallback
|
|
for target_id, target in self.session_manager.get_all_targets().items():
|
|
if target.target_type in ('page', 'tab') and url in target.url:
|
|
return target_id
|
|
|
|
raise ValueError(f'No TargetID found for url={url}')
|
|
|
|
async def get_most_recently_opened_target_id(self) -> TargetID:
|
|
"""Get the most recently opened target ID using SessionManager."""
|
|
# Get all page targets from SessionManager
|
|
page_targets = self.session_manager.get_all_page_targets()
|
|
if not page_targets:
|
|
raise RuntimeError('No page targets available')
|
|
return page_targets[-1].target_id
|
|
|
|
def is_file_input(self, element: Any) -> bool:
|
|
"""Check if element is a file input.
|
|
|
|
Args:
|
|
element: The DOM element to check
|
|
|
|
Returns:
|
|
True if element is a file input, False otherwise
|
|
"""
|
|
if self._dom_watchdog:
|
|
return self._dom_watchdog.is_file_input(element)
|
|
# Fallback if watchdog not available
|
|
return (
|
|
hasattr(element, 'node_name')
|
|
and element.node_name.upper() == 'INPUT'
|
|
and hasattr(element, 'attributes')
|
|
and element.attributes.get('type', '').lower() == 'file'
|
|
)
|
|
|
|
def find_file_input_near_element(
|
|
self,
|
|
node: 'EnhancedDOMTreeNode',
|
|
max_height: int = 3,
|
|
max_descendant_depth: int = 3,
|
|
) -> 'EnhancedDOMTreeNode | None':
|
|
"""Find the closest file input to the given element.
|
|
|
|
Walks up the DOM tree (up to max_height levels), checking the node itself,
|
|
its descendants (up to max_descendant_depth deep), and siblings at each level.
|
|
|
|
Args:
|
|
node: Starting DOM element
|
|
max_height: Maximum levels to walk up the parent chain
|
|
max_descendant_depth: Maximum depth to search descendants
|
|
|
|
Returns:
|
|
The nearest file input element, or None if not found
|
|
"""
|
|
from browser_use.dom.views import EnhancedDOMTreeNode
|
|
|
|
def _find_in_descendants(n: EnhancedDOMTreeNode, depth: int) -> EnhancedDOMTreeNode | None:
|
|
if depth < 0:
|
|
return None
|
|
if self.is_file_input(n):
|
|
return n
|
|
for child in n.children_nodes or []:
|
|
result = _find_in_descendants(child, depth - 1)
|
|
if result:
|
|
return result
|
|
return None
|
|
|
|
current: EnhancedDOMTreeNode | None = node
|
|
for _ in range(max_height + 1):
|
|
if current is None:
|
|
break
|
|
# Check the current node itself
|
|
if self.is_file_input(current):
|
|
return current
|
|
# Check all descendants of the current node
|
|
result = _find_in_descendants(current, max_descendant_depth)
|
|
if result:
|
|
return result
|
|
# Check all siblings and their descendants
|
|
if current.parent_node:
|
|
for sibling in current.parent_node.children_nodes or []:
|
|
if sibling is current:
|
|
continue
|
|
if self.is_file_input(sibling):
|
|
return sibling
|
|
result = _find_in_descendants(sibling, max_descendant_depth)
|
|
if result:
|
|
return result
|
|
current = current.parent_node
|
|
return None
|
|
|
|
async def get_selector_map(self) -> dict[int, EnhancedDOMTreeNode]:
|
|
"""Get the current selector map from cached state or DOM watchdog.
|
|
|
|
Returns:
|
|
Dictionary mapping element indices to EnhancedDOMTreeNode objects
|
|
"""
|
|
# First try cached selector map
|
|
if self._cached_selector_map:
|
|
return self._cached_selector_map
|
|
|
|
# Try to get from DOM watchdog
|
|
if self._dom_watchdog and hasattr(self._dom_watchdog, 'selector_map'):
|
|
return self._dom_watchdog.selector_map or {}
|
|
|
|
# Return empty dict if nothing available
|
|
return {}
|
|
|
|
async def get_index_by_id(self, element_id: str) -> int | None:
|
|
"""Find element index by its id attribute.
|
|
|
|
Args:
|
|
element_id: The id attribute value to search for
|
|
|
|
Returns:
|
|
Index of the element, or None if not found
|
|
"""
|
|
selector_map = await self.get_selector_map()
|
|
for idx, element in selector_map.items():
|
|
if element.attributes and element.attributes.get('id') == element_id:
|
|
return idx
|
|
return None
|
|
|
|
async def get_index_by_class(self, class_name: str) -> int | None:
|
|
"""Find element index by its class attribute (matches if class contains the given name).
|
|
|
|
Args:
|
|
class_name: The class name to search for
|
|
|
|
Returns:
|
|
Index of the first matching element, or None if not found
|
|
"""
|
|
selector_map = await self.get_selector_map()
|
|
for idx, element in selector_map.items():
|
|
if element.attributes:
|
|
element_class = element.attributes.get('class', '')
|
|
if class_name in element_class.split():
|
|
return idx
|
|
return None
|
|
|
|
async def remove_highlights(self) -> None:
|
|
"""Remove highlights from the page using CDP."""
|
|
if not self.browser_profile.highlight_elements and not self.browser_profile.dom_highlight_elements:
|
|
return
|
|
|
|
try:
|
|
async with asyncio.timeout(3.0):
|
|
# Get cached session
|
|
cdp_session = await self.get_or_create_cdp_session()
|
|
|
|
# Remove highlights via JavaScript - be thorough
|
|
script = """
|
|
(function() {
|
|
// Remove all browser-use highlight elements
|
|
const highlights = document.querySelectorAll('[data-browser-use-highlight]');
|
|
console.log('Removing', highlights.length, 'browser-use highlight elements');
|
|
highlights.forEach(el => el.remove());
|
|
|
|
// Also remove by ID in case selector missed anything
|
|
const highlightContainer = document.getElementById('browser-use-debug-highlights');
|
|
if (highlightContainer) {
|
|
console.log('Removing highlight container by ID');
|
|
highlightContainer.remove();
|
|
}
|
|
|
|
// Final cleanup - remove any orphaned tooltips
|
|
const orphanedTooltips = document.querySelectorAll('[data-browser-use-highlight="tooltip"]');
|
|
orphanedTooltips.forEach(el => el.remove());
|
|
|
|
return { removed: highlights.length };
|
|
})();
|
|
"""
|
|
result = await cdp_session.cdp_client.send.Runtime.evaluate(
|
|
params={'expression': script, 'returnByValue': True}, session_id=cdp_session.session_id
|
|
)
|
|
|
|
# Log the result for debugging
|
|
if result and 'result' in result and 'value' in result['result']:
|
|
removed_count = result['result']['value'].get('removed', 0)
|
|
self.logger.debug(f'Successfully removed {removed_count} highlight elements')
|
|
else:
|
|
self.logger.debug('Highlight removal completed')
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f'Failed to remove highlights: {e}')
|
|
|
|
@observe_debug(ignore_input=True, ignore_output=True, name='get_element_coordinates')
|
|
async def get_element_coordinates(self, backend_node_id: int, cdp_session: CDPSession) -> DOMRect | None:
|
|
"""Get element coordinates for a backend node ID using multiple methods.
|
|
|
|
This method tries DOM.getContentQuads first, then falls back to DOM.getBoxModel,
|
|
and finally uses JavaScript getBoundingClientRect as a last resort.
|
|
|
|
Args:
|
|
backend_node_id: The backend node ID to get coordinates for
|
|
cdp_session: The CDP session to use
|
|
|
|
Returns:
|
|
DOMRect with coordinates or None if element not found/no bounds
|
|
"""
|
|
session_id = cdp_session.session_id
|
|
quads = []
|
|
|
|
# Method 1: Try DOM.getContentQuads first (best for inline elements and complex layouts)
|
|
try:
|
|
content_quads_result = await cdp_session.cdp_client.send.DOM.getContentQuads(
|
|
params={'backendNodeId': backend_node_id}, session_id=session_id
|
|
)
|
|
if 'quads' in content_quads_result and content_quads_result['quads']:
|
|
quads = content_quads_result['quads']
|
|
self.logger.debug(f'Got {len(quads)} quads from DOM.getContentQuads')
|
|
else:
|
|
self.logger.debug(f'No quads found from DOM.getContentQuads {content_quads_result}')
|
|
except Exception as e:
|
|
self.logger.debug(f'DOM.getContentQuads failed: {e}')
|
|
|
|
# Method 2: Fall back to DOM.getBoxModel
|
|
if not quads:
|
|
try:
|
|
box_model = await cdp_session.cdp_client.send.DOM.getBoxModel(
|
|
params={'backendNodeId': backend_node_id}, session_id=session_id
|
|
)
|
|
if 'model' in box_model and 'content' in box_model['model']:
|
|
content_quad = box_model['model']['content']
|
|
if len(content_quad) >= 8:
|
|
# Convert box model format to quad format
|
|
quads = [
|
|
[
|
|
content_quad[0],
|
|
content_quad[1], # x1, y1
|
|
content_quad[2],
|
|
content_quad[3], # x2, y2
|
|
content_quad[4],
|
|
content_quad[5], # x3, y3
|
|
content_quad[6],
|
|
content_quad[7], # x4, y4
|
|
]
|
|
]
|
|
self.logger.debug('Got quad from DOM.getBoxModel')
|
|
except Exception as e:
|
|
self.logger.debug(f'DOM.getBoxModel failed: {e}')
|
|
|
|
# Method 3: Fall back to JavaScript getBoundingClientRect
|
|
if not quads:
|
|
try:
|
|
result = await cdp_session.cdp_client.send.DOM.resolveNode(
|
|
params={'backendNodeId': backend_node_id},
|
|
session_id=session_id,
|
|
)
|
|
if 'object' in result and 'objectId' in result['object']:
|
|
object_id = result['object']['objectId']
|
|
js_result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
params={
|
|
'objectId': object_id,
|
|
'functionDeclaration': """
|
|
function() {
|
|
const rect = this.getBoundingClientRect();
|
|
return {
|
|
x: rect.x,
|
|
y: rect.y,
|
|
width: rect.width,
|
|
height: rect.height
|
|
};
|
|
}
|
|
""",
|
|
'returnByValue': True,
|
|
},
|
|
session_id=session_id,
|
|
)
|
|
if 'result' in js_result and 'value' in js_result['result']:
|
|
rect_data = js_result['result']['value']
|
|
if rect_data['width'] > 0 and rect_data['height'] > 0:
|
|
return DOMRect(
|
|
x=rect_data['x'], y=rect_data['y'], width=rect_data['width'], height=rect_data['height']
|
|
)
|
|
except Exception as e:
|
|
self.logger.debug(f'JavaScript getBoundingClientRect failed: {e}')
|
|
|
|
# Convert quads to bounding rectangle if we have them
|
|
if quads:
|
|
# Use the first quad (most relevant for the element)
|
|
quad = quads[0]
|
|
if len(quad) >= 8:
|
|
# Calculate bounding rect from quad points
|
|
x_coords = [quad[i] for i in range(0, 8, 2)]
|
|
y_coords = [quad[i] for i in range(1, 8, 2)]
|
|
|
|
min_x = min(x_coords)
|
|
min_y = min(y_coords)
|
|
max_x = max(x_coords)
|
|
max_y = max(y_coords)
|
|
|
|
width = max_x - min_x
|
|
height = max_y - min_y
|
|
|
|
if width > 0 and height > 0:
|
|
return DOMRect(x=min_x, y=min_y, width=width, height=height)
|
|
|
|
return None
|
|
|
|
async def highlight_interaction_element(self, node: 'EnhancedDOMTreeNode') -> None:
|
|
"""Temporarily highlight an element during interaction for user visibility.
|
|
|
|
This creates a visual highlight on the browser that shows the user which element
|
|
is being interacted with. The highlight automatically fades after the configured duration.
|
|
|
|
Args:
|
|
node: The DOM node to highlight with backend_node_id for coordinate lookup
|
|
"""
|
|
if not self.browser_profile.highlight_elements:
|
|
return
|
|
|
|
try:
|
|
import json
|
|
|
|
cdp_session = await self.get_or_create_cdp_session()
|
|
|
|
# Get current coordinates
|
|
rect = await self.get_element_coordinates(node.backend_node_id, cdp_session)
|
|
|
|
color = self.browser_profile.interaction_highlight_color
|
|
duration_ms = int(self.browser_profile.interaction_highlight_duration * 1000)
|
|
|
|
if not rect:
|
|
self.logger.debug(f'No coordinates found for backend node {node.backend_node_id}')
|
|
return
|
|
|
|
# Create animated corner brackets that start offset and animate inward
|
|
script = f"""
|
|
(function() {{
|
|
const rect = {json.dumps({'x': rect.x, 'y': rect.y, 'width': rect.width, 'height': rect.height})};
|
|
const color = {json.dumps(color)};
|
|
const duration = {duration_ms};
|
|
|
|
// Scale corner size based on element dimensions to ensure gaps between corners
|
|
const maxCornerSize = 20;
|
|
const minCornerSize = 8;
|
|
const cornerSize = Math.max(
|
|
minCornerSize,
|
|
Math.min(maxCornerSize, Math.min(rect.width, rect.height) * 0.35)
|
|
);
|
|
const borderWidth = 3;
|
|
const startOffset = 10; // Starting offset in pixels
|
|
const finalOffset = -3; // Final position slightly outside the element
|
|
|
|
// Get current scroll position
|
|
const scrollX = window.pageXOffset || document.documentElement.scrollLeft || 0;
|
|
const scrollY = window.pageYOffset || document.documentElement.scrollTop || 0;
|
|
|
|
// Create container for all corners
|
|
const container = document.createElement('div');
|
|
container.setAttribute('data-browser-use-interaction-highlight', 'true');
|
|
container.style.cssText = `
|
|
position: absolute;
|
|
left: ${{rect.x + scrollX}}px;
|
|
top: ${{rect.y + scrollY}}px;
|
|
width: ${{rect.width}}px;
|
|
height: ${{rect.height}}px;
|
|
pointer-events: none;
|
|
z-index: 2147483647;
|
|
`;
|
|
|
|
// Create 4 corner brackets
|
|
const corners = [
|
|
{{ pos: 'top-left', startX: -startOffset, startY: -startOffset, finalX: finalOffset, finalY: finalOffset }},
|
|
{{ pos: 'top-right', startX: startOffset, startY: -startOffset, finalX: -finalOffset, finalY: finalOffset }},
|
|
{{ pos: 'bottom-left', startX: -startOffset, startY: startOffset, finalX: finalOffset, finalY: -finalOffset }},
|
|
{{ pos: 'bottom-right', startX: startOffset, startY: startOffset, finalX: -finalOffset, finalY: -finalOffset }}
|
|
];
|
|
|
|
corners.forEach(corner => {{
|
|
const bracket = document.createElement('div');
|
|
bracket.style.cssText = `
|
|
position: absolute;
|
|
width: ${{cornerSize}}px;
|
|
height: ${{cornerSize}}px;
|
|
pointer-events: none;
|
|
transition: all 0.15s ease-out;
|
|
`;
|
|
|
|
// Position corners
|
|
if (corner.pos === 'top-left') {{
|
|
bracket.style.top = '0';
|
|
bracket.style.left = '0';
|
|
bracket.style.borderTop = `${{borderWidth}}px solid ${{color}}`;
|
|
bracket.style.borderLeft = `${{borderWidth}}px solid ${{color}}`;
|
|
bracket.style.transform = `translate(${{corner.startX}}px, ${{corner.startY}}px)`;
|
|
}} else if (corner.pos === 'top-right') {{
|
|
bracket.style.top = '0';
|
|
bracket.style.right = '0';
|
|
bracket.style.borderTop = `${{borderWidth}}px solid ${{color}}`;
|
|
bracket.style.borderRight = `${{borderWidth}}px solid ${{color}}`;
|
|
bracket.style.transform = `translate(${{corner.startX}}px, ${{corner.startY}}px)`;
|
|
}} else if (corner.pos === 'bottom-left') {{
|
|
bracket.style.bottom = '0';
|
|
bracket.style.left = '0';
|
|
bracket.style.borderBottom = `${{borderWidth}}px solid ${{color}}`;
|
|
bracket.style.borderLeft = `${{borderWidth}}px solid ${{color}}`;
|
|
bracket.style.transform = `translate(${{corner.startX}}px, ${{corner.startY}}px)`;
|
|
}} else if (corner.pos === 'bottom-right') {{
|
|
bracket.style.bottom = '0';
|
|
bracket.style.right = '0';
|
|
bracket.style.borderBottom = `${{borderWidth}}px solid ${{color}}`;
|
|
bracket.style.borderRight = `${{borderWidth}}px solid ${{color}}`;
|
|
bracket.style.transform = `translate(${{corner.startX}}px, ${{corner.startY}}px)`;
|
|
}}
|
|
|
|
container.appendChild(bracket);
|
|
|
|
// Animate to final position slightly outside the element
|
|
setTimeout(() => {{
|
|
bracket.style.transform = `translate(${{corner.finalX}}px, ${{corner.finalY}}px)`;
|
|
}}, 10);
|
|
}});
|
|
|
|
document.body.appendChild(container);
|
|
|
|
// Auto-remove after duration
|
|
setTimeout(() => {{
|
|
container.style.opacity = '0';
|
|
container.style.transition = 'opacity 0.3s ease-out';
|
|
setTimeout(() => container.remove(), 300);
|
|
}}, duration);
|
|
|
|
return {{ created: true }};
|
|
}})();
|
|
"""
|
|
|
|
# Fire and forget - don't wait for completion
|
|
|
|
await cdp_session.cdp_client.send.Runtime.evaluate(
|
|
params={'expression': script, 'returnByValue': True}, session_id=cdp_session.session_id
|
|
)
|
|
|
|
except Exception as e:
|
|
# Don't fail the action if highlighting fails
|
|
self.logger.debug(f'Failed to highlight interaction element: {e}')
|
|
|
|
async def highlight_coordinate_click(self, x: int, y: int) -> None:
|
|
"""Temporarily highlight a coordinate click position for user visibility.
|
|
|
|
This creates a visual highlight at the specified coordinates showing where
|
|
the click action occurred. The highlight automatically fades after the configured duration.
|
|
|
|
Args:
|
|
x: Horizontal coordinate relative to viewport left edge
|
|
y: Vertical coordinate relative to viewport top edge
|
|
"""
|
|
if not self.browser_profile.highlight_elements:
|
|
return
|
|
|
|
try:
|
|
import json
|
|
|
|
cdp_session = await self.get_or_create_cdp_session()
|
|
|
|
color = self.browser_profile.interaction_highlight_color
|
|
duration_ms = int(self.browser_profile.interaction_highlight_duration * 1000)
|
|
|
|
# Create animated crosshair and circle at the click coordinates
|
|
script = f"""
|
|
(function() {{
|
|
const x = {x};
|
|
const y = {y};
|
|
const color = {json.dumps(color)};
|
|
const duration = {duration_ms};
|
|
|
|
// Get current scroll position
|
|
const scrollX = window.pageXOffset || document.documentElement.scrollLeft || 0;
|
|
const scrollY = window.pageYOffset || document.documentElement.scrollTop || 0;
|
|
|
|
// Create container
|
|
const container = document.createElement('div');
|
|
container.setAttribute('data-browser-use-coordinate-highlight', 'true');
|
|
container.style.cssText = `
|
|
position: absolute;
|
|
left: ${{x + scrollX}}px;
|
|
top: ${{y + scrollY}}px;
|
|
width: 0;
|
|
height: 0;
|
|
pointer-events: none;
|
|
z-index: 2147483647;
|
|
`;
|
|
|
|
// Create outer circle
|
|
const outerCircle = document.createElement('div');
|
|
outerCircle.style.cssText = `
|
|
position: absolute;
|
|
left: -15px;
|
|
top: -15px;
|
|
width: 30px;
|
|
height: 30px;
|
|
border: 3px solid ${{color}};
|
|
border-radius: 50%;
|
|
opacity: 0;
|
|
transform: scale(0.3);
|
|
transition: all 0.2s ease-out;
|
|
`;
|
|
container.appendChild(outerCircle);
|
|
|
|
// Create center dot
|
|
const centerDot = document.createElement('div');
|
|
centerDot.style.cssText = `
|
|
position: absolute;
|
|
left: -4px;
|
|
top: -4px;
|
|
width: 8px;
|
|
height: 8px;
|
|
background: ${{color}};
|
|
border-radius: 50%;
|
|
opacity: 0;
|
|
transform: scale(0);
|
|
transition: all 0.15s ease-out;
|
|
`;
|
|
container.appendChild(centerDot);
|
|
|
|
document.body.appendChild(container);
|
|
|
|
// Animate in
|
|
setTimeout(() => {{
|
|
outerCircle.style.opacity = '0.8';
|
|
outerCircle.style.transform = 'scale(1)';
|
|
centerDot.style.opacity = '1';
|
|
centerDot.style.transform = 'scale(1)';
|
|
}}, 10);
|
|
|
|
// Animate out and remove
|
|
setTimeout(() => {{
|
|
outerCircle.style.opacity = '0';
|
|
outerCircle.style.transform = 'scale(1.5)';
|
|
centerDot.style.opacity = '0';
|
|
setTimeout(() => container.remove(), 300);
|
|
}}, duration);
|
|
|
|
return {{ created: true }};
|
|
}})();
|
|
"""
|
|
|
|
# Fire and forget - don't wait for completion
|
|
await cdp_session.cdp_client.send.Runtime.evaluate(
|
|
params={'expression': script, 'returnByValue': True}, session_id=cdp_session.session_id
|
|
)
|
|
|
|
except Exception as e:
|
|
# Don't fail the action if highlighting fails
|
|
self.logger.debug(f'Failed to highlight coordinate click: {e}')
|
|
|
|
async def add_highlights(self, selector_map: dict[int, 'EnhancedDOMTreeNode']) -> None:
|
|
"""Add visual highlights to the browser DOM for user visibility."""
|
|
if not self.browser_profile.dom_highlight_elements or not selector_map:
|
|
return
|
|
|
|
try:
|
|
import json
|
|
|
|
# Convert selector_map to the format expected by the highlighting script
|
|
elements_data = []
|
|
for _, node in selector_map.items():
|
|
# Get bounding box using absolute position (includes iframe translations) if available
|
|
if node.absolute_position:
|
|
# Use absolute position which includes iframe coordinate translations
|
|
rect = node.absolute_position
|
|
bbox = {'x': rect.x, 'y': rect.y, 'width': rect.width, 'height': rect.height}
|
|
|
|
# Only include elements with valid bounding boxes
|
|
if bbox and bbox.get('width', 0) > 0 and bbox.get('height', 0) > 0:
|
|
element = {
|
|
'x': bbox['x'],
|
|
'y': bbox['y'],
|
|
'width': bbox['width'],
|
|
'height': bbox['height'],
|
|
'element_name': node.node_name,
|
|
'is_clickable': node.snapshot_node.is_clickable if node.snapshot_node else True,
|
|
'is_scrollable': getattr(node, 'is_scrollable', False),
|
|
'attributes': node.attributes or {},
|
|
'frame_id': getattr(node, 'frame_id', None),
|
|
'node_id': node.node_id,
|
|
'backend_node_id': node.backend_node_id,
|
|
'xpath': node.xpath,
|
|
'text_content': node.get_all_children_text()[:50]
|
|
if hasattr(node, 'get_all_children_text')
|
|
else node.node_value[:50],
|
|
}
|
|
elements_data.append(element)
|
|
|
|
if not elements_data:
|
|
self.logger.debug('⚠️ No valid elements to highlight')
|
|
return
|
|
|
|
self.logger.debug(f'📍 Creating highlights for {len(elements_data)} elements')
|
|
|
|
# Always remove existing highlights first
|
|
await self.remove_highlights()
|
|
|
|
# Add a small delay to ensure removal completes
|
|
import asyncio
|
|
|
|
await asyncio.sleep(0.05)
|
|
|
|
# Get CDP session
|
|
cdp_session = await self.get_or_create_cdp_session()
|
|
|
|
# Create the proven highlighting script from v0.6.0 with fixed positioning
|
|
script = f"""
|
|
(function() {{
|
|
// Interactive elements data
|
|
const interactiveElements = {json.dumps(elements_data)};
|
|
|
|
console.log('=== BROWSER-USE HIGHLIGHTING ===');
|
|
console.log('Highlighting', interactiveElements.length, 'interactive elements');
|
|
|
|
// Double-check: Remove any existing highlight container first
|
|
const existingContainer = document.getElementById('browser-use-debug-highlights');
|
|
if (existingContainer) {{
|
|
console.log('⚠️ Found existing highlight container, removing it first');
|
|
existingContainer.remove();
|
|
}}
|
|
|
|
// Also remove any stray highlight elements
|
|
const strayHighlights = document.querySelectorAll('[data-browser-use-highlight]');
|
|
if (strayHighlights.length > 0) {{
|
|
console.log('⚠️ Found', strayHighlights.length, 'stray highlight elements, removing them');
|
|
strayHighlights.forEach(el => el.remove());
|
|
}}
|
|
|
|
// Use maximum z-index for visibility
|
|
const HIGHLIGHT_Z_INDEX = 2147483647;
|
|
|
|
// Create container for all highlights - use FIXED positioning (key insight from v0.6.0)
|
|
const container = document.createElement('div');
|
|
container.id = 'browser-use-debug-highlights';
|
|
container.setAttribute('data-browser-use-highlight', 'container');
|
|
|
|
container.style.cssText = `
|
|
position: absolute;
|
|
top: 0;
|
|
left: 0;
|
|
width: 100vw;
|
|
height: 100vh;
|
|
pointer-events: none;
|
|
z-index: ${{HIGHLIGHT_Z_INDEX}};
|
|
overflow: visible;
|
|
margin: 0;
|
|
padding: 0;
|
|
border: none;
|
|
outline: none;
|
|
box-shadow: none;
|
|
background: none;
|
|
font-family: inherit;
|
|
`;
|
|
|
|
// Helper function to create text elements safely
|
|
function createTextElement(tag, text, styles) {{
|
|
const element = document.createElement(tag);
|
|
element.textContent = text;
|
|
if (styles) element.style.cssText = styles;
|
|
return element;
|
|
}}
|
|
|
|
// Add highlights for each element
|
|
interactiveElements.forEach((element, index) => {{
|
|
const highlight = document.createElement('div');
|
|
highlight.setAttribute('data-browser-use-highlight', 'element');
|
|
highlight.setAttribute('data-element-id', element.backend_node_id);
|
|
highlight.style.cssText = `
|
|
position: absolute;
|
|
left: ${{element.x}}px;
|
|
top: ${{element.y}}px;
|
|
width: ${{element.width}}px;
|
|
height: ${{element.height}}px;
|
|
outline: 2px dashed #4a90e2;
|
|
outline-offset: -2px;
|
|
background: transparent;
|
|
pointer-events: none;
|
|
box-sizing: content-box;
|
|
transition: outline 0.2s ease;
|
|
margin: 0;
|
|
padding: 0;
|
|
border: none;
|
|
`;
|
|
|
|
// Enhanced label with backend node ID
|
|
const label = createTextElement('div', element.backend_node_id, `
|
|
position: absolute;
|
|
top: -20px;
|
|
left: 0;
|
|
background-color: #4a90e2;
|
|
color: white;
|
|
padding: 2px 6px;
|
|
font-size: 11px;
|
|
font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
|
|
font-weight: bold;
|
|
border-radius: 3px;
|
|
white-space: nowrap;
|
|
z-index: ${{HIGHLIGHT_Z_INDEX + 1}};
|
|
box-shadow: 0 2px 4px rgba(0,0,0,0.3);
|
|
border: none;
|
|
outline: none;
|
|
margin: 0;
|
|
line-height: 1.2;
|
|
`);
|
|
|
|
highlight.appendChild(label);
|
|
container.appendChild(highlight);
|
|
}});
|
|
|
|
// Add container to document
|
|
document.body.appendChild(container);
|
|
|
|
console.log('Highlighting complete - added', interactiveElements.length, 'highlights');
|
|
return {{ added: interactiveElements.length }};
|
|
}})();
|
|
"""
|
|
|
|
# Execute the script
|
|
result = await cdp_session.cdp_client.send.Runtime.evaluate(
|
|
params={'expression': script, 'returnByValue': True}, session_id=cdp_session.session_id
|
|
)
|
|
|
|
# Log the result
|
|
if result and 'result' in result and 'value' in result['result']:
|
|
added_count = result['result']['value'].get('added', 0)
|
|
self.logger.debug(f'Successfully added {added_count} highlight elements to browser DOM')
|
|
else:
|
|
self.logger.debug('Browser highlight injection completed')
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f'Failed to add browser highlights: {e}')
|
|
import traceback
|
|
|
|
self.logger.debug(f'Browser highlight traceback: {traceback.format_exc()}')
|
|
|
|
async def _close_extension_options_pages(self) -> None:
|
|
"""Close any extension options/welcome pages that have opened."""
|
|
try:
|
|
# Get all page targets from SessionManager
|
|
page_targets = self.session_manager.get_all_page_targets()
|
|
|
|
for target in page_targets:
|
|
target_url = target.url
|
|
target_id = target.target_id
|
|
|
|
# Check if this is an extension options/welcome page
|
|
if 'chrome-extension://' in target_url and (
|
|
'options.html' in target_url or 'welcome.html' in target_url or 'onboarding.html' in target_url
|
|
):
|
|
self.logger.info(f'[BrowserSession] 🚫 Closing extension options page: {target_url}')
|
|
try:
|
|
await self._cdp_close_page(target_id)
|
|
except Exception as e:
|
|
self.logger.debug(f'[BrowserSession] Could not close extension page {target_id}: {e}')
|
|
|
|
except Exception as e:
|
|
self.logger.debug(f'[BrowserSession] Error closing extension options pages: {e}')
|
|
|
|
async def send_demo_mode_log(self, message: str, level: str = 'info', metadata: dict[str, Any] | None = None) -> None:
|
|
"""Send a message to the in-browser demo panel if enabled."""
|
|
if not self.browser_profile.demo_mode:
|
|
return
|
|
demo = self.demo_mode
|
|
if not demo:
|
|
return
|
|
try:
|
|
await demo.send_log(message=message, level=level, metadata=metadata or {})
|
|
except Exception as exc:
|
|
self.logger.debug(f'[DemoMode] Failed to send log: {exc}')
|
|
|
|
@property
|
|
def downloaded_files(self) -> list[str]:
|
|
"""Get list of files downloaded during this browser session.
|
|
|
|
Returns:
|
|
list[str]: List of absolute file paths to downloaded files in this session
|
|
"""
|
|
return self._downloaded_files.copy()
|
|
|
|
# endregion - ========== Helper Methods ==========
|
|
|
|
# region - ========== CDP-based replacements for browser_context operations ==========
|
|
|
|
async def _cdp_get_all_pages(
|
|
self,
|
|
include_http: bool = True,
|
|
include_about: bool = True,
|
|
include_pages: bool = True,
|
|
include_iframes: bool = False,
|
|
include_workers: bool = False,
|
|
include_chrome: bool = False,
|
|
include_chrome_extensions: bool = False,
|
|
include_chrome_error: bool = False,
|
|
) -> list[TargetInfo]:
|
|
"""Get all browser pages/tabs using SessionManager (source of truth)."""
|
|
# Safety check - return empty list if browser not connected yet
|
|
if not self.session_manager:
|
|
return []
|
|
|
|
# Build TargetInfo dicts from SessionManager owned data (crystal clear ownership)
|
|
result = []
|
|
for target_id, target in self.session_manager.get_all_targets().items():
|
|
# Create TargetInfo dict
|
|
target_info: TargetInfo = {
|
|
'targetId': target.target_id,
|
|
'type': target.target_type,
|
|
'title': target.title,
|
|
'url': target.url,
|
|
'attached': True,
|
|
'canAccessOpener': False,
|
|
}
|
|
|
|
# Apply filters
|
|
if self._is_valid_target(
|
|
target_info,
|
|
include_http=include_http,
|
|
include_about=include_about,
|
|
include_pages=include_pages,
|
|
include_iframes=include_iframes,
|
|
include_workers=include_workers,
|
|
include_chrome=include_chrome,
|
|
include_chrome_extensions=include_chrome_extensions,
|
|
include_chrome_error=include_chrome_error,
|
|
):
|
|
result.append(target_info)
|
|
|
|
return result
|
|
|
|
async def _cdp_create_new_page(self, url: str = 'about:blank', background: bool = False, new_window: bool = False) -> str:
|
|
"""Create a new page/tab using CDP Target.createTarget. Returns target ID."""
|
|
# Only include newWindow when True, letting Chrome auto-create window as needed
|
|
params = CreateTargetParameters(url=url, background=background)
|
|
if new_window:
|
|
params['newWindow'] = True
|
|
# Use the root CDP client to create tabs at the browser level
|
|
if self._cdp_client_root:
|
|
result = await self._cdp_client_root.send.Target.createTarget(params=params)
|
|
else:
|
|
# Fallback to using cdp_client if root is not available
|
|
result = await self.cdp_client.send.Target.createTarget(params=params)
|
|
return result['targetId']
|
|
|
|
async def _cdp_close_page(self, target_id: TargetID) -> None:
|
|
"""Close a page/tab using CDP Target.closeTarget."""
|
|
await self.cdp_client.send.Target.closeTarget(params={'targetId': target_id})
|
|
|
|
async def _cdp_get_cookies(self) -> list[Cookie]:
|
|
"""Get cookies using CDP Network.getCookies."""
|
|
cdp_session = await self.get_or_create_cdp_session(target_id=None)
|
|
result = await asyncio.wait_for(
|
|
cdp_session.cdp_client.send.Storage.getCookies(session_id=cdp_session.session_id), timeout=8.0
|
|
)
|
|
return result.get('cookies', [])
|
|
|
|
async def _cdp_set_cookies(self, cookies: list[Cookie]) -> None:
|
|
"""Set cookies using CDP Storage.setCookies."""
|
|
if not self.agent_focus_target_id or not cookies:
|
|
return
|
|
|
|
cdp_session = await self.get_or_create_cdp_session(target_id=None)
|
|
# Storage.setCookies expects params dict with 'cookies' key
|
|
await cdp_session.cdp_client.send.Storage.setCookies(
|
|
params={'cookies': cookies}, # type: ignore[arg-type]
|
|
session_id=cdp_session.session_id,
|
|
)
|
|
|
|
async def _cdp_clear_cookies(self) -> None:
|
|
"""Clear all cookies using CDP Network.clearBrowserCookies."""
|
|
cdp_session = await self.get_or_create_cdp_session()
|
|
await cdp_session.cdp_client.send.Storage.clearCookies(session_id=cdp_session.session_id)
|
|
|
|
async def _cdp_grant_permissions(self, permissions: list[str], origin: str | None = None) -> None:
|
|
"""Grant permissions using CDP Browser.grantPermissions."""
|
|
params = {'permissions': permissions}
|
|
# if origin:
|
|
# params['origin'] = origin
|
|
cdp_session = await self.get_or_create_cdp_session()
|
|
# await cdp_session.cdp_client.send.Browser.grantPermissions(params=params, session_id=cdp_session.session_id)
|
|
raise NotImplementedError('Not implemented yet')
|
|
|
|
async def _cdp_set_geolocation(self, latitude: float, longitude: float, accuracy: float = 100) -> None:
|
|
"""Set geolocation using CDP Emulation.setGeolocationOverride."""
|
|
await self.cdp_client.send.Emulation.setGeolocationOverride(
|
|
params={'latitude': latitude, 'longitude': longitude, 'accuracy': accuracy}
|
|
)
|
|
|
|
async def _cdp_clear_geolocation(self) -> None:
|
|
"""Clear geolocation override using CDP."""
|
|
await self.cdp_client.send.Emulation.clearGeolocationOverride()
|
|
|
|
async def _cdp_add_init_script(self, script: str) -> str:
|
|
"""Add script to evaluate on new document using CDP Page.addScriptToEvaluateOnNewDocument."""
|
|
assert self._cdp_client_root is not None
|
|
cdp_session = await self.get_or_create_cdp_session()
|
|
|
|
result = await cdp_session.cdp_client.send.Page.addScriptToEvaluateOnNewDocument(
|
|
params={'source': script, 'runImmediately': True}, session_id=cdp_session.session_id
|
|
)
|
|
return result['identifier']
|
|
|
|
async def _cdp_remove_init_script(self, identifier: str) -> None:
|
|
"""Remove script added with addScriptToEvaluateOnNewDocument."""
|
|
cdp_session = await self.get_or_create_cdp_session(target_id=None)
|
|
await cdp_session.cdp_client.send.Page.removeScriptToEvaluateOnNewDocument(
|
|
params={'identifier': identifier}, session_id=cdp_session.session_id
|
|
)
|
|
|
|
async def _cdp_set_viewport(
|
|
self, width: int, height: int, device_scale_factor: float = 1.0, mobile: bool = False, target_id: str | None = None
|
|
) -> None:
|
|
"""Set viewport using CDP Emulation.setDeviceMetricsOverride.
|
|
|
|
Args:
|
|
width: Viewport width
|
|
height: Viewport height
|
|
device_scale_factor: Device scale factor (default 1.0)
|
|
mobile: Whether to emulate mobile device (default False)
|
|
target_id: Optional target ID to set viewport for. If not provided, uses agent_focus.
|
|
"""
|
|
if target_id:
|
|
# Set viewport for specific target
|
|
cdp_session = await self.get_or_create_cdp_session(target_id, focus=False)
|
|
elif self.agent_focus_target_id:
|
|
# Use current focus - use safe API with focus=False to avoid changing focus
|
|
try:
|
|
cdp_session = await self.get_or_create_cdp_session(self.agent_focus_target_id, focus=False)
|
|
except ValueError:
|
|
self.logger.warning('Cannot set viewport: focused target has no sessions')
|
|
return
|
|
else:
|
|
self.logger.warning('Cannot set viewport: no target_id provided and agent_focus not initialized')
|
|
return
|
|
|
|
await cdp_session.cdp_client.send.Emulation.setDeviceMetricsOverride(
|
|
params={'width': width, 'height': height, 'deviceScaleFactor': device_scale_factor, 'mobile': mobile},
|
|
session_id=cdp_session.session_id,
|
|
)
|
|
|
|
async def _cdp_get_origins(self) -> list[dict[str, Any]]:
|
|
"""Get origins with localStorage and sessionStorage using CDP."""
|
|
origins = []
|
|
cdp_session = await self.get_or_create_cdp_session(target_id=None)
|
|
|
|
try:
|
|
# Enable DOMStorage domain to track storage
|
|
await cdp_session.cdp_client.send.DOMStorage.enable(session_id=cdp_session.session_id)
|
|
|
|
try:
|
|
# Get all frames to find unique origins
|
|
frames_result = await cdp_session.cdp_client.send.Page.getFrameTree(session_id=cdp_session.session_id)
|
|
|
|
# Extract unique origins from frames
|
|
unique_origins = set()
|
|
|
|
def _extract_origins(frame_tree):
|
|
"""Recursively extract origins from frame tree."""
|
|
frame = frame_tree.get('frame', {})
|
|
origin = frame.get('securityOrigin')
|
|
if origin and origin != 'null':
|
|
unique_origins.add(origin)
|
|
|
|
# Process child frames
|
|
for child in frame_tree.get('childFrames', []):
|
|
_extract_origins(child)
|
|
|
|
async def _get_storage_items(origin: str, is_local_storage: bool) -> list[dict[str, str]] | None:
|
|
"""Helper to get storage items for an origin."""
|
|
storage_type = 'localStorage' if is_local_storage else 'sessionStorage'
|
|
try:
|
|
result = await cdp_session.cdp_client.send.DOMStorage.getDOMStorageItems(
|
|
params={'storageId': {'securityOrigin': origin, 'isLocalStorage': is_local_storage}},
|
|
session_id=cdp_session.session_id,
|
|
)
|
|
|
|
items = []
|
|
for item in result.get('entries', []):
|
|
if len(item) == 2: # Each item is [key, value]
|
|
items.append({'name': item[0], 'value': item[1]})
|
|
|
|
return items if items else None
|
|
except Exception as e:
|
|
self.logger.debug(f'Failed to get {storage_type} for {origin}: {e}')
|
|
return None
|
|
|
|
_extract_origins(frames_result.get('frameTree', {}))
|
|
|
|
# For each unique origin, get localStorage and sessionStorage
|
|
for origin in unique_origins:
|
|
origin_data = {'origin': origin}
|
|
|
|
# Get localStorage
|
|
local_storage = await _get_storage_items(origin, is_local_storage=True)
|
|
if local_storage:
|
|
origin_data['localStorage'] = local_storage
|
|
|
|
# Get sessionStorage
|
|
session_storage = await _get_storage_items(origin, is_local_storage=False)
|
|
if session_storage:
|
|
origin_data['sessionStorage'] = session_storage
|
|
|
|
# Only add origin if it has storage data
|
|
if 'localStorage' in origin_data or 'sessionStorage' in origin_data:
|
|
origins.append(origin_data)
|
|
|
|
finally:
|
|
# Always disable DOMStorage tracking when done
|
|
await cdp_session.cdp_client.send.DOMStorage.disable(session_id=cdp_session.session_id)
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f'Failed to get origins: {e}')
|
|
|
|
return origins
|
|
|
|
async def _cdp_get_storage_state(self) -> dict:
|
|
"""Get storage state (cookies, localStorage, sessionStorage) using CDP."""
|
|
# Use the _cdp_get_cookies helper which handles session attachment
|
|
cookies = await self._cdp_get_cookies()
|
|
|
|
# Get origins with localStorage/sessionStorage
|
|
origins = await self._cdp_get_origins()
|
|
|
|
return {
|
|
'cookies': cookies,
|
|
'origins': origins,
|
|
}
|
|
|
|
async def _cdp_navigate(self, url: str, target_id: TargetID | None = None) -> None:
|
|
"""Navigate to URL using CDP Page.navigate."""
|
|
# Use provided target_id or fall back to agent_focus_target_id
|
|
|
|
assert self._cdp_client_root is not None, 'CDP client not initialized - browser may not be connected yet'
|
|
assert self.agent_focus_target_id is not None, 'Agent focus not initialized - browser may not be connected yet'
|
|
|
|
target_id_to_use = target_id or self.agent_focus_target_id
|
|
cdp_session = await self.get_or_create_cdp_session(target_id_to_use, focus=True)
|
|
|
|
# Use helper to navigate on the target
|
|
await cdp_session.cdp_client.send.Page.navigate(params={'url': url}, session_id=cdp_session.session_id)
|
|
|
|
@staticmethod
|
|
def _is_valid_target(
|
|
target_info: TargetInfo,
|
|
include_http: bool = True,
|
|
include_chrome: bool = False,
|
|
include_chrome_extensions: bool = False,
|
|
include_chrome_error: bool = False,
|
|
include_about: bool = True,
|
|
include_iframes: bool = True,
|
|
include_pages: bool = True,
|
|
include_workers: bool = False,
|
|
) -> bool:
|
|
"""Check if a target should be processed.
|
|
|
|
Args:
|
|
target_info: Target info dict from CDP
|
|
|
|
Returns:
|
|
True if target should be processed, False if it should be skipped
|
|
"""
|
|
target_type = target_info.get('type', '')
|
|
url = target_info.get('url', '')
|
|
|
|
url_allowed, type_allowed = False, False
|
|
|
|
# Always allow new tab pages (chrome://new-tab-page/, chrome://newtab/, about:blank)
|
|
# so they can be redirected to about:blank in connect()
|
|
from browser_use.utils import is_new_tab_page
|
|
|
|
if is_new_tab_page(url):
|
|
url_allowed = True
|
|
|
|
if url.startswith('chrome-error://') and include_chrome_error:
|
|
url_allowed = True
|
|
|
|
if url.startswith('chrome://') and include_chrome:
|
|
url_allowed = True
|
|
|
|
if url.startswith('chrome-extension://') and include_chrome_extensions:
|
|
url_allowed = True
|
|
|
|
# dont allow about:srcdoc! there are also other rare about: pages that we want to avoid
|
|
if url == 'about:blank' and include_about:
|
|
url_allowed = True
|
|
|
|
if (url.startswith('http://') or url.startswith('https://')) and include_http:
|
|
url_allowed = True
|
|
|
|
if target_type in ('service_worker', 'shared_worker', 'worker') and include_workers:
|
|
type_allowed = True
|
|
|
|
if target_type in ('page', 'tab') and include_pages:
|
|
type_allowed = True
|
|
|
|
if target_type in ('iframe', 'webview') and include_iframes:
|
|
type_allowed = True
|
|
# Chrome often reports empty URLs for cross-origin iframe targets (OOPIFs)
|
|
# initially via attachedToTarget, but they are still valid and accessible via CDP.
|
|
# Allow them through so get_all_frames() can resolve their frame trees.
|
|
if not url:
|
|
url_allowed = True
|
|
|
|
return url_allowed and type_allowed
|
|
|
|
async def get_all_frames(self) -> tuple[dict[str, dict], dict[str, str]]:
|
|
"""Get a complete frame hierarchy from all browser targets.
|
|
|
|
Returns:
|
|
Tuple of (all_frames, target_sessions) where:
|
|
- all_frames: dict mapping frame_id -> frame info dict with all metadata
|
|
- target_sessions: dict mapping target_id -> session_id for active sessions
|
|
"""
|
|
all_frames = {} # frame_id -> FrameInfo dict
|
|
target_sessions = {} # target_id -> session_id (keep sessions alive during collection)
|
|
|
|
# Check if cross-origin iframe support is enabled
|
|
include_cross_origin = self.browser_profile.cross_origin_iframes
|
|
|
|
# Get all targets - only include iframes if cross-origin support is enabled
|
|
targets = await self._cdp_get_all_pages(
|
|
include_http=True,
|
|
include_about=True,
|
|
include_pages=True,
|
|
include_iframes=include_cross_origin, # Only include iframe targets if flag is set
|
|
include_workers=False,
|
|
include_chrome=False,
|
|
include_chrome_extensions=False,
|
|
include_chrome_error=include_cross_origin, # Only include error pages if cross-origin is enabled
|
|
)
|
|
all_targets = targets
|
|
|
|
# First pass: collect frame trees from ALL targets
|
|
for target in all_targets:
|
|
target_id = target['targetId']
|
|
|
|
# Skip iframe targets if cross-origin support is disabled
|
|
if not include_cross_origin and target.get('type') == 'iframe':
|
|
continue
|
|
|
|
# When cross-origin support is disabled, only process the current target
|
|
if not include_cross_origin:
|
|
# Only process the current focus target
|
|
if self.agent_focus_target_id and target_id != self.agent_focus_target_id:
|
|
continue
|
|
# Use the existing agent_focus target's session - use safe API with focus=False
|
|
try:
|
|
cdp_session = await self.get_or_create_cdp_session(self.agent_focus_target_id, focus=False)
|
|
except ValueError:
|
|
continue # Skip if no session available
|
|
else:
|
|
# Get cached session for this target (don't change focus - iterating frames)
|
|
try:
|
|
cdp_session = await self.get_or_create_cdp_session(target_id, focus=False)
|
|
except ValueError:
|
|
continue # Target may have detached between discovery and session creation
|
|
|
|
if cdp_session:
|
|
target_sessions[target_id] = cdp_session.session_id
|
|
|
|
try:
|
|
# Try to get frame tree (not all target types support this)
|
|
frame_tree_result = await cdp_session.cdp_client.send.Page.getFrameTree(session_id=cdp_session.session_id)
|
|
|
|
# Process the frame tree recursively
|
|
def process_frame_tree(node, parent_frame_id=None):
|
|
"""Recursively process frame tree and add to all_frames."""
|
|
frame = node.get('frame', {})
|
|
current_frame_id = frame.get('id')
|
|
|
|
if current_frame_id:
|
|
# For iframe targets, check if the frame has a parentId field
|
|
# This indicates it's an OOPIF with a parent in another target
|
|
actual_parent_id = frame.get('parentId') or parent_frame_id
|
|
|
|
# Create frame info with all CDP response data plus our additions
|
|
frame_info = {
|
|
**frame, # Include all original frame data: id, url, parentId, etc.
|
|
'frameTargetId': target_id, # Target that can access this frame
|
|
'parentFrameId': actual_parent_id, # Use parentId from frame if available
|
|
'childFrameIds': [], # Will be populated below
|
|
'isCrossOrigin': False, # Will be determined based on context
|
|
'isValidTarget': self._is_valid_target(
|
|
target,
|
|
include_http=True,
|
|
include_about=True,
|
|
include_pages=True,
|
|
include_iframes=True,
|
|
include_workers=False,
|
|
include_chrome=False, # chrome://newtab, chrome://settings, etc. are not valid frames we can control (for sanity reasons)
|
|
include_chrome_extensions=False, # chrome-extension://
|
|
include_chrome_error=False, # chrome-error:// (e.g. when iframes fail to load or are blocked by uBlock Origin)
|
|
),
|
|
}
|
|
|
|
# Check if frame is cross-origin based on crossOriginIsolatedContextType
|
|
cross_origin_type = frame.get('crossOriginIsolatedContextType')
|
|
if cross_origin_type and cross_origin_type != 'NotIsolated':
|
|
frame_info['isCrossOrigin'] = True
|
|
|
|
# For iframe targets, the frame itself is likely cross-origin
|
|
if target.get('type') == 'iframe':
|
|
frame_info['isCrossOrigin'] = True
|
|
|
|
# Skip cross-origin frames if support is disabled
|
|
if not include_cross_origin and frame_info.get('isCrossOrigin'):
|
|
return # Skip this frame and its children
|
|
|
|
# Add child frame IDs (note: OOPIFs won't appear here)
|
|
child_frames = node.get('childFrames', [])
|
|
for child in child_frames:
|
|
child_frame = child.get('frame', {})
|
|
child_frame_id = child_frame.get('id')
|
|
if child_frame_id:
|
|
frame_info['childFrameIds'].append(child_frame_id)
|
|
|
|
# Store or merge frame info
|
|
if current_frame_id in all_frames:
|
|
# Frame already seen from another target, merge info
|
|
existing = all_frames[current_frame_id]
|
|
# If this is an iframe target, it has direct access to the frame
|
|
if target.get('type') == 'iframe':
|
|
existing['frameTargetId'] = target_id
|
|
existing['isCrossOrigin'] = True
|
|
else:
|
|
all_frames[current_frame_id] = frame_info
|
|
|
|
# Process child frames recursively (only if we're not skipping this frame)
|
|
if include_cross_origin or not frame_info.get('isCrossOrigin'):
|
|
for child in child_frames:
|
|
process_frame_tree(child, current_frame_id)
|
|
|
|
# Process the entire frame tree
|
|
process_frame_tree(frame_tree_result.get('frameTree', {}))
|
|
|
|
except Exception as e:
|
|
# Target doesn't support Page domain or has no frames
|
|
self.logger.debug(f'Failed to get frame tree for target {target_id}: {e}')
|
|
|
|
# Second pass: populate backend node IDs and parent target IDs
|
|
# Only do this if cross-origin support is enabled
|
|
if include_cross_origin:
|
|
await self._populate_frame_metadata(all_frames, target_sessions)
|
|
|
|
return all_frames, target_sessions
|
|
|
|
async def _populate_frame_metadata(self, all_frames: dict[str, dict], target_sessions: dict[str, str]) -> None:
|
|
"""Populate additional frame metadata like backend node IDs and parent target IDs.
|
|
|
|
Args:
|
|
all_frames: Frame hierarchy dict to populate
|
|
target_sessions: Active target sessions
|
|
"""
|
|
for frame_id_iter, frame_info in all_frames.items():
|
|
parent_frame_id = frame_info.get('parentFrameId')
|
|
|
|
if parent_frame_id and parent_frame_id in all_frames:
|
|
parent_frame_info = all_frames[parent_frame_id]
|
|
parent_target_id = parent_frame_info.get('frameTargetId')
|
|
|
|
# Store parent target ID
|
|
frame_info['parentTargetId'] = parent_target_id
|
|
|
|
# Try to get backend node ID from parent context
|
|
if parent_target_id in target_sessions:
|
|
assert parent_target_id is not None
|
|
parent_session_id = target_sessions[parent_target_id]
|
|
try:
|
|
# Enable DOM domain
|
|
await self.cdp_client.send.DOM.enable(session_id=parent_session_id)
|
|
|
|
# Get frame owner info to find backend node ID
|
|
frame_owner = await self.cdp_client.send.DOM.getFrameOwner(
|
|
params={'frameId': frame_id_iter}, session_id=parent_session_id
|
|
)
|
|
|
|
if frame_owner:
|
|
frame_info['backendNodeId'] = frame_owner.get('backendNodeId')
|
|
frame_info['nodeId'] = frame_owner.get('nodeId')
|
|
|
|
except Exception:
|
|
# Frame owner not available (likely cross-origin)
|
|
pass
|
|
|
|
async def find_frame_target(self, frame_id: str, all_frames: dict[str, dict] | None = None) -> dict | None:
|
|
"""Find the frame info for a specific frame ID.
|
|
|
|
Args:
|
|
frame_id: The frame ID to search for
|
|
all_frames: Optional pre-built frame hierarchy. If None, will call get_all_frames()
|
|
|
|
Returns:
|
|
Frame info dict if found, None otherwise
|
|
"""
|
|
if all_frames is None:
|
|
all_frames, _ = await self.get_all_frames()
|
|
|
|
return all_frames.get(frame_id)
|
|
|
|
async def cdp_client_for_target(self, target_id: TargetID) -> CDPSession:
|
|
return await self.get_or_create_cdp_session(target_id, focus=False)
|
|
|
|
async def cdp_client_for_frame(self, frame_id: str) -> CDPSession:
|
|
"""Get a CDP client attached to the target containing the specified frame.
|
|
|
|
Builds a unified frame hierarchy from all targets to find the correct target
|
|
for any frame, including OOPIFs (Out-of-Process iframes).
|
|
|
|
Args:
|
|
frame_id: The frame ID to search for
|
|
|
|
Returns:
|
|
Tuple of (cdp_cdp_session, target_id) for the target containing the frame
|
|
|
|
Raises:
|
|
ValueError: If the frame is not found in any target
|
|
"""
|
|
# If cross-origin iframes are disabled, just use the main session
|
|
if not self.browser_profile.cross_origin_iframes:
|
|
return await self.get_or_create_cdp_session()
|
|
|
|
# Get complete frame hierarchy
|
|
all_frames, target_sessions = await self.get_all_frames()
|
|
|
|
# Find the requested frame
|
|
frame_info = await self.find_frame_target(frame_id, all_frames)
|
|
|
|
if frame_info:
|
|
target_id = frame_info.get('frameTargetId')
|
|
|
|
if target_id in target_sessions:
|
|
assert target_id is not None
|
|
# Use existing session
|
|
session_id = target_sessions[target_id]
|
|
# Return the client with session attached (don't change focus)
|
|
return await self.get_or_create_cdp_session(target_id, focus=False)
|
|
|
|
# Frame not found
|
|
raise ValueError(f"Frame with ID '{frame_id}' not found in any target")
|
|
|
|
async def cdp_client_for_node(self, node: EnhancedDOMTreeNode) -> CDPSession:
|
|
"""Get CDP client for a specific DOM node based on its frame.
|
|
|
|
IMPORTANT: backend_node_id is only valid in the session where the DOM was captured.
|
|
We trust the node's session_id/frame_id/target_id instead of searching all sessions.
|
|
"""
|
|
|
|
# Strategy 1: If node has session_id, try to use that exact session (most specific)
|
|
if node.session_id and self.session_manager:
|
|
try:
|
|
# Find the CDP session by session_id from SessionManager
|
|
cdp_session = self.session_manager.get_session(node.session_id)
|
|
if cdp_session:
|
|
# Get target to log URL
|
|
target = self.session_manager.get_target(cdp_session.target_id)
|
|
self.logger.debug(f'✅ Using session from node.session_id for node {node.backend_node_id}: {target.url}')
|
|
return cdp_session
|
|
except Exception as e:
|
|
self.logger.debug(f'Failed to get session by session_id {node.session_id}: {e}')
|
|
|
|
# Strategy 2: If node has frame_id, use that frame's session
|
|
if node.frame_id:
|
|
try:
|
|
cdp_session = await self.cdp_client_for_frame(node.frame_id)
|
|
target = self.session_manager.get_target(cdp_session.target_id)
|
|
self.logger.debug(f'✅ Using session from node.frame_id for node {node.backend_node_id}: {target.url}')
|
|
return cdp_session
|
|
except Exception as e:
|
|
self.logger.debug(f'Failed to get session for frame {node.frame_id}: {e}')
|
|
|
|
# Strategy 3: If node has target_id, use that target's session
|
|
if node.target_id:
|
|
try:
|
|
cdp_session = await self.get_or_create_cdp_session(target_id=node.target_id, focus=False)
|
|
target = self.session_manager.get_target(cdp_session.target_id)
|
|
self.logger.debug(f'✅ Using session from node.target_id for node {node.backend_node_id}: {target.url}')
|
|
return cdp_session
|
|
except Exception as e:
|
|
self.logger.debug(f'Failed to get session for target {node.target_id}: {e}')
|
|
|
|
# Strategy 4: Fallback to agent_focus_target_id (the page where agent is currently working)
|
|
if self.agent_focus_target_id:
|
|
target = self.session_manager.get_target(self.agent_focus_target_id)
|
|
try:
|
|
# Use safe API with focus=False to avoid changing focus
|
|
cdp_session = await self.get_or_create_cdp_session(self.agent_focus_target_id, focus=False)
|
|
if target:
|
|
self.logger.warning(
|
|
f'⚠️ Node {node.backend_node_id} has no session/frame/target info. Using agent_focus session: {target.url}'
|
|
)
|
|
return cdp_session
|
|
except ValueError:
|
|
pass # Fall through to last resort
|
|
|
|
# Last resort: use main session
|
|
self.logger.error(f'❌ No session info for node {node.backend_node_id} and no agent_focus available. Using main session.')
|
|
return await self.get_or_create_cdp_session()
|
|
|
|
@observe_debug(ignore_input=True, ignore_output=True, name='take_screenshot')
|
|
async def take_screenshot(
|
|
self,
|
|
path: str | None = None,
|
|
full_page: bool = False,
|
|
format: str = 'png',
|
|
quality: int | None = None,
|
|
clip: dict | None = None,
|
|
) -> bytes:
|
|
"""Take a screenshot using CDP.
|
|
|
|
Args:
|
|
path: Optional file path to save screenshot
|
|
full_page: Capture entire scrollable page beyond viewport
|
|
format: Image format ('png', 'jpeg', 'webp')
|
|
quality: Quality 0-100 for JPEG format
|
|
clip: Region to capture {'x': int, 'y': int, 'width': int, 'height': int}
|
|
|
|
Returns:
|
|
Screenshot data as bytes
|
|
"""
|
|
import base64
|
|
|
|
from cdp_use.cdp.page import CaptureScreenshotParameters
|
|
|
|
cdp_session = await self.get_or_create_cdp_session()
|
|
|
|
# Build parameters dict explicitly to satisfy TypedDict expectations
|
|
params: CaptureScreenshotParameters = {
|
|
'format': format,
|
|
'captureBeyondViewport': full_page,
|
|
}
|
|
|
|
if quality is not None and format == 'jpeg':
|
|
params['quality'] = quality
|
|
|
|
if clip:
|
|
params['clip'] = {
|
|
'x': clip['x'],
|
|
'y': clip['y'],
|
|
'width': clip['width'],
|
|
'height': clip['height'],
|
|
'scale': 1,
|
|
}
|
|
|
|
params = CaptureScreenshotParameters(**params)
|
|
|
|
result = await cdp_session.cdp_client.send.Page.captureScreenshot(params=params, session_id=cdp_session.session_id)
|
|
|
|
if not result or 'data' not in result:
|
|
raise Exception('Screenshot failed - no data returned')
|
|
|
|
screenshot_data = base64.b64decode(result['data'])
|
|
|
|
if path:
|
|
Path(path).write_bytes(screenshot_data)
|
|
|
|
return screenshot_data
|
|
|
|
async def screenshot_element(
|
|
self,
|
|
selector: str,
|
|
path: str | None = None,
|
|
format: str = 'png',
|
|
quality: int | None = None,
|
|
) -> bytes:
|
|
"""Take a screenshot of a specific element.
|
|
|
|
Args:
|
|
selector: CSS selector for the element
|
|
path: Optional file path to save screenshot
|
|
format: Image format ('png', 'jpeg', 'webp')
|
|
quality: Quality 0-100 for JPEG format
|
|
|
|
Returns:
|
|
Screenshot data as bytes
|
|
"""
|
|
|
|
bounds = await self._get_element_bounds(selector)
|
|
if not bounds:
|
|
raise ValueError(f"Element '{selector}' not found or has no bounds")
|
|
|
|
return await self.take_screenshot(
|
|
path=path,
|
|
format=format,
|
|
quality=quality,
|
|
clip=bounds,
|
|
)
|
|
|
|
async def _get_element_bounds(self, selector: str) -> dict | None:
|
|
"""Get element bounding box using CDP."""
|
|
|
|
cdp_session = await self.get_or_create_cdp_session()
|
|
|
|
# Get document
|
|
doc = await cdp_session.cdp_client.send.DOM.getDocument(params={'depth': 1}, session_id=cdp_session.session_id)
|
|
|
|
# Query selector
|
|
node_result = await cdp_session.cdp_client.send.DOM.querySelector(
|
|
params={'nodeId': doc['root']['nodeId'], 'selector': selector}, session_id=cdp_session.session_id
|
|
)
|
|
|
|
node_id = node_result.get('nodeId')
|
|
if not node_id:
|
|
return None
|
|
|
|
# Get bounding box
|
|
box_result = await cdp_session.cdp_client.send.DOM.getBoxModel(
|
|
params={'nodeId': node_id}, session_id=cdp_session.session_id
|
|
)
|
|
|
|
box_model = box_result.get('model')
|
|
if not box_model:
|
|
return None
|
|
|
|
content = box_model['content']
|
|
return {
|
|
'x': min(content[0], content[2], content[4], content[6]),
|
|
'y': min(content[1], content[3], content[5], content[7]),
|
|
'width': max(content[0], content[2], content[4], content[6]) - min(content[0], content[2], content[4], content[6]),
|
|
'height': max(content[1], content[3], content[5], content[7]) - min(content[1], content[3], content[5], content[7]),
|
|
}
|