Files
browser-use/browser_use/browser/session.py
2025-08-06 20:52:12 -07:00

1680 lines
58 KiB
Python

"""Event-driven browser session with backwards compatibility."""
import asyncio
import logging
import weakref
from pathlib import Path
from typing import TYPE_CHECKING, Any
from bubus import EventBus
from bubus.helpers import retry
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
from uuid_extensions import uuid7str
from browser_use.browser.events import (
BrowserConnectedEvent,
BrowserErrorEvent,
BrowserLaunchEvent,
BrowserStartEvent,
BrowserStopEvent,
BrowserStoppedEvent,
)
from browser_use.browser.profile import BrowserProfile
from browser_use.browser.views import (
BrowserStateSummary,
PageInfo,
TabInfo,
)
from browser_use.observability import observe_debug
from browser_use.utils import (
_log_pretty_url,
is_new_tab_page,
logger,
time_execution_async,
)
if TYPE_CHECKING:
from cdp_use import CDPClient
from browser_use.dom.views import EnhancedDOMTreeNode
class CachedSession:
"""Container for cached CDP session to allow weak references."""
def __init__(self, client: Any, session_id: str, target_id: str, frame_id: str | None = None):
self.client = client
self.session_id = session_id
self.target_id = target_id
self.frame_id = frame_id
def __hash__(self):
return hash(self.client)
_GLOB_WARNING_SHOWN = False # used inside _is_url_allowed to avoid spamming the logs with the same warning multiple times
MAX_SCREENSHOT_HEIGHT = 2000
MAX_SCREENSHOT_WIDTH = 1920
def _log_glob_warning(domain: str, glob: str, logger: logging.Logger):
global _GLOB_WARNING_SHOWN
if not _GLOB_WARNING_SHOWN:
logger.warning(
# glob patterns are very easy to mess up and match too many domains by accident
# e.g. if you only need to access gmail, don't use *.google.com because an attacker could convince the agent to visit a malicious doc
# on docs.google.com/s/some/evil/doc to set up a prompt injection attack
f"⚠️ Allowing agent to visit {domain} based on allowed_domains=['{glob}', ...]. Set allowed_domains=['{domain}', ...] explicitly to avoid matching too many domains!"
)
_GLOB_WARNING_SHOWN = True
DEFAULT_BROWSER_PROFILE = BrowserProfile()
class BrowserSession(BaseModel):
"""Event-driven browser session with backwards compatibility.
This class provides a 2-layer architecture:
- High-level event handling for agents/controllers
- Direct CDP/Playwright calls for browser operations
Supports both event-driven and imperative calling styles.
"""
model_config = ConfigDict(
arbitrary_types_allowed=True,
validate_assignment=True,
extra='forbid',
)
# Core configuration
id: str = Field(default_factory=lambda: uuid7str())
browser_profile: BrowserProfile = Field(default_factory=lambda: DEFAULT_BROWSER_PROFILE)
# Connection info (for backwards compatibility)
cdp_url: str | None = None
is_local: bool = Field(default=True)
# Mutable state
current_target_id: str | None = None
"""Current active target ID for the main page"""
# Event bus
event_bus: EventBus = Field(default_factory=EventBus)
# PDF handling
_auto_download_pdfs: bool = PrivateAttr(default=True)
def model_post_init(self, __context) -> None:
"""Register event handlers after model initialization."""
# Register BrowserSession's event handlers manually since it's not a BaseWatchdog
self.event_bus.on('BrowserStartEvent', self.on_BrowserStartEvent)
self.event_bus.on('BrowserStopEvent', self.on_BrowserStopEvent)
# Watchdogs
_crash_watchdog: Any = PrivateAttr(default=None)
_downloads_watchdog: Any = PrivateAttr(default=None)
_aboutblank_watchdog: Any = PrivateAttr(default=None)
_navigation_watchdog: Any = PrivateAttr(default=None)
_storage_state_watchdog: Any = PrivateAttr(default=None)
_local_browser_watchdog: Any = PrivateAttr(default=None)
_default_action_watchdog: Any = PrivateAttr(default=None)
_dom_watchdog: Any = PrivateAttr(default=None)
_screenshot_watchdog: Any = PrivateAttr(default=None)
# Navigation tracking now handled by watchdogs
# Cached browser state for synchronous access
_cached_browser_state_summary: Any = PrivateAttr(default=None)
_cached_selector_map: dict[int, 'EnhancedDOMTreeNode'] = PrivateAttr(default_factory=dict)
"""Cached mapping of element indices to DOM nodes"""
# CDP client
_cdp_client: 'CDPClient | None' = PrivateAttr(default=None)
"""Cached CDP client instance"""
# CDP session cache
_cdp_session_cache: weakref.WeakValueDictionary = PrivateAttr(default_factory=weakref.WeakValueDictionary)
"""Cache of CDP sessions by target_id -> (client, session_id) tuple"""
_cdp_cache_enabled: bool = PrivateAttr(default=True)
"""Flag to enable/disable CDP session caching"""
_logger: Any = PrivateAttr(default=None)
@property
def logger(self) -> Any:
"""Get instance-specific logger with session ID in the name"""
if self._logger is None: # keep updating the name pre-init because our id and str(self) can change
import logging
self._logger = logging.getLogger(f'browser_use.{self}')
return self._logger
@property
def cdp_client(self) -> 'CDPClient':
"""Get the cached CDP client.
The client is created and started in setup_browser_via_cdp_url().
Returns:
The CDP client instance
Raises:
RuntimeError: If CDP client is not initialized yet
"""
if self._cdp_client is None:
raise RuntimeError('CDP client not initialized - browser may not be connected yet')
return self._cdp_client
async def get_cdp_session(self, target_id: str) -> tuple[Any, str]:
"""Get or create a CDP session for a target, using cache when enabled.
Args:
target_id: The target ID to get a session for
Returns:
Tuple of (cdp_client, session_id)
"""
# If caching is disabled, always create a new session
if not self._cdp_cache_enabled:
client = self.cdp_client
session = await client.send.Target.attachToTarget(params={'targetId': target_id, 'flatten': True})
session_id = session['sessionId']
await self._enable_all_domains(client, session_id)
return client, session_id
# Check cache first
cached = self._cdp_session_cache.get(target_id)
if cached:
try:
# Quick ping to verify it's still alive (0.1s timeout)
await asyncio.wait_for(
cached.client.send.Runtime.evaluate(params={'expression': '1'}, session_id=cached.session_id),
timeout=0.1
)
return cached.client, cached.session_id
except:
# Dead session, remove from cache
self._cdp_session_cache.pop(target_id, None)
# Create new session
client = self.cdp_client
session = await client.send.Target.attachToTarget(params={'targetId': target_id, 'flatten': True})
session_id = session['sessionId']
# Enable all necessary domains at creation time
await self._enable_all_domains(client, session_id)
# Cache it using CachedSession (which supports weak references)
cache_value = CachedSession(client, session_id, target_id)
self._cdp_session_cache[target_id] = cache_value
return client, session_id
async def _enable_all_domains(self, client: Any, session_id: str) -> None:
"""Enable all necessary CDP domains for a session."""
# Enable auto-attach for related targets (iframes, etc)
await client.send.Target.setAutoAttach(
params={'autoAttach': True, 'waitForDebuggerOnStart': False, 'flatten': True},
session_id=session_id
)
# Enable all commonly used domains in parallel
await asyncio.gather(
client.send.Page.enable(session_id=session_id),
# TEMPORARILY DISABLED: Network.enable causes excessive event logging
# client.send.Network.enable(session_id=session_id),
client.send.Runtime.enable(session_id=session_id),
client.send.DOM.enable(session_id=session_id),
client.send.DOMSnapshot.enable(session_id=session_id),
client.send.Accessibility.enable(session_id=session_id),
client.send.Inspector.enable(session_id=session_id),
return_exceptions=True # Don't fail if some domains aren't available
)
async def release_cdp_session(self, target_id: str) -> None:
"""Explicitly release a CDP session (detach and remove from cache).
Args:
target_id: The target ID to release the session for
"""
# If caching is disabled, nothing to release from cache
if not self._cdp_cache_enabled:
return
cached = self._cdp_session_cache.pop(target_id, None)
if cached:
try:
client, session_id = cached
await client.send.Target.detachFromTarget(params={'sessionId': session_id})
except:
pass # Session might already be dead
async def clear_cdp_cache(self) -> None:
"""Clear all cached CDP sessions with proper cleanup."""
for target_id in list(self._cdp_session_cache.keys()):
await self.release_cdp_session(target_id)
def __repr__(self) -> str:
port_number = (self.cdp_url or 'no-cdp').rsplit(':', 1)[-1].split('/', 1)[0]
return f'BrowserSession🆂 {self.id[-4:]}:{port_number} #{str(id(self))[-2:]} (cdp_url={self.cdp_url}, profile={self.browser_profile})'
def __str__(self) -> str:
# Note: _original_browser_session tracking moved to Agent class
port_number = (self.cdp_url or 'no-cdp').rsplit(':', 1)[-1].split('/', 1)[0]
return (
f'BrowserSession🆂 {self.id[-4:]}:{port_number} #{str(id(self))[-2:]}' # ' 🅟 {str(id(self.current_target_id))[-2:]}'
)
async def on_BrowserStartEvent(self, event: BrowserStartEvent) -> dict[str, str]:
"""Handle browser start request.
Returns:
Dict with 'cdp_url' key containing the CDP URL
"""
# Initialize and attach all watchdogs FIRST so LocalBrowserWatchdog can handle BrowserLaunchEvent
await self.attach_all_watchdogs()
try:
# If no CDP URL, launch local browser
if not self.cdp_url:
if self.is_local:
# Launch local browser using event-driven approach
launch_event = self.event_bus.dispatch(BrowserLaunchEvent())
await launch_event
# Get the CDP URL from LocalBrowserWatchdog handler result
results = await launch_event.event_results_flat_dict()
self.cdp_url = results.get('cdp_url')
if not self.cdp_url:
raise Exception('No CDP URL returned from LocalBrowserWatchdog')
else:
raise Exception('No CDP URL provided for remote browser connection')
assert self.cdp_url and '://' in self.cdp_url
# Setup browser via CDP (for both local and remote cases)
await self.setup_browser_via_cdp_url()
# Notify that browser is connected (single place)
self.event_bus.dispatch(BrowserConnectedEvent(cdp_url=self.cdp_url))
# Return the CDP URL for other components
return {'cdp_url': self.cdp_url}
except Exception as e:
self.event_bus.dispatch(
BrowserErrorEvent(
error_type='BrowserStartEventError',
message=f'Failed to start browser: {type(e).__name__} {e}',
details={'cdp_url': self.cdp_url, 'is_local': self.is_local},
)
)
raise
async def on_BrowserStopEvent(self, event: BrowserStopEvent) -> None:
"""Handle browser stop request."""
try:
# Check if we should keep the browser alive
if self.browser_profile.keep_alive and not event.force:
self.event_bus.dispatch(BrowserStoppedEvent(reason='Kept alive due to keep_alive=True'))
return
# Clear CDP session cache before stopping
await self.clear_cdp_cache()
# Reset state
if self.is_local:
self.cdp_url = None
# Notify stop and wait for all handlers to complete
# LocalBrowserWatchdog listens for BrowserStopEvent and dispatches BrowserKillEvent
stop_event = self.event_bus.dispatch(BrowserStoppedEvent(reason='Stopped by request'))
await stop_event
except Exception as e:
self.event_bus.dispatch(
BrowserErrorEvent(
error_type='BrowserStopEventError',
message=f'Failed to stop browser: {type(e).__name__} {e}',
details={'cdp_url': self.cdp_url, 'is_local': self.is_local},
)
)
# ========== Helper Methods ==========
async def get_browser_state_with_recovery(
self, cache_clickable_elements_hashes: bool = True, include_screenshot: bool = False
) -> 'BrowserStateSummary':
"""Get browser state using event system.
This is a compatibility wrapper that dispatches BrowserStateRequestEvent.
Args:
cache_clickable_elements_hashes: Whether to cache element hashes (for compatibility)
include_screenshot: Whether to include screenshot in state
Returns:
BrowserStateSummary from the event handler
"""
from browser_use.browser.events import BrowserStateRequestEvent
# Dispatch the event and wait for result
event = self.event_bus.dispatch(
BrowserStateRequestEvent(
include_dom=True,
include_screenshot=include_screenshot,
cache_clickable_elements_hashes=cache_clickable_elements_hashes,
)
)
# The handler returns the BrowserStateSummary directly
result = await event.event_result()
return result
async def get_state_summary(self, cache_clickable_elements_hashes: bool = True) -> 'BrowserStateSummary':
"""Alias for get_browser_state_with_recovery for backwards compatibility."""
return await self.get_browser_state_with_recovery(
cache_clickable_elements_hashes=cache_clickable_elements_hashes, include_screenshot=False
)
async def attach_all_watchdogs(self) -> None:
"""Initialize and attach all watchdogs in one go."""
from browser_use.browser.aboutblank_watchdog import AboutBlankWatchdog
from browser_use.browser.crash_watchdog import CrashWatchdog
from browser_use.browser.default_action_watchdog import DefaultActionWatchdog
from browser_use.browser.dom_watchdog import DOMWatchdog
from browser_use.browser.downloads_watchdog import DownloadsWatchdog
from browser_use.browser.local_browser_watchdog import LocalBrowserWatchdog
from browser_use.browser.navigation_watchdog import NavigationWatchdog
from browser_use.browser.screenshot_watchdog import ScreenshotWatchdog
from browser_use.browser.storage_state_watchdog import StorageStateWatchdog
watchdog_configs = [
('_crash_watchdog', CrashWatchdog),
('_downloads_watchdog', DownloadsWatchdog),
('_storage_state_watchdog', StorageStateWatchdog),
('_local_browser_watchdog', LocalBrowserWatchdog),
('_navigation_watchdog', NavigationWatchdog),
('_aboutblank_watchdog', AboutBlankWatchdog),
('_default_action_watchdog', DefaultActionWatchdog),
('_dom_watchdog', DOMWatchdog),
('_screenshot_watchdog', ScreenshotWatchdog),
]
for attr_name, watchdog_class in watchdog_configs:
if not hasattr(self, attr_name) or getattr(self, attr_name) is None:
try:
watchdog = watchdog_class(event_bus=self.event_bus, browser_session=self)
await watchdog.attach_to_session()
setattr(self, attr_name, watchdog)
# logger.debug(f'[Session] Initialized and attached {watchdog_class.__name__}')
except Exception as e:
logger.warning(f'[Session] Failed to initialize {watchdog_class.__name__}: {e}')
else:
# Watchdog already exists, don't re-initialize to avoid duplicate handlers
logger.debug(f'[Session] {watchdog_class.__name__} already initialized, skipping')
async def setup_browser_via_cdp_url(self) -> None:
"""Connect to a remote chromium-based browser via CDP using cdp-use.
This MUST succeed or the browser is unusable. Fails hard on any error.
"""
if not self.cdp_url:
raise RuntimeError('Cannot setup CDP connection without CDP URL')
self.logger.info(f'🌎 Connecting to existing chromium-based browser via CDP: {self.cdp_url} -> (remote browser)')
try:
# Import cdp-use client
import httpx
from cdp_use import CDPClient
# Convert HTTP URL to WebSocket URL if needed
ws_url = self.cdp_url
if not ws_url.startswith('ws'):
# If it's an HTTP URL, fetch the WebSocket URL from /json/version endpoint
url = ws_url.rstrip('/')
if not url.endswith('/json/version'):
url = url + '/json/version'
async with httpx.AsyncClient() as client:
version_info = await client.get(url)
ws_url = version_info.json()['webSocketDebuggerUrl']
# Create and store the CDP client for direct CDP communication
if self._cdp_client is None:
self._cdp_client = CDPClient(ws_url)
assert self._cdp_client is not None
await self._cdp_client.start()
self.logger.info('✅ CDP client connected successfully')
assert self._cdp_client is not None
# Get browser targets to find available contexts/pages
targets = await self._cdp_client.send.Target.getTargets()
# Find main browser pages (avoiding iframes, workers, extensions, etc.)
page_targets = [
t
for t in targets['targetInfos']
if self._is_valid_target(
t, include_http=True, include_about=True, include_pages=True, include_iframes=False, include_workers=False
)
]
# Check for chrome://newtab pages and immediately redirect them
# to about:blank to avoid JS issues from CDP on chrome://* urls
from browser_use.utils import is_new_tab_page
for target in page_targets:
target_url = target.get('url', '')
if is_new_tab_page(target_url) and target_url != 'about:blank':
# Redirect chrome://newtab to about:blank to avoid JS issues preventing driving chrome://newtab
target_id = target['targetId']
self.logger.info(f'🔄 Redirecting {target_url} to about:blank for target {target_id}')
try:
# Use cached session to navigate to about:blank
client, session_id = await self.get_cdp_session(target_id)
await client.send.Page.navigate(params={'url': 'about:blank'}, session_id=session_id)
except Exception as e:
self.logger.warning(f'Failed to redirect {target_url} to about:blank: {e}')
if not page_targets:
# No pages found, create a new one
new_target = await self._cdp_client.send.Target.createTarget(params={'url': 'about:blank'})
target_id = new_target['targetId']
self.logger.info(f'📄 Created new blank page with target ID: {target_id}')
else:
# Use the first available page
target_id = page_targets[0]['targetId']
self.logger.info(f'📄 Using existing page with target ID: {target_id}')
# Store the current page target ID
self.current_target_id = target_id
# Pre-create cached session for the current target (enables all domains)
try:
await self.get_cdp_session(target_id)
self.logger.info(f'🌐 CDP session cached and domains enabled for target {target_id[:8]}...')
except Exception as e:
self.logger.warning(f'Failed to create CDP session: {e}')
except Exception as e:
# Fatal error - browser is not usable without CDP connection
self.logger.error(f'❌ FATAL: Failed to setup CDP connection: {e}')
self.logger.error('❌ Browser cannot continue without CDP connection')
# Clean up any partial state
self._cdp_client = None
self.current_target_id = None
# Re-raise as a fatal error
raise RuntimeError(f'Failed to establish CDP connection to browser: {e}') from e
async def setup_domservice_init_scripts(self, retry_count: int = 0) -> None:
# self.logger.debug('Setting up init scripts in browser')
init_script = """
// check to make sure we're not inside the PDF viewer
window.isPdfViewer = !!document?.body?.querySelector('body > embed[type="application/pdf"][width="100%"]')
if (!window.isPdfViewer) {
// Permissions
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (parameters) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: Notification.permission }) :
originalQuery(parameters)
);
(() => {
if (window._eventListenerTrackerInitialized) return;
window._eventListenerTrackerInitialized = true;
const originalAddEventListener = EventTarget.prototype.addEventListener;
const eventListenersMap = new WeakMap();
EventTarget.prototype.addEventListener = function(type, listener, options) {
if (typeof listener === "function") {
let listeners = eventListenersMap.get(this);
if (!listeners) {
listeners = [];
eventListenersMap.set(this, listeners);
}
listeners.push({
type,
listener,
listenerPreview: listener.toString().slice(0, 100),
options
});
}
return originalAddEventListener.call(this, type, listener, options);
};
window.getEventListenersForNode = (node) => {
const listeners = eventListenersMap.get(node) || [];
return listeners.map(({ type, listenerPreview, options }) => ({
type,
listenerPreview,
options
}));
};
})();
}
"""
# TODO: convert this to pure cdp-use and/or move it to the dom_watchdog.py
# await self.browser_context.add_init_script(init_script)
@property
async def target_ids(self) -> list[str]:
"""Get all open page target IDs using CDP."""
try:
pages = await self._cdp_get_all_pages()
return [page['targetId'] for page in pages]
except Exception:
return []
async def get_target_id_by_tab_index(self, tab_index: int) -> str | None:
"""Get target ID by tab index."""
target_ids = await self.target_ids
if 0 <= tab_index < len(target_ids):
return target_ids[tab_index]
return None
async def get_tab_index(self, target_id: str) -> int:
"""Get tab index for a target ID."""
target_ids = await self.target_ids
if target_id in target_ids:
return target_ids.index(target_id)
return -1
async def get_tabs_info(self) -> list[TabInfo]:
"""Get information about all open tabs using CDP Target.getTargetInfo for speed."""
tabs = []
# Get all page targets using CDP
pages = await self._cdp_get_all_pages()
for i, page_target in enumerate(pages):
target_id = page_target['targetId']
url = page_target['url']
# Try to get the title directly from Target.getTargetInfo - much faster!
# The initial getTargets() doesn't include title, but getTargetInfo does
try:
target_info = await self.cdp_client.send.Target.getTargetInfo(params={'targetId': target_id})
# The title is directly available in targetInfo
title = target_info.get('targetInfo', {}).get('title', '')
# Skip JS execution for chrome:// pages and new tab pages
if is_new_tab_page(url) or url.startswith('chrome://'):
# Use URL as title for chrome pages, or mark new tabs as unusable
if is_new_tab_page(url):
title = 'ignore this tab and do not use it'
elif not title:
# For chrome:// pages without a title, use the URL itself
title = url
# Special handling for PDF pages without titles
if (not title or title == '') and (url.endswith('.pdf') or 'pdf' in url):
# PDF pages might not have a title, use URL filename
try:
from urllib.parse import urlparse
filename = urlparse(url).path.split('/')[-1]
if filename:
title = filename
except Exception:
pass
except Exception as e:
# Fallback to basic title handling
self.logger.debug(f'⚠️ Failed to get target info for tab #{i}: {_log_pretty_url(url)} - {type(e).__name__}')
if is_new_tab_page(url):
title = 'ignore this tab and do not use it'
elif url.startswith('chrome://'):
title = url
else:
title = ''
tab_info = TabInfo(
page_id=i,
url=url,
title=title,
parent_page_id=None,
id=target_id, # Use target ID as the unique identifier
index=i,
)
tabs.append(tab_info)
return tabs
# DOM element methods
# Removed duplicate get_browser_state_with_recovery - using the decorated version below
@observe_debug(ignore_input=True, ignore_output=True, name='get_minimal_state_summary')
@time_execution_async('--get_minimal_state_summary')
async def get_minimal_state_summary(self) -> BrowserStateSummary:
"""Get basic page info without DOM processing, but try to capture screenshot"""
from browser_use.browser.views import BrowserStateSummary
from browser_use.dom.views import EnhancedDOMTreeNode as DOMElementNode
from browser_use.dom.views import NodeType, SerializedDOMState
# Get basic info - no DOM parsing to avoid errors
url = await self.get_current_page_url() or 'unknown'
# Try to get title safely
try:
# timeout after 2 seconds
title = await asyncio.wait_for(self.get_current_page_title(), timeout=2.0)
except Exception:
title = 'Page Load Error'
# Try to get tabs info safely
try:
# timeout after 2 seconds
tabs_info = await retry(timeout=2, retries=0)(self.get_tabs_info)()
except Exception:
tabs_info = []
# Create minimal DOM element for error state
minimal_element_tree = DOMElementNode(
node_id=1,
backend_node_id=1,
node_type=NodeType.ELEMENT_NODE,
node_name='body',
node_value='',
attributes={},
is_scrollable=False,
is_visible=True,
absolute_position=None,
frame_id=None,
target_id=self.current_target_id,
content_document=None,
shadow_root_type=None,
shadow_roots=None,
parent_node=None,
children_nodes=[],
ax_node=None,
snapshot_node=None,
)
# Check if current page is a PDF viewer
is_pdf_viewer = await self._is_pdf_viewer(page)
# Create minimal SerializedDOMState
minimal_dom_state = SerializedDOMState(
_root=None, # No simplified tree for minimal state
selector_map={}, # Empty selector map
)
return BrowserStateSummary(
dom_state=minimal_dom_state,
url=url,
title=title,
tabs=tabs_info,
pixels_above=0,
pixels_below=0,
browser_errors=[f'Page state retrieval failed, minimal recovery applied for {url}'],
is_pdf_viewer=is_pdf_viewer,
recent_events='',
)
@observe_debug(ignore_input=True, ignore_output=True, name='get_updated_state')
async def _get_updated_state(self, focus_element: int = -1, include_screenshot: bool = True) -> BrowserStateSummary:
"""Update and return state."""
# Get current page URL
page_url = await self.get_current_page_url()
# Check if this is a new tab or chrome:// page early for optimization
is_empty_page = is_new_tab_page(page_url) or page_url.startswith('chrome://')
try:
# Fast path for empty pages - skip all expensive operations
if is_empty_page:
self.logger.debug(f'⚡ Fast path for empty page: {page_url}')
# Create minimal DOM state immediately - just return None for now
# since DOM classes have been refactored
content = None
# Get tabs info
tabs_info = await self.get_tabs_info()
# Skip screenshot for empty pages
screenshot_b64 = None
# Use default viewport dimensions from browser profile
viewport = self.browser_profile.viewport or {'width': 1280, 'height': 720}
page_info = PageInfo(
viewport_width=viewport['width'],
viewport_height=viewport['height'],
page_width=viewport['width'],
page_height=viewport['height'],
scroll_x=0,
scroll_y=0,
pixels_above=0,
pixels_below=0,
pixels_left=0,
pixels_right=0,
)
# Return minimal state immediately
self.browser_state_summary = BrowserStateSummary(
dom_state=content,
url=page_url,
title='New Tab' if is_new_tab_page(page_url) else 'Chrome Page',
tabs=tabs_info,
screenshot=screenshot_b64,
page_info=page_info,
pixels_above=0,
pixels_below=0,
browser_errors=[],
is_pdf_viewer=False,
)
return self.browser_state_summary
# Normal path for regular pages
self.logger.debug('🧹 Removing highlights...')
try:
await self.remove_highlights()
except TimeoutError:
self.logger.debug('Timeout to remove highlights')
# Check for PDF and auto-download if needed
try:
pdf_path = await self._auto_download_pdf_if_needed(page)
if pdf_path:
self.logger.info(f'📄 PDF auto-downloaded: {pdf_path}')
except Exception as e:
self.logger.debug(f'PDF auto-download check failed: {type(e).__name__}: {e}')
self.logger.debug('🌳 Starting DOM processing...')
from browser_use.browser.events import BrowserStateRequestEvent
from browser_use.dom.views import SerializedDOMState
try:
# Use the DOMWatchdog via event bus - request state with DOM
result = await asyncio.wait_for(
self.event_bus.dispatch(BrowserStateRequestEvent(include_dom=True, include_screenshot=False)),
timeout=45.0, # 45 second timeout for DOM processing - generous for complex pages
)
state_summary = await result.event_result()
content = state_summary.dom_state if state_summary else None
self.logger.debug('✅ DOM processing completed')
except (TimeoutError, Exception) as e:
if isinstance(e, TimeoutError):
self.logger.warning(f'DOM processing timed out after 45 seconds for {page_url}')
else:
self.logger.warning(f'DOM processing failed: {e}')
self.logger.warning('🔄 Falling back to minimal DOM state to allow basic navigation...')
# Create minimal DOM state for basic navigation
content = SerializedDOMState(
_root=None, # No simplified tree for minimal state
selector_map={}, # Empty selector map
)
self.logger.debug('📋 Getting tabs info...')
tabs_info = await self.get_tabs_info()
self.logger.debug('✅ Tabs info completed')
# Get all cross-origin iframes within the page and open them in new tabs
# mark the titles of the new tabs so the LLM knows to check them for additional content
# unfortunately too buggy for now, too many sites use invisible cross-origin iframes for ads, tracking, youtube videos, social media, etc.
# and it distracts the bot by opening a lot of new tabs
# iframe_urls = await dom_service.get_cross_origin_iframes()
# outer_page = self.current_target_id
# for url in iframe_urls:
# if url in [tab.url for tab in tabs_info]:
# continue # skip if the iframe if we already have it open in a tab
# new_page_id = tabs_info[-1].page_id + 1
# self.logger.debug(f'Opening cross-origin iframe in new tab #{new_page_id}: {url}')
# await self.create_new_tab(url)
# tabs_info.append(
# TabInfo(
# page_id=new_page_id,
# url=url,
# title=f'iFrame opened as new tab, treat as if embedded inside page {outer_page.url}: {page.url}',
# parent_page_url=outer_page.url,
# )
# )
if include_screenshot:
try:
self.logger.debug('📸 Capturing screenshot...')
# Reasonable timeout for screenshot
screenshot_b64 = await self.take_screenshot()
# self.logger.debug('✅ Screenshot completed')
except Exception as e:
self.logger.warning(f'❌ Screenshot failed for {_log_pretty_url(page.url)}: {type(e).__name__} {e}')
screenshot_b64 = None
else:
screenshot_b64 = None
# Get comprehensive page information
page_info = await self.get_page_info(page)
try:
self.logger.debug('📏 Getting scroll info...')
pixels_above, pixels_below = await asyncio.wait_for(self.get_scroll_info(page), timeout=5.0)
self.logger.debug('✅ Scroll info completed')
except Exception as e:
self.logger.warning(f'Failed to get scroll info: {type(e).__name__}')
pixels_above, pixels_below = 0, 0
try:
title = await asyncio.wait_for(self.get_current_page_title(), timeout=3.0)
except Exception:
title = 'Title unavailable'
# Check if this is a minimal fallback state
browser_errors = []
if not content.selector_map: # Empty selector map indicates fallback state
browser_errors.append(
f'DOM processing timed out for {page_url} - using minimal state. Basic navigation still available via go_to_url, scroll, and search actions.'
)
# Check if current page is a PDF viewer
is_pdf_viewer = await self._is_pdf_viewer(page)
self.browser_state_summary = BrowserStateSummary(
dom_state=content,
url=page_url,
title=title,
tabs=tabs_info,
screenshot=screenshot_b64,
page_info=page_info,
pixels_above=pixels_above,
pixels_below=pixels_below,
browser_errors=browser_errors,
is_pdf_viewer=is_pdf_viewer,
)
self.logger.debug('✅ get_state_summary completed successfully')
return self.browser_state_summary
except Exception as e:
self.logger.error(f'❌ Failed to update browser_state_summary: {type(e).__name__}: {e}')
# Return last known good state if available
if hasattr(self, 'browser_state_summary'):
return self.browser_state_summary
raise
# ========== CDP Helper Methods ==========
async def cdp_clients_for_target(self, target_id: str) -> list['CDPClient']:
"""Get CDP clients for a target, including main and iframe sessions.
Returns list with root target session first, then iframe sessions.
"""
if not self.cdp_client:
raise ValueError('CDP client not initialized')
clients = []
# Get cached session for main target
client, session_id = await self.get_cdp_session(target_id)
# For now, return just the main client with session
# In future, we'd enumerate iframes and attach to them too
clients.append(client)
return clients
async def cdp_client_for_node(self, node: 'EnhancedDOMTreeNode') -> 'CDPClient':
"""Get CDP client for a specific DOM node based on its frame."""
if node.frame_id:
return await self.cdp_client_for_frame(node.frame_id)
return self.cdp_client
async def frames_by_target(self, target_id: str) -> list[str]:
"""Get all frame IDs for a target."""
# Get frame tree using helper
frame_tree = await self._cdp_execute_on_target(target_id, commands=[('Page.getFrameTree', {})])
# Extract frame IDs recursively
frame_ids = []
def extract_frames(tree_node):
frame_ids.append(tree_node['frame']['id'])
for child in tree_node.get('childFrames', []):
extract_frames(child)
extract_frames(frame_tree['frameTree'])
return frame_ids
async def target_id_by_frame_id(self, frame_id: str) -> str | None:
"""Get target ID for a given frame ID.
Note: This requires iterating through all targets to find the frame.
"""
targets = await self.cdp_client.send.Target.getTargets()
for target in targets['targetInfos']:
# Skip invalid targets
if not self._is_valid_target(
target, include_http=True, include_about=True, include_pages=True, include_iframes=True, include_workers=False
):
continue
# Check if this target contains the frame
frames = await self.frames_by_target(target['targetId'])
if frame_id in frames:
return target['targetId']
return None
async def get_current_page_cdp_session_id(self) -> str | None:
"""Get the CDP session ID for the current page."""
if not hasattr(self, 'current_target_id') or not self.current_target_id:
return None
# Get cached session ID
client, session_id = await self.get_cdp_session(self.current_target_id)
return session_id
async def _create_fresh_cdp_client(self) -> Any:
"""Create a new CDP client instance. Caller is responsible for cleanup."""
if not self.cdp_url:
raise ValueError('CDP URL is not set')
import httpx
from cdp_use import CDPClient
# If the cdp_url is already a websocket URL, use it as-is.
if self.cdp_url.startswith('ws'):
ws_url = self.cdp_url
else:
# Otherwise, treat it as the DevTools HTTP root and fetch the websocket URL.
url = self.cdp_url.rstrip('/')
if not url.endswith('/json/version'):
url = url + '/json/version'
async with httpx.AsyncClient() as client:
version_info = await client.get(url)
ws_url = version_info.json()['webSocketDebuggerUrl']
cdp_client = CDPClient(ws_url)
await cdp_client.start()
return cdp_client
async def create_cdp_session_for_target(self, target_id: str) -> Any:
"""Create a new CDP session attached to a specific target/frame.
Args:
target_id: The target ID to attach to
Returns:
Tuple of (CDPClient, session_id) - uses cached session when available
"""
# Just use the cached session
return await self.get_cdp_session(target_id)
async def create_cdp_session_for_frame(self, frame_id: str) -> Any:
"""Create a new CDP session for a specific frame by finding its parent target.
Args:
frame_id: The frame ID to find and attach to
Returns:
Tuple of (CDPClient, session_id) for the target containing this frame
Raises:
ValueError: If frame_id is not found in any target
"""
# Get all targets using main client
targets = await self.cdp_client.send.Target.getTargets()
# Search through page targets to find which one contains the frame
for target in targets['targetInfos']:
# Skip invalid targets
if not self._is_valid_target(target):
continue
if target['type'] != 'page':
continue
# Use cached session to check frame tree
client, temp_session_id = await self.get_cdp_session(target['targetId'])
# Get frame tree for this target
frame_tree = await client.send.Page.getFrameTree(session_id=temp_session_id)
# Recursively search for the frame_id
def search_frame_tree(node) -> bool:
if node['frame']['id'] == frame_id:
return True
if 'childFrames' in node:
for child in node['childFrames']:
if search_frame_tree(child):
return True
return False
if search_frame_tree(frame_tree['frameTree']):
# Found the target containing this frame - return cached session
return await self.get_cdp_session(target['targetId'])
# Frame not found
raise ValueError(f'Frame with ID {frame_id} not found in any target')
async def create_cdp_session_for_node(self, node: Any) -> Any:
"""Create a new CDP session for a specific DOM node's target.
Args:
node: The EnhancedDOMTreeNode to create a session for
Returns:
Tuple of (CDPClient, session_id) for the node's target
Raises:
ValueError: If node doesn't have a target_id or node doesn't exist in target
"""
if not hasattr(node, 'target_id') or not node.target_id:
raise ValueError(f'Node does not have a target_id: {node}')
# Get cached session for the node's target
client, session_id = await self.get_cdp_session(node.target_id)
# Verify the node exists in this target
try:
await client.send.DOM.describeNode(params={'backendNodeId': node.backend_node_id}, session_id=session_id)
# If we get here without exception, the node exists
return client, session_id
except Exception as e:
raise ValueError(f'Node with backend_node_id {node.backend_node_id} not found in target {node.target_id}: {e}')
async def get_current_target_info(self) -> dict | None:
"""Get info about the current active target using CDP."""
if not self.current_target_id:
return None
targets = await self.cdp_client.send.Target.getTargets()
for target in targets.get('targetInfos', []):
if target.get('targetId') == self.current_target_id:
# Still return even if it's not a "valid" target since we're looking for a specific ID
return target
return None
async def get_current_page_url(self) -> str:
"""Get the URL of the current page using CDP."""
target = await self.get_current_target_info()
if target:
return target.get('url', '')
return ''
async def get_current_page_title(self) -> str:
"""Get the title of the current page using CDP."""
if not self.current_target_id:
return ''
try:
session = await self.cdp_client.send.Target.attachToTarget(
params={'targetId': self.current_target_id, 'flatten': True}
)
session_id = session['sessionId']
title_result = await self.cdp_client.send.Runtime.evaluate(
params={'expression': 'document.title'}, session_id=session_id
)
title = title_result.get('result', {}).get('value', '')
await self.cdp_client.send.Target.detachFromTarget(params={'sessionId': session_id})
return title
except Exception:
return ''
# ========== DOM Helper Methods ==========
def update_cached_selector_map(self, selector_map: dict[int, 'EnhancedDOMTreeNode']) -> None:
"""Update the cached selector map with new DOM state.
This should be called by the DOM watchdog after rebuilding the DOM.
Args:
selector_map: The new selector map from DOM serialization
"""
self._cached_selector_map = selector_map
async def get_dom_element_by_index(self, index: int) -> 'EnhancedDOMTreeNode | None':
"""Get DOM element by index.
First checks cached selector map, then falls back to DOM watchdog
which may trigger a DOM rebuild if needed.
Args:
index: The element index from the serialized DOM
Returns:
EnhancedDOMTreeNode or None if index not found
"""
# First check cached selector map
if self._cached_selector_map and index in self._cached_selector_map:
return self._cached_selector_map[index]
# Fall back to DOM watchdog which may rebuild DOM
if self._dom_watchdog:
node = await self._dom_watchdog.get_element_by_index(index)
# Update cache if watchdog rebuilt the DOM
if self._dom_watchdog.selector_map:
self._cached_selector_map = self._dom_watchdog.selector_map
return node
return None
# Alias for backwards compatibility
async def get_element_by_index(self, index: int) -> 'EnhancedDOMTreeNode | None':
"""Alias for get_dom_element_by_index for backwards compatibility."""
return await self.get_dom_element_by_index(index)
def is_file_input(self, element: Any) -> bool:
"""Check if element is a file input.
Args:
element: The DOM element to check
Returns:
True if element is a file input, False otherwise
"""
if self._dom_watchdog:
return self._dom_watchdog.is_file_input(element)
# Fallback if watchdog not available
return (
hasattr(element, 'node_name')
and element.node_name.upper() == 'INPUT'
and hasattr(element, 'attributes')
and element.attributes.get('type', '').lower() == 'file'
)
def clear_dom_cache(self) -> None:
"""Clear cached DOM state to force rebuild on next access."""
if self._dom_watchdog:
self._dom_watchdog.clear_cache()
async def get_selector_map(self) -> dict[int, 'EnhancedDOMTreeNode']:
"""Get the current selector map from cached state or DOM watchdog.
Returns:
Dictionary mapping element indices to EnhancedDOMTreeNode objects
"""
# First try cached selector map
if self._cached_selector_map:
return self._cached_selector_map
# Try to get from DOM watchdog
if self._dom_watchdog and hasattr(self._dom_watchdog, 'selector_map'):
return self._dom_watchdog.selector_map or {}
# Return empty dict if nothing available
return {}
async def remove_highlights(self) -> None:
"""Remove highlights from the page using CDP."""
try:
if not self.current_target_id:
return
# Get cached session
client, session_id = await self.get_cdp_session(self.current_target_id)
# Remove highlights via JavaScript
script = """
// Remove all browser-use highlight elements
const highlights = document.querySelectorAll('[data-browser-use-highlight]');
highlights.forEach(el => el.remove());
"""
await client.send.Runtime.evaluate(params={'expression': script}, session_id=session_id)
except Exception as e:
self.logger.debug(f'Failed to remove highlights: {e}')
@property
def downloaded_files(self) -> list[str]:
"""Get list of downloaded files from the downloads directory."""
if not self.browser_profile.downloads_path:
return []
downloads_dir = Path(self.browser_profile.downloads_path)
if not downloads_dir.exists():
return []
# Get all files in downloads directory (not directories)
files = [str(f) for f in downloads_dir.iterdir() if f.is_file()]
return sorted(files)
# ========== CDP-based replacements for browser_context operations ==========
async def _cdp_execute_on_target(
self, target_id: str, commands: list[tuple[str, dict]] | None = None, callable_fn: Any | None = None
) -> Any:
"""Execute CDP commands on a specific target using cached session.
Args:
target_id: The target ID to execute commands on
commands: List of (method, params) tuples to execute, e.g. [('Runtime.evaluate', {'expression': '...'})]
callable_fn: Alternative - async function that receives (cdp_client, session_id) and returns result
Returns:
Result of the last command or callable_fn return value
"""
# Get cached session or create new one
client, session_id = await self.get_cdp_session(target_id)
if callable_fn:
return await callable_fn(client, session_id)
elif commands:
result = None
for method, params in commands:
domain, command = method.split('.')
domain_obj = getattr(client.send, domain)
cmd_func = getattr(domain_obj, command)
result = await cmd_func(params=params, session_id=session_id) if params else await cmd_func(session_id=session_id)
return result
else:
return session_id
async def _cdp_get_all_pages(self) -> list[dict]:
"""Get all browser pages/tabs using CDP Target.getTargets."""
targets = await self.cdp_client.send.Target.getTargets()
# Filter for valid page/tab targets only
return [t for t in targets.get('targetInfos', []) if self._is_valid_target(t) and t.get('type') in ('page', 'tab')]
async def _cdp_create_new_page(self, url: str = 'about:blank') -> str:
"""Create a new page/tab using CDP Target.createTarget. Returns target ID."""
result = await self.cdp_client.send.Target.createTarget(params={'url': url, 'newWindow': False, 'background': False})
return result['targetId']
async def _cdp_close_page(self, target_id: str) -> None:
"""Close a page/tab using CDP Target.closeTarget."""
await self.cdp_client.send.Target.closeTarget(params={'targetId': target_id})
async def _cdp_activate_page(self, target_id: str) -> None:
"""Activate/focus a page using CDP Target.activateTarget."""
await self.cdp_client.send.Target.activateTarget(params={'targetId': target_id})
async def _cdp_get_cookies(self, urls: list[str] | None = None) -> list[dict]:
"""Get cookies using CDP Network.getCookies."""
if not self.current_target_id:
return []
client, session_id = await self.get_cdp_session(self.current_target_id)
params = {'urls': urls} if urls else {}
result = await client.send.Network.getCookies(params=params, session_id=session_id)
return result.get('cookies', [])
async def _cdp_set_cookies(self, cookies: list[dict]) -> None:
"""Set cookies using CDP Network.setCookies."""
if not self.current_target_id or not cookies:
return
client, session_id = await self.get_cdp_session(self.current_target_id)
await client.send.Network.setCookies(params={'cookies': cookies}, session_id=session_id)
async def _cdp_clear_cookies(self) -> None:
"""Clear all cookies using CDP Network.clearBrowserCookies."""
if not self.current_target_id:
return
client, session_id = await self.get_cdp_session(self.current_target_id)
await client.send.Network.clearBrowserCookies(session_id=session_id)
async def _cdp_set_extra_headers(self, headers: dict[str, str]) -> None:
"""Set extra HTTP headers using CDP Network.setExtraHTTPHeaders."""
if not self.current_target_id:
return
client, session_id = await self.get_cdp_session(self.current_target_id)
await client.send.Network.setExtraHTTPHeaders(params={'headers': headers}, session_id=session_id)
async def _cdp_grant_permissions(self, permissions: list[str], origin: str | None = None) -> None:
"""Grant permissions using CDP Browser.grantPermissions."""
params = {'permissions': permissions}
if origin:
params['origin'] = origin
await self.cdp_client.send.Browser.grantPermissions(**params)
async def _cdp_set_geolocation(self, latitude: float, longitude: float, accuracy: float = 100) -> None:
"""Set geolocation using CDP Emulation.setGeolocationOverride."""
await self.cdp_client.send.Emulation.setGeolocationOverride(
params={'latitude': latitude, 'longitude': longitude, 'accuracy': accuracy}
)
async def _cdp_clear_geolocation(self) -> None:
"""Clear geolocation override using CDP."""
await self.cdp_client.send.Emulation.clearGeolocationOverride()
async def _cdp_add_init_script(self, script: str) -> str:
"""Add script to evaluate on new document using CDP Page.addScriptToEvaluateOnNewDocument."""
result = await self.cdp_client.send.Page.addScriptToEvaluateOnNewDocument(params={'source': script})
return result['identifier']
async def _cdp_remove_init_script(self, identifier: str) -> None:
"""Remove script added with addScriptToEvaluateOnNewDocument."""
await self.cdp_client.send.Page.removeScriptToEvaluateOnNewDocument(params={'identifier': identifier})
async def _cdp_set_viewport(self, width: int, height: int, device_scale_factor: float = 1.0, mobile: bool = False) -> None:
"""Set viewport using CDP Emulation.setDeviceMetricsOverride."""
await self.cdp_client.send.Emulation.setDeviceMetricsOverride(
params={'width': width, 'height': height, 'deviceScaleFactor': device_scale_factor, 'mobile': mobile}
)
async def _cdp_get_storage_state(self) -> dict:
"""Get storage state (cookies, localStorage, sessionStorage) using CDP."""
# Use the _cdp_get_cookies helper which handles session attachment
cookies = await self._cdp_get_cookies()
# Get localStorage and sessionStorage would require evaluating JavaScript
# on each origin, which is more complex. For now, return cookies only.
return {
'cookies': cookies,
'origins': [], # Would need to iterate through origins for localStorage/sessionStorage
}
async def _cdp_navigate(self, url: str, target_id: str | None = None) -> None:
"""Navigate to URL using CDP Page.navigate."""
# Use provided target_id or fall back to current_target_id
target_to_use = target_id or self.current_target_id
if not target_to_use:
# If no target available, get the first page target
targets = await self._cdp_get_all_pages()
if targets:
target_to_use = targets[0]['targetId']
self.current_target_id = target_to_use
else:
raise ValueError('No target available for navigation')
# Use helper to navigate on the target
await self._cdp_execute_on_target(target_to_use, commands=[('Page.enable', {}), ('Page.navigate', {'url': url})])
@staticmethod
def _is_valid_target(
target_info: dict,
include_http: bool = True,
include_chrome: bool = False,
include_chrome_extensions: bool = False,
include_chrome_error: bool = False,
include_about: bool = True,
include_iframes: bool = True,
include_pages: bool = True,
include_workers: bool = False,
) -> bool:
"""Check if a target should be processed.
Args:
target_info: Target info dict from CDP
Returns:
True if target should be processed, False if it should be skipped
"""
target_type = target_info.get('type', '')
url = target_info.get('url', '')
url_allowed, type_allowed = False, False
if url.startswith('chrome-error://') and include_chrome_error:
url_allowed = True
if url.startswith('chrome://') and include_chrome:
url_allowed = True
if url.startswith('chrome-extension://') and include_chrome_extensions:
url_allowed = True
# dont allow about:srcdoc! there are also other rare about: pages that we want to avoid
if url == 'about:blank' and include_about:
url_allowed = True
if (url.startswith('http://') or url.startswith('https://')) and include_http:
url_allowed = True
if target_type in ('service_worker', 'shared_worker', 'worker') and include_workers:
type_allowed = True
if target_type in ('page', 'tab') and include_pages:
type_allowed = True
if target_type in ('iframe', 'webview') and include_iframes:
type_allowed = True
return url_allowed and type_allowed
async def get_all_frames(self) -> tuple[dict[str, dict], dict[str, str]]:
"""Get a complete frame hierarchy from all browser targets.
Returns:
Tuple of (all_frames, target_sessions) where:
- all_frames: dict mapping frame_id -> frame info dict with all metadata
- target_sessions: dict mapping target_id -> session_id for active sessions
"""
all_frames = {} # frame_id -> FrameInfo dict
target_sessions = {} # target_id -> session_id (keep sessions alive during collection)
# Get all targets
targets = await self.cdp_client.send.Target.getTargets()
all_targets = targets.get('targetInfos', [])
# First pass: collect frame trees from ALL targets
for target in all_targets:
target_id = target.get('targetId')
if not target_id:
continue
# Skip invalid targets
if not self._is_valid_target(
target, include_http=True, include_about=True, include_pages=True, include_iframes=True, include_workers=False
):
continue
# Get cached session for this target
client, session_id = await self.get_cdp_session(target_id)
target_sessions[target_id] = session_id
try:
# Try to get frame tree (not all target types support this)
try:
frame_tree_result = await client.send.Page.getFrameTree(session_id=session_id)
# Process the frame tree recursively
def process_frame_tree(node, parent_frame_id=None):
"""Recursively process frame tree and add to all_frames."""
frame = node.get('frame', {})
current_frame_id = frame.get('id')
if current_frame_id:
# For iframe targets, check if the frame has a parentId field
# This indicates it's an OOPIF with a parent in another target
actual_parent_id = frame.get('parentId') or parent_frame_id
# Create frame info with all CDP response data plus our additions
frame_info = {
**frame, # Include all original frame data: id, url, parentId, etc.
'frameTargetId': target_id, # Target that can access this frame
'parentFrameId': actual_parent_id, # Use parentId from frame if available
'childFrameIds': [], # Will be populated below
'isCrossOrigin': False, # Will be determined based on context
'isUrlValid': _is_url_valid(frame.get('url', '')),
}
# Check if frame is cross-origin based on crossOriginIsolatedContextType
cross_origin_type = frame.get('crossOriginIsolatedContextType')
if cross_origin_type and cross_origin_type != 'NotIsolated':
frame_info['isCrossOrigin'] = True
# For iframe targets, the frame itself is likely cross-origin
if target.get('type') == 'iframe':
frame_info['isCrossOrigin'] = True
# Add child frame IDs (note: OOPIFs won't appear here)
child_frames = node.get('childFrames', [])
for child in child_frames:
child_frame = child.get('frame', {})
child_frame_id = child_frame.get('id')
if child_frame_id:
frame_info['childFrameIds'].append(child_frame_id)
# Store or merge frame info
if current_frame_id in all_frames:
# Frame already seen from another target, merge info
existing = all_frames[current_frame_id]
# If this is an iframe target, it has direct access to the frame
if target.get('type') == 'iframe':
existing['frameTargetId'] = target_id
existing['isCrossOrigin'] = True
else:
all_frames[current_frame_id] = frame_info
# Process child frames recursively
for child in child_frames:
process_frame_tree(child, current_frame_id)
# Process the entire frame tree
process_frame_tree(frame_tree_result.get('frameTree', {}))
except Exception:
# Target doesn't support Page domain or has no frames
pass
except Exception:
# Error processing this target
pass
# Second pass: populate backend node IDs and parent target IDs
await self._populate_frame_metadata(all_frames, target_sessions)
return all_frames, target_sessions
async def _populate_frame_metadata(self, all_frames: dict[str, dict], target_sessions: dict[str, str]) -> None:
"""Populate additional frame metadata like backend node IDs and parent target IDs.
Args:
all_frames: Frame hierarchy dict to populate
target_sessions: Active target sessions
"""
for frame_id_iter, frame_info in all_frames.items():
parent_frame_id = frame_info.get('parentFrameId')
if parent_frame_id and parent_frame_id in all_frames:
parent_frame_info = all_frames[parent_frame_id]
parent_target_id = parent_frame_info.get('frameTargetId')
# Store parent target ID
frame_info['parentTargetId'] = parent_target_id
# Try to get backend node ID from parent context
if parent_target_id in target_sessions:
parent_session_id = target_sessions[parent_target_id]
try:
# Enable DOM domain
await self.cdp_client.send.DOM.enable(session_id=parent_session_id)
# Get frame owner info to find backend node ID
frame_owner = await self.cdp_client.send.DOM.getFrameOwner(
params={'frameId': frame_id_iter}, session_id=parent_session_id
)
if frame_owner:
frame_info['backendNodeId'] = frame_owner.get('backendNodeId')
frame_info['nodeId'] = frame_owner.get('nodeId')
except Exception:
# Frame owner not available (likely cross-origin)
pass
async def find_frame_target(self, frame_id: str, all_frames: dict[str, dict] | None = None) -> dict | None:
"""Find the frame info for a specific frame ID.
Args:
frame_id: The frame ID to search for
all_frames: Optional pre-built frame hierarchy. If None, will call get_all_frames()
Returns:
Frame info dict if found, None otherwise
"""
if all_frames is None:
all_frames, _ = await self.get_all_frames()
return all_frames.get(frame_id)
async def cdp_client_for_frame(self, frame_id: str) -> Any:
"""Get a CDP client attached to the target containing the specified frame.
Builds a unified frame hierarchy from all targets to find the correct target
for any frame, including OOPIFs (Out-of-Process iframes).
Args:
frame_id: The frame ID to search for
Returns:
Tuple of (cdp_client, session_id, target_id) for the target containing the frame
Raises:
ValueError: If the frame is not found in any target
"""
# Get complete frame hierarchy
all_frames, target_sessions = await self.get_all_frames()
# Find the requested frame
frame_info = await self.find_frame_target(frame_id, all_frames)
if frame_info:
target_id = frame_info.get('frameTargetId')
if target_id in target_sessions:
# Use existing session
session_id = target_sessions[target_id]
# Return the client with session attached
return self.cdp_client, session_id, target_id
# Frame not found
raise ValueError(f"Frame with ID '{frame_id}' not found in any target")
async def cdp_client_for_target(self, target_id: str) -> Any:
"""Get a CDP client attached to a specific target.
This is a simpler helper that just gets a cached session for a target.
Args:
target_id: The target ID to attach to
Returns:
Tuple of (cdp_client, session_id) for the target
"""
return await self.get_cdp_session(target_id)
# Import uuid7str for ID generation
try:
from uuid_extensions import uuid7str
except ImportError:
import uuid
def uuid7str() -> str:
return str(uuid.uuid4())
# Fix Pydantic circular dependency for all watchdogs
# This must be called after BrowserSession class is fully defined
_watchdog_modules = [
'browser_use.browser.crash_watchdog.CrashWatchdog',
'browser_use.browser.downloads_watchdog.DownloadsWatchdog',
'browser_use.browser.local_browser_watchdog.LocalBrowserWatchdog',
'browser_use.browser.storage_state_watchdog.StorageStateWatchdog',
'browser_use.browser.navigation_watchdog.NavigationWatchdog',
'browser_use.browser.aboutblank_watchdog.AboutBlankWatchdog',
'browser_use.browser.default_action_watchdog.DefaultActionWatchdog',
'browser_use.browser.dom_watchdog.DOMWatchdog',
'browser_use.browser.screenshot_watchdog.ScreenshotWatchdog',
]
for module_path in _watchdog_modules:
try:
module_name, class_name = module_path.rsplit('.', 1)
module = __import__(module_name, fromlist=[class_name])
watchdog_class = getattr(module, class_name)
watchdog_class.model_rebuild()
except Exception:
pass # Ignore if watchdog can't be imported or rebuilt