"""Event-driven browser session with backwards compatibility.""" import asyncio import logging import weakref from pathlib import Path from typing import TYPE_CHECKING, Any from bubus import EventBus from bubus.helpers import retry from pydantic import BaseModel, ConfigDict, Field, PrivateAttr from uuid_extensions import uuid7str from browser_use.browser.events import ( BrowserConnectedEvent, BrowserErrorEvent, BrowserLaunchEvent, BrowserStartEvent, BrowserStopEvent, BrowserStoppedEvent, ) from browser_use.browser.profile import BrowserProfile from browser_use.browser.views import ( BrowserStateSummary, PageInfo, TabInfo, ) from browser_use.observability import observe_debug from browser_use.utils import ( _log_pretty_url, is_new_tab_page, logger, time_execution_async, ) if TYPE_CHECKING: from cdp_use import CDPClient from browser_use.dom.views import EnhancedDOMTreeNode class CachedSession: """Container for cached CDP session to allow weak references.""" def __init__(self, client: Any, session_id: str, target_id: str, frame_id: str | None = None): self.client = client self.session_id = session_id self.target_id = target_id self.frame_id = frame_id def __hash__(self): return hash(self.client) _GLOB_WARNING_SHOWN = False # used inside _is_url_allowed to avoid spamming the logs with the same warning multiple times MAX_SCREENSHOT_HEIGHT = 2000 MAX_SCREENSHOT_WIDTH = 1920 def _log_glob_warning(domain: str, glob: str, logger: logging.Logger): global _GLOB_WARNING_SHOWN if not _GLOB_WARNING_SHOWN: logger.warning( # glob patterns are very easy to mess up and match too many domains by accident # e.g. if you only need to access gmail, don't use *.google.com because an attacker could convince the agent to visit a malicious doc # on docs.google.com/s/some/evil/doc to set up a prompt injection attack f"⚠️ Allowing agent to visit {domain} based on allowed_domains=['{glob}', ...]. Set allowed_domains=['{domain}', ...] explicitly to avoid matching too many domains!" ) _GLOB_WARNING_SHOWN = True DEFAULT_BROWSER_PROFILE = BrowserProfile() class BrowserSession(BaseModel): """Event-driven browser session with backwards compatibility. This class provides a 2-layer architecture: - High-level event handling for agents/controllers - Direct CDP/Playwright calls for browser operations Supports both event-driven and imperative calling styles. """ model_config = ConfigDict( arbitrary_types_allowed=True, validate_assignment=True, extra='forbid', ) # Core configuration id: str = Field(default_factory=lambda: uuid7str()) browser_profile: BrowserProfile = Field(default_factory=lambda: DEFAULT_BROWSER_PROFILE) # Connection info (for backwards compatibility) cdp_url: str | None = None is_local: bool = Field(default=True) # Mutable state current_target_id: str | None = None """Current active target ID for the main page""" # Event bus event_bus: EventBus = Field(default_factory=EventBus) # PDF handling _auto_download_pdfs: bool = PrivateAttr(default=True) def model_post_init(self, __context) -> None: """Register event handlers after model initialization.""" # Register BrowserSession's event handlers manually since it's not a BaseWatchdog self.event_bus.on('BrowserStartEvent', self.on_BrowserStartEvent) self.event_bus.on('BrowserStopEvent', self.on_BrowserStopEvent) # Watchdogs _crash_watchdog: Any = PrivateAttr(default=None) _downloads_watchdog: Any = PrivateAttr(default=None) _aboutblank_watchdog: Any = PrivateAttr(default=None) _navigation_watchdog: Any = PrivateAttr(default=None) _storage_state_watchdog: Any = PrivateAttr(default=None) _local_browser_watchdog: Any = PrivateAttr(default=None) _default_action_watchdog: Any = PrivateAttr(default=None) _dom_watchdog: Any = PrivateAttr(default=None) _screenshot_watchdog: Any = PrivateAttr(default=None) # Navigation tracking now handled by watchdogs # Cached browser state for synchronous access _cached_browser_state_summary: Any = PrivateAttr(default=None) _cached_selector_map: dict[int, 'EnhancedDOMTreeNode'] = PrivateAttr(default_factory=dict) """Cached mapping of element indices to DOM nodes""" # CDP client _cdp_client: 'CDPClient | None' = PrivateAttr(default=None) """Cached CDP client instance""" # CDP session cache _cdp_session_cache: weakref.WeakValueDictionary = PrivateAttr(default_factory=weakref.WeakValueDictionary) """Cache of CDP sessions by target_id -> (client, session_id) tuple""" _cdp_cache_enabled: bool = PrivateAttr(default=True) """Flag to enable/disable CDP session caching""" _logger: Any = PrivateAttr(default=None) @property def logger(self) -> Any: """Get instance-specific logger with session ID in the name""" if self._logger is None: # keep updating the name pre-init because our id and str(self) can change import logging self._logger = logging.getLogger(f'browser_use.{self}') return self._logger @property def cdp_client(self) -> 'CDPClient': """Get the cached CDP client. The client is created and started in setup_browser_via_cdp_url(). Returns: The CDP client instance Raises: RuntimeError: If CDP client is not initialized yet """ if self._cdp_client is None: raise RuntimeError('CDP client not initialized - browser may not be connected yet') return self._cdp_client async def get_cdp_session(self, target_id: str) -> tuple[Any, str]: """Get or create a CDP session for a target, using cache when enabled. Args: target_id: The target ID to get a session for Returns: Tuple of (cdp_client, session_id) """ # If caching is disabled, always create a new session if not self._cdp_cache_enabled: client = self.cdp_client session = await client.send.Target.attachToTarget(params={'targetId': target_id, 'flatten': True}) session_id = session['sessionId'] await self._enable_all_domains(client, session_id) return client, session_id # Check cache first cached = self._cdp_session_cache.get(target_id) if cached: try: # Quick ping to verify it's still alive (0.1s timeout) await asyncio.wait_for( cached.client.send.Runtime.evaluate(params={'expression': '1'}, session_id=cached.session_id), timeout=0.1 ) return cached.client, cached.session_id except: # Dead session, remove from cache self._cdp_session_cache.pop(target_id, None) # Create new session client = self.cdp_client session = await client.send.Target.attachToTarget(params={'targetId': target_id, 'flatten': True}) session_id = session['sessionId'] # Enable all necessary domains at creation time await self._enable_all_domains(client, session_id) # Cache it using CachedSession (which supports weak references) cache_value = CachedSession(client, session_id, target_id) self._cdp_session_cache[target_id] = cache_value return client, session_id async def _enable_all_domains(self, client: Any, session_id: str) -> None: """Enable all necessary CDP domains for a session.""" # Enable auto-attach for related targets (iframes, etc) await client.send.Target.setAutoAttach( params={'autoAttach': True, 'waitForDebuggerOnStart': False, 'flatten': True}, session_id=session_id ) # Enable all commonly used domains in parallel await asyncio.gather( client.send.Page.enable(session_id=session_id), # TEMPORARILY DISABLED: Network.enable causes excessive event logging # client.send.Network.enable(session_id=session_id), client.send.Runtime.enable(session_id=session_id), client.send.DOM.enable(session_id=session_id), client.send.DOMSnapshot.enable(session_id=session_id), client.send.Accessibility.enable(session_id=session_id), client.send.Inspector.enable(session_id=session_id), return_exceptions=True # Don't fail if some domains aren't available ) async def release_cdp_session(self, target_id: str) -> None: """Explicitly release a CDP session (detach and remove from cache). Args: target_id: The target ID to release the session for """ # If caching is disabled, nothing to release from cache if not self._cdp_cache_enabled: return cached = self._cdp_session_cache.pop(target_id, None) if cached: try: client, session_id = cached await client.send.Target.detachFromTarget(params={'sessionId': session_id}) except: pass # Session might already be dead async def clear_cdp_cache(self) -> None: """Clear all cached CDP sessions with proper cleanup.""" for target_id in list(self._cdp_session_cache.keys()): await self.release_cdp_session(target_id) def __repr__(self) -> str: port_number = (self.cdp_url or 'no-cdp').rsplit(':', 1)[-1].split('/', 1)[0] return f'BrowserSessionπŸ†‚ {self.id[-4:]}:{port_number} #{str(id(self))[-2:]} (cdp_url={self.cdp_url}, profile={self.browser_profile})' def __str__(self) -> str: # Note: _original_browser_session tracking moved to Agent class port_number = (self.cdp_url or 'no-cdp').rsplit(':', 1)[-1].split('/', 1)[0] return ( f'BrowserSessionπŸ†‚ {self.id[-4:]}:{port_number} #{str(id(self))[-2:]}' # ' πŸ…Ÿ {str(id(self.current_target_id))[-2:]}' ) async def on_BrowserStartEvent(self, event: BrowserStartEvent) -> dict[str, str]: """Handle browser start request. Returns: Dict with 'cdp_url' key containing the CDP URL """ # Initialize and attach all watchdogs FIRST so LocalBrowserWatchdog can handle BrowserLaunchEvent await self.attach_all_watchdogs() try: # If no CDP URL, launch local browser if not self.cdp_url: if self.is_local: # Launch local browser using event-driven approach launch_event = self.event_bus.dispatch(BrowserLaunchEvent()) await launch_event # Get the CDP URL from LocalBrowserWatchdog handler result results = await launch_event.event_results_flat_dict() self.cdp_url = results.get('cdp_url') if not self.cdp_url: raise Exception('No CDP URL returned from LocalBrowserWatchdog') else: raise Exception('No CDP URL provided for remote browser connection') assert self.cdp_url and '://' in self.cdp_url # Setup browser via CDP (for both local and remote cases) await self.setup_browser_via_cdp_url() # Notify that browser is connected (single place) self.event_bus.dispatch(BrowserConnectedEvent(cdp_url=self.cdp_url)) # Return the CDP URL for other components return {'cdp_url': self.cdp_url} except Exception as e: self.event_bus.dispatch( BrowserErrorEvent( error_type='BrowserStartEventError', message=f'Failed to start browser: {type(e).__name__} {e}', details={'cdp_url': self.cdp_url, 'is_local': self.is_local}, ) ) raise async def on_BrowserStopEvent(self, event: BrowserStopEvent) -> None: """Handle browser stop request.""" try: # Check if we should keep the browser alive if self.browser_profile.keep_alive and not event.force: self.event_bus.dispatch(BrowserStoppedEvent(reason='Kept alive due to keep_alive=True')) return # Clear CDP session cache before stopping await self.clear_cdp_cache() # Reset state if self.is_local: self.cdp_url = None # Notify stop and wait for all handlers to complete # LocalBrowserWatchdog listens for BrowserStopEvent and dispatches BrowserKillEvent stop_event = self.event_bus.dispatch(BrowserStoppedEvent(reason='Stopped by request')) await stop_event except Exception as e: self.event_bus.dispatch( BrowserErrorEvent( error_type='BrowserStopEventError', message=f'Failed to stop browser: {type(e).__name__} {e}', details={'cdp_url': self.cdp_url, 'is_local': self.is_local}, ) ) # ========== Helper Methods ========== async def get_browser_state_with_recovery( self, cache_clickable_elements_hashes: bool = True, include_screenshot: bool = False ) -> 'BrowserStateSummary': """Get browser state using event system. This is a compatibility wrapper that dispatches BrowserStateRequestEvent. Args: cache_clickable_elements_hashes: Whether to cache element hashes (for compatibility) include_screenshot: Whether to include screenshot in state Returns: BrowserStateSummary from the event handler """ from browser_use.browser.events import BrowserStateRequestEvent # Dispatch the event and wait for result event = self.event_bus.dispatch( BrowserStateRequestEvent( include_dom=True, include_screenshot=include_screenshot, cache_clickable_elements_hashes=cache_clickable_elements_hashes, ) ) # The handler returns the BrowserStateSummary directly result = await event.event_result() return result async def get_state_summary(self, cache_clickable_elements_hashes: bool = True) -> 'BrowserStateSummary': """Alias for get_browser_state_with_recovery for backwards compatibility.""" return await self.get_browser_state_with_recovery( cache_clickable_elements_hashes=cache_clickable_elements_hashes, include_screenshot=False ) async def attach_all_watchdogs(self) -> None: """Initialize and attach all watchdogs in one go.""" from browser_use.browser.aboutblank_watchdog import AboutBlankWatchdog from browser_use.browser.crash_watchdog import CrashWatchdog from browser_use.browser.default_action_watchdog import DefaultActionWatchdog from browser_use.browser.dom_watchdog import DOMWatchdog from browser_use.browser.downloads_watchdog import DownloadsWatchdog from browser_use.browser.local_browser_watchdog import LocalBrowserWatchdog from browser_use.browser.navigation_watchdog import NavigationWatchdog from browser_use.browser.screenshot_watchdog import ScreenshotWatchdog from browser_use.browser.storage_state_watchdog import StorageStateWatchdog watchdog_configs = [ ('_crash_watchdog', CrashWatchdog), ('_downloads_watchdog', DownloadsWatchdog), ('_storage_state_watchdog', StorageStateWatchdog), ('_local_browser_watchdog', LocalBrowserWatchdog), ('_navigation_watchdog', NavigationWatchdog), ('_aboutblank_watchdog', AboutBlankWatchdog), ('_default_action_watchdog', DefaultActionWatchdog), ('_dom_watchdog', DOMWatchdog), ('_screenshot_watchdog', ScreenshotWatchdog), ] for attr_name, watchdog_class in watchdog_configs: if not hasattr(self, attr_name) or getattr(self, attr_name) is None: try: watchdog = watchdog_class(event_bus=self.event_bus, browser_session=self) await watchdog.attach_to_session() setattr(self, attr_name, watchdog) # logger.debug(f'[Session] Initialized and attached {watchdog_class.__name__}') except Exception as e: logger.warning(f'[Session] Failed to initialize {watchdog_class.__name__}: {e}') else: # Watchdog already exists, don't re-initialize to avoid duplicate handlers logger.debug(f'[Session] {watchdog_class.__name__} already initialized, skipping') async def setup_browser_via_cdp_url(self) -> None: """Connect to a remote chromium-based browser via CDP using cdp-use. This MUST succeed or the browser is unusable. Fails hard on any error. """ if not self.cdp_url: raise RuntimeError('Cannot setup CDP connection without CDP URL') self.logger.info(f'🌎 Connecting to existing chromium-based browser via CDP: {self.cdp_url} -> (remote browser)') try: # Import cdp-use client import httpx from cdp_use import CDPClient # Convert HTTP URL to WebSocket URL if needed ws_url = self.cdp_url if not ws_url.startswith('ws'): # If it's an HTTP URL, fetch the WebSocket URL from /json/version endpoint url = ws_url.rstrip('/') if not url.endswith('/json/version'): url = url + '/json/version' async with httpx.AsyncClient() as client: version_info = await client.get(url) ws_url = version_info.json()['webSocketDebuggerUrl'] # Create and store the CDP client for direct CDP communication if self._cdp_client is None: self._cdp_client = CDPClient(ws_url) assert self._cdp_client is not None await self._cdp_client.start() self.logger.info('βœ… CDP client connected successfully') assert self._cdp_client is not None # Get browser targets to find available contexts/pages targets = await self._cdp_client.send.Target.getTargets() # Find main browser pages (avoiding iframes, workers, extensions, etc.) page_targets = [ t for t in targets['targetInfos'] if self._is_valid_target( t, include_http=True, include_about=True, include_pages=True, include_iframes=False, include_workers=False ) ] # Check for chrome://newtab pages and immediately redirect them # to about:blank to avoid JS issues from CDP on chrome://* urls from browser_use.utils import is_new_tab_page for target in page_targets: target_url = target.get('url', '') if is_new_tab_page(target_url) and target_url != 'about:blank': # Redirect chrome://newtab to about:blank to avoid JS issues preventing driving chrome://newtab target_id = target['targetId'] self.logger.info(f'πŸ”„ Redirecting {target_url} to about:blank for target {target_id}') try: # Use cached session to navigate to about:blank client, session_id = await self.get_cdp_session(target_id) await client.send.Page.navigate(params={'url': 'about:blank'}, session_id=session_id) except Exception as e: self.logger.warning(f'Failed to redirect {target_url} to about:blank: {e}') if not page_targets: # No pages found, create a new one new_target = await self._cdp_client.send.Target.createTarget(params={'url': 'about:blank'}) target_id = new_target['targetId'] self.logger.info(f'πŸ“„ Created new blank page with target ID: {target_id}') else: # Use the first available page target_id = page_targets[0]['targetId'] self.logger.info(f'πŸ“„ Using existing page with target ID: {target_id}') # Store the current page target ID self.current_target_id = target_id # Pre-create cached session for the current target (enables all domains) try: await self.get_cdp_session(target_id) self.logger.info(f'🌐 CDP session cached and domains enabled for target {target_id[:8]}...') except Exception as e: self.logger.warning(f'Failed to create CDP session: {e}') except Exception as e: # Fatal error - browser is not usable without CDP connection self.logger.error(f'❌ FATAL: Failed to setup CDP connection: {e}') self.logger.error('❌ Browser cannot continue without CDP connection') # Clean up any partial state self._cdp_client = None self.current_target_id = None # Re-raise as a fatal error raise RuntimeError(f'Failed to establish CDP connection to browser: {e}') from e async def setup_domservice_init_scripts(self, retry_count: int = 0) -> None: # self.logger.debug('Setting up init scripts in browser') init_script = """ // check to make sure we're not inside the PDF viewer window.isPdfViewer = !!document?.body?.querySelector('body > embed[type="application/pdf"][width="100%"]') if (!window.isPdfViewer) { // Permissions const originalQuery = window.navigator.permissions.query; window.navigator.permissions.query = (parameters) => ( parameters.name === 'notifications' ? Promise.resolve({ state: Notification.permission }) : originalQuery(parameters) ); (() => { if (window._eventListenerTrackerInitialized) return; window._eventListenerTrackerInitialized = true; const originalAddEventListener = EventTarget.prototype.addEventListener; const eventListenersMap = new WeakMap(); EventTarget.prototype.addEventListener = function(type, listener, options) { if (typeof listener === "function") { let listeners = eventListenersMap.get(this); if (!listeners) { listeners = []; eventListenersMap.set(this, listeners); } listeners.push({ type, listener, listenerPreview: listener.toString().slice(0, 100), options }); } return originalAddEventListener.call(this, type, listener, options); }; window.getEventListenersForNode = (node) => { const listeners = eventListenersMap.get(node) || []; return listeners.map(({ type, listenerPreview, options }) => ({ type, listenerPreview, options })); }; })(); } """ # TODO: convert this to pure cdp-use and/or move it to the dom_watchdog.py # await self.browser_context.add_init_script(init_script) @property async def target_ids(self) -> list[str]: """Get all open page target IDs using CDP.""" try: pages = await self._cdp_get_all_pages() return [page['targetId'] for page in pages] except Exception: return [] async def get_target_id_by_tab_index(self, tab_index: int) -> str | None: """Get target ID by tab index.""" target_ids = await self.target_ids if 0 <= tab_index < len(target_ids): return target_ids[tab_index] return None async def get_tab_index(self, target_id: str) -> int: """Get tab index for a target ID.""" target_ids = await self.target_ids if target_id in target_ids: return target_ids.index(target_id) return -1 async def get_tabs_info(self) -> list[TabInfo]: """Get information about all open tabs using CDP Target.getTargetInfo for speed.""" tabs = [] # Get all page targets using CDP pages = await self._cdp_get_all_pages() for i, page_target in enumerate(pages): target_id = page_target['targetId'] url = page_target['url'] # Try to get the title directly from Target.getTargetInfo - much faster! # The initial getTargets() doesn't include title, but getTargetInfo does try: target_info = await self.cdp_client.send.Target.getTargetInfo(params={'targetId': target_id}) # The title is directly available in targetInfo title = target_info.get('targetInfo', {}).get('title', '') # Skip JS execution for chrome:// pages and new tab pages if is_new_tab_page(url) or url.startswith('chrome://'): # Use URL as title for chrome pages, or mark new tabs as unusable if is_new_tab_page(url): title = 'ignore this tab and do not use it' elif not title: # For chrome:// pages without a title, use the URL itself title = url # Special handling for PDF pages without titles if (not title or title == '') and (url.endswith('.pdf') or 'pdf' in url): # PDF pages might not have a title, use URL filename try: from urllib.parse import urlparse filename = urlparse(url).path.split('/')[-1] if filename: title = filename except Exception: pass except Exception as e: # Fallback to basic title handling self.logger.debug(f'⚠️ Failed to get target info for tab #{i}: {_log_pretty_url(url)} - {type(e).__name__}') if is_new_tab_page(url): title = 'ignore this tab and do not use it' elif url.startswith('chrome://'): title = url else: title = '' tab_info = TabInfo( page_id=i, url=url, title=title, parent_page_id=None, id=target_id, # Use target ID as the unique identifier index=i, ) tabs.append(tab_info) return tabs # DOM element methods # Removed duplicate get_browser_state_with_recovery - using the decorated version below @observe_debug(ignore_input=True, ignore_output=True, name='get_minimal_state_summary') @time_execution_async('--get_minimal_state_summary') async def get_minimal_state_summary(self) -> BrowserStateSummary: """Get basic page info without DOM processing, but try to capture screenshot""" from browser_use.browser.views import BrowserStateSummary from browser_use.dom.views import EnhancedDOMTreeNode as DOMElementNode from browser_use.dom.views import NodeType, SerializedDOMState # Get basic info - no DOM parsing to avoid errors url = await self.get_current_page_url() or 'unknown' # Try to get title safely try: # timeout after 2 seconds title = await asyncio.wait_for(self.get_current_page_title(), timeout=2.0) except Exception: title = 'Page Load Error' # Try to get tabs info safely try: # timeout after 2 seconds tabs_info = await retry(timeout=2, retries=0)(self.get_tabs_info)() except Exception: tabs_info = [] # Create minimal DOM element for error state minimal_element_tree = DOMElementNode( node_id=1, backend_node_id=1, node_type=NodeType.ELEMENT_NODE, node_name='body', node_value='', attributes={}, is_scrollable=False, is_visible=True, absolute_position=None, frame_id=None, target_id=self.current_target_id, content_document=None, shadow_root_type=None, shadow_roots=None, parent_node=None, children_nodes=[], ax_node=None, snapshot_node=None, ) # Check if current page is a PDF viewer is_pdf_viewer = await self._is_pdf_viewer(page) # Create minimal SerializedDOMState minimal_dom_state = SerializedDOMState( _root=None, # No simplified tree for minimal state selector_map={}, # Empty selector map ) return BrowserStateSummary( dom_state=minimal_dom_state, url=url, title=title, tabs=tabs_info, pixels_above=0, pixels_below=0, browser_errors=[f'Page state retrieval failed, minimal recovery applied for {url}'], is_pdf_viewer=is_pdf_viewer, recent_events='', ) @observe_debug(ignore_input=True, ignore_output=True, name='get_updated_state') async def _get_updated_state(self, focus_element: int = -1, include_screenshot: bool = True) -> BrowserStateSummary: """Update and return state.""" # Get current page URL page_url = await self.get_current_page_url() # Check if this is a new tab or chrome:// page early for optimization is_empty_page = is_new_tab_page(page_url) or page_url.startswith('chrome://') try: # Fast path for empty pages - skip all expensive operations if is_empty_page: self.logger.debug(f'⚑ Fast path for empty page: {page_url}') # Create minimal DOM state immediately - just return None for now # since DOM classes have been refactored content = None # Get tabs info tabs_info = await self.get_tabs_info() # Skip screenshot for empty pages screenshot_b64 = None # Use default viewport dimensions from browser profile viewport = self.browser_profile.viewport or {'width': 1280, 'height': 720} page_info = PageInfo( viewport_width=viewport['width'], viewport_height=viewport['height'], page_width=viewport['width'], page_height=viewport['height'], scroll_x=0, scroll_y=0, pixels_above=0, pixels_below=0, pixels_left=0, pixels_right=0, ) # Return minimal state immediately self.browser_state_summary = BrowserStateSummary( dom_state=content, url=page_url, title='New Tab' if is_new_tab_page(page_url) else 'Chrome Page', tabs=tabs_info, screenshot=screenshot_b64, page_info=page_info, pixels_above=0, pixels_below=0, browser_errors=[], is_pdf_viewer=False, ) return self.browser_state_summary # Normal path for regular pages self.logger.debug('🧹 Removing highlights...') try: await self.remove_highlights() except TimeoutError: self.logger.debug('Timeout to remove highlights') # Check for PDF and auto-download if needed try: pdf_path = await self._auto_download_pdf_if_needed(page) if pdf_path: self.logger.info(f'πŸ“„ PDF auto-downloaded: {pdf_path}') except Exception as e: self.logger.debug(f'PDF auto-download check failed: {type(e).__name__}: {e}') self.logger.debug('🌳 Starting DOM processing...') from browser_use.browser.events import BrowserStateRequestEvent from browser_use.dom.views import SerializedDOMState try: # Use the DOMWatchdog via event bus - request state with DOM result = await asyncio.wait_for( self.event_bus.dispatch(BrowserStateRequestEvent(include_dom=True, include_screenshot=False)), timeout=45.0, # 45 second timeout for DOM processing - generous for complex pages ) state_summary = await result.event_result() content = state_summary.dom_state if state_summary else None self.logger.debug('βœ… DOM processing completed') except (TimeoutError, Exception) as e: if isinstance(e, TimeoutError): self.logger.warning(f'DOM processing timed out after 45 seconds for {page_url}') else: self.logger.warning(f'DOM processing failed: {e}') self.logger.warning('πŸ”„ Falling back to minimal DOM state to allow basic navigation...') # Create minimal DOM state for basic navigation content = SerializedDOMState( _root=None, # No simplified tree for minimal state selector_map={}, # Empty selector map ) self.logger.debug('πŸ“‹ Getting tabs info...') tabs_info = await self.get_tabs_info() self.logger.debug('βœ… Tabs info completed') # Get all cross-origin iframes within the page and open them in new tabs # mark the titles of the new tabs so the LLM knows to check them for additional content # unfortunately too buggy for now, too many sites use invisible cross-origin iframes for ads, tracking, youtube videos, social media, etc. # and it distracts the bot by opening a lot of new tabs # iframe_urls = await dom_service.get_cross_origin_iframes() # outer_page = self.current_target_id # for url in iframe_urls: # if url in [tab.url for tab in tabs_info]: # continue # skip if the iframe if we already have it open in a tab # new_page_id = tabs_info[-1].page_id + 1 # self.logger.debug(f'Opening cross-origin iframe in new tab #{new_page_id}: {url}') # await self.create_new_tab(url) # tabs_info.append( # TabInfo( # page_id=new_page_id, # url=url, # title=f'iFrame opened as new tab, treat as if embedded inside page {outer_page.url}: {page.url}', # parent_page_url=outer_page.url, # ) # ) if include_screenshot: try: self.logger.debug('πŸ“Έ Capturing screenshot...') # Reasonable timeout for screenshot screenshot_b64 = await self.take_screenshot() # self.logger.debug('βœ… Screenshot completed') except Exception as e: self.logger.warning(f'❌ Screenshot failed for {_log_pretty_url(page.url)}: {type(e).__name__} {e}') screenshot_b64 = None else: screenshot_b64 = None # Get comprehensive page information page_info = await self.get_page_info(page) try: self.logger.debug('πŸ“ Getting scroll info...') pixels_above, pixels_below = await asyncio.wait_for(self.get_scroll_info(page), timeout=5.0) self.logger.debug('βœ… Scroll info completed') except Exception as e: self.logger.warning(f'Failed to get scroll info: {type(e).__name__}') pixels_above, pixels_below = 0, 0 try: title = await asyncio.wait_for(self.get_current_page_title(), timeout=3.0) except Exception: title = 'Title unavailable' # Check if this is a minimal fallback state browser_errors = [] if not content.selector_map: # Empty selector map indicates fallback state browser_errors.append( f'DOM processing timed out for {page_url} - using minimal state. Basic navigation still available via go_to_url, scroll, and search actions.' ) # Check if current page is a PDF viewer is_pdf_viewer = await self._is_pdf_viewer(page) self.browser_state_summary = BrowserStateSummary( dom_state=content, url=page_url, title=title, tabs=tabs_info, screenshot=screenshot_b64, page_info=page_info, pixels_above=pixels_above, pixels_below=pixels_below, browser_errors=browser_errors, is_pdf_viewer=is_pdf_viewer, ) self.logger.debug('βœ… get_state_summary completed successfully') return self.browser_state_summary except Exception as e: self.logger.error(f'❌ Failed to update browser_state_summary: {type(e).__name__}: {e}') # Return last known good state if available if hasattr(self, 'browser_state_summary'): return self.browser_state_summary raise # ========== CDP Helper Methods ========== async def cdp_clients_for_target(self, target_id: str) -> list['CDPClient']: """Get CDP clients for a target, including main and iframe sessions. Returns list with root target session first, then iframe sessions. """ if not self.cdp_client: raise ValueError('CDP client not initialized') clients = [] # Get cached session for main target client, session_id = await self.get_cdp_session(target_id) # For now, return just the main client with session # In future, we'd enumerate iframes and attach to them too clients.append(client) return clients async def cdp_client_for_node(self, node: 'EnhancedDOMTreeNode') -> 'CDPClient': """Get CDP client for a specific DOM node based on its frame.""" if node.frame_id: return await self.cdp_client_for_frame(node.frame_id) return self.cdp_client async def frames_by_target(self, target_id: str) -> list[str]: """Get all frame IDs for a target.""" # Get frame tree using helper frame_tree = await self._cdp_execute_on_target(target_id, commands=[('Page.getFrameTree', {})]) # Extract frame IDs recursively frame_ids = [] def extract_frames(tree_node): frame_ids.append(tree_node['frame']['id']) for child in tree_node.get('childFrames', []): extract_frames(child) extract_frames(frame_tree['frameTree']) return frame_ids async def target_id_by_frame_id(self, frame_id: str) -> str | None: """Get target ID for a given frame ID. Note: This requires iterating through all targets to find the frame. """ targets = await self.cdp_client.send.Target.getTargets() for target in targets['targetInfos']: # Skip invalid targets if not self._is_valid_target( target, include_http=True, include_about=True, include_pages=True, include_iframes=True, include_workers=False ): continue # Check if this target contains the frame frames = await self.frames_by_target(target['targetId']) if frame_id in frames: return target['targetId'] return None async def get_current_page_cdp_session_id(self) -> str | None: """Get the CDP session ID for the current page.""" if not hasattr(self, 'current_target_id') or not self.current_target_id: return None # Get cached session ID client, session_id = await self.get_cdp_session(self.current_target_id) return session_id async def _create_fresh_cdp_client(self) -> Any: """Create a new CDP client instance. Caller is responsible for cleanup.""" if not self.cdp_url: raise ValueError('CDP URL is not set') import httpx from cdp_use import CDPClient # If the cdp_url is already a websocket URL, use it as-is. if self.cdp_url.startswith('ws'): ws_url = self.cdp_url else: # Otherwise, treat it as the DevTools HTTP root and fetch the websocket URL. url = self.cdp_url.rstrip('/') if not url.endswith('/json/version'): url = url + '/json/version' async with httpx.AsyncClient() as client: version_info = await client.get(url) ws_url = version_info.json()['webSocketDebuggerUrl'] cdp_client = CDPClient(ws_url) await cdp_client.start() return cdp_client async def create_cdp_session_for_target(self, target_id: str) -> Any: """Create a new CDP session attached to a specific target/frame. Args: target_id: The target ID to attach to Returns: Tuple of (CDPClient, session_id) - uses cached session when available """ # Just use the cached session return await self.get_cdp_session(target_id) async def create_cdp_session_for_frame(self, frame_id: str) -> Any: """Create a new CDP session for a specific frame by finding its parent target. Args: frame_id: The frame ID to find and attach to Returns: Tuple of (CDPClient, session_id) for the target containing this frame Raises: ValueError: If frame_id is not found in any target """ # Get all targets using main client targets = await self.cdp_client.send.Target.getTargets() # Search through page targets to find which one contains the frame for target in targets['targetInfos']: # Skip invalid targets if not self._is_valid_target(target): continue if target['type'] != 'page': continue # Use cached session to check frame tree client, temp_session_id = await self.get_cdp_session(target['targetId']) # Get frame tree for this target frame_tree = await client.send.Page.getFrameTree(session_id=temp_session_id) # Recursively search for the frame_id def search_frame_tree(node) -> bool: if node['frame']['id'] == frame_id: return True if 'childFrames' in node: for child in node['childFrames']: if search_frame_tree(child): return True return False if search_frame_tree(frame_tree['frameTree']): # Found the target containing this frame - return cached session return await self.get_cdp_session(target['targetId']) # Frame not found raise ValueError(f'Frame with ID {frame_id} not found in any target') async def create_cdp_session_for_node(self, node: Any) -> Any: """Create a new CDP session for a specific DOM node's target. Args: node: The EnhancedDOMTreeNode to create a session for Returns: Tuple of (CDPClient, session_id) for the node's target Raises: ValueError: If node doesn't have a target_id or node doesn't exist in target """ if not hasattr(node, 'target_id') or not node.target_id: raise ValueError(f'Node does not have a target_id: {node}') # Get cached session for the node's target client, session_id = await self.get_cdp_session(node.target_id) # Verify the node exists in this target try: await client.send.DOM.describeNode(params={'backendNodeId': node.backend_node_id}, session_id=session_id) # If we get here without exception, the node exists return client, session_id except Exception as e: raise ValueError(f'Node with backend_node_id {node.backend_node_id} not found in target {node.target_id}: {e}') async def get_current_target_info(self) -> dict | None: """Get info about the current active target using CDP.""" if not self.current_target_id: return None targets = await self.cdp_client.send.Target.getTargets() for target in targets.get('targetInfos', []): if target.get('targetId') == self.current_target_id: # Still return even if it's not a "valid" target since we're looking for a specific ID return target return None async def get_current_page_url(self) -> str: """Get the URL of the current page using CDP.""" target = await self.get_current_target_info() if target: return target.get('url', '') return '' async def get_current_page_title(self) -> str: """Get the title of the current page using CDP.""" if not self.current_target_id: return '' try: session = await self.cdp_client.send.Target.attachToTarget( params={'targetId': self.current_target_id, 'flatten': True} ) session_id = session['sessionId'] title_result = await self.cdp_client.send.Runtime.evaluate( params={'expression': 'document.title'}, session_id=session_id ) title = title_result.get('result', {}).get('value', '') await self.cdp_client.send.Target.detachFromTarget(params={'sessionId': session_id}) return title except Exception: return '' # ========== DOM Helper Methods ========== def update_cached_selector_map(self, selector_map: dict[int, 'EnhancedDOMTreeNode']) -> None: """Update the cached selector map with new DOM state. This should be called by the DOM watchdog after rebuilding the DOM. Args: selector_map: The new selector map from DOM serialization """ self._cached_selector_map = selector_map async def get_dom_element_by_index(self, index: int) -> 'EnhancedDOMTreeNode | None': """Get DOM element by index. First checks cached selector map, then falls back to DOM watchdog which may trigger a DOM rebuild if needed. Args: index: The element index from the serialized DOM Returns: EnhancedDOMTreeNode or None if index not found """ # First check cached selector map if self._cached_selector_map and index in self._cached_selector_map: return self._cached_selector_map[index] # Fall back to DOM watchdog which may rebuild DOM if self._dom_watchdog: node = await self._dom_watchdog.get_element_by_index(index) # Update cache if watchdog rebuilt the DOM if self._dom_watchdog.selector_map: self._cached_selector_map = self._dom_watchdog.selector_map return node return None # Alias for backwards compatibility async def get_element_by_index(self, index: int) -> 'EnhancedDOMTreeNode | None': """Alias for get_dom_element_by_index for backwards compatibility.""" return await self.get_dom_element_by_index(index) def is_file_input(self, element: Any) -> bool: """Check if element is a file input. Args: element: The DOM element to check Returns: True if element is a file input, False otherwise """ if self._dom_watchdog: return self._dom_watchdog.is_file_input(element) # Fallback if watchdog not available return ( hasattr(element, 'node_name') and element.node_name.upper() == 'INPUT' and hasattr(element, 'attributes') and element.attributes.get('type', '').lower() == 'file' ) def clear_dom_cache(self) -> None: """Clear cached DOM state to force rebuild on next access.""" if self._dom_watchdog: self._dom_watchdog.clear_cache() async def get_selector_map(self) -> dict[int, 'EnhancedDOMTreeNode']: """Get the current selector map from cached state or DOM watchdog. Returns: Dictionary mapping element indices to EnhancedDOMTreeNode objects """ # First try cached selector map if self._cached_selector_map: return self._cached_selector_map # Try to get from DOM watchdog if self._dom_watchdog and hasattr(self._dom_watchdog, 'selector_map'): return self._dom_watchdog.selector_map or {} # Return empty dict if nothing available return {} async def remove_highlights(self) -> None: """Remove highlights from the page using CDP.""" try: if not self.current_target_id: return # Get cached session client, session_id = await self.get_cdp_session(self.current_target_id) # Remove highlights via JavaScript script = """ // Remove all browser-use highlight elements const highlights = document.querySelectorAll('[data-browser-use-highlight]'); highlights.forEach(el => el.remove()); """ await client.send.Runtime.evaluate(params={'expression': script}, session_id=session_id) except Exception as e: self.logger.debug(f'Failed to remove highlights: {e}') @property def downloaded_files(self) -> list[str]: """Get list of downloaded files from the downloads directory.""" if not self.browser_profile.downloads_path: return [] downloads_dir = Path(self.browser_profile.downloads_path) if not downloads_dir.exists(): return [] # Get all files in downloads directory (not directories) files = [str(f) for f in downloads_dir.iterdir() if f.is_file()] return sorted(files) # ========== CDP-based replacements for browser_context operations ========== async def _cdp_execute_on_target( self, target_id: str, commands: list[tuple[str, dict]] | None = None, callable_fn: Any | None = None ) -> Any: """Execute CDP commands on a specific target using cached session. Args: target_id: The target ID to execute commands on commands: List of (method, params) tuples to execute, e.g. [('Runtime.evaluate', {'expression': '...'})] callable_fn: Alternative - async function that receives (cdp_client, session_id) and returns result Returns: Result of the last command or callable_fn return value """ # Get cached session or create new one client, session_id = await self.get_cdp_session(target_id) if callable_fn: return await callable_fn(client, session_id) elif commands: result = None for method, params in commands: domain, command = method.split('.') domain_obj = getattr(client.send, domain) cmd_func = getattr(domain_obj, command) result = await cmd_func(params=params, session_id=session_id) if params else await cmd_func(session_id=session_id) return result else: return session_id async def _cdp_get_all_pages(self) -> list[dict]: """Get all browser pages/tabs using CDP Target.getTargets.""" targets = await self.cdp_client.send.Target.getTargets() # Filter for valid page/tab targets only return [t for t in targets.get('targetInfos', []) if self._is_valid_target(t) and t.get('type') in ('page', 'tab')] async def _cdp_create_new_page(self, url: str = 'about:blank') -> str: """Create a new page/tab using CDP Target.createTarget. Returns target ID.""" result = await self.cdp_client.send.Target.createTarget(params={'url': url, 'newWindow': False, 'background': False}) return result['targetId'] async def _cdp_close_page(self, target_id: str) -> None: """Close a page/tab using CDP Target.closeTarget.""" await self.cdp_client.send.Target.closeTarget(params={'targetId': target_id}) async def _cdp_activate_page(self, target_id: str) -> None: """Activate/focus a page using CDP Target.activateTarget.""" await self.cdp_client.send.Target.activateTarget(params={'targetId': target_id}) async def _cdp_get_cookies(self, urls: list[str] | None = None) -> list[dict]: """Get cookies using CDP Network.getCookies.""" if not self.current_target_id: return [] client, session_id = await self.get_cdp_session(self.current_target_id) params = {'urls': urls} if urls else {} result = await client.send.Network.getCookies(params=params, session_id=session_id) return result.get('cookies', []) async def _cdp_set_cookies(self, cookies: list[dict]) -> None: """Set cookies using CDP Network.setCookies.""" if not self.current_target_id or not cookies: return client, session_id = await self.get_cdp_session(self.current_target_id) await client.send.Network.setCookies(params={'cookies': cookies}, session_id=session_id) async def _cdp_clear_cookies(self) -> None: """Clear all cookies using CDP Network.clearBrowserCookies.""" if not self.current_target_id: return client, session_id = await self.get_cdp_session(self.current_target_id) await client.send.Network.clearBrowserCookies(session_id=session_id) async def _cdp_set_extra_headers(self, headers: dict[str, str]) -> None: """Set extra HTTP headers using CDP Network.setExtraHTTPHeaders.""" if not self.current_target_id: return client, session_id = await self.get_cdp_session(self.current_target_id) await client.send.Network.setExtraHTTPHeaders(params={'headers': headers}, session_id=session_id) async def _cdp_grant_permissions(self, permissions: list[str], origin: str | None = None) -> None: """Grant permissions using CDP Browser.grantPermissions.""" params = {'permissions': permissions} if origin: params['origin'] = origin await self.cdp_client.send.Browser.grantPermissions(**params) async def _cdp_set_geolocation(self, latitude: float, longitude: float, accuracy: float = 100) -> None: """Set geolocation using CDP Emulation.setGeolocationOverride.""" await self.cdp_client.send.Emulation.setGeolocationOverride( params={'latitude': latitude, 'longitude': longitude, 'accuracy': accuracy} ) async def _cdp_clear_geolocation(self) -> None: """Clear geolocation override using CDP.""" await self.cdp_client.send.Emulation.clearGeolocationOverride() async def _cdp_add_init_script(self, script: str) -> str: """Add script to evaluate on new document using CDP Page.addScriptToEvaluateOnNewDocument.""" result = await self.cdp_client.send.Page.addScriptToEvaluateOnNewDocument(params={'source': script}) return result['identifier'] async def _cdp_remove_init_script(self, identifier: str) -> None: """Remove script added with addScriptToEvaluateOnNewDocument.""" await self.cdp_client.send.Page.removeScriptToEvaluateOnNewDocument(params={'identifier': identifier}) async def _cdp_set_viewport(self, width: int, height: int, device_scale_factor: float = 1.0, mobile: bool = False) -> None: """Set viewport using CDP Emulation.setDeviceMetricsOverride.""" await self.cdp_client.send.Emulation.setDeviceMetricsOverride( params={'width': width, 'height': height, 'deviceScaleFactor': device_scale_factor, 'mobile': mobile} ) async def _cdp_get_storage_state(self) -> dict: """Get storage state (cookies, localStorage, sessionStorage) using CDP.""" # Use the _cdp_get_cookies helper which handles session attachment cookies = await self._cdp_get_cookies() # Get localStorage and sessionStorage would require evaluating JavaScript # on each origin, which is more complex. For now, return cookies only. return { 'cookies': cookies, 'origins': [], # Would need to iterate through origins for localStorage/sessionStorage } async def _cdp_navigate(self, url: str, target_id: str | None = None) -> None: """Navigate to URL using CDP Page.navigate.""" # Use provided target_id or fall back to current_target_id target_to_use = target_id or self.current_target_id if not target_to_use: # If no target available, get the first page target targets = await self._cdp_get_all_pages() if targets: target_to_use = targets[0]['targetId'] self.current_target_id = target_to_use else: raise ValueError('No target available for navigation') # Use helper to navigate on the target await self._cdp_execute_on_target(target_to_use, commands=[('Page.enable', {}), ('Page.navigate', {'url': url})]) @staticmethod def _is_valid_target( target_info: dict, include_http: bool = True, include_chrome: bool = False, include_chrome_extensions: bool = False, include_chrome_error: bool = False, include_about: bool = True, include_iframes: bool = True, include_pages: bool = True, include_workers: bool = False, ) -> bool: """Check if a target should be processed. Args: target_info: Target info dict from CDP Returns: True if target should be processed, False if it should be skipped """ target_type = target_info.get('type', '') url = target_info.get('url', '') url_allowed, type_allowed = False, False if url.startswith('chrome-error://') and include_chrome_error: url_allowed = True if url.startswith('chrome://') and include_chrome: url_allowed = True if url.startswith('chrome-extension://') and include_chrome_extensions: url_allowed = True # dont allow about:srcdoc! there are also other rare about: pages that we want to avoid if url == 'about:blank' and include_about: url_allowed = True if (url.startswith('http://') or url.startswith('https://')) and include_http: url_allowed = True if target_type in ('service_worker', 'shared_worker', 'worker') and include_workers: type_allowed = True if target_type in ('page', 'tab') and include_pages: type_allowed = True if target_type in ('iframe', 'webview') and include_iframes: type_allowed = True return url_allowed and type_allowed async def get_all_frames(self) -> tuple[dict[str, dict], dict[str, str]]: """Get a complete frame hierarchy from all browser targets. Returns: Tuple of (all_frames, target_sessions) where: - all_frames: dict mapping frame_id -> frame info dict with all metadata - target_sessions: dict mapping target_id -> session_id for active sessions """ all_frames = {} # frame_id -> FrameInfo dict target_sessions = {} # target_id -> session_id (keep sessions alive during collection) # Get all targets targets = await self.cdp_client.send.Target.getTargets() all_targets = targets.get('targetInfos', []) # First pass: collect frame trees from ALL targets for target in all_targets: target_id = target.get('targetId') if not target_id: continue # Skip invalid targets if not self._is_valid_target( target, include_http=True, include_about=True, include_pages=True, include_iframes=True, include_workers=False ): continue # Get cached session for this target client, session_id = await self.get_cdp_session(target_id) target_sessions[target_id] = session_id try: # Try to get frame tree (not all target types support this) try: frame_tree_result = await client.send.Page.getFrameTree(session_id=session_id) # Process the frame tree recursively def process_frame_tree(node, parent_frame_id=None): """Recursively process frame tree and add to all_frames.""" frame = node.get('frame', {}) current_frame_id = frame.get('id') if current_frame_id: # For iframe targets, check if the frame has a parentId field # This indicates it's an OOPIF with a parent in another target actual_parent_id = frame.get('parentId') or parent_frame_id # Create frame info with all CDP response data plus our additions frame_info = { **frame, # Include all original frame data: id, url, parentId, etc. 'frameTargetId': target_id, # Target that can access this frame 'parentFrameId': actual_parent_id, # Use parentId from frame if available 'childFrameIds': [], # Will be populated below 'isCrossOrigin': False, # Will be determined based on context 'isUrlValid': _is_url_valid(frame.get('url', '')), } # Check if frame is cross-origin based on crossOriginIsolatedContextType cross_origin_type = frame.get('crossOriginIsolatedContextType') if cross_origin_type and cross_origin_type != 'NotIsolated': frame_info['isCrossOrigin'] = True # For iframe targets, the frame itself is likely cross-origin if target.get('type') == 'iframe': frame_info['isCrossOrigin'] = True # Add child frame IDs (note: OOPIFs won't appear here) child_frames = node.get('childFrames', []) for child in child_frames: child_frame = child.get('frame', {}) child_frame_id = child_frame.get('id') if child_frame_id: frame_info['childFrameIds'].append(child_frame_id) # Store or merge frame info if current_frame_id in all_frames: # Frame already seen from another target, merge info existing = all_frames[current_frame_id] # If this is an iframe target, it has direct access to the frame if target.get('type') == 'iframe': existing['frameTargetId'] = target_id existing['isCrossOrigin'] = True else: all_frames[current_frame_id] = frame_info # Process child frames recursively for child in child_frames: process_frame_tree(child, current_frame_id) # Process the entire frame tree process_frame_tree(frame_tree_result.get('frameTree', {})) except Exception: # Target doesn't support Page domain or has no frames pass except Exception: # Error processing this target pass # Second pass: populate backend node IDs and parent target IDs await self._populate_frame_metadata(all_frames, target_sessions) return all_frames, target_sessions async def _populate_frame_metadata(self, all_frames: dict[str, dict], target_sessions: dict[str, str]) -> None: """Populate additional frame metadata like backend node IDs and parent target IDs. Args: all_frames: Frame hierarchy dict to populate target_sessions: Active target sessions """ for frame_id_iter, frame_info in all_frames.items(): parent_frame_id = frame_info.get('parentFrameId') if parent_frame_id and parent_frame_id in all_frames: parent_frame_info = all_frames[parent_frame_id] parent_target_id = parent_frame_info.get('frameTargetId') # Store parent target ID frame_info['parentTargetId'] = parent_target_id # Try to get backend node ID from parent context if parent_target_id in target_sessions: parent_session_id = target_sessions[parent_target_id] try: # Enable DOM domain await self.cdp_client.send.DOM.enable(session_id=parent_session_id) # Get frame owner info to find backend node ID frame_owner = await self.cdp_client.send.DOM.getFrameOwner( params={'frameId': frame_id_iter}, session_id=parent_session_id ) if frame_owner: frame_info['backendNodeId'] = frame_owner.get('backendNodeId') frame_info['nodeId'] = frame_owner.get('nodeId') except Exception: # Frame owner not available (likely cross-origin) pass async def find_frame_target(self, frame_id: str, all_frames: dict[str, dict] | None = None) -> dict | None: """Find the frame info for a specific frame ID. Args: frame_id: The frame ID to search for all_frames: Optional pre-built frame hierarchy. If None, will call get_all_frames() Returns: Frame info dict if found, None otherwise """ if all_frames is None: all_frames, _ = await self.get_all_frames() return all_frames.get(frame_id) async def cdp_client_for_frame(self, frame_id: str) -> Any: """Get a CDP client attached to the target containing the specified frame. Builds a unified frame hierarchy from all targets to find the correct target for any frame, including OOPIFs (Out-of-Process iframes). Args: frame_id: The frame ID to search for Returns: Tuple of (cdp_client, session_id, target_id) for the target containing the frame Raises: ValueError: If the frame is not found in any target """ # Get complete frame hierarchy all_frames, target_sessions = await self.get_all_frames() # Find the requested frame frame_info = await self.find_frame_target(frame_id, all_frames) if frame_info: target_id = frame_info.get('frameTargetId') if target_id in target_sessions: # Use existing session session_id = target_sessions[target_id] # Return the client with session attached return self.cdp_client, session_id, target_id # Frame not found raise ValueError(f"Frame with ID '{frame_id}' not found in any target") async def cdp_client_for_target(self, target_id: str) -> Any: """Get a CDP client attached to a specific target. This is a simpler helper that just gets a cached session for a target. Args: target_id: The target ID to attach to Returns: Tuple of (cdp_client, session_id) for the target """ return await self.get_cdp_session(target_id) # Import uuid7str for ID generation try: from uuid_extensions import uuid7str except ImportError: import uuid def uuid7str() -> str: return str(uuid.uuid4()) # Fix Pydantic circular dependency for all watchdogs # This must be called after BrowserSession class is fully defined _watchdog_modules = [ 'browser_use.browser.crash_watchdog.CrashWatchdog', 'browser_use.browser.downloads_watchdog.DownloadsWatchdog', 'browser_use.browser.local_browser_watchdog.LocalBrowserWatchdog', 'browser_use.browser.storage_state_watchdog.StorageStateWatchdog', 'browser_use.browser.navigation_watchdog.NavigationWatchdog', 'browser_use.browser.aboutblank_watchdog.AboutBlankWatchdog', 'browser_use.browser.default_action_watchdog.DefaultActionWatchdog', 'browser_use.browser.dom_watchdog.DOMWatchdog', 'browser_use.browser.screenshot_watchdog.ScreenshotWatchdog', ] for module_path in _watchdog_modules: try: module_name, class_name = module_path.rsplit('.', 1) module = __import__(module_name, fromlist=[class_name]) watchdog_class = getattr(module, class_name) watchdog_class.model_rebuild() except Exception: pass # Ignore if watchdog can't be imported or rebuilt