browser-use/browser_use/browser/session.py

"""Event-driven browser session with backwards compatibility."""

import asyncio
import logging
import weakref
from pathlib import Path
from typing import TYPE_CHECKING, Any

from bubus import EventBus
from bubus.helpers import retry
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
from uuid_extensions import uuid7str

from browser_use.browser.events import (
	BrowserConnectedEvent,
	BrowserErrorEvent,
	BrowserLaunchEvent,
	BrowserStartEvent,
	BrowserStopEvent,
	BrowserStoppedEvent,
)
from browser_use.browser.profile import BrowserProfile
from browser_use.browser.views import (
	BrowserStateSummary,
	PageInfo,
	TabInfo,
)
from browser_use.observability import observe_debug
from browser_use.utils import (
	_log_pretty_url,
	is_new_tab_page,
	logger,
	time_execution_async,
)

if TYPE_CHECKING:
	from cdp_use import CDPClient

	from browser_use.dom.views import EnhancedDOMTreeNode


class CachedSession:
	"""Container for cached CDP session to allow weak references."""
	def __init__(self, client: Any, session_id: str, target_id: str, frame_id: str | None = None):
		self.client = client
		self.session_id = session_id
		self.target_id = target_id
		self.frame_id = frame_id

	def __hash__(self):
		return hash(self.client)

_GLOB_WARNING_SHOWN = False  # used inside _is_url_allowed to avoid spamming the logs with the same warning multiple times

MAX_SCREENSHOT_HEIGHT = 2000
MAX_SCREENSHOT_WIDTH = 1920


def _log_glob_warning(domain: str, glob: str, logger: logging.Logger):
	global _GLOB_WARNING_SHOWN
	if not _GLOB_WARNING_SHOWN:
		logger.warning(
			# glob patterns are very easy to mess up and match too many domains by accident
			# e.g. if you only need to access gmail, don't use *.google.com because an attacker could convince the agent to visit a malicious doc
			# on docs.google.com/s/some/evil/doc to set up a prompt injection attack
			f"⚠️ Allowing agent to visit {domain} based on allowed_domains=['{glob}', ...]. Set allowed_domains=['{domain}', ...] explicitly to avoid matching too many domains!"
		)
		_GLOB_WARNING_SHOWN = True


DEFAULT_BROWSER_PROFILE = BrowserProfile()


class BrowserSession(BaseModel):
	"""Event-driven browser session with backwards compatibility.

	This class provides a 2-layer architecture:
	- High-level event handling for agents/controllers
	- Direct CDP/Playwright calls for browser operations

	Supports both event-driven and imperative calling styles.
	"""

	model_config = ConfigDict(
		arbitrary_types_allowed=True,
		validate_assignment=True,
		extra='forbid',
	)

	# Core configuration
	id: str = Field(default_factory=lambda: uuid7str())
	browser_profile: BrowserProfile = Field(default_factory=lambda: DEFAULT_BROWSER_PROFILE)

	# Connection info (for backwards compatibility)
	cdp_url: str | None = None
	is_local: bool = Field(default=True)

	# Mutable state
	current_target_id: str | None = None
	"""Current active target ID for the main page"""

	# Event bus
	event_bus: EventBus = Field(default_factory=EventBus)

	# PDF handling
	_auto_download_pdfs: bool = PrivateAttr(default=True)

	def model_post_init(self, __context) -> None:
		"""Register event handlers after model initialization."""
		# Register BrowserSession's event handlers manually since it's not a BaseWatchdog
		self.event_bus.on('BrowserStartEvent', self.on_BrowserStartEvent)
		self.event_bus.on('BrowserStopEvent', self.on_BrowserStopEvent)

	# Watchdogs
	_crash_watchdog: Any = PrivateAttr(default=None)
	_downloads_watchdog: Any = PrivateAttr(default=None)
	_aboutblank_watchdog: Any = PrivateAttr(default=None)
	_navigation_watchdog: Any = PrivateAttr(default=None)
	_storage_state_watchdog: Any = PrivateAttr(default=None)
	_local_browser_watchdog: Any = PrivateAttr(default=None)
	_default_action_watchdog: Any = PrivateAttr(default=None)
	_dom_watchdog: Any = PrivateAttr(default=None)
	_screenshot_watchdog: Any = PrivateAttr(default=None)

	# Navigation tracking now handled by watchdogs

	# Cached browser state for synchronous access
	_cached_browser_state_summary: Any = PrivateAttr(default=None)
	_cached_selector_map: dict[int, 'EnhancedDOMTreeNode'] = PrivateAttr(default_factory=dict)
	"""Cached mapping of element indices to DOM nodes"""

	# CDP client
	_cdp_client: 'CDPClient | None' = PrivateAttr(default=None)
	"""Cached CDP client instance"""

	# CDP session cache
	_cdp_session_cache: weakref.WeakValueDictionary = PrivateAttr(default_factory=weakref.WeakValueDictionary)
	"""Cache of CDP sessions by target_id -> (client, session_id) tuple"""

	_cdp_cache_enabled: bool = PrivateAttr(default=True)
	"""Flag to enable/disable CDP session caching"""

	_logger: Any = PrivateAttr(default=None)

	@property
	def logger(self) -> Any:
		"""Get instance-specific logger with session ID in the name"""
		if self._logger is None:  # keep updating the name pre-init because our id and str(self) can change
			import logging

			self._logger = logging.getLogger(f'browser_use.{self}')
		return self._logger

	@property
	def cdp_client(self) -> 'CDPClient':
		"""Get the cached CDP client.

		The client is created and started in setup_browser_via_cdp_url().

		Returns:
			The CDP client instance

		Raises:
			RuntimeError: If CDP client is not initialized yet
		"""
		if self._cdp_client is None:
			raise RuntimeError('CDP client not initialized - browser may not be connected yet')
		return self._cdp_client

	async def get_cdp_session(self, target_id: str) -> tuple[Any, str]:
		"""Get or create a CDP session for a target, using cache when enabled.

		Args:
			target_id: The target ID to get a session for

		Returns:
			Tuple of (cdp_client, session_id)
		"""
		# If caching is disabled, always create a new session
		if not self._cdp_cache_enabled:
			client = self.cdp_client
			session = await client.send.Target.attachToTarget(params={'targetId': target_id, 'flatten': True})
			session_id = session['sessionId']
			await self._enable_all_domains(client, session_id)
			return client, session_id

		# Check cache first
		cached = self._cdp_session_cache.get(target_id)
		if cached:
			try:
				# Quick ping to verify it's still alive (0.1s timeout)
				await asyncio.wait_for(
					cached.client.send.Runtime.evaluate(params={'expression': '1'}, session_id=cached.session_id),
					timeout=0.1
				)
				return cached.client, cached.session_id
			except:
				# Dead session, remove from cache
				self._cdp_session_cache.pop(target_id, None)

		# Create new session
		client = self.cdp_client
		session = await client.send.Target.attachToTarget(params={'targetId': target_id, 'flatten': True})
		session_id = session['sessionId']

		# Enable all necessary domains at creation time
		await self._enable_all_domains(client, session_id)

		# Cache it using CachedSession (which supports weak references)
		cache_value = CachedSession(client, session_id, target_id)
		self._cdp_session_cache[target_id] = cache_value

		return client, session_id

	async def _enable_all_domains(self, client: Any, session_id: str) -> None:
		"""Enable all necessary CDP domains for a session."""
		# Enable auto-attach for related targets (iframes, etc)
		await client.send.Target.setAutoAttach(
			params={'autoAttach': True, 'waitForDebuggerOnStart': False, 'flatten': True},
			session_id=session_id
		)

		# Enable all commonly used domains in parallel
		await asyncio.gather(
			client.send.Page.enable(session_id=session_id),
			# TEMPORARILY DISABLED: Network.enable causes excessive event logging
			# client.send.Network.enable(session_id=session_id),
			client.send.Runtime.enable(session_id=session_id),
			client.send.DOM.enable(session_id=session_id),
			client.send.DOMSnapshot.enable(session_id=session_id),
			client.send.Accessibility.enable(session_id=session_id),
			client.send.Inspector.enable(session_id=session_id),
			return_exceptions=True  # Don't fail if some domains aren't available
		)

	async def release_cdp_session(self, target_id: str) -> None:
		"""Explicitly release a CDP session (detach and remove from cache).

		Args:
			target_id: The target ID to release the session for
		"""
		# If caching is disabled, nothing to release from cache
		if not self._cdp_cache_enabled:
			return

		cached = self._cdp_session_cache.pop(target_id, None)
		if cached:
			try:
				client, session_id = cached
				await client.send.Target.detachFromTarget(params={'sessionId': session_id})
			except:
				pass  # Session might already be dead

	async def clear_cdp_cache(self) -> None:
		"""Clear all cached CDP sessions with proper cleanup."""
		for target_id in list(self._cdp_session_cache.keys()):
			await self.release_cdp_session(target_id)

	def __repr__(self) -> str:
		port_number = (self.cdp_url or 'no-cdp').rsplit(':', 1)[-1].split('/', 1)[0]
		return f'BrowserSession🆂 {self.id[-4:]}:{port_number} #{str(id(self))[-2:]} (cdp_url={self.cdp_url}, profile={self.browser_profile})'

	def __str__(self) -> str:
		# Note: _original_browser_session tracking moved to Agent class
		port_number = (self.cdp_url or 'no-cdp').rsplit(':', 1)[-1].split('/', 1)[0]
		return (
			f'BrowserSession🆂 {self.id[-4:]}:{port_number} #{str(id(self))[-2:]}'  # ' 🅟 {str(id(self.current_target_id))[-2:]}'
		)

	async def on_BrowserStartEvent(self, event: BrowserStartEvent) -> dict[str, str]:
		"""Handle browser start request.

		Returns:
			Dict with 'cdp_url' key containing the CDP URL
		"""

		# Initialize and attach all watchdogs FIRST so LocalBrowserWatchdog can handle BrowserLaunchEvent
		await self.attach_all_watchdogs()

		try:
			# If no CDP URL, launch local browser
			if not self.cdp_url:
				if self.is_local:
					# Launch local browser using event-driven approach
					launch_event = self.event_bus.dispatch(BrowserLaunchEvent())
					await launch_event

					# Get the CDP URL from LocalBrowserWatchdog handler result
					results = await launch_event.event_results_flat_dict()
					self.cdp_url = results.get('cdp_url')

					if not self.cdp_url:
						raise Exception('No CDP URL returned from LocalBrowserWatchdog')
				else:
					raise Exception('No CDP URL provided for remote browser connection')

			assert self.cdp_url and '://' in self.cdp_url

			# Setup browser via CDP (for both local and remote cases)
			await self.setup_browser_via_cdp_url()

			# Notify that browser is connected (single place)
			self.event_bus.dispatch(BrowserConnectedEvent(cdp_url=self.cdp_url))

			# Return the CDP URL for other components
			return {'cdp_url': self.cdp_url}

		except Exception as e:
			self.event_bus.dispatch(
				BrowserErrorEvent(
					error_type='BrowserStartEventError',
					message=f'Failed to start browser: {type(e).__name__} {e}',
					details={'cdp_url': self.cdp_url, 'is_local': self.is_local},
				)
			)
			raise

	async def on_BrowserStopEvent(self, event: BrowserStopEvent) -> None:
		"""Handle browser stop request."""

		try:
			# Check if we should keep the browser alive
			if self.browser_profile.keep_alive and not event.force:
				self.event_bus.dispatch(BrowserStoppedEvent(reason='Kept alive due to keep_alive=True'))
				return

			# Clear CDP session cache before stopping
			await self.clear_cdp_cache()

			# Reset state
			if self.is_local:
				self.cdp_url = None

			# Notify stop and wait for all handlers to complete
			# LocalBrowserWatchdog listens for BrowserStopEvent and dispatches BrowserKillEvent
			stop_event = self.event_bus.dispatch(BrowserStoppedEvent(reason='Stopped by request'))
			await stop_event

		except Exception as e:
			self.event_bus.dispatch(
				BrowserErrorEvent(
					error_type='BrowserStopEventError',
					message=f'Failed to stop browser: {type(e).__name__} {e}',
					details={'cdp_url': self.cdp_url, 'is_local': self.is_local},
				)
			)

	# ========== Helper Methods ==========

	async def get_browser_state_with_recovery(
		self, cache_clickable_elements_hashes: bool = True, include_screenshot: bool = False
	) -> 'BrowserStateSummary':
		"""Get browser state using event system.

		This is a compatibility wrapper that dispatches BrowserStateRequestEvent.

		Args:
			cache_clickable_elements_hashes: Whether to cache element hashes (for compatibility)
			include_screenshot: Whether to include screenshot in state

		Returns:
			BrowserStateSummary from the event handler
		"""
		from browser_use.browser.events import BrowserStateRequestEvent

		# Dispatch the event and wait for result
		event = self.event_bus.dispatch(
			BrowserStateRequestEvent(
				include_dom=True,
				include_screenshot=include_screenshot,
				cache_clickable_elements_hashes=cache_clickable_elements_hashes,
			)
		)

		# The handler returns the BrowserStateSummary directly
		result = await event.event_result()
		return result

	async def get_state_summary(self, cache_clickable_elements_hashes: bool = True) -> 'BrowserStateSummary':
		"""Alias for get_browser_state_with_recovery for backwards compatibility."""
		return await self.get_browser_state_with_recovery(
			cache_clickable_elements_hashes=cache_clickable_elements_hashes, include_screenshot=False
		)

	async def attach_all_watchdogs(self) -> None:
		"""Initialize and attach all watchdogs in one go."""
		from browser_use.browser.aboutblank_watchdog import AboutBlankWatchdog
		from browser_use.browser.crash_watchdog import CrashWatchdog
		from browser_use.browser.default_action_watchdog import DefaultActionWatchdog
		from browser_use.browser.dom_watchdog import DOMWatchdog
		from browser_use.browser.downloads_watchdog import DownloadsWatchdog
		from browser_use.browser.local_browser_watchdog import LocalBrowserWatchdog
		from browser_use.browser.navigation_watchdog import NavigationWatchdog
		from browser_use.browser.screenshot_watchdog import ScreenshotWatchdog
		from browser_use.browser.storage_state_watchdog import StorageStateWatchdog

		watchdog_configs = [
			('_crash_watchdog', CrashWatchdog),
			('_downloads_watchdog', DownloadsWatchdog),
			('_storage_state_watchdog', StorageStateWatchdog),
			('_local_browser_watchdog', LocalBrowserWatchdog),
			('_navigation_watchdog', NavigationWatchdog),
			('_aboutblank_watchdog', AboutBlankWatchdog),
			('_default_action_watchdog', DefaultActionWatchdog),
			('_dom_watchdog', DOMWatchdog),
			('_screenshot_watchdog', ScreenshotWatchdog),
		]

		for attr_name, watchdog_class in watchdog_configs:
			if not hasattr(self, attr_name) or getattr(self, attr_name) is None:
				try:
					watchdog = watchdog_class(event_bus=self.event_bus, browser_session=self)
					await watchdog.attach_to_session()
					setattr(self, attr_name, watchdog)
					# logger.debug(f'[Session] Initialized and attached {watchdog_class.__name__}')
				except Exception as e:
					logger.warning(f'[Session] Failed to initialize {watchdog_class.__name__}: {e}')
			else:
				# Watchdog already exists, don't re-initialize to avoid duplicate handlers
				logger.debug(f'[Session] {watchdog_class.__name__} already initialized, skipping')

	async def setup_browser_via_cdp_url(self) -> None:
		"""Connect to a remote chromium-based browser via CDP using cdp-use.

		This MUST succeed or the browser is unusable. Fails hard on any error.
		"""

		if not self.cdp_url:
			raise RuntimeError('Cannot setup CDP connection without CDP URL')

		self.logger.info(f'🌎 Connecting to existing chromium-based browser via CDP: {self.cdp_url} -> (remote browser)')

		try:
			# Import cdp-use client
			import httpx
			from cdp_use import CDPClient

			# Convert HTTP URL to WebSocket URL if needed
			ws_url = self.cdp_url
			if not ws_url.startswith('ws'):
				# If it's an HTTP URL, fetch the WebSocket URL from /json/version endpoint
				url = ws_url.rstrip('/')
				if not url.endswith('/json/version'):
					url = url + '/json/version'
				async with httpx.AsyncClient() as client:
					version_info = await client.get(url)
					ws_url = version_info.json()['webSocketDebuggerUrl']

			# Create and store the CDP client for direct CDP communication
			if self._cdp_client is None:
				self._cdp_client = CDPClient(ws_url)
				assert self._cdp_client is not None
				await self._cdp_client.start()
				self.logger.info('✅ CDP client connected successfully')

			assert self._cdp_client is not None

			# Get browser targets to find available contexts/pages
			targets = await self._cdp_client.send.Target.getTargets()

			# Find main browser pages (avoiding iframes, workers, extensions, etc.)
			page_targets = [
				t
				for t in targets['targetInfos']
				if self._is_valid_target(
					t, include_http=True, include_about=True, include_pages=True, include_iframes=False, include_workers=False
				)
			]

			# Check for chrome://newtab pages and immediately redirect them
			# to about:blank to avoid JS issues from CDP on chrome://* urls
			from browser_use.utils import is_new_tab_page

			for target in page_targets:
				target_url = target.get('url', '')
				if is_new_tab_page(target_url) and target_url != 'about:blank':
					# Redirect chrome://newtab to about:blank to avoid JS issues preventing driving chrome://newtab
					target_id = target['targetId']
					self.logger.info(f'🔄 Redirecting {target_url} to about:blank for target {target_id}')
					try:
						# Use cached session to navigate to about:blank
						client, session_id = await self.get_cdp_session(target_id)
						await client.send.Page.navigate(params={'url': 'about:blank'}, session_id=session_id)
					except Exception as e:
						self.logger.warning(f'Failed to redirect {target_url} to about:blank: {e}')

			if not page_targets:
				# No pages found, create a new one
				new_target = await self._cdp_client.send.Target.createTarget(params={'url': 'about:blank'})
				target_id = new_target['targetId']
				self.logger.info(f'📄 Created new blank page with target ID: {target_id}')
			else:
				# Use the first available page
				target_id = page_targets[0]['targetId']
				self.logger.info(f'📄 Using existing page with target ID: {target_id}')

			# Store the current page target ID
			self.current_target_id = target_id

			# Pre-create cached session for the current target (enables all domains)
			try:
				await self.get_cdp_session(target_id)
				self.logger.info(f'🌐 CDP session cached and domains enabled for target {target_id[:8]}...')
			except Exception as e:
				self.logger.warning(f'Failed to create CDP session: {e}')

		except Exception as e:
			# Fatal error - browser is not usable without CDP connection
			self.logger.error(f'❌ FATAL: Failed to setup CDP connection: {e}')
			self.logger.error('❌ Browser cannot continue without CDP connection')
			# Clean up any partial state
			self._cdp_client = None
			self.current_target_id = None
			# Re-raise as a fatal error
			raise RuntimeError(f'Failed to establish CDP connection to browser: {e}') from e

	async def setup_domservice_init_scripts(self, retry_count: int = 0) -> None:
		# self.logger.debug('Setting up init scripts in browser')

		init_script = """
			// check to make sure we're not inside the PDF viewer
			window.isPdfViewer = !!document?.body?.querySelector('body > embed[type="application/pdf"][width="100%"]')
			if (!window.isPdfViewer) {

				// Permissions
				const originalQuery = window.navigator.permissions.query;
				window.navigator.permissions.query = (parameters) => (
					parameters.name === 'notifications' ?
						Promise.resolve({ state: Notification.permission }) :
						originalQuery(parameters)
				);
				(() => {
					if (window._eventListenerTrackerInitialized) return;
					window._eventListenerTrackerInitialized = true;

					const originalAddEventListener = EventTarget.prototype.addEventListener;
					const eventListenersMap = new WeakMap();

					EventTarget.prototype.addEventListener = function(type, listener, options) {
						if (typeof listener === "function") {
							let listeners = eventListenersMap.get(this);
							if (!listeners) {
								listeners = [];
								eventListenersMap.set(this, listeners);
							}

							listeners.push({
								type,
								listener,
								listenerPreview: listener.toString().slice(0, 100),
								options
							});
						}

						return originalAddEventListener.call(this, type, listener, options);
					};

					window.getEventListenersForNode = (node) => {
						const listeners = eventListenersMap.get(node) || [];
						return listeners.map(({ type, listenerPreview, options }) => ({
							type,
							listenerPreview,
							options
						}));
					};
				})();
			}
		"""
		# TODO: convert this to pure cdp-use and/or move it to the dom_watchdog.py
		# await self.browser_context.add_init_script(init_script)

	@property
	async def target_ids(self) -> list[str]:
		"""Get all open page target IDs using CDP."""
		try:
			pages = await self._cdp_get_all_pages()
			return [page['targetId'] for page in pages]
		except Exception:
			return []

	async def get_target_id_by_tab_index(self, tab_index: int) -> str | None:
		"""Get target ID by tab index."""
		target_ids = await self.target_ids
		if 0 <= tab_index < len(target_ids):
			return target_ids[tab_index]
		return None

	async def get_tab_index(self, target_id: str) -> int:
		"""Get tab index for a target ID."""
		target_ids = await self.target_ids
		if target_id in target_ids:
			return target_ids.index(target_id)
		return -1

	async def get_tabs_info(self) -> list[TabInfo]:
		"""Get information about all open tabs using CDP Target.getTargetInfo for speed."""
		tabs = []

		# Get all page targets using CDP
		pages = await self._cdp_get_all_pages()

		for i, page_target in enumerate(pages):
			target_id = page_target['targetId']
			url = page_target['url']

			# Try to get the title directly from Target.getTargetInfo - much faster!
			# The initial getTargets() doesn't include title, but getTargetInfo does
			try:
				target_info = await self.cdp_client.send.Target.getTargetInfo(params={'targetId': target_id})
				# The title is directly available in targetInfo
				title = target_info.get('targetInfo', {}).get('title', '')

				# Skip JS execution for chrome:// pages and new tab pages
				if is_new_tab_page(url) or url.startswith('chrome://'):
					# Use URL as title for chrome pages, or mark new tabs as unusable
					if is_new_tab_page(url):
						title = 'ignore this tab and do not use it'
					elif not title:
						# For chrome:// pages without a title, use the URL itself
						title = url

				# Special handling for PDF pages without titles
				if (not title or title == '') and (url.endswith('.pdf') or 'pdf' in url):
					# PDF pages might not have a title, use URL filename
					try:
						from urllib.parse import urlparse
						filename = urlparse(url).path.split('/')[-1]
						if filename:
							title = filename
					except Exception:
						pass

			except Exception as e:
				# Fallback to basic title handling
				self.logger.debug(f'⚠️ Failed to get target info for tab #{i}: {_log_pretty_url(url)} - {type(e).__name__}')

				if is_new_tab_page(url):
					title = 'ignore this tab and do not use it'
				elif url.startswith('chrome://'):
					title = url
				else:
					title = ''

			tab_info = TabInfo(
				page_id=i,
				url=url,
				title=title,
				parent_page_id=None,
				id=target_id,  # Use target ID as the unique identifier
				index=i,
			)
			tabs.append(tab_info)

		return tabs

	# DOM element methods
	# Removed duplicate get_browser_state_with_recovery - using the decorated version below

	@observe_debug(ignore_input=True, ignore_output=True, name='get_minimal_state_summary')
	@time_execution_async('--get_minimal_state_summary')
	async def get_minimal_state_summary(self) -> BrowserStateSummary:
		"""Get basic page info without DOM processing, but try to capture screenshot"""
		from browser_use.browser.views import BrowserStateSummary
		from browser_use.dom.views import EnhancedDOMTreeNode as DOMElementNode
		from browser_use.dom.views import NodeType, SerializedDOMState

		# Get basic info - no DOM parsing to avoid errors
		url = await self.get_current_page_url() or 'unknown'

		# Try to get title safely
		try:
			# timeout after 2 seconds
			title = await asyncio.wait_for(self.get_current_page_title(), timeout=2.0)
		except Exception:
			title = 'Page Load Error'

		# Try to get tabs info safely
		try:
			# timeout after 2 seconds
			tabs_info = await retry(timeout=2, retries=0)(self.get_tabs_info)()
		except Exception:
			tabs_info = []

		# Create minimal DOM element for error state
		minimal_element_tree = DOMElementNode(
			node_id=1,
			backend_node_id=1,
			node_type=NodeType.ELEMENT_NODE,
			node_name='body',
			node_value='',
			attributes={},
			is_scrollable=False,
			is_visible=True,
			absolute_position=None,
			frame_id=None,
			target_id=self.current_target_id,
			content_document=None,
			shadow_root_type=None,
			shadow_roots=None,
			parent_node=None,
			children_nodes=[],
			ax_node=None,
			snapshot_node=None,
		)

		# Check if current page is a PDF viewer
		is_pdf_viewer = await self._is_pdf_viewer(page)

		# Create minimal SerializedDOMState
		minimal_dom_state = SerializedDOMState(
			_root=None,  # No simplified tree for minimal state
			selector_map={},  # Empty selector map
		)

		return BrowserStateSummary(
			dom_state=minimal_dom_state,
			url=url,
			title=title,
			tabs=tabs_info,
			pixels_above=0,
			pixels_below=0,
			browser_errors=[f'Page state retrieval failed, minimal recovery applied for {url}'],
			is_pdf_viewer=is_pdf_viewer,
			recent_events='',
		)

	@observe_debug(ignore_input=True, ignore_output=True, name='get_updated_state')
	async def _get_updated_state(self, focus_element: int = -1, include_screenshot: bool = True) -> BrowserStateSummary:
		"""Update and return state."""

		# Get current page URL
		page_url = await self.get_current_page_url()

		# Check if this is a new tab or chrome:// page early for optimization
		is_empty_page = is_new_tab_page(page_url) or page_url.startswith('chrome://')

		try:
			# Fast path for empty pages - skip all expensive operations
			if is_empty_page:
				self.logger.debug(f'⚡ Fast path for empty page: {page_url}')

				# Create minimal DOM state immediately - just return None for now
				# since DOM classes have been refactored
				content = None

				# Get tabs info
				tabs_info = await self.get_tabs_info()

				# Skip screenshot for empty pages
				screenshot_b64 = None

				# Use default viewport dimensions from browser profile
				viewport = self.browser_profile.viewport or {'width': 1280, 'height': 720}
				page_info = PageInfo(
					viewport_width=viewport['width'],
					viewport_height=viewport['height'],
					page_width=viewport['width'],
					page_height=viewport['height'],
					scroll_x=0,
					scroll_y=0,
					pixels_above=0,
					pixels_below=0,
					pixels_left=0,
					pixels_right=0,
				)

				# Return minimal state immediately
				self.browser_state_summary = BrowserStateSummary(
					dom_state=content,
					url=page_url,
					title='New Tab' if is_new_tab_page(page_url) else 'Chrome Page',
					tabs=tabs_info,
					screenshot=screenshot_b64,
					page_info=page_info,
					pixels_above=0,
					pixels_below=0,
					browser_errors=[],
					is_pdf_viewer=False,
				)
				return self.browser_state_summary

			# Normal path for regular pages
			self.logger.debug('🧹 Removing highlights...')
			try:
				await self.remove_highlights()
			except TimeoutError:
				self.logger.debug('Timeout to remove highlights')

			# Check for PDF and auto-download if needed
			try:
				pdf_path = await self._auto_download_pdf_if_needed(page)
				if pdf_path:
					self.logger.info(f'📄 PDF auto-downloaded: {pdf_path}')
			except Exception as e:
				self.logger.debug(f'PDF auto-download check failed: {type(e).__name__}: {e}')

			self.logger.debug('🌳 Starting DOM processing...')
			from browser_use.browser.events import BrowserStateRequestEvent
			from browser_use.dom.views import SerializedDOMState

			try:
				# Use the DOMWatchdog via event bus - request state with DOM
				result = await asyncio.wait_for(
					self.event_bus.dispatch(BrowserStateRequestEvent(include_dom=True, include_screenshot=False)),
					timeout=45.0,  # 45 second timeout for DOM processing - generous for complex pages
				)
				state_summary = await result.event_result()
				content = state_summary.dom_state if state_summary else None
				self.logger.debug('✅ DOM processing completed')
			except (TimeoutError, Exception) as e:
				if isinstance(e, TimeoutError):
					self.logger.warning(f'DOM processing timed out after 45 seconds for {page_url}')
				else:
					self.logger.warning(f'DOM processing failed: {e}')
				self.logger.warning('🔄 Falling back to minimal DOM state to allow basic navigation...')

				# Create minimal DOM state for basic navigation
				content = SerializedDOMState(
					_root=None,  # No simplified tree for minimal state
					selector_map={},  # Empty selector map
				)

			self.logger.debug('📋 Getting tabs info...')
			tabs_info = await self.get_tabs_info()
			self.logger.debug('✅ Tabs info completed')

			# Get all cross-origin iframes within the page and open them in new tabs
			# mark the titles of the new tabs so the LLM knows to check them for additional content
			# unfortunately too buggy for now, too many sites use invisible cross-origin iframes for ads, tracking, youtube videos, social media, etc.
			# and it distracts the bot by opening a lot of new tabs
			# iframe_urls = await dom_service.get_cross_origin_iframes()
			# outer_page = self.current_target_id
			# for url in iframe_urls:
			# 	if url in [tab.url for tab in tabs_info]:
			# 		continue  # skip if the iframe if we already have it open in a tab
			# 	new_page_id = tabs_info[-1].page_id + 1
			# 	self.logger.debug(f'Opening cross-origin iframe in new tab #{new_page_id}: {url}')
			# 	await self.create_new_tab(url)
			# 	tabs_info.append(
			# 		TabInfo(
			# 			page_id=new_page_id,
			# 			url=url,
			# 			title=f'iFrame opened as new tab, treat as if embedded inside page {outer_page.url}: {page.url}',
			# 			parent_page_url=outer_page.url,
			# 		)
			# 	)

			if include_screenshot:
				try:
					self.logger.debug('📸 Capturing screenshot...')
					# Reasonable timeout for screenshot
					screenshot_b64 = await self.take_screenshot()
					# self.logger.debug('✅ Screenshot completed')
				except Exception as e:
					self.logger.warning(f'❌ Screenshot failed for {_log_pretty_url(page.url)}: {type(e).__name__} {e}')
					screenshot_b64 = None
			else:
				screenshot_b64 = None

			# Get comprehensive page information
			page_info = await self.get_page_info(page)
			try:
				self.logger.debug('📏 Getting scroll info...')
				pixels_above, pixels_below = await asyncio.wait_for(self.get_scroll_info(page), timeout=5.0)
				self.logger.debug('✅ Scroll info completed')
			except Exception as e:
				self.logger.warning(f'Failed to get scroll info: {type(e).__name__}')
				pixels_above, pixels_below = 0, 0

			try:
				title = await asyncio.wait_for(self.get_current_page_title(), timeout=3.0)
			except Exception:
				title = 'Title unavailable'

			# Check if this is a minimal fallback state
			browser_errors = []
			if not content.selector_map:  # Empty selector map indicates fallback state
				browser_errors.append(
					f'DOM processing timed out for {page_url} - using minimal state. Basic navigation still available via go_to_url, scroll, and search actions.'
				)

			# Check if current page is a PDF viewer
			is_pdf_viewer = await self._is_pdf_viewer(page)

			self.browser_state_summary = BrowserStateSummary(
				dom_state=content,
				url=page_url,
				title=title,
				tabs=tabs_info,
				screenshot=screenshot_b64,
				page_info=page_info,
				pixels_above=pixels_above,
				pixels_below=pixels_below,
				browser_errors=browser_errors,
				is_pdf_viewer=is_pdf_viewer,
			)

			self.logger.debug('✅ get_state_summary completed successfully')
			return self.browser_state_summary
		except Exception as e:
			self.logger.error(f'❌ Failed to update browser_state_summary: {type(e).__name__}: {e}')
			# Return last known good state if available
			if hasattr(self, 'browser_state_summary'):
				return self.browser_state_summary
			raise

	# ========== CDP Helper Methods ==========

	async def cdp_clients_for_target(self, target_id: str) -> list['CDPClient']:
		"""Get CDP clients for a target, including main and iframe sessions.

		Returns list with root target session first, then iframe sessions.
		"""
		if not self.cdp_client:
			raise ValueError('CDP client not initialized')

		clients = []

		# Get cached session for main target
		client, session_id = await self.get_cdp_session(target_id)

		# For now, return just the main client with session
		# In future, we'd enumerate iframes and attach to them too
		clients.append(client)

		return clients

	async def cdp_client_for_node(self, node: 'EnhancedDOMTreeNode') -> 'CDPClient':
		"""Get CDP client for a specific DOM node based on its frame."""
		if node.frame_id:
			return await self.cdp_client_for_frame(node.frame_id)
		return self.cdp_client

	async def frames_by_target(self, target_id: str) -> list[str]:
		"""Get all frame IDs for a target."""
		# Get frame tree using helper
		frame_tree = await self._cdp_execute_on_target(target_id, commands=[('Page.getFrameTree', {})])

		# Extract frame IDs recursively
		frame_ids = []

		def extract_frames(tree_node):
			frame_ids.append(tree_node['frame']['id'])
			for child in tree_node.get('childFrames', []):
				extract_frames(child)

		extract_frames(frame_tree['frameTree'])

		return frame_ids

	async def target_id_by_frame_id(self, frame_id: str) -> str | None:
		"""Get target ID for a given frame ID.

		Note: This requires iterating through all targets to find the frame.
		"""
		targets = await self.cdp_client.send.Target.getTargets()

		for target in targets['targetInfos']:
			# Skip invalid targets
			if not self._is_valid_target(
				target, include_http=True, include_about=True, include_pages=True, include_iframes=True, include_workers=False
			):
				continue

			# Check if this target contains the frame
			frames = await self.frames_by_target(target['targetId'])
			if frame_id in frames:
				return target['targetId']

		return None

	async def get_current_page_cdp_session_id(self) -> str | None:
		"""Get the CDP session ID for the current page."""
		if not hasattr(self, 'current_target_id') or not self.current_target_id:
			return None

		# Get cached session ID
		client, session_id = await self.get_cdp_session(self.current_target_id)
		return session_id

	async def _create_fresh_cdp_client(self) -> Any:
		"""Create a new CDP client instance. Caller is responsible for cleanup."""
		if not self.cdp_url:
			raise ValueError('CDP URL is not set')

		import httpx
		from cdp_use import CDPClient

		# If the cdp_url is already a websocket URL, use it as-is.
		if self.cdp_url.startswith('ws'):
			ws_url = self.cdp_url
		else:
			# Otherwise, treat it as the DevTools HTTP root and fetch the websocket URL.
			url = self.cdp_url.rstrip('/')
			if not url.endswith('/json/version'):
				url = url + '/json/version'
			async with httpx.AsyncClient() as client:
				version_info = await client.get(url)
				ws_url = version_info.json()['webSocketDebuggerUrl']

		cdp_client = CDPClient(ws_url)
		await cdp_client.start()
		return cdp_client

	async def create_cdp_session_for_target(self, target_id: str) -> Any:
		"""Create a new CDP session attached to a specific target/frame.

		Args:
			target_id: The target ID to attach to

		Returns:
			Tuple of (CDPClient, session_id) - uses cached session when available
		"""
		# Just use the cached session
		return await self.get_cdp_session(target_id)

	async def create_cdp_session_for_frame(self, frame_id: str) -> Any:
		"""Create a new CDP session for a specific frame by finding its parent target.

		Args:
			frame_id: The frame ID to find and attach to

		Returns:
			Tuple of (CDPClient, session_id) for the target containing this frame

		Raises:
			ValueError: If frame_id is not found in any target
		"""
		# Get all targets using main client
		targets = await self.cdp_client.send.Target.getTargets()

		# Search through page targets to find which one contains the frame
		for target in targets['targetInfos']:
				# Skip invalid targets
				if not self._is_valid_target(target):
					continue

				if target['type'] != 'page':
					continue

				# Use cached session to check frame tree
				client, temp_session_id = await self.get_cdp_session(target['targetId'])

				# Get frame tree for this target
				frame_tree = await client.send.Page.getFrameTree(session_id=temp_session_id)

				# Recursively search for the frame_id
				def search_frame_tree(node) -> bool:
					if node['frame']['id'] == frame_id:
						return True
					if 'childFrames' in node:
						for child in node['childFrames']:
							if search_frame_tree(child):
								return True
					return False

				if search_frame_tree(frame_tree['frameTree']):
					# Found the target containing this frame - return cached session
					return await self.get_cdp_session(target['targetId'])

		# Frame not found
		raise ValueError(f'Frame with ID {frame_id} not found in any target')

	async def create_cdp_session_for_node(self, node: Any) -> Any:
		"""Create a new CDP session for a specific DOM node's target.

		Args:
			node: The EnhancedDOMTreeNode to create a session for

		Returns:
			Tuple of (CDPClient, session_id) for the node's target

		Raises:
			ValueError: If node doesn't have a target_id or node doesn't exist in target
		"""
		if not hasattr(node, 'target_id') or not node.target_id:
			raise ValueError(f'Node does not have a target_id: {node}')

		# Get cached session for the node's target
		client, session_id = await self.get_cdp_session(node.target_id)

		# Verify the node exists in this target
		try:
			await client.send.DOM.describeNode(params={'backendNodeId': node.backend_node_id}, session_id=session_id)
			# If we get here without exception, the node exists
			return client, session_id
		except Exception as e:
			raise ValueError(f'Node with backend_node_id {node.backend_node_id} not found in target {node.target_id}: {e}')

	async def get_current_target_info(self) -> dict | None:
		"""Get info about the current active target using CDP."""
		if not self.current_target_id:
			return None

		targets = await self.cdp_client.send.Target.getTargets()
		for target in targets.get('targetInfos', []):
			if target.get('targetId') == self.current_target_id:
				# Still return even if it's not a "valid" target since we're looking for a specific ID
				return target
		return None

	async def get_current_page_url(self) -> str:
		"""Get the URL of the current page using CDP."""
		target = await self.get_current_target_info()
		if target:
			return target.get('url', '')
		return ''

	async def get_current_page_title(self) -> str:
		"""Get the title of the current page using CDP."""
		if not self.current_target_id:
			return ''

		try:
			session = await self.cdp_client.send.Target.attachToTarget(
				params={'targetId': self.current_target_id, 'flatten': True}
			)
			session_id = session['sessionId']
			title_result = await self.cdp_client.send.Runtime.evaluate(
				params={'expression': 'document.title'}, session_id=session_id
			)
			title = title_result.get('result', {}).get('value', '')
			await self.cdp_client.send.Target.detachFromTarget(params={'sessionId': session_id})
			return title
		except Exception:
			return ''

	# ========== DOM Helper Methods ==========

	def update_cached_selector_map(self, selector_map: dict[int, 'EnhancedDOMTreeNode']) -> None:
		"""Update the cached selector map with new DOM state.

		This should be called by the DOM watchdog after rebuilding the DOM.

		Args:
			selector_map: The new selector map from DOM serialization
		"""
		self._cached_selector_map = selector_map

	async def get_dom_element_by_index(self, index: int) -> 'EnhancedDOMTreeNode | None':
		"""Get DOM element by index.

		First checks cached selector map, then falls back to DOM watchdog
		which may trigger a DOM rebuild if needed.

		Args:
			index: The element index from the serialized DOM

		Returns:
			EnhancedDOMTreeNode or None if index not found
		"""
		# First check cached selector map
		if self._cached_selector_map and index in self._cached_selector_map:
			return self._cached_selector_map[index]

		# Fall back to DOM watchdog which may rebuild DOM
		if self._dom_watchdog:
			node = await self._dom_watchdog.get_element_by_index(index)
			# Update cache if watchdog rebuilt the DOM
			if self._dom_watchdog.selector_map:
				self._cached_selector_map = self._dom_watchdog.selector_map
			return node

		return None

	# Alias for backwards compatibility
	async def get_element_by_index(self, index: int) -> 'EnhancedDOMTreeNode | None':
		"""Alias for get_dom_element_by_index for backwards compatibility."""
		return await self.get_dom_element_by_index(index)

	def is_file_input(self, element: Any) -> bool:
		"""Check if element is a file input.

		Args:
			element: The DOM element to check

		Returns:
			True if element is a file input, False otherwise
		"""
		if self._dom_watchdog:
			return self._dom_watchdog.is_file_input(element)
		# Fallback if watchdog not available
		return (
			hasattr(element, 'node_name')
			and element.node_name.upper() == 'INPUT'
			and hasattr(element, 'attributes')
			and element.attributes.get('type', '').lower() == 'file'
		)

	def clear_dom_cache(self) -> None:
		"""Clear cached DOM state to force rebuild on next access."""
		if self._dom_watchdog:
			self._dom_watchdog.clear_cache()

	async def get_selector_map(self) -> dict[int, 'EnhancedDOMTreeNode']:
		"""Get the current selector map from cached state or DOM watchdog.

		Returns:
			Dictionary mapping element indices to EnhancedDOMTreeNode objects
		"""
		# First try cached selector map
		if self._cached_selector_map:
			return self._cached_selector_map

		# Try to get from DOM watchdog
		if self._dom_watchdog and hasattr(self._dom_watchdog, 'selector_map'):
			return self._dom_watchdog.selector_map or {}

		# Return empty dict if nothing available
		return {}

	async def remove_highlights(self) -> None:
		"""Remove highlights from the page using CDP."""
		try:
			if not self.current_target_id:
				return

			# Get cached session
			client, session_id = await self.get_cdp_session(self.current_target_id)

			# Remove highlights via JavaScript
			script = """
					// Remove all browser-use highlight elements
					const highlights = document.querySelectorAll('[data-browser-use-highlight]');
					highlights.forEach(el => el.remove());
			"""
			await client.send.Runtime.evaluate(params={'expression': script}, session_id=session_id)
		except Exception as e:
			self.logger.debug(f'Failed to remove highlights: {e}')

	@property
	def downloaded_files(self) -> list[str]:
		"""Get list of downloaded files from the downloads directory."""
		if not self.browser_profile.downloads_path:
			return []

		downloads_dir = Path(self.browser_profile.downloads_path)
		if not downloads_dir.exists():
			return []

		# Get all files in downloads directory (not directories)
		files = [str(f) for f in downloads_dir.iterdir() if f.is_file()]
		return sorted(files)

	# ========== CDP-based replacements for browser_context operations ==========

	async def _cdp_execute_on_target(
		self, target_id: str, commands: list[tuple[str, dict]] | None = None, callable_fn: Any | None = None
	) -> Any:
		"""Execute CDP commands on a specific target using cached session.

		Args:
			target_id: The target ID to execute commands on
			commands: List of (method, params) tuples to execute, e.g. [('Runtime.evaluate', {'expression': '...'})]
			callable_fn: Alternative - async function that receives (cdp_client, session_id) and returns result

		Returns:
			Result of the last command or callable_fn return value
		"""
		# Get cached session or create new one
		client, session_id = await self.get_cdp_session(target_id)

		if callable_fn:
			return await callable_fn(client, session_id)
		elif commands:
			result = None
			for method, params in commands:
				domain, command = method.split('.')
				domain_obj = getattr(client.send, domain)
				cmd_func = getattr(domain_obj, command)
				result = await cmd_func(params=params, session_id=session_id) if params else await cmd_func(session_id=session_id)
			return result
		else:
			return session_id

	async def _cdp_get_all_pages(self) -> list[dict]:
		"""Get all browser pages/tabs using CDP Target.getTargets."""
		targets = await self.cdp_client.send.Target.getTargets()
		# Filter for valid page/tab targets only
		return [t for t in targets.get('targetInfos', []) if self._is_valid_target(t) and t.get('type') in ('page', 'tab')]

	async def _cdp_create_new_page(self, url: str = 'about:blank') -> str:
		"""Create a new page/tab using CDP Target.createTarget. Returns target ID."""
		result = await self.cdp_client.send.Target.createTarget(params={'url': url, 'newWindow': False, 'background': False})
		return result['targetId']

	async def _cdp_close_page(self, target_id: str) -> None:
		"""Close a page/tab using CDP Target.closeTarget."""
		await self.cdp_client.send.Target.closeTarget(params={'targetId': target_id})

	async def _cdp_activate_page(self, target_id: str) -> None:
		"""Activate/focus a page using CDP Target.activateTarget."""
		await self.cdp_client.send.Target.activateTarget(params={'targetId': target_id})

	async def _cdp_get_cookies(self, urls: list[str] | None = None) -> list[dict]:
		"""Get cookies using CDP Network.getCookies."""
		if not self.current_target_id:
			return []

		client, session_id = await self.get_cdp_session(self.current_target_id)
		params = {'urls': urls} if urls else {}
		result = await client.send.Network.getCookies(params=params, session_id=session_id)
		return result.get('cookies', [])

	async def _cdp_set_cookies(self, cookies: list[dict]) -> None:
		"""Set cookies using CDP Network.setCookies."""
		if not self.current_target_id or not cookies:
			return

		client, session_id = await self.get_cdp_session(self.current_target_id)
		await client.send.Network.setCookies(params={'cookies': cookies}, session_id=session_id)

	async def _cdp_clear_cookies(self) -> None:
		"""Clear all cookies using CDP Network.clearBrowserCookies."""
		if not self.current_target_id:
			return

		client, session_id = await self.get_cdp_session(self.current_target_id)
		await client.send.Network.clearBrowserCookies(session_id=session_id)

	async def _cdp_set_extra_headers(self, headers: dict[str, str]) -> None:
		"""Set extra HTTP headers using CDP Network.setExtraHTTPHeaders."""
		if not self.current_target_id:
			return

		client, session_id = await self.get_cdp_session(self.current_target_id)
		await client.send.Network.setExtraHTTPHeaders(params={'headers': headers}, session_id=session_id)

	async def _cdp_grant_permissions(self, permissions: list[str], origin: str | None = None) -> None:
		"""Grant permissions using CDP Browser.grantPermissions."""
		params = {'permissions': permissions}
		if origin:
			params['origin'] = origin
		await self.cdp_client.send.Browser.grantPermissions(**params)

	async def _cdp_set_geolocation(self, latitude: float, longitude: float, accuracy: float = 100) -> None:
		"""Set geolocation using CDP Emulation.setGeolocationOverride."""
		await self.cdp_client.send.Emulation.setGeolocationOverride(
			params={'latitude': latitude, 'longitude': longitude, 'accuracy': accuracy}
		)

	async def _cdp_clear_geolocation(self) -> None:
		"""Clear geolocation override using CDP."""
		await self.cdp_client.send.Emulation.clearGeolocationOverride()

	async def _cdp_add_init_script(self, script: str) -> str:
		"""Add script to evaluate on new document using CDP Page.addScriptToEvaluateOnNewDocument."""
		result = await self.cdp_client.send.Page.addScriptToEvaluateOnNewDocument(params={'source': script})
		return result['identifier']

	async def _cdp_remove_init_script(self, identifier: str) -> None:
		"""Remove script added with addScriptToEvaluateOnNewDocument."""
		await self.cdp_client.send.Page.removeScriptToEvaluateOnNewDocument(params={'identifier': identifier})

	async def _cdp_set_viewport(self, width: int, height: int, device_scale_factor: float = 1.0, mobile: bool = False) -> None:
		"""Set viewport using CDP Emulation.setDeviceMetricsOverride."""
		await self.cdp_client.send.Emulation.setDeviceMetricsOverride(
			params={'width': width, 'height': height, 'deviceScaleFactor': device_scale_factor, 'mobile': mobile}
		)

	async def _cdp_get_storage_state(self) -> dict:
		"""Get storage state (cookies, localStorage, sessionStorage) using CDP."""
		# Use the _cdp_get_cookies helper which handles session attachment
		cookies = await self._cdp_get_cookies()

		# Get localStorage and sessionStorage would require evaluating JavaScript
		# on each origin, which is more complex. For now, return cookies only.
		return {
			'cookies': cookies,
			'origins': [],  # Would need to iterate through origins for localStorage/sessionStorage
		}

	async def _cdp_navigate(self, url: str, target_id: str | None = None) -> None:
		"""Navigate to URL using CDP Page.navigate."""
		# Use provided target_id or fall back to current_target_id
		target_to_use = target_id or self.current_target_id

		if not target_to_use:
			# If no target available, get the first page target
			targets = await self._cdp_get_all_pages()
			if targets:
				target_to_use = targets[0]['targetId']
				self.current_target_id = target_to_use
			else:
				raise ValueError('No target available for navigation')

		# Use helper to navigate on the target
		await self._cdp_execute_on_target(target_to_use, commands=[('Page.enable', {}), ('Page.navigate', {'url': url})])

	@staticmethod
	def _is_valid_target(
		target_info: dict,
		include_http: bool = True,
		include_chrome: bool = False,
		include_chrome_extensions: bool = False,
		include_chrome_error: bool = False,
		include_about: bool = True,
		include_iframes: bool = True,
		include_pages: bool = True,
		include_workers: bool = False,
	) -> bool:
		"""Check if a target should be processed.

		Args:
			target_info: Target info dict from CDP

		Returns:
			True if target should be processed, False if it should be skipped
		"""
		target_type = target_info.get('type', '')
		url = target_info.get('url', '')

		url_allowed, type_allowed = False, False

		if url.startswith('chrome-error://') and include_chrome_error:
			url_allowed = True

		if url.startswith('chrome://') and include_chrome:
			url_allowed = True

		if url.startswith('chrome-extension://') and include_chrome_extensions:
			url_allowed = True

		# dont allow about:srcdoc! there are also other rare about: pages that we want to avoid
		if url == 'about:blank' and include_about:
			url_allowed = True

		if (url.startswith('http://') or url.startswith('https://')) and include_http:
			url_allowed = True

		if target_type in ('service_worker', 'shared_worker', 'worker') and include_workers:
			type_allowed = True

		if target_type in ('page', 'tab') and include_pages:
			type_allowed = True

		if target_type in ('iframe', 'webview') and include_iframes:
			type_allowed = True

		return url_allowed and type_allowed

	async def get_all_frames(self) -> tuple[dict[str, dict], dict[str, str]]:
		"""Get a complete frame hierarchy from all browser targets.

		Returns:
			Tuple of (all_frames, target_sessions) where:
			- all_frames: dict mapping frame_id -> frame info dict with all metadata
			- target_sessions: dict mapping target_id -> session_id for active sessions
		"""
		all_frames = {}  # frame_id -> FrameInfo dict
		target_sessions = {}  # target_id -> session_id (keep sessions alive during collection)

		# Get all targets
		targets = await self.cdp_client.send.Target.getTargets()
		all_targets = targets.get('targetInfos', [])

		# First pass: collect frame trees from ALL targets
		for target in all_targets:
			target_id = target.get('targetId')

			if not target_id:
				continue

			# Skip invalid targets
			if not self._is_valid_target(
				target, include_http=True, include_about=True, include_pages=True, include_iframes=True, include_workers=False
			):
				continue

			# Get cached session for this target
			client, session_id = await self.get_cdp_session(target_id)
			target_sessions[target_id] = session_id

			try:
				# Try to get frame tree (not all target types support this)
				try:
					frame_tree_result = await client.send.Page.getFrameTree(session_id=session_id)

					# Process the frame tree recursively
					def process_frame_tree(node, parent_frame_id=None):
						"""Recursively process frame tree and add to all_frames."""
						frame = node.get('frame', {})
						current_frame_id = frame.get('id')

						if current_frame_id:
							# For iframe targets, check if the frame has a parentId field
							# This indicates it's an OOPIF with a parent in another target
							actual_parent_id = frame.get('parentId') or parent_frame_id

							# Create frame info with all CDP response data plus our additions
							frame_info = {
								**frame,  # Include all original frame data: id, url, parentId, etc.
								'frameTargetId': target_id,  # Target that can access this frame
								'parentFrameId': actual_parent_id,  # Use parentId from frame if available
								'childFrameIds': [],  # Will be populated below
								'isCrossOrigin': False,  # Will be determined based on context
								'isUrlValid': _is_url_valid(frame.get('url', '')),
							}

							# Check if frame is cross-origin based on crossOriginIsolatedContextType
							cross_origin_type = frame.get('crossOriginIsolatedContextType')
							if cross_origin_type and cross_origin_type != 'NotIsolated':
								frame_info['isCrossOrigin'] = True

							# For iframe targets, the frame itself is likely cross-origin
							if target.get('type') == 'iframe':
								frame_info['isCrossOrigin'] = True

							# Add child frame IDs (note: OOPIFs won't appear here)
							child_frames = node.get('childFrames', [])
							for child in child_frames:
								child_frame = child.get('frame', {})
								child_frame_id = child_frame.get('id')
								if child_frame_id:
									frame_info['childFrameIds'].append(child_frame_id)

							# Store or merge frame info
							if current_frame_id in all_frames:
								# Frame already seen from another target, merge info
								existing = all_frames[current_frame_id]
								# If this is an iframe target, it has direct access to the frame
								if target.get('type') == 'iframe':
									existing['frameTargetId'] = target_id
									existing['isCrossOrigin'] = True
							else:
								all_frames[current_frame_id] = frame_info

							# Process child frames recursively
							for child in child_frames:
								process_frame_tree(child, current_frame_id)

					# Process the entire frame tree
					process_frame_tree(frame_tree_result.get('frameTree', {}))

				except Exception:
					# Target doesn't support Page domain or has no frames
					pass

			except Exception:
				# Error processing this target
				pass

		# Second pass: populate backend node IDs and parent target IDs
		await self._populate_frame_metadata(all_frames, target_sessions)

		return all_frames, target_sessions

	async def _populate_frame_metadata(self, all_frames: dict[str, dict], target_sessions: dict[str, str]) -> None:
		"""Populate additional frame metadata like backend node IDs and parent target IDs.

		Args:
			all_frames: Frame hierarchy dict to populate
			target_sessions: Active target sessions
		"""
		for frame_id_iter, frame_info in all_frames.items():
			parent_frame_id = frame_info.get('parentFrameId')

			if parent_frame_id and parent_frame_id in all_frames:
				parent_frame_info = all_frames[parent_frame_id]
				parent_target_id = parent_frame_info.get('frameTargetId')

				# Store parent target ID
				frame_info['parentTargetId'] = parent_target_id

				# Try to get backend node ID from parent context
				if parent_target_id in target_sessions:
					parent_session_id = target_sessions[parent_target_id]
					try:
						# Enable DOM domain
						await self.cdp_client.send.DOM.enable(session_id=parent_session_id)

						# Get frame owner info to find backend node ID
						frame_owner = await self.cdp_client.send.DOM.getFrameOwner(
							params={'frameId': frame_id_iter}, session_id=parent_session_id
						)

						if frame_owner:
							frame_info['backendNodeId'] = frame_owner.get('backendNodeId')
							frame_info['nodeId'] = frame_owner.get('nodeId')

					except Exception:
						# Frame owner not available (likely cross-origin)
						pass

	async def find_frame_target(self, frame_id: str, all_frames: dict[str, dict] | None = None) -> dict | None:
		"""Find the frame info for a specific frame ID.

		Args:
			frame_id: The frame ID to search for
			all_frames: Optional pre-built frame hierarchy. If None, will call get_all_frames()

		Returns:
			Frame info dict if found, None otherwise
		"""
		if all_frames is None:
			all_frames, _ = await self.get_all_frames()

		return all_frames.get(frame_id)


	async def cdp_client_for_frame(self, frame_id: str) -> Any:
		"""Get a CDP client attached to the target containing the specified frame.

		Builds a unified frame hierarchy from all targets to find the correct target
		for any frame, including OOPIFs (Out-of-Process iframes).

		Args:
			frame_id: The frame ID to search for

		Returns:
			Tuple of (cdp_client, session_id, target_id) for the target containing the frame

		Raises:
			ValueError: If the frame is not found in any target
		"""
		# Get complete frame hierarchy
		all_frames, target_sessions = await self.get_all_frames()

		# Find the requested frame
		frame_info = await self.find_frame_target(frame_id, all_frames)

		if frame_info:
			target_id = frame_info.get('frameTargetId')

			if target_id in target_sessions:
				# Use existing session
				session_id = target_sessions[target_id]
				# Return the client with session attached
				return self.cdp_client, session_id, target_id

		# Frame not found
		raise ValueError(f"Frame with ID '{frame_id}' not found in any target")

	async def cdp_client_for_target(self, target_id: str) -> Any:
		"""Get a CDP client attached to a specific target.

		This is a simpler helper that just gets a cached session for a target.

		Args:
			target_id: The target ID to attach to

		Returns:
			Tuple of (cdp_client, session_id) for the target
		"""
		return await self.get_cdp_session(target_id)


# Import uuid7str for ID generation
try:
	from uuid_extensions import uuid7str
except ImportError:
	import uuid

	def uuid7str() -> str:
		return str(uuid.uuid4())


# Fix Pydantic circular dependency for all watchdogs
# This must be called after BrowserSession class is fully defined
_watchdog_modules = [
	'browser_use.browser.crash_watchdog.CrashWatchdog',
	'browser_use.browser.downloads_watchdog.DownloadsWatchdog',
	'browser_use.browser.local_browser_watchdog.LocalBrowserWatchdog',
	'browser_use.browser.storage_state_watchdog.StorageStateWatchdog',
	'browser_use.browser.navigation_watchdog.NavigationWatchdog',
	'browser_use.browser.aboutblank_watchdog.AboutBlankWatchdog',
	'browser_use.browser.default_action_watchdog.DefaultActionWatchdog',
	'browser_use.browser.dom_watchdog.DOMWatchdog',
	'browser_use.browser.screenshot_watchdog.ScreenshotWatchdog',
]

for module_path in _watchdog_modules:
	try:
		module_name, class_name = module_path.rsplit('.', 1)
		module = __import__(module_name, fromlist=[class_name])
		watchdog_class = getattr(module, class_name)
		watchdog_class.model_rebuild()
	except Exception:
		pass  # Ignore if watchdog can't be imported or rebuilt