Merge branch 'main' into os-font-fix

2026-05-06 17:52:15 +02:00 · 2025-08-31 00:48:49 +02:00
parent 6ec586cff5 ad7a3d6a84
commit e07ad3bc32
24 changed files with 1653 additions and 769 deletions
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -172,6 +172,11 @@ jobs:
            const score = `${passed}/${total}`;
            const percentage = Math.round((passed / total) * 100);
            
+            // Fail the workflow if 0% pass rate
+            if (percentage === 0) {
+              core.setFailed(`Evaluation failed: 0% pass rate (${passed}/${total})`);
+            }
+            
            // Create detailed table
            let tableRows = '';
            detailedResults.forEach(result => {
--- a/browser_use/agent/message_manager/service.py
+++ b/browser_use/agent/message_manager/service.py
@@ -212,10 +212,16 @@ class MessageManager:

 		# Build the history item
 		if model_output is None:
-			# Only add error history item if we have a valid step number
-			if step_number is not None and step_number > 0:
-				history_item = HistoryItem(step_number=step_number, error='Agent failed to output in the right format.')
-				self.state.agent_history_items.append(history_item)
+			# Add history item for initial actions (step 0) or errors (step > 0)
+			if step_number is not None:
+				if step_number == 0 and action_results:
+					# Step 0 with initial action results
+					history_item = HistoryItem(step_number=step_number, action_results=action_results)
+					self.state.agent_history_items.append(history_item)
+				elif step_number > 0:
+					# Error case for steps > 0
+					history_item = HistoryItem(step_number=step_number, error='Agent failed to output in the right format.')
+					self.state.agent_history_items.append(history_item)
 		else:
 			history_item = HistoryItem(
 				step_number=step_number,
--- a/browser_use/agent/service.py
+++ b/browser_use/agent/service.py
@@ -270,8 +270,19 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		# Action setup
 		self._setup_action_models()
 		self._set_browser_use_version_and_source(source)
-		self.initial_actions = self._convert_initial_actions(initial_actions) if initial_actions else None

+		initial_url = None
+
+		# only load url if no initial actions are provided
+		if self.directly_open_url and not self.state.follow_up_task and not initial_actions:
+			initial_url = self._extract_url_from_task(self.task)
+			if initial_url:
+				self.logger.info(f'🔗 Found URL in task: {initial_url}, adding as initial action...')
+				initial_actions = [{'go_to_url': {'url': initial_url, 'new_tab': False}}]
+
+		self.initial_url = initial_url
+
+		self.initial_actions = self._convert_initial_actions(initial_actions) if initial_actions else None
 		# Verify we can connect to the model
 		self._verify_and_setup_llm()

@@ -588,7 +599,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		if hasattr(self, 'cloud_sync') and self.cloud_sync and self.enable_cloud_sync:
 			self.eventbus.on('*', self.cloud_sync.handle_event)

-	@observe_debug(ignore_input=True, ignore_output=True, name='_raise_if_stopped_or_paused')
 	async def _raise_if_stopped_or_paused(self) -> None:
 		"""Utility function that raises an InterruptedError if the agent is stopped or paused."""

@@ -635,14 +645,10 @@ class Agent(Generic[Context, AgentStructuredOutput]):

 		self.logger.debug(f'🌐 Step {self.state.n_steps}: Getting browser state...')
 		# Always take screenshots for all steps
-		# Use caching based on directly_open_url setting - if directly_open_url is False, don't use cached state
-		is_first_step = self.state.n_steps in (0, 1)
-		use_cache = is_first_step and self.directly_open_url
-		self.logger.debug(f'📸 Requesting browser state with include_screenshot=True, cached={use_cache}')
+		self.logger.debug('📸 Requesting browser state with include_screenshot=True')
 		browser_state_summary = await self.browser_session.get_browser_state_summary(
 			cache_clickable_elements_hashes=True,
 			include_screenshot=True,  # always capture even if use_vision=False so that cloud sync is useful (it's fast now anyway)
-			cached=use_cache,
 			include_recent_events=self.include_recent_events,
 		)
 		if browser_state_summary.screenshot:
@@ -1160,7 +1166,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		unique_urls = list(set(found_urls))
 		# If multiple URLs found, skip directly_open_urling
 		if len(unique_urls) > 1:
-			self.logger.debug(f'📍 Multiple URLs found ({len(found_urls)}), skipping directly_open_url to avoid ambiguity')
+			self.logger.debug(f'Multiple URLs found ({len(found_urls)}), skipping directly_open_url to avoid ambiguity')
 			return None

 		# If exactly one URL found, return it
@@ -1239,45 +1245,13 @@ class Agent(Generic[Context, AgentStructuredOutput]):

 			self.logger.debug('🔧 Browser session started with watchdogs attached')

-			# Check if task contains a URL and add it as an initial action (only if directly_open_url is enabled)
-			if self.directly_open_url and not self.state.follow_up_task:
-				initial_url = self._extract_url_from_task(self.task)
-				if initial_url:
-					self.logger.info(f'🔗 Found URL in task: {initial_url}, adding as initial action...')
+			# Ensure browser focus is properly established before executing initial actions
+			if self.browser_session and self.browser_session.agent_focus:
+				self.logger.debug(f'🎯 Browser focus established on target: {self.browser_session.agent_focus.target_id[-4:]}')
+			else:
+				self.logger.warning('⚠️ No browser focus established, may cause navigation issues')

-					# Create a go_to_url action for the initial URL
-					go_to_url_action = {
-						'go_to_url': {
-							'url': initial_url,
-							'new_tab': False,  # Navigate in current tab
-						}
-					}
-
-					# Add to initial_actions or create new list if none exist
-					if self.initial_actions:
-						# Convert back to dict format, prepend URL navigation, then convert back
-						initial_actions_dicts = []
-						for action in self.initial_actions:
-							action_data = action.model_dump(exclude_unset=True)
-							initial_actions_dicts.append(action_data)
-
-						# Prepend the go_to_url action
-						initial_actions_dicts = [go_to_url_action] + initial_actions_dicts
-
-						# Convert back to ActionModel instances
-						self.initial_actions = self._convert_initial_actions(initial_actions_dicts)
-					else:
-						# Create new initial_actions with just the go_to_url
-						self.initial_actions = self._convert_initial_actions([go_to_url_action])
-
-					self.logger.debug(f'✅ Added navigation to {initial_url} as initial action')
-
-			# Execute initial actions if provided
-			if self.initial_actions and not self.state.follow_up_task:
-				self.logger.debug(f'⚡ Executing {len(self.initial_actions)} initial actions...')
-				result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
-				self.state.last_result = result
-				self.logger.debug('✅ Initial actions completed')
+			await self._execute_initial_actions()

 			self.logger.debug(f'🔄 Starting main execution loop with max {max_steps} steps...')
 			for step in range(max_steps):
@@ -1519,6 +1493,8 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 				new_element_hashes = {e.parent_branch_hash() for e in new_selector_map.values()}
 				if check_for_new_elements and not new_element_hashes.issubset(cached_element_hashes):
 					# next action requires index but there are new elements on the page
+					# log difference in len debug
+					self.logger.debug(f'New elements: {abs(len(new_element_hashes) - len(cached_element_hashes))}')
 					remaining_actions_str = get_remaining_actions_str(actions, i)
 					msg = f'Something new appeared after action {i} / {total_actions}: actions {remaining_actions_str} were not executed'
 					logger.info(msg)
@@ -1653,6 +1629,17 @@ class Agent(Generic[Context, AgentStructuredOutput]):

 		return results

+	async def _execute_initial_actions(self) -> None:
+		# Execute initial actions if provided
+		if self.initial_actions and not self.state.follow_up_task:
+			self.logger.debug(f'⚡ Executing {len(self.initial_actions)} initial actions...')
+			result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
+			# update result 1 to mention that its was automatically loaded
+			if result and self.initial_url and result[0].long_term_memory:
+				result[0].long_term_memory = f'Found initial url and automatically loaded it. {result[0].long_term_memory}'
+			self.state.last_result = result
+			self.logger.debug('Initial actions completed')
+
 	async def _execute_history_step(self, history_item: AgentHistory, delay: float) -> list[ActionResult]:
 		"""Execute a single step from history with element validation"""
 		assert self.browser_session is not None, 'BrowserSession is not set up'
--- a/browser_use/browser/profile.py
+++ b/browser_use/browser/profile.py
@@ -583,9 +583,14 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
 	# --- UI/viewport/DOM ---

 	highlight_elements: bool = Field(default=True, description='Highlight interactive elements on the page.')
+	filter_highlight_ids: bool = Field(
+		default=True, description='Only show element IDs in highlights if llm_representation is less than 10 characters.'
+	)

 	# --- Downloads ---
-	auto_download_pdfs: bool = Field(default=True, description='Automatically download PDFs when navigating to PDF viewer pages.')
+	auto_download_pdfs: bool = Field(
+		default=False, description='Automatically download PDFs when navigating to PDF viewer pages.'
+	)

 	profile_directory: str = 'Default'  # e.g. 'Profile 1', 'Profile 2', 'Custom Profile', etc.

--- a/browser_use/browser/python_highlights.py
+++ b/browser_use/browser/python_highlights.py
@@ -0,0 +1,476 @@
+"""Python-based highlighting system for drawing bounding boxes on screenshots.
+
+This module replaces JavaScript-based highlighting with fast Python image processing
+to draw bounding boxes around interactive elements directly on screenshots.
+"""
+
+import base64
+import io
+import logging
+
+from PIL import Image, ImageDraw, ImageFont
+
+from browser_use.dom.views import DOMSelectorMap
+from browser_use.observability import observe_debug
+from browser_use.utils import time_execution_async
+
+logger = logging.getLogger(__name__)
+
+# Color scheme for different element types
+ELEMENT_COLORS = {
+	'button': '#FF6B6B',  # Red for buttons
+	'input': '#4ECDC4',  # Teal for inputs
+	'select': '#45B7D1',  # Blue for dropdowns
+	'a': '#96CEB4',  # Green for links
+	'textarea': '#FF8C42',  # Orange for text areas (was yellow, now more visible)
+	'default': '#DDA0DD',  # Light purple for other interactive elements
+}
+
+# Element type mappings
+ELEMENT_TYPE_MAP = {
+	'button': 'button',
+	'input': 'input',
+	'select': 'select',
+	'a': 'a',
+	'textarea': 'textarea',
+}
+
+
+def get_element_color(tag_name: str, element_type: str | None = None) -> str:
+	"""Get color for element based on tag name and type."""
+	# Check input type first
+	if tag_name == 'input' and element_type:
+		if element_type in ['button', 'submit']:
+			return ELEMENT_COLORS['button']
+
+	# Use tag-based color
+	return ELEMENT_COLORS.get(tag_name.lower(), ELEMENT_COLORS['default'])
+
+
+def should_show_index_overlay(element_index: int | None) -> bool:
+	"""Determine if index overlay should be shown."""
+	return element_index is not None
+
+
+def draw_enhanced_bounding_box_with_text(
+	draw,  # ImageDraw.Draw - avoiding type annotation due to PIL typing issues
+	bbox: tuple[int, int, int, int],
+	color: str,
+	text: str | None = None,
+	font: ImageFont.FreeTypeFont | None = None,
+	element_type: str = 'div',
+	image_size: tuple[int, int] = (2000, 1500),
+) -> None:
+	"""Draw an enhanced bounding box with much bigger index containers and dashed borders."""
+	x1, y1, x2, y2 = bbox
+
+	# Draw dashed bounding box with pattern: 1 line, 2 spaces, 1 line, 2 spaces...
+	dash_length = 4
+	gap_length = 8
+	line_width = 2
+
+	# Helper function to draw dashed line
+	def draw_dashed_line(start_x, start_y, end_x, end_y):
+		if start_x == end_x:  # Vertical line
+			y = start_y
+			while y < end_y:
+				dash_end = min(y + dash_length, end_y)
+				draw.line([(start_x, y), (start_x, dash_end)], fill=color, width=line_width)
+				y += dash_length + gap_length
+		else:  # Horizontal line
+			x = start_x
+			while x < end_x:
+				dash_end = min(x + dash_length, end_x)
+				draw.line([(x, start_y), (dash_end, start_y)], fill=color, width=line_width)
+				x += dash_length + gap_length
+
+	# Draw dashed rectangle
+	draw_dashed_line(x1, y1, x2, y1)  # Top
+	draw_dashed_line(x2, y1, x2, y2)  # Right
+	draw_dashed_line(x2, y2, x1, y2)  # Bottom
+	draw_dashed_line(x1, y2, x1, y1)  # Left
+
+	# Draw much bigger index overlay if we have index text
+	if text:
+		try:
+			# Scale font size based on image dimensions for consistent appearance across viewports
+			img_width, img_height = image_size
+			# Base font size scales with viewport width (36px for 1200px viewport)
+			base_font_size = max(16, min(48, int(img_width * 0.03)))  # 3% of viewport width
+			big_font = None
+			try:
+				big_font = ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', base_font_size)
+			except OSError:
+				try:
+					big_font = ImageFont.truetype('arial.ttf', base_font_size)
+				except OSError:
+					# Try system fonts on different platforms
+					try:
+						big_font = ImageFont.truetype('Arial Bold.ttf', base_font_size)
+					except OSError:
+						big_font = font  # Fallback to original font
+
+			# Get text size with bigger font
+			if big_font:
+				bbox_text = draw.textbbox((0, 0), text, font=big_font)
+				text_width = bbox_text[2] - bbox_text[0]
+				text_height = bbox_text[3] - bbox_text[1]
+			else:
+				# Fallback for default font
+				bbox_text = draw.textbbox((0, 0), text)
+				text_width = bbox_text[2] - bbox_text[0]
+				text_height = bbox_text[3] - bbox_text[1]
+
+			# Scale padding based on viewport size for consistent appearance
+			padding = max(4, int(img_width * 0.005))  # 0.5% of viewport width
+			element_width = x2 - x1
+			element_height = y2 - y1
+
+			# Container dimensions
+			container_width = text_width + padding * 2
+			container_height = text_height + padding * 2
+
+			# Position in top-left corner (inside if fits, outside if too small)
+			if element_width >= container_width and element_height >= container_height:
+				# Place inside top-left corner
+				bg_x1 = x1 + 2  # Small offset from edge
+				bg_y1 = y1 + 2
+			else:
+				# Place outside top-left corner
+				bg_x1 = x1
+				bg_y1 = max(0, y1 - container_height)
+
+			bg_x2 = bg_x1 + container_width
+			bg_y2 = bg_y1 + container_height
+
+			# Center the number within the index box with proper baseline handling
+			text_x = bg_x1 + (container_width - text_width) // 2
+			# Add extra vertical space to prevent clipping
+			text_y = bg_y1 + (container_height - text_height) // 2 - bbox_text[1]  # Subtract top offset
+
+			# Ensure container stays within image bounds
+			img_width, img_height = image_size
+			if bg_x1 < 0:
+				offset = -bg_x1
+				bg_x1 += offset
+				bg_x2 += offset
+				text_x += offset
+			if bg_y1 < 0:
+				offset = -bg_y1
+				bg_y1 += offset
+				bg_y2 += offset
+				text_y += offset
+			if bg_x2 > img_width:
+				offset = bg_x2 - img_width
+				bg_x1 -= offset
+				bg_x2 -= offset
+				text_x -= offset
+			if bg_y2 > img_height:
+				offset = bg_y2 - img_height
+				bg_y1 -= offset
+				bg_y2 -= offset
+				text_y -= offset
+
+			# Draw bigger background rectangle with thicker border
+			draw.rectangle([bg_x1, bg_y1, bg_x2, bg_y2], fill=color, outline='white', width=2)
+
+			# Draw white text centered in the index box
+			draw.text((text_x, text_y), text, fill='white', font=big_font or font)
+
+		except Exception as e:
+			logger.debug(f'Failed to draw enhanced text overlay: {e}')
+
+
+def draw_bounding_box_with_text(
+	draw,  # ImageDraw.Draw - avoiding type annotation due to PIL typing issues
+	bbox: tuple[int, int, int, int],
+	color: str,
+	text: str | None = None,
+	font: ImageFont.FreeTypeFont | None = None,
+) -> None:
+	"""Draw a bounding box with optional text overlay."""
+	x1, y1, x2, y2 = bbox
+
+	# Draw dashed bounding box
+	dash_length = 2
+	gap_length = 6
+
+	# Top edge
+	x = x1
+	while x < x2:
+		end_x = min(x + dash_length, x2)
+		draw.line([(x, y1), (end_x, y1)], fill=color, width=2)
+		draw.line([(x, y1 + 1), (end_x, y1 + 1)], fill=color, width=2)
+		x += dash_length + gap_length
+
+	# Bottom edge
+	x = x1
+	while x < x2:
+		end_x = min(x + dash_length, x2)
+		draw.line([(x, y2), (end_x, y2)], fill=color, width=2)
+		draw.line([(x, y2 - 1), (end_x, y2 - 1)], fill=color, width=2)
+		x += dash_length + gap_length
+
+	# Left edge
+	y = y1
+	while y < y2:
+		end_y = min(y + dash_length, y2)
+		draw.line([(x1, y), (x1, end_y)], fill=color, width=2)
+		draw.line([(x1 + 1, y), (x1 + 1, end_y)], fill=color, width=2)
+		y += dash_length + gap_length
+
+	# Right edge
+	y = y1
+	while y < y2:
+		end_y = min(y + dash_length, y2)
+		draw.line([(x2, y), (x2, end_y)], fill=color, width=2)
+		draw.line([(x2 - 1, y), (x2 - 1, end_y)], fill=color, width=2)
+		y += dash_length + gap_length
+
+	# Draw index overlay if we have index text
+	if text:
+		try:
+			# Get text size
+			if font:
+				bbox_text = draw.textbbox((0, 0), text, font=font)
+				text_width = bbox_text[2] - bbox_text[0]
+				text_height = bbox_text[3] - bbox_text[1]
+			else:
+				# Fallback for default font
+				bbox_text = draw.textbbox((0, 0), text)
+				text_width = bbox_text[2] - bbox_text[0]
+				text_height = bbox_text[3] - bbox_text[1]
+
+			# Smart positioning based on element size
+			padding = 5
+			element_width = x2 - x1
+			element_height = y2 - y1
+			element_area = element_width * element_height
+			index_box_area = (text_width + padding * 2) * (text_height + padding * 2)
+
+			# Calculate size ratio to determine positioning strategy
+			size_ratio = element_area / max(index_box_area, 1)
+
+			if size_ratio < 4:
+				# Very small elements: place outside in bottom-right corner
+				text_x = x2 + padding
+				text_y = y2 - text_height
+				# Ensure it doesn't go off screen
+				text_x = min(text_x, 1200 - text_width - padding)
+				text_y = max(text_y, 0)
+			elif size_ratio < 16:
+				# Medium elements: place in bottom-right corner inside
+				text_x = x2 - text_width - padding
+				text_y = y2 - text_height - padding
+			else:
+				# Large elements: place in center
+				text_x = x1 + (element_width - text_width) // 2
+				text_y = y1 + (element_height - text_height) // 2
+
+			# Ensure text stays within bounds
+			text_x = max(0, min(text_x, 1200 - text_width))
+			text_y = max(0, min(text_y, 800 - text_height))
+
+			# Draw background rectangle for maximum contrast
+			bg_x1 = text_x - padding
+			bg_y1 = text_y - padding
+			bg_x2 = text_x + text_width + padding
+			bg_y2 = text_y + text_height + padding
+
+			# Use white background with thick black border for maximum visibility
+			draw.rectangle([bg_x1, bg_y1, bg_x2, bg_y2], fill='white', outline='black', width=2)
+
+			# Draw bold dark text on light background for best contrast
+			draw.text((text_x, text_y), text, fill='black', font=font)
+
+		except Exception as e:
+			logger.debug(f'Failed to draw text overlay: {e}')
+
+
+def process_element_highlight(
+	element_id: int,
+	element,
+	draw,
+	device_pixel_ratio: float,
+	font,
+	filter_highlight_ids: bool,
+	image_size: tuple[int, int],
+) -> None:
+	"""Process a single element for highlighting."""
+	try:
+		# Use absolute_position coordinates directly
+		if not element.absolute_position:
+			return
+
+		bounds = element.absolute_position
+
+		# Scale coordinates from CSS pixels to device pixels for screenshot
+		# The screenshot is captured at device pixel resolution, but coordinates are in CSS pixels
+		x1 = int(bounds.x * device_pixel_ratio)
+		y1 = int(bounds.y * device_pixel_ratio)
+		x2 = int((bounds.x + bounds.width) * device_pixel_ratio)
+		y2 = int((bounds.y + bounds.height) * device_pixel_ratio)
+
+		# Ensure coordinates are within image bounds
+		img_width, img_height = image_size
+		x1 = max(0, min(x1, img_width))
+		y1 = max(0, min(y1, img_height))
+		x2 = max(x1, min(x2, img_width))
+		y2 = max(y1, min(y2, img_height))
+
+		# Skip if bounding box is too small or invalid
+		if x2 - x1 < 2 or y2 - y1 < 2:
+			return
+
+		# Get element color based on type
+		tag_name = element.tag_name if hasattr(element, 'tag_name') else 'div'
+		element_type = None
+		if hasattr(element, 'attributes') and element.attributes:
+			element_type = element.attributes.get('type')
+
+		color = get_element_color(tag_name, element_type)
+
+		# Get element index for overlay and apply filtering
+		element_index = getattr(element, 'element_index', None)
+		index_text = None
+
+		if element_index is not None:
+			if filter_highlight_ids:
+				# Use the meaningful text that matches what the LLM sees
+				meaningful_text = element.get_meaningful_text_for_llm()
+				# Show ID only if meaningful text is less than 5 characters
+				if len(meaningful_text) < 5:
+					index_text = str(element_index)
+			else:
+				# Always show ID when filter is disabled
+				index_text = str(element_index)
+
+		# Draw enhanced bounding box with bigger index
+		draw_enhanced_bounding_box_with_text(draw, (x1, y1, x2, y2), color, index_text, font, tag_name, image_size)
+
+	except Exception as e:
+		logger.debug(f'Failed to draw highlight for element {element_id}: {e}')
+
+
+@observe_debug(ignore_input=True, ignore_output=True, name='create_highlighted_screenshot')
+@time_execution_async('create_highlighted_screenshot')
+async def create_highlighted_screenshot(
+	screenshot_b64: str,
+	selector_map: DOMSelectorMap,
+	device_pixel_ratio: float = 1.0,
+	viewport_offset_x: int = 0,
+	viewport_offset_y: int = 0,
+	filter_highlight_ids: bool = True,
+) -> str:
+	"""Create a highlighted screenshot with bounding boxes around interactive elements.
+
+	Args:
+	    screenshot_b64: Base64 encoded screenshot
+	    selector_map: Map of interactive elements with their positions
+	    device_pixel_ratio: Device pixel ratio for scaling coordinates
+	    viewport_offset_x: X offset for viewport positioning
+	    viewport_offset_y: Y offset for viewport positioning
+
+	Returns:
+	    Base64 encoded highlighted screenshot
+	"""
+	try:
+		# Decode screenshot
+		screenshot_data = base64.b64decode(screenshot_b64)
+		image = Image.open(io.BytesIO(screenshot_data)).convert('RGBA')
+
+		# Create drawing context
+		draw = ImageDraw.Draw(image)
+
+		# Try to load a font, fall back to default if not available
+		font = None
+		try:
+			font = ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', 12)
+		except OSError:
+			try:
+				font = ImageFont.truetype('arial.ttf', 12)
+			except OSError:
+				font = None  # Use default font
+
+		# Process elements sequentially to avoid ImageDraw thread safety issues
+		# PIL ImageDraw is not thread-safe, so we process elements one by one
+		for element_id, element in selector_map.items():
+			process_element_highlight(element_id, element, draw, device_pixel_ratio, font, filter_highlight_ids, image.size)
+
+		# Convert back to base64
+		output_buffer = io.BytesIO()
+		image.save(output_buffer, format='PNG')
+		output_buffer.seek(0)
+
+		highlighted_b64 = base64.b64encode(output_buffer.getvalue()).decode('utf-8')
+
+		logger.debug(f'Successfully created highlighted screenshot with {len(selector_map)} elements')
+		return highlighted_b64
+
+	except Exception as e:
+		logger.error(f'Failed to create highlighted screenshot: {e}')
+		# Return original screenshot on error
+		return screenshot_b64
+
+
+async def get_viewport_info_from_cdp(cdp_session) -> tuple[float, int, int]:
+	"""Get viewport information from CDP session.
+
+	Returns:
+	    Tuple of (device_pixel_ratio, scroll_x, scroll_y)
+	"""
+	try:
+		# Get layout metrics which includes viewport info and device pixel ratio
+		metrics = await cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=cdp_session.session_id)
+
+		# Extract viewport information
+		visual_viewport = metrics.get('visualViewport', {})
+		css_visual_viewport = metrics.get('cssVisualViewport', {})
+		css_layout_viewport = metrics.get('cssLayoutViewport', {})
+
+		# Calculate device pixel ratio
+		css_width = css_visual_viewport.get('clientWidth', css_layout_viewport.get('clientWidth', 1280.0))
+		device_width = visual_viewport.get('clientWidth', css_width)
+		device_pixel_ratio = device_width / css_width if css_width > 0 else 1.0
+
+		# Get scroll position in CSS pixels
+		scroll_x = int(css_visual_viewport.get('pageX', 0))
+		scroll_y = int(css_visual_viewport.get('pageY', 0))
+
+		return float(device_pixel_ratio), scroll_x, scroll_y
+
+	except Exception as e:
+		logger.debug(f'Failed to get viewport info from CDP: {e}')
+		return 1.0, 0, 0
+
+
+@observe_debug(ignore_input=True, ignore_output=True, name='create_highlighted_screenshot_async')
+@time_execution_async('create_highlighted_screenshot_async')
+async def create_highlighted_screenshot_async(
+	screenshot_b64: str, selector_map: DOMSelectorMap, cdp_session=None, filter_highlight_ids: bool = True
+) -> str:
+	"""Async wrapper for creating highlighted screenshots.
+
+	Args:
+	    screenshot_b64: Base64 encoded screenshot
+	    selector_map: Map of interactive elements
+	    cdp_session: CDP session for getting viewport info
+
+	Returns:
+	    Base64 encoded highlighted screenshot
+	"""
+	# Get viewport information if CDP session is available
+	device_pixel_ratio = 1.0
+	viewport_offset_x = 0
+	viewport_offset_y = 0
+
+	if cdp_session:
+		try:
+			device_pixel_ratio, viewport_offset_x, viewport_offset_y = await get_viewport_info_from_cdp(cdp_session)
+		except Exception as e:
+			logger.debug(f'Failed to get viewport info from CDP: {e}')
+
+	# Create highlighted screenshot with async processing
+	return await create_highlighted_screenshot(
+		screenshot_b64, selector_map, device_pixel_ratio, viewport_offset_x, viewport_offset_y, filter_highlight_ids
+	)
--- a/browser_use/browser/session.py
+++ b/browser_use/browser/session.py
@@ -39,7 +39,8 @@ from browser_use.browser.events import (
 from browser_use.browser.profile import BrowserProfile, ProxySettings
 from browser_use.browser.views import BrowserStateSummary, TabInfo
 from browser_use.dom.views import EnhancedDOMTreeNode, TargetInfo
-from browser_use.utils import _log_pretty_url, is_new_tab_page
+from browser_use.observability import observe_debug
+from browser_use.utils import _log_pretty_url, is_new_tab_page, time_execution_async

 DEFAULT_BROWSER_PROFILE = BrowserProfile()

@@ -264,6 +265,7 @@ class BrowserSession(BaseModel):
 		wait_for_network_idle_page_load_time: float | None = None,
 		wait_between_actions: float | None = None,
 		highlight_elements: bool | None = None,
+		filter_highlight_ids: bool | None = None,
 		auto_download_pdfs: bool | None = None,
 		profile_directory: str | None = None,
 	):
@@ -536,6 +538,18 @@ class BrowserSession(BaseModel):

 		target_id = None

+		# If new_tab=True but we're already in a new tab, set new_tab=False
+		if event.new_tab:
+			try:
+				current_url = await self.get_current_page_url()
+				from browser_use.utils import is_new_tab_page
+
+				if is_new_tab_page(current_url):
+					self.logger.debug(f'[on_NavigateToUrlEvent] Already in new tab ({current_url}), setting new_tab=False')
+					event.new_tab = False
+			except Exception as e:
+				self.logger.debug(f'[on_NavigateToUrlEvent] Could not check current URL: {e}')
+
 		# check if the url is already open in a tab somewhere that we're not currently on, if so, short-circuit and just switch to it
 		targets = await self._cdp_get_all_pages()
 		for target in targets:
@@ -584,10 +598,18 @@ class BrowserSession(BaseModel):
 				# Use current tab
 				target_id = target_id or self.agent_focus.target_id

-			# Activate target (bring to foreground)
-			await self.event_bus.dispatch(SwitchTabEvent(target_id=target_id))
-			# which does this for us:
-			# self.agent_focus = await self.get_or_create_cdp_session(target_id)
+			# Only switch tab if we're not already on the target tab
+			if self.agent_focus is None or self.agent_focus.target_id != target_id:
+				self.logger.debug(
+					f'[on_NavigateToUrlEvent] Switching to target tab {target_id[-4:]} (current: {self.agent_focus.target_id[-4:] if self.agent_focus else "none"})'
+				)
+				# Activate target (bring to foreground)
+				await self.event_bus.dispatch(SwitchTabEvent(target_id=target_id))
+				# which does this for us:
+				# self.agent_focus = await self.get_or_create_cdp_session(target_id)
+			else:
+				self.logger.debug(f'[on_NavigateToUrlEvent] Already on target tab {target_id[-4:]}, skipping SwitchTabEvent')
+
 			assert self.agent_focus is not None and self.agent_focus.target_id == target_id, (
 				'Agent focus not updated to new target_id after SwitchTabEvent should have switched to it'
 			)
@@ -605,8 +627,8 @@ class BrowserSession(BaseModel):
 				session_id=self.agent_focus.session_id,
 			)

-			# Wait a bit to ensure page starts loading
-			await asyncio.sleep(0.5)
+			# # Wait a bit to ensure page starts loading
+			# await asyncio.sleep(0.5)

 			# Dispatch navigation complete
 			self.logger.debug(f'Dispatching NavigationCompleteEvent for {event.url} (tab #{target_id[-4:]})')
@@ -678,8 +700,8 @@ class BrowserSession(BaseModel):
 		"""Handle tab closure - update focus if needed."""

 		cdp_session = await self.get_or_create_cdp_session(target_id=None, focus=False)
-		await cdp_session.cdp_client.send.Target.closeTarget(params={'targetId': event.target_id})
 		await self.event_bus.dispatch(TabClosedEvent(target_id=event.target_id))
+		await cdp_session.cdp_client.send.Target.closeTarget(params={'targetId': event.target_id})

 	async def on_TabClosedEvent(self, event: TabClosedEvent) -> None:
 		"""Handle tab closure - update focus if needed."""
@@ -791,6 +813,8 @@ class BrowserSession(BaseModel):
 		assert self._cdp_client_root is not None, 'CDP client not initialized - browser may not be connected yet'
 		return self._cdp_client_root

+	@time_execution_async('get_or_create_cdp_session')
+	@observe_debug(ignore_input=True, ignore_output=True, name='get_or_create_cdp_session')
 	async def get_or_create_cdp_session(
 		self, target_id: TargetID | None = None, focus: bool = True, new_socket: bool | None = None
 	) -> CDPSession:
@@ -845,6 +869,8 @@ class BrowserSession(BaseModel):
 			cdp_url=self.cdp_url if should_use_new_socket else None,
 		)
 		self._cdp_session_pool[target_id] = session
+		# log length of _cdp_session_pool
+		self.logger.debug(f'[get_or_create_cdp_session] new _cdp_session_pool length: {len(self._cdp_session_pool)}')

 		# Only change agent focus if requested
 		if focus:
@@ -870,7 +896,7 @@ class BrowserSession(BaseModel):
 		return self.agent_focus.session_id if self.agent_focus else None

 	# ========== Helper Methods ==========
-
+	@observe_debug(ignore_input=True, ignore_output=True, name='get_browser_state_summary')
 	async def get_browser_state_summary(
 		self,
 		cache_clickable_elements_hashes: bool = True,
@@ -1321,6 +1347,7 @@ class BrowserSession(BaseModel):
 		except Exception as e:
 			self.logger.debug(f'Skipping proxy auth setup: {type(e).__name__}: {e}')

+	@observe_debug(ignore_input=True, ignore_output=True, name='get_tabs')
 	async def get_tabs(self) -> list[TabInfo]:
 		"""Get information about all open tabs using CDP Target.getTargetInfo for speed."""
 		tabs = []
@@ -1399,6 +1426,7 @@ class BrowserSession(BaseModel):
 				return target
 		return None

+	@observe_debug(ignore_input=True, ignore_output=True, name='get_current_page_url')
 	async def get_current_page_url(self) -> str:
 		"""Get the URL of the current page using CDP."""
 		target = await self.get_current_target_info()
@@ -1519,6 +1547,9 @@ class BrowserSession(BaseModel):

 	async def remove_highlights(self) -> None:
 		"""Remove highlights from the page using CDP."""
+		if not self.browser_profile.highlight_elements:
+			return
+
 		try:
 			# Get cached session
 			cdp_session = await self.get_or_create_cdp_session()
--- a/browser_use/browser/views.py
+++ b/browser_use/browser/views.py
@@ -126,17 +126,42 @@ class BrowserStateHistory:


 class BrowserError(Exception):
-	"""Base class for all browser errors"""
+	"""Browser error with structured memory for LLM context management.
+
+	This exception class provides separate memory contexts for browser actions:
+	- short_term_memory: Immediate context shown once to the LLM for the next action
+	- long_term_memory: Persistent error information stored across steps
+	"""

 	message: str
+	short_term_memory: str | None = None
+	long_term_memory: str | None = None
 	details: dict[str, Any] | None = None
 	while_handling_event: BaseEvent[Any] | None = None

-	def __init__(self, message: str, details: dict[str, Any] | None = None, event: BaseEvent[Any] | None = None):
+	def __init__(
+		self,
+		message: str,
+		short_term_memory: str | None = None,
+		long_term_memory: str | None = None,
+		details: dict[str, Any] | None = None,
+		event: BaseEvent[Any] | None = None,
+	):
+		"""Initialize a BrowserError with structured memory contexts.
+
+		Args:
+			message: Technical error message for logging and debugging
+			short_term_memory: Context shown once to LLM (e.g., available actions, options)
+			long_term_memory: Persistent error info stored in agent memory
+			details: Additional metadata for debugging
+			event: The browser event that triggered this error
+		"""
 		self.message = message
-		super().__init__(message)
+		self.short_term_memory = short_term_memory
+		self.long_term_memory = long_term_memory
 		self.details = details
 		self.while_handling_event = event
+		super().__init__(message)

 	def __str__(self) -> str:
 		if self.details:
--- a/browser_use/browser/watchdogs/default_action_watchdog.py
+++ b/browser_use/browser/watchdogs/default_action_watchdog.py
--- a/browser_use/browser/watchdogs/dom_watchdog.py
+++ b/browser_use/browser/watchdogs/dom_watchdog.py
@@ -16,6 +16,8 @@ from browser_use.dom.views import (
 	EnhancedDOMTreeNode,
 	SerializedDOMState,
 )
+from browser_use.observability import observe_debug
+from browser_use.utils import time_execution_async

 if TYPE_CHECKING:
 	from browser_use.browser.views import BrowserStateSummary, PageInfo
@@ -42,70 +44,7 @@ class DOMWatchdog(BaseWatchdog):

 	async def on_TabCreatedEvent(self, event: TabCreatedEvent) -> None:
 		# self.logger.debug('Setting up init scripts in browser')
-
-		self.logger.debug('💉 Injecting DOM Service init script to track event listeners added to DOM elements by JS...')
-
-		init_script = """
-			// check to make sure we're not inside the PDF viewer
-			window.isPdfViewer = !!document?.body?.querySelector('body > embed[type="application/pdf"][width="100%"]')
-			if (!window.isPdfViewer) {
-
-				// Permissions
-				const originalQuery = window.navigator.permissions.query;
-				window.navigator.permissions.query = (parameters) => (
-					parameters.name === 'notifications' ?
-						Promise.resolve({ state: Notification.permission }) :
-						originalQuery(parameters)
-				);
-				(() => {
-					if (window._eventListenerTrackerInitialized) return;
-					window._eventListenerTrackerInitialized = true;
-
-					const originalAddEventListener = EventTarget.prototype.addEventListener;
-					const eventListenersMap = new WeakMap();
-
-					EventTarget.prototype.addEventListener = function(type, listener, options) {
-						if (typeof listener === "function") {
-							let listeners = eventListenersMap.get(this);
-							if (!listeners) {
-								listeners = [];
-								eventListenersMap.set(this, listeners);
-							}
-
-							listeners.push({
-								type,
-								listener,
-								listenerPreview: listener.toString().slice(0, 100),
-								options
-							});
-						}
-
-						return originalAddEventListener.call(this, type, listener, options);
-					};
-
-					window.getEventListenersForNode = (node) => {
-						const listeners = eventListenersMap.get(node) || [];
-						return listeners.map(({ type, listenerPreview, options }) => ({
-							type,
-							listenerPreview,
-							options
-						}));
-					};
-				})();
-			}
-		"""
-
-		# Try to inject the script, but don't fail if the Page domain isn't ready yet
-		# This can happen when a new tab is created and the CDP session isn't fully attached
-		try:
-			await self.browser_session._cdp_add_init_script(init_script)
-		except Exception as e:
-			if "'Page.addScriptToEvaluateOnNewDocument' wasn't found" in str(e):
-				self.logger.debug(f'Page domain not ready for new tab, skipping init script injection: {e}')
-				# The script will be injected when the page actually navigates
-			else:
-				# Re-raise other errors
-				raise
+		return None

 	def _get_recent_events_str(self, limit: int = 10) -> str | None:
 		"""Get the most recent events from the event bus as JSON.
@@ -164,10 +103,10 @@ class DOMWatchdog(BaseWatchdog):
 		self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got page URL: {page_url}')
 		if self.browser_session.agent_focus:
 			self.logger.debug(
-				f'📍 Current page URL: {page_url}, target_id: {self.browser_session.agent_focus.target_id}, session_id: {self.browser_session.agent_focus.session_id}'
+				f'Current page URL: {page_url}, target_id: {self.browser_session.agent_focus.target_id}, session_id: {self.browser_session.agent_focus.session_id}'
 			)
 		else:
-			self.logger.debug(f'📍 Current page URL: {page_url}, no cdp_session attached')
+			self.logger.debug(f'Current page URL: {page_url}, no cdp_session attached')

 		# check if we should skip DOM tree build for pointless pages
 		not_a_meaningful_website = page_url.lower().split(':', 1)[0] not in ('http', 'https')
@@ -243,65 +182,73 @@ class DOMWatchdog(BaseWatchdog):
 					recent_events=self._get_recent_events_str() if event.include_recent_events else None,
 				)

-			# Normal path: Build DOM tree if requested
-			if event.include_dom:
-				self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: 🌳 Building DOM tree...')
+			# Execute DOM building and screenshot capture in parallel
+			dom_task = None
+			screenshot_task = None
+
+			# Start DOM building task if requested
+			if event.include_dom:
+				self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: 🌳 Starting DOM tree build task...')

-				# Build the DOM directly using the internal method
 				previous_state = (
 					self.browser_session._cached_browser_state_summary.dom_state
 					if self.browser_session._cached_browser_state_summary
 					else None
 				)

+				dom_task = asyncio.create_task(self._build_dom_tree_without_highlights(previous_state))
+
+			# Start clean screenshot task if requested (without JS highlights)
+			if event.include_screenshot:
+				self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: 📸 Starting clean screenshot task...')
+				screenshot_task = asyncio.create_task(self._capture_clean_screenshot())
+
+			# Wait for both tasks to complete
+			content = None
+			screenshot_b64 = None
+
+			if dom_task:
 				try:
-					# Call the DOM building method directly
-					self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: Starting _build_dom_tree...')
-					content = await self._build_dom_tree(previous_state)
-					self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ _build_dom_tree completed')
+					content = await dom_task
+					self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ DOM tree build completed')
 				except Exception as e:
 					self.logger.warning(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: DOM build failed: {e}, using minimal state')
 					content = SerializedDOMState(_root=None, selector_map={})
-
-				if not content:
-					# Fallback to minimal DOM state
-					self.logger.warning('DOM build returned no content, using minimal state')
-					content = SerializedDOMState(_root=None, selector_map={})
 			else:
-				# Skip DOM building if not requested
 				content = SerializedDOMState(_root=None, selector_map={})

-			# re-focus top-level page session context
-			assert self.browser_session.agent_focus is not None, 'No current target ID'
-			await self.browser_session.get_or_create_cdp_session(target_id=self.browser_session.agent_focus.target_id, focus=True)
-
-			# Get screenshot if requested
-			screenshot_b64 = None
-			if event.include_screenshot:
-				self.logger.debug(
-					f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: 📸 DOM watchdog requesting screenshot, include_screenshot={event.include_screenshot}'
-				)
+			if screenshot_task:
 				try:
-					# Check if handler is registered
-					handlers = self.event_bus.handlers.get('ScreenshotEvent', [])
-					handler_names = [getattr(h, '__name__', str(h)) for h in handlers]
-					self.logger.debug(f'📸 ScreenshotEvent handlers registered: {len(handlers)} - {handler_names}')
-
-					screenshot_event = self.event_bus.dispatch(ScreenshotEvent(full_page=False))
-					self.logger.debug('📸 Dispatched ScreenshotEvent, waiting for event to complete...')
-
-					# Wait for the event itself to complete (this waits for all handlers)
-					await screenshot_event
-
-					# Get the single handler result
-					screenshot_b64 = await screenshot_event.event_result(raise_if_any=True, raise_if_none=True)
-				except TimeoutError:
-					self.logger.warning('📸 Screenshot timed out after 6 seconds - no handler registered or slow page?')
-
+					screenshot_b64 = await screenshot_task
+					self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ Clean screenshot captured')
 				except Exception as e:
-					self.logger.warning(f'📸 Screenshot failed: {type(e).__name__}: {e}')
-			else:
-				self.logger.debug(f'📸 Skipping screenshot, include_screenshot={event.include_screenshot}')
+					self.logger.warning(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Clean screenshot failed: {e}')
+					screenshot_b64 = None
+
+			# Apply Python-based highlighting if both DOM and screenshot are available
+			if screenshot_b64 and content and content.selector_map and self.browser_session.browser_profile.highlight_elements:
+				try:
+					self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: 🎨 Applying Python-based highlighting...')
+					from browser_use.browser.python_highlights import create_highlighted_screenshot_async
+
+					# Get CDP session for viewport info
+					cdp_session = await self.browser_session.get_or_create_cdp_session()
+					start = time.time()
+					screenshot_b64 = await create_highlighted_screenshot_async(
+						screenshot_b64,
+						content.selector_map,
+						cdp_session,
+						self.browser_session.browser_profile.filter_highlight_ids,
+					)
+					self.logger.debug(
+						f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ Applied highlights to {len(content.selector_map)} elements in {time.time() - start:.2f}s'
+					)
+				except Exception as e:
+					self.logger.warning(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Python highlighting failed: {e}')
+
+			# Ensure we have valid content
+			if not content:
+				content = SerializedDOMState(_root=None, selector_map={})

 			# Tabs info already fetched at the beginning

@@ -452,7 +399,7 @@ class DOMWatchdog(BaseWatchdog):
 			self.logger.debug(f'🔍 DOMWatchdog._build_dom_tree: ✅ Selector maps updated, {len(self.selector_map)} elements')

 			# Inject highlighting for visual feedback if we have elements
-			if self.selector_map and self._dom_service:
+			if self.selector_map and self._dom_service and self.browser_session.browser_profile.highlight_elements:
 				try:
 					self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Injecting highlighting script...')
 					from browser_use.dom.debug.highlights import inject_highlighting_script
@@ -463,6 +410,8 @@ class DOMWatchdog(BaseWatchdog):
 					)
 				except Exception as e:
 					self.logger.debug(f'🔍 DOMWatchdog._build_dom_tree: Failed to inject highlighting: {e}')
+			elif self.selector_map and self._dom_service and not self.browser_session.browser_profile.highlight_elements:
+				self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Skipping highlighting injection - highlight_elements=False')

 			self.logger.debug('🔍 DOMWatchdog._build_dom_tree: ✅ COMPLETED DOM tree build')
 			return self.current_dom_state
@@ -477,6 +426,95 @@ class DOMWatchdog(BaseWatchdog):
 			)
 			raise

+	@time_execution_async('build_dom_tree_without_highlights')
+	@observe_debug(ignore_input=True, ignore_output=True, name='build_dom_tree_without_highlights')
+	async def _build_dom_tree_without_highlights(self, previous_state: SerializedDOMState | None = None) -> SerializedDOMState:
+		"""Build DOM tree without injecting JavaScript highlights (for parallel execution)."""
+		try:
+			self.logger.debug('🔍 DOMWatchdog._build_dom_tree_without_highlights: STARTING DOM tree build')
+
+			# Create or reuse DOM service
+			if self._dom_service is None:
+				self._dom_service = DomService(
+					browser_session=self.browser_session,
+					logger=self.logger,
+					cross_origin_iframes=self.browser_session.browser_profile.cross_origin_iframes,
+				)
+
+			# Get serialized DOM tree using the service
+			self.logger.debug('🔍 DOMWatchdog._build_dom_tree_without_highlights: Calling DomService.get_serialized_dom_tree...')
+			start = time.time()
+			self.current_dom_state, self.enhanced_dom_tree, timing_info = await self._dom_service.get_serialized_dom_tree(
+				previous_cached_state=previous_state,
+			)
+			end = time.time()
+			self.logger.debug(
+				'🔍 DOMWatchdog._build_dom_tree_without_highlights: ✅ DomService.get_serialized_dom_tree completed'
+			)
+
+			self.logger.debug(f'Time taken to get DOM tree: {end - start} seconds')
+			self.logger.debug(f'Timing breakdown: {timing_info}')
+
+			# Update selector map for other watchdogs
+			self.logger.debug('🔍 DOMWatchdog._build_dom_tree_without_highlights: Updating selector maps...')
+			self.selector_map = self.current_dom_state.selector_map
+			# Update BrowserSession's cached selector map
+			if self.browser_session:
+				self.browser_session.update_cached_selector_map(self.selector_map)
+			self.logger.debug(
+				f'🔍 DOMWatchdog._build_dom_tree_without_highlights: ✅ Selector maps updated, {len(self.selector_map)} elements'
+			)
+
+			# Skip JavaScript highlighting injection - Python highlighting will be applied later
+			self.logger.debug('🔍 DOMWatchdog._build_dom_tree_without_highlights: ✅ COMPLETED DOM tree build (no JS highlights)')
+			return self.current_dom_state
+
+		except Exception as e:
+			self.logger.error(f'Failed to build DOM tree without highlights: {e}')
+			self.event_bus.dispatch(
+				BrowserErrorEvent(
+					error_type='DOMBuildFailed',
+					message=str(e),
+				)
+			)
+			raise
+
+	@time_execution_async('capture_clean_screenshot')
+	@observe_debug(ignore_input=True, ignore_output=True, name='capture_clean_screenshot')
+	async def _capture_clean_screenshot(self) -> str:
+		"""Capture a clean screenshot without JavaScript highlights."""
+		try:
+			self.logger.debug('🔍 DOMWatchdog._capture_clean_screenshot: Capturing clean screenshot...')
+
+			# Ensure we have a focused CDP session
+			assert self.browser_session.agent_focus is not None, 'No current target ID'
+			await self.browser_session.get_or_create_cdp_session(target_id=self.browser_session.agent_focus.target_id, focus=True)
+
+			# Check if handler is registered
+			handlers = self.event_bus.handlers.get('ScreenshotEvent', [])
+			handler_names = [getattr(h, '__name__', str(h)) for h in handlers]
+			self.logger.debug(f'📸 ScreenshotEvent handlers registered: {len(handlers)} - {handler_names}')
+
+			screenshot_event = self.event_bus.dispatch(ScreenshotEvent(full_page=False))
+			self.logger.debug('📸 Dispatched ScreenshotEvent, waiting for event to complete...')
+
+			# Wait for the event itself to complete (this waits for all handlers)
+			await screenshot_event
+
+			# Get the single handler result
+			screenshot_b64 = await screenshot_event.event_result(raise_if_any=True, raise_if_none=True)
+			if screenshot_b64 is None:
+				raise RuntimeError('Screenshot handler returned None')
+			self.logger.debug('🔍 DOMWatchdog._capture_clean_screenshot: ✅ Clean screenshot captured successfully')
+			return str(screenshot_b64)
+
+		except TimeoutError:
+			self.logger.warning('📸 Clean screenshot timed out after 6 seconds - no handler registered or slow page?')
+			raise
+		except Exception as e:
+			self.logger.warning(f'📸 Clean screenshot failed: {type(e).__name__}: {e}')
+			raise
+
 	async def _wait_for_stable_network(self):
 		"""Wait for page stability - simplified for CDP-only branch."""
 		start_time = time.time()
@@ -496,6 +534,7 @@ class DOMWatchdog(BaseWatchdog):
 		elapsed = time.time() - start_time
 		self.logger.debug(f'✅ Page stability wait completed in {elapsed:.2f}s')

+	@observe_debug(ignore_input=True, ignore_output=True, name='get_page_info')
 	async def _get_page_info(self) -> 'PageInfo':
 		"""Get comprehensive page information using a single CDP call.

--- a/browser_use/browser/watchdogs/downloads_watchdog.py
+++ b/browser_use/browser/watchdogs/downloads_watchdog.py
@@ -111,9 +111,10 @@ class DownloadsWatchdog(BaseWatchdog):
 		# Check if auto-download is enabled
 		auto_download_enabled = self._is_auto_download_enabled()
 		if not auto_download_enabled:
-			self.logger.debug('[DownloadsWatchdog] Skipping PDF check - auto-download disabled')
 			return

+		# Note: Using network-based PDF detection that doesn't require JavaScript
+
 		target_id = event.target_id
 		self.logger.debug(f'[DownloadsWatchdog] Got target_id={target_id} for tab #{event.target_id[-4:]}')

@@ -552,8 +553,9 @@ class DownloadsWatchdog(BaseWatchdog):
 				del self._active_downloads[download_id]

 	async def check_for_pdf_viewer(self, target_id: TargetID) -> bool:
-		"""Check if the current target is Chrome's built-in PDF viewer.
+		"""Check if the current target is a PDF using network-based detection.

+		This method avoids JavaScript execution that can crash WebSocket connections.
 		Returns True if a PDF is detected and should be downloaded.
 		"""
 		self.logger.debug(f'[DownloadsWatchdog] Checking if target {target_id} is PDF viewer...')
@@ -575,98 +577,115 @@ class DownloadsWatchdog(BaseWatchdog):
 			return cached_result

 		try:
-			# Create a temporary CDP session for this target without switching focus
-			import asyncio
-
-			temp_session = await self.browser_session.get_or_create_cdp_session(target_id, focus=False)
-
-			result = await asyncio.wait_for(
-				temp_session.cdp_client.send.Runtime.evaluate(
-					params={
-						'expression': """
-				(() => {
-					// Check for Chrome's built-in PDF viewer (both old and new selectors)
-					const pdfEmbed = document.querySelector('embed[type="application/x-google-chrome-pdf"]') ||
-									document.querySelector('embed[type="application/pdf"]');
-					if (pdfEmbed) {
-						// For Chrome PDF viewer, use window.location.href not embed.src (which is often about:blank)
-						return {
-							isPdf: true,
-							url: window.location.href,
-							isChromePdfViewer: true
-						};
-					}
-					
-					// Check for direct PDF navigation
-					if (document.contentType === 'application/pdf') {
-						return {
-							isPdf: true,
-							url: window.location.href,
-							isDirectPdf: true
-						};
-					}
-					
-					// Also check if the URL ends with .pdf or has PDF in it
-					const url = window.location.href;
-					const isPdfUrl = url.toLowerCase().includes('.pdf');
-					if (isPdfUrl) {
-						return {
-							isPdf: true,
-							url: url,
-							isPdfUrl: true
-						};
-					}
-					
-					// Check for PDF in iframe
-					const iframes = document.querySelectorAll('iframe');
-					for (const iframe of iframes) {
-						try {
-							const iframeDoc = iframe.contentDocument || iframe.contentWindow.document;
-							if (iframeDoc.contentType === 'application/pdf') {
-								return {
-									isPdf: true,
-									url: iframe.src,
-									isIframePdf: true
-								};
-							}
-						} catch (e) {
-							// Cross-origin iframe, skip
-						}
-					}
-					
-					return { isPdf: false };
-				})()
-				""",
-						'returnByValue': True,
-					},
-					session_id=temp_session.session_id,
-				),
-				timeout=5.0,  # 5 second timeout to prevent hanging
-			)
-
-			# No need to detach - session is cached
-			is_pdf_viewer = result.get('result', {}).get('value', {})
-
-			if is_pdf_viewer.get('isPdf', False):
-				self.logger.debug(
-					f'[DownloadsWatchdog] PDF detected: {is_pdf_viewer.get("url", "unknown")} '
-					f'(type: {"Chrome viewer" if is_pdf_viewer.get("isChromePdfViewer") else "direct PDF" if is_pdf_viewer.get("isDirectPdf") else "PDF URL" if is_pdf_viewer.get("isPdfUrl") else "iframe PDF"})'
-				)
+			# Method 1: Check URL patterns (fastest, most reliable)
+			url_is_pdf = self._check_url_for_pdf(page_url)
+			if url_is_pdf:
+				self.logger.debug(f'[DownloadsWatchdog] PDF detected via URL pattern: {page_url}')
 				self._pdf_viewer_cache[page_url] = True
 				return True

+			# Method 2: Check network response headers via CDP (safer than JavaScript)
+			header_is_pdf = await self._check_network_headers_for_pdf(target_id)
+			if header_is_pdf:
+				self.logger.debug(f'[DownloadsWatchdog] PDF detected via network headers: {page_url}')
+				self._pdf_viewer_cache[page_url] = True
+				return True
+
+			# Method 3: Check Chrome's PDF viewer specific URLs
+			chrome_pdf_viewer = self._is_chrome_pdf_viewer_url(page_url)
+			if chrome_pdf_viewer:
+				self.logger.debug(f'[DownloadsWatchdog] Chrome PDF viewer detected: {page_url}')
+				self._pdf_viewer_cache[page_url] = True
+				return True
+
+			# Not a PDF
 			self._pdf_viewer_cache[page_url] = False
 			return False

-		except TimeoutError:
-			self.logger.warning(f'[DownloadsWatchdog] ❌ PDF check timed out for target: {page_url}')
-			self._pdf_viewer_cache[page_url] = False
-			return False
 		except Exception as e:
 			self.logger.warning(f'[DownloadsWatchdog] ❌ Error checking for PDF viewer: {e}')
 			self._pdf_viewer_cache[page_url] = False
 			return False

+	def _check_url_for_pdf(self, url: str) -> bool:
+		"""Check if URL indicates a PDF file."""
+		if not url:
+			return False
+
+		url_lower = url.lower()
+
+		# Direct PDF file extensions
+		if url_lower.endswith('.pdf'):
+			return True
+
+		# PDF in path
+		if '.pdf' in url_lower:
+			return True
+
+		# PDF MIME type in URL parameters
+		if any(
+			param in url_lower
+			for param in [
+				'content-type=application/pdf',
+				'content-type=application%2fpdf',
+				'mimetype=application/pdf',
+				'type=application/pdf',
+			]
+		):
+			return True
+
+		return False
+
+	def _is_chrome_pdf_viewer_url(self, url: str) -> bool:
+		"""Check if this is Chrome's internal PDF viewer URL."""
+		if not url:
+			return False
+
+		url_lower = url.lower()
+
+		# Chrome PDF viewer uses chrome-extension:// URLs
+		if 'chrome-extension://' in url_lower and 'pdf' in url_lower:
+			return True
+
+		# Chrome PDF viewer internal URLs
+		if url_lower.startswith('chrome://') and 'pdf' in url_lower:
+			return True
+
+		return False
+
+	async def _check_network_headers_for_pdf(self, target_id: TargetID) -> bool:
+		"""Infer PDF via navigation history/URL; headers are not available post-navigation in this context."""
+		try:
+			import asyncio
+
+			# Get CDP session
+			temp_session = await self.browser_session.get_or_create_cdp_session(target_id, focus=False)
+
+			# Get navigation history to find the main resource
+			history = await asyncio.wait_for(
+				temp_session.cdp_client.send.Page.getNavigationHistory(session_id=temp_session.session_id), timeout=3.0
+			)
+
+			current_entry = history.get('entries', [])
+			if current_entry:
+				current_index = history.get('currentIndex', 0)
+				if 0 <= current_index < len(current_entry):
+					current_url = current_entry[current_index].get('url', '')
+
+					# Check if the URL itself suggests PDF
+					if self._check_url_for_pdf(current_url):
+						return True
+
+			# Note: CDP doesn't easily expose response headers for completed navigations
+			# For more complex cases, we'd need to set up Network.responseReceived listeners
+			# before navigation, but that's overkill for most PDF detection cases
+
+			return False
+
+		except Exception as e:
+			self.logger.debug(f'[DownloadsWatchdog] Network headers check failed (non-critical): {e}')
+			return False
+
 	async def trigger_pdf_download(self, target_id: TargetID) -> str | None:
 		"""Trigger download of a PDF from Chrome's PDF viewer.

--- a/browser_use/dom/debug/highlights.py
+++ b/browser_use/dom/debug/highlights.py
@@ -100,7 +100,7 @@ async def inject_highlighting_script(dom_service: DomService, interactive_elemen
 		# Convert DOMSelectorMap to the format expected by the JavaScript
 		converted_elements = convert_dom_selector_map_to_highlight_format(interactive_elements)

-		logger.debug(f'📍 Creating CSP-safe highlighting for {len(converted_elements)} elements')
+		logger.debug(f'Creating CSP-safe highlighting for {len(converted_elements)} elements')

 		# ALWAYS remove any existing highlights first to prevent double-highlighting
 		await remove_highlighting_script(dom_service)
--- a/browser_use/dom/serializer/clickable_elements.py
+++ b/browser_use/dom/serializer/clickable_elements.py
@@ -20,7 +20,7 @@ class ClickableElementDetector:

 		# IFRAME elements should be interactive if they're large enough to potentially need scrolling
 		# Small iframes (< 100px width or height) are unlikely to have scrollable content
-		if node.tag_name and node.tag_name.upper() == 'IFRAME':
+		if node.tag_name and node.tag_name.upper() == 'IFRAME' or node.tag_name.upper() == 'FRAME':
 			if node.snapshot_node and node.snapshot_node.bounds:
 				width = node.snapshot_node.bounds.width
 				height = node.snapshot_node.bounds.height
@@ -94,14 +94,14 @@ class ClickableElementDetector:
 					# Skip properties we can't process
 					continue

-		# ENHANCED TAG CHECK: Include truly interactive elements
+				# ENHANCED TAG CHECK: Include truly interactive elements
+		# Note: 'label' removed - labels are handled by other attribute checks below - other wise labels with "for" attribute can destroy the real clickable element on apartments.com
 		interactive_tags = {
 			'button',
 			'input',
 			'select',
 			'textarea',
 			'a',
-			'label',
 			'details',
 			'summary',
 			'option',
--- a/browser_use/dom/serializer/serializer.py
+++ b/browser_use/dom/serializer/serializer.py
@@ -143,10 +143,10 @@ class DOMTreeSerializer:
 			if node.node_name.lower() in DISABLED_ELEMENTS:
 				return None

-			if node.node_name == 'IFRAME':
+			if node.node_name == 'IFRAME' or node.node_name == 'FRAME':
 				if node.content_document:
 					simplified = SimplifiedNode(original_node=node, children=[])
-					for child in node.content_document.children:
+					for child in node.content_document.children_nodes or []:
 						simplified_child = self._create_simplified_tree(child)
 						if simplified_child:
 							simplified.children.append(simplified_child)
@@ -159,7 +159,7 @@ class DOMTreeSerializer:
 			is_scrollable = node.is_actually_scrollable

 			# Include if interactive (regardless of visibility), or scrollable, or has children to process
-			should_include = (is_interactive and is_visible) or is_scrollable or node.children_and_shadow_roots
+			should_include = (is_interactive and is_visible) or is_scrollable or bool(node.children_and_shadow_roots)

 			if should_include:
 				simplified = SimplifiedNode(original_node=node, children=[])
@@ -435,7 +435,12 @@ class DOMTreeSerializer:
 			# Add element with interactive_index if clickable, scrollable, or iframe
 			is_any_scrollable = node.original_node.is_actually_scrollable or node.original_node.is_scrollable
 			should_show_scroll = node.original_node.should_show_scroll_info
-			if node.interactive_index is not None or is_any_scrollable or node.original_node.tag_name.upper() == 'IFRAME':
+			if (
+				node.interactive_index is not None
+				or is_any_scrollable
+				or node.original_node.tag_name.upper() == 'IFRAME'
+				or node.original_node.tag_name.upper() == 'FRAME'
+			):
 				next_depth += 1

 				# Build attributes string
@@ -453,6 +458,9 @@ class DOMTreeSerializer:
 				elif node.original_node.tag_name.upper() == 'IFRAME':
 					# Iframe element (not interactive)
 					line = f'{depth_str}|IFRAME|<{node.original_node.tag_name}'
+				elif node.original_node.tag_name.upper() == 'FRAME':
+					# Frame element (not interactive)
+					line = f'{depth_str}|FRAME|<{node.original_node.tag_name}'
 				else:
 					line = f'{depth_str}<{node.original_node.tag_name}'

--- a/browser_use/dom/service.py
+++ b/browser_use/dom/service.py
@@ -187,7 +187,7 @@ class DomService:
 		for frame in reversed(html_frames):
 			if (
 				frame.node_type == NodeType.ELEMENT_NODE
-				and frame.node_name.upper() == 'IFRAME'
+				and (frame.node_name.upper() == 'IFRAME' or frame.node_name.upper() == 'FRAME')
 				and frame.snapshot_node
 				and frame.snapshot_node.bounds
 			):
@@ -561,7 +561,11 @@ class DomService:
 					)

 			# Calculate new iframe offset for content documents, accounting for iframe scroll
-			if node['nodeName'].upper() == 'IFRAME' and snapshot_data and snapshot_data.bounds:
+			if (
+				(node['nodeName'].upper() == 'IFRAME' or node['nodeName'].upper() == 'FRAME')
+				and snapshot_data
+				and snapshot_data.bounds
+			):
 				if snapshot_data.bounds:
 					updated_html_frames.append(dom_tree_node)

--- a/browser_use/dom/views.py
+++ b/browser_use/dom/views.py
@@ -12,6 +12,7 @@ from cdp_use.cdp.target.types import SessionID, TargetID, TargetInfo
 from uuid_extensions import uuid7str

 from browser_use.dom.utils import cap_text_length
+from browser_use.observability import observe_debug

 # Serializer types
 DEFAULT_INCLUDE_ATTRIBUTES = [
@@ -91,14 +92,28 @@ class SimplifiedNode:
 	is_new: bool = False
 	excluded_by_parent: bool = False  # New field for bbox filtering

+	def _clean_original_node_json(self, node_json: dict) -> dict:
+		"""Recursively remove children_nodes and shadow_roots from original_node JSON."""
+		# Remove the fields we don't want in SimplifiedNode serialization
+		if 'children_nodes' in node_json:
+			del node_json['children_nodes']
+		if 'shadow_roots' in node_json:
+			del node_json['shadow_roots']
+
+		# Clean nested content_document if it exists
+		if node_json.get('content_document'):
+			node_json['content_document'] = self._clean_original_node_json(node_json['content_document'])
+
+		return node_json
+
 	def __json__(self) -> dict:
 		original_node_json = self.original_node.__json__()
-		del original_node_json['children_nodes']
-		del original_node_json['shadow_roots']
+		# Remove children_nodes and shadow_roots to avoid duplication with SimplifiedNode.children
+		cleaned_original_node_json = self._clean_original_node_json(original_node_json)
 		return {
 			'should_display': self.should_display,
 			'interactive_index': self.interactive_index,
-			'original_node': original_node_json,
+			'original_node': cleaned_original_node_json,
 			'children': [c.__json__() for c in self.children],
 		}

@@ -412,6 +427,25 @@ class EnhancedDOMTreeNode:

 		return f'<{self.tag_name}>{cap_text_length(self.get_all_children_text(), max_text_length) or ""}'

+	def get_meaningful_text_for_llm(self) -> str:
+		"""
+		Get the meaningful text content that the LLM actually sees for this element.
+		This matches exactly what goes into the DOMTreeSerializer output.
+		"""
+		meaningful_text = ''
+		if hasattr(self, 'attributes') and self.attributes:
+			# Priority order: value, aria-label, title, placeholder, alt, text content
+			for attr in ['value', 'aria-label', 'title', 'placeholder', 'alt']:
+				if attr in self.attributes and self.attributes[attr]:
+					meaningful_text = self.attributes[attr]
+					break
+
+		# Fallback to text content if no meaningful attributes
+		if not meaningful_text:
+			meaningful_text = self.get_all_children_text()
+
+		return meaningful_text.strip()
+
 	@property
 	def is_actually_scrollable(self) -> bool:
 		"""
@@ -677,6 +711,7 @@ class SerializedDOMState:

 	selector_map: DOMSelectorMap

+	@observe_debug(ignore_input=True, ignore_output=True, name='llm_representation')
 	def llm_representation(
 		self,
 		include_attributes: list[str] | None = None,
--- a/browser_use/tools/registry/service.py
+++ b/browser_use/tools/registry/service.py
@@ -379,6 +379,8 @@ class Registry(Generic[Context]):
 				raise RuntimeError(str(e)) from e
 			else:
 				raise RuntimeError(f'Error executing action {action_name}: {str(e)}') from e
+		except TimeoutError as e:
+			raise RuntimeError(f'Error executing action {action_name} due to timeout.') from e
 		except Exception as e:
 			raise RuntimeError(f'Error executing action {action_name}: {str(e)}') from e

--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -65,26 +65,19 @@ Context = TypeVar('Context')
 T = TypeVar('T', bound=BaseModel)


-def extract_llm_error_message(error: Exception) -> str:
-	"""
-	Extract the clean error message from an exception that may contain <llm_error_msg> tags.
-
-	If the tags are found, returns the content between them.
-	Otherwise, returns the original error string.
-	"""
-	import re
-
-	error_str = str(error)
-
-	# Look for content between <llm_error_msg> tags
-	pattern = r'<llm_error_msg>(.*?)</llm_error_msg>'
-	match = re.search(pattern, error_str, re.DOTALL)
-
-	if match:
-		return match.group(1).strip()
-
-	# Fallback: return the original error string
-	return error_str
+def handle_browser_error(e: BrowserError) -> ActionResult:
+	if e.long_term_memory is not None:
+		if e.short_term_memory is not None:
+			return ActionResult(
+				extracted_content=e.short_term_memory, error=e.long_term_memory, include_extracted_content_only_once=True
+			)
+		else:
+			return ActionResult(error=e.long_term_memory)
+	# Fallback to original error handling if long_term_memory is None
+	logger.warning(
+		'⚠️ A BrowserError was raised without long_term_memory - always set long_term_memory when raising BrowserError to propagate right messages to LLM.'
+	)
+	raise e


 class Tools(Generic[Context]):
@@ -177,11 +170,10 @@ class Tools(Generic[Context]):
 				memory = f"Searched Google for '{params.query}'"
 				msg = f'🔍  {memory}'
 				logger.info(msg)
-				return ActionResult(extracted_content=memory, include_in_memory=True, long_term_memory=memory)
+				return ActionResult(extracted_content=memory, long_term_memory=memory)
 			except Exception as e:
 				logger.error(f'Failed to search Google: {e}')
-				clean_msg = extract_llm_error_message(e)
-				return ActionResult(error=f'Failed to search Google for "{params.query}": {clean_msg}')
+				return ActionResult(error=f'Failed to search Google for "{params.query}": {str(e)}')

 		@self.registry.action(
 			'Navigate to URL, set new_tab=True to open in new tab, False to navigate in current tab', param_model=GoToUrlAction
@@ -201,12 +193,11 @@ class Tools(Generic[Context]):
 					msg = f'🔗 {memory}'

 				logger.info(msg)
-				return ActionResult(extracted_content=msg, include_in_memory=True, long_term_memory=memory)
+				return ActionResult(extracted_content=msg, long_term_memory=memory)
 			except Exception as e:
 				error_msg = str(e)
 				# Always log the actual error first for debugging
 				browser_session.logger.error(f'❌ Navigation failed: {error_msg}')
-				clean_msg = extract_llm_error_message(e)

 				# Check if it's specifically a RuntimeError about CDP client
 				if isinstance(e, RuntimeError) and 'CDP client not initialized' in error_msg:
@@ -223,12 +214,12 @@ class Tools(Generic[Context]):
 						'net::',
 					]
 				):
-					site_unavailable_msg = f'Site unavailable: {params.url} - {error_msg}'
-					browser_session.logger.warning(f'⚠️ {site_unavailable_msg}')
+					site_unavailable_msg = f'Navigation failed - site unavailable: {params.url}'
+					browser_session.logger.warning(f'⚠️ {site_unavailable_msg} - {error_msg}')
 					return ActionResult(error=site_unavailable_msg)
 				else:
 					# Return error in ActionResult instead of re-raising
-					return ActionResult(error=f'Navigation failed: {clean_msg}')
+					return ActionResult(error=f'Navigation failed: {str(e)}')

 		@self.registry.action('Go back', param_model=NoParamsAction)
 		async def go_back(_: NoParamsAction, browser_session: BrowserSession):
@@ -241,8 +232,7 @@ class Tools(Generic[Context]):
 				return ActionResult(extracted_content=memory)
 			except Exception as e:
 				logger.error(f'Failed to dispatch GoBackEvent: {type(e).__name__}: {e}')
-				clean_msg = extract_llm_error_message(e)
-				error_msg = f'Failed to go back: {clean_msg}'
+				error_msg = f'Failed to go back: {str(e)}'
 				return ActionResult(error=error_msg)

 		@self.registry.action(
@@ -285,23 +275,18 @@ class Tools(Generic[Context]):
 				# Wait for handler to complete and get any exception or metadata
 				click_metadata = await event.event_result(raise_if_any=True, raise_if_none=False)
 				memory = f'Clicked element with index {params.index}'
+				if params.while_holding_ctrl:
+					memory += ' and opened in new tab'
 				msg = f'🖱️ {memory}'
 				logger.info(msg)

 				# Include click coordinates in metadata if available
 				return ActionResult(
-					extracted_content=memory,
-					include_in_memory=True,
 					long_term_memory=memory,
 					metadata=click_metadata if isinstance(click_metadata, dict) else None,
 				)
-			except Exception as e:
-				logger.error(f'Failed to execute ClickElementEvent: {type(e).__name__}: {e}')
-				clean_msg = extract_llm_error_message(e)
-				error_msg = f'Failed to click element {params.index}: {clean_msg}'
-
-				# If it's a select dropdown error, automatically get the dropdown options
-				if 'dropdown' in str(e) and node:
+			except BrowserError as e:
+				if 'Cannot click on <select> elements.' in str(e):
 					try:
 						return await get_dropdown_options(
 							params=GetDropdownOptionsAction(index=params.index), browser_session=browser_session
@@ -311,6 +296,9 @@ class Tools(Generic[Context]):
 							f'Failed to get dropdown options as shortcut during click_element_by_index on dropdown: {type(dropdown_error).__name__}: {dropdown_error}'
 						)

+				return handle_browser_error(e)
+			except Exception as e:
+				error_msg = f'Failed to click element {params.index}: {str(e)}'
 				return ActionResult(error=error_msg)

 		@self.registry.action(
@@ -336,10 +324,11 @@ class Tools(Generic[Context]):
 				# Include input coordinates in metadata if available
 				return ActionResult(
 					extracted_content=msg,
-					include_in_memory=True,
 					long_term_memory=f"Input '{params.text}' into element {params.index}.",
 					metadata=input_metadata if isinstance(input_metadata, dict) else None,
 				)
+			except BrowserError as e:
+				return handle_browser_error(e)
 			except Exception as e:
 				# Log the full error for debugging
 				logger.error(f'Failed to dispatch TypeTextEvent: {type(e).__name__}: {e}')
@@ -370,27 +359,28 @@ class Tools(Generic[Context]):
 							if not browser_session.is_local:
 								pass
 							else:
-								raise BrowserError(
-									f'File path {params.path} is not available. Must be in available_file_paths, downloaded_files, or a file managed by file_system.'
-								)
+								msg = f'File path {params.path} is not available. Upload files must be in available_file_paths, downloaded_files, or a file managed by file_system.'
+								logger.error(f'❌ {msg}')
+								return ActionResult(error=msg)
 					else:
 						# If browser is remote, allow passing a remote-accessible absolute path
 						if not browser_session.is_local:
 							pass
 						else:
-							raise BrowserError(
-								f'File path {params.path} is not available. Must be in available_file_paths or downloaded_files.'
-							)
+							msg = f'File path {params.path} is not available. Upload files must be in available_file_paths, downloaded_files, or a file managed by file_system.'
+							raise BrowserError(message=msg, long_term_memory=msg)

 			# For local browsers, ensure the file exists on the local filesystem
 			if browser_session.is_local:
 				if not os.path.exists(params.path):
-					raise BrowserError(f'File {params.path} does not exist')
+					msg = f'File {params.path} does not exist'
+					return ActionResult(error=msg)

 			# Get the selector map to find the node
 			selector_map = await browser_session.get_selector_map()
 			if params.index not in selector_map:
-				raise BrowserError(f'Element with index {params.index} not found in selector map')
+				msg = f'Element with index {params.index} does not exist.'
+				return ActionResult(error=msg)

 			node = selector_map[params.index]

@@ -486,7 +476,6 @@ class Tools(Generic[Context]):
 				logger.info(f'📁 {msg}')
 				return ActionResult(
 					extracted_content=msg,
-					include_in_memory=True,
 					long_term_memory=f'Uploaded file {params.path} to element {params.index}',
 				)
 			except Exception as e:
@@ -499,12 +488,7 @@ class Tools(Generic[Context]):
 		async def switch_tab(params: SwitchTabAction, browser_session: BrowserSession):
 			# Dispatch switch tab event
 			try:
-				if params.tab_id:
-					target_id = await browser_session.get_target_id_from_tab_id(params.tab_id)
-				elif params.url:
-					target_id = await browser_session.get_target_id_from_url(params.url)
-				else:
-					target_id = await browser_session.get_most_recently_opened_target_id()
+				target_id = await browser_session.get_target_id_from_tab_id(params.tab_id)

 				event = browser_session.event_bus.dispatch(SwitchTabEvent(target_id=target_id))
 				await event
@@ -512,11 +496,10 @@ class Tools(Generic[Context]):
 				assert new_target_id, 'SwitchTabEvent did not return a TargetID for the new tab that was switched to'
 				memory = f'Switched to Tab with ID {new_target_id[-4:]}'
 				logger.info(f'🔄  {memory}')
-				return ActionResult(extracted_content=memory, include_in_memory=True, long_term_memory=memory)
+				return ActionResult(extracted_content=memory, long_term_memory=memory)
 			except Exception as e:
 				logger.error(f'Failed to switch tab: {type(e).__name__}: {e}')
-				clean_msg = extract_llm_error_message(e)
-				return ActionResult(error=f'Failed to switch to tab {params.tab_id or params.url}: {clean_msg}')
+				return ActionResult(error=f'Failed to switch to tab {params.tab_id}.')

 		@self.registry.action('Close an existing tab', param_model=CloseTabAction)
 		async def close_tab(params: CloseTabAction, browser_session: BrowserSession):
@@ -535,13 +518,11 @@ class Tools(Generic[Context]):
 				logger.info(f'🗑️  {memory}')
 				return ActionResult(
 					extracted_content=memory,
-					include_in_memory=True,
 					long_term_memory=memory,
 				)
 			except Exception as e:
 				logger.error(f'Failed to close tab: {e}')
-				clean_msg = extract_llm_error_message(e)
-				return ActionResult(error=f'Failed to close tab {params.tab_id}: {clean_msg}')
+				return ActionResult(error=f'Failed to close tab {params.tab_id}.')

 		# Content Actions

@@ -697,11 +678,10 @@ Provide the extracted information in a clear, structured format."""

 				msg = f'🔍 {long_term_memory}'
 				logger.info(msg)
-				return ActionResult(extracted_content=msg, include_in_memory=True, long_term_memory=long_term_memory)
+				return ActionResult(extracted_content=msg, long_term_memory=long_term_memory)
 			except Exception as e:
 				logger.error(f'Failed to dispatch ScrollEvent: {type(e).__name__}: {e}')
-				clean_msg = extract_llm_error_message(e)
-				error_msg = f'Failed to scroll: {clean_msg}'
+				error_msg = 'Failed to execute scroll action.'
 				return ActionResult(error=error_msg)

 		@self.registry.action(
@@ -717,11 +697,10 @@ Provide the extracted information in a clear, structured format."""
 				memory = f'Sent keys: {params.keys}'
 				msg = f'⌨️  {memory}'
 				logger.info(msg)
-				return ActionResult(extracted_content=memory, include_in_memory=True, long_term_memory=memory)
+				return ActionResult(extracted_content=memory, long_term_memory=memory)
 			except Exception as e:
 				logger.error(f'Failed to dispatch SendKeysEvent: {type(e).__name__}: {e}')
-				clean_msg = extract_llm_error_message(e)
-				error_msg = f'Failed to send keys: {clean_msg}'
+				error_msg = f'Failed to send keys: {str(e)}'
 				return ActionResult(error=error_msg)

 		@self.registry.action(
@@ -737,14 +716,13 @@ Provide the extracted information in a clear, structured format."""
 				memory = f'Scrolled to text: {text}'
 				msg = f'🔍  {memory}'
 				logger.info(msg)
-				return ActionResult(extracted_content=memory, include_in_memory=True, long_term_memory=memory)
+				return ActionResult(extracted_content=memory, long_term_memory=memory)
 			except Exception as e:
 				# Text not found
 				msg = f"Text '{text}' not found or not visible on page"
 				logger.info(msg)
 				return ActionResult(
 					extracted_content=msg,
-					include_in_memory=True,
 					long_term_memory=f"Tried scrolling to text '{text}' but it was not found",
 				)

@@ -762,7 +740,6 @@ Provide the extracted information in a clear, structured format."""
 				raise ValueError(f'Element index {params.index} not found in DOM')

 			# Dispatch GetDropdownOptionsEvent to the event handler
-			import json

 			event = browser_session.event_bus.dispatch(GetDropdownOptionsEvent(node=node))
 			dropdown_data = await event.event_result(timeout=3.0, raise_if_none=True, raise_if_any=True)
@@ -770,14 +747,10 @@ Provide the extracted information in a clear, structured format."""
 			if not dropdown_data:
 				raise ValueError('Failed to get dropdown options - no data returned')

-			# Extract the message from the returned data
-			msg = dropdown_data.get('message', '')
-			options_count = len(json.loads(dropdown_data.get('options', '[]')))  # Parse the string back to list to get count
-
+			# Use structured memory from the handler
 			return ActionResult(
-				extracted_content=msg,
-				include_in_memory=True,
-				long_term_memory=f'Found {options_count} dropdown options for index {params.index}',
+				extracted_content=dropdown_data['short_term_memory'],
+				long_term_memory=dropdown_data['long_term_memory'],
 				include_extracted_content_only_once=True,
 			)

@@ -801,14 +774,28 @@ Provide the extracted information in a clear, structured format."""
 			if not selection_data:
 				raise ValueError('Failed to select dropdown option - no data returned')

-			# Extract the message from the returned data
-			msg = selection_data.get('message', f'Selected option: {params.text}')
-
-			return ActionResult(
-				extracted_content=msg,
-				include_in_memory=True,
-				long_term_memory=f"Selected dropdown option '{params.text}' at index {params.index}",
-			)
+			# Check if the selection was successful
+			if selection_data.get('success') == 'true':
+				# Extract the message from the returned data
+				msg = selection_data.get('message', f'Selected option: {params.text}')
+				return ActionResult(
+					extracted_content=msg,
+					include_in_memory=True,
+					long_term_memory=f"Selected dropdown option '{params.text}' at index {params.index}",
+				)
+			else:
+				# Handle structured error response
+				# TODO: raise BrowserError instead of returning ActionResult
+				if 'short_term_memory' in selection_data and 'long_term_memory' in selection_data:
+					return ActionResult(
+						extracted_content=selection_data['short_term_memory'],
+						long_term_memory=selection_data['long_term_memory'],
+						include_extracted_content_only_once=True,
+					)
+				else:
+					# Fallback to regular error
+					error_msg = selection_data.get('error', f'Failed to select option: {params.text}')
+					return ActionResult(error=error_msg)

 		# File System Actions
 		@self.registry.action(
@@ -831,7 +818,7 @@ Provide the extracted information in a clear, structured format."""
 			else:
 				result = await file_system.write_file(file_name, content)
 			logger.info(f'💾 {result}')
-			return ActionResult(extracted_content=result, include_in_memory=True, long_term_memory=result)
+			return ActionResult(extracted_content=result, long_term_memory=result)

 		@self.registry.action(
 			'Replace old_str with new_str in file_name. old_str must exactly match the string to replace in original text. Recommended tool to mark completed items in todo.md or change specific contents in a file.'
@@ -839,7 +826,7 @@ Provide the extracted information in a clear, structured format."""
 		async def replace_file_str(file_name: str, old_str: str, new_str: str, file_system: FileSystem):
 			result = await file_system.replace_file_str(file_name, old_str, new_str)
 			logger.info(f'💾 {result}')
-			return ActionResult(extracted_content=result, include_in_memory=True, long_term_memory=result)
+			return ActionResult(extracted_content=result, long_term_memory=result)

 		@self.registry.action('Read file_name from file system')
 		async def read_file(file_name: str, available_file_paths: list[str], file_system: FileSystem):
@@ -866,7 +853,6 @@ Provide the extracted information in a clear, structured format."""
 			logger.info(f'💾 {memory}')
 			return ActionResult(
 				extracted_content=result,
-				include_in_memory=True,
 				long_term_memory=memory,
 				include_extracted_content_only_once=True,
 			)
@@ -1001,12 +987,16 @@ Provide the extracted information in a clear, structured format."""
 							sensitive_data=sensitive_data,
 							available_file_paths=available_file_paths,
 						)
+					except BrowserError as e:
+						logger.error(f'❌ Action {action_name} failed with BrowserError: {str(e)}')
+						result = handle_browser_error(e)
+					except TimeoutError as e:
+						logger.error(f'❌ Action {action_name} failed with TimeoutError: {str(e)}')
+						result = ActionResult(error=f'{action_name} was not executed due to timeout.')
 					except Exception as e:
 						# Log the original exception with traceback for observability
-						logger.error(f"Action '{action_name}' failed")
-						# Extract clean error message from llm_error_msg tags if present
-						clean_msg = extract_llm_error_message(e)
-						result = ActionResult(error=clean_msg)
+						logger.error(f"Action '{action_name}' failed with error: {str(e)}")
+						result = ActionResult(error=str(e))

 					if Laminar is not None:
 						Laminar.set_span_output(result)
--- a/browser_use/tools/views.py
+++ b/browser_use/tools/views.py
@@ -43,15 +43,10 @@ class StructuredOutputAction(BaseModel, Generic[T]):


 class SwitchTabAction(BaseModel):
-	url: str | None = Field(
-		default=None,
-		description='URL or URL substring of the tab to switch to, if not provided, the tab_id or most recently opened tab will be used',
-	)
-	tab_id: str | None = Field(
-		default=None,
+	tab_id: str = Field(
 		min_length=4,
 		max_length=4,
-		description='exact 4 character Tab ID to match instead of URL, prefer using this if known',
+		description='Last 4 chars of TargetID',
 	)  # last 4 chars of TargetID


--- a/docs/docs.json
+++ b/docs/docs.json
@@ -129,31 +129,6 @@
      {
        "tab": "Cloud",
        "versions": [
-          {
-            "version": "v2",
-            "groups": [
-              {
-                "group": "Get Started",
-                "pages": [
-                  "cloud/v2/quickstart",
-                  "cloud/v2/python-quickstart",
-                  "cloud/v2/node-quickstart"
-                ]
-              },
-              {
-                "group": "Platform",
-                "pages": [
-                  "cloud/v1/pricing",
-                  "cloud/v1/n8n-browser-use-integration",
-                  "cloud/v1/search"
-                ]
-              },
-              {
-                "group": "REST API reference",
-                "openapi": "https://app.stainless.com/api/spec/documented/browser-use/openapi.documented.yml"
-              }
-            ]
-          },
          {
            "version": "v1",
            "groups": [
@@ -180,6 +155,27 @@
                "openapi": "https://api.browser-use.com/api/v1/openapi.json"
              }
            ]
+          },
+          {
+            "version": "v2",
+            "groups": [
+              {
+                "group": "Get Started",
+                "pages": [
+                  "cloud/v2/quickstart",
+                  "cloud/v2/python-quickstart",
+                  "cloud/v2/node-quickstart"
+                ]
+              },
+              {
+                "group": "Platform",
+                "pages": [
+                  "cloud/v1/pricing",
+                  "cloud/v1/n8n-browser-use-integration",
+                  "cloud/v1/search"
+                ]
+              }
+            ]
          }
        ]
      }
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,7 @@ dependencies = [
    "aiofiles>=24.1.0",
    "aiohttp==3.12.15",
    "anyio>=4.9.0",
-    "bubus>=1.5.4",
+    "bubus>=1.5.6",
    "google-api-core>=2.25.0",
    "httpx>=0.28.1",
    "markdownify==1.1.0",
--- a/tests/ci/evaluate_tasks.py
+++ b/tests/ci/evaluate_tasks.py
@@ -17,11 +17,7 @@ import aiofiles
 import yaml
 from pydantic import BaseModel

-from browser_use.agent.service import Agent
-from browser_use.agent.views import AgentHistoryList
-from browser_use.browser.profile import BrowserProfile
-from browser_use.browser.session import BrowserSession
-from browser_use.llm import ChatOpenAI
+from browser_use import Agent, AgentHistoryList, BrowserProfile, BrowserSession, ChatOpenAI
 from browser_use.llm.messages import UserMessage

 # --- CONFIG ---
--- a/tests/ci/test_browser_event_ClickElementEvent.py
+++ b/tests/ci/test_browser_event_ClickElementEvent.py
@@ -185,11 +185,11 @@ class TestClickElementEvent:
 		# Verify the result structure
 		assert isinstance(result, ActionResult), 'Result should be an ActionResult instance'
 		assert result.error is None, f'Expected no error but got: {result.error}'
-
+		result_text = result.extracted_content or result.long_term_memory
 		# Core logic validation: Verify click was successful
-		assert result.extracted_content is not None
-		assert f'Clicked element with index {button_index}' in result.extracted_content, (
-			f'Expected click confirmation in result content, got: {result.extracted_content}'
+		assert result_text is not None
+		assert f'Clicked element with index {button_index}' in result_text, (
+			f'Expected click confirmation in result content, got: {result_text}'
 		)
 		# Note: The click action doesn't include button text in the result, only the index

@@ -260,7 +260,11 @@ class TestClickElementEvent:

 		# Verify the result
 		assert isinstance(result, ActionResult)
-		assert result.extracted_content is not None
+		result_text = result.extracted_content or result.long_term_memory
+		assert result_text is not None
+		assert f'Clicked element with index {link_index}' in result_text, (
+			f'Expected click confirmation in result content, got: {result_text}'
+		)

 		# Verify that a new tab was opened
 		tabs = await browser_session.get_tabs()
--- a/tests/ci/test_browser_event_ScrollEvent.py
+++ b/tests/ci/test_browser_event_ScrollEvent.py
@@ -104,7 +104,6 @@ class TestScrollActions:
 		assert result.extracted_content is not None
 		assert 'Scrolled down' in result.extracted_content
 		assert 'the page' in result.extracted_content
-		assert result.include_in_memory is True

 		# Test 2: Basic page scroll up
 		scroll_up_action = {'scroll': ScrollAction(down=False, num_pages=0.5)}
@@ -123,7 +122,7 @@ class TestScrollActions:
 		# This should fail with error about element not found
 		assert isinstance(result, ActionResult)
 		assert result.error is not None, 'Expected error for invalid element index'
-		assert 'Element index 999 not found' in result.error or 'Failed to scroll' in result.error
+		assert 'Element index 999 not found' in result.error or 'Failed to execute scroll' in result.error

 		# Test 4: Model parameter validation
 		scroll_with_index = ScrollAction(down=True, num_pages=1.0, frame_element_index=5)
--- a/tests/ci/test_browser_watchdog_screenshots.py
+++ b/tests/ci/test_browser_watchdog_screenshots.py
@@ -394,10 +394,10 @@ class TestScreenshotEventSystem:

 			# Test the NEW event-driven path: direct event dispatching
 			event = browser_session.event_bus.dispatch(ScreenshotEvent(full_page=False))
-			screenshot_result = (await event.event_result()) or {}
-			assert screenshot_result.get('screenshot')
-			assert isinstance(screenshot_result['screenshot'], str)
-			assert len(base64.b64decode(screenshot_result['screenshot'])) > 5000
+			screenshot_b64 = await event.event_result()
+			assert screenshot_b64 is not None
+			assert isinstance(screenshot_b64, str)
+			assert len(base64.b64decode(screenshot_b64)) > 5000

 		finally:
 			await browser_session.kill()