diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py
index 33a545fb2..8c7aa7348 100644
--- a/browser_use/agent/prompts.py
+++ b/browser_use/agent/prompts.py
@@ -2,6 +2,7 @@ import importlib.resources
from datetime import datetime
from typing import TYPE_CHECKING, Literal, Optional
+from browser_use.dom.views import NodeType, SimplifiedNode
from browser_use.llm.messages import ContentPartImageParam, ContentPartTextParam, ImageURL, SystemMessage, UserMessage
from browser_use.observability import observe_debug
from browser_use.utils import is_new_tab_page
@@ -112,8 +113,93 @@ class AgentMessagePrompt:
self.sample_images = sample_images or []
assert self.browser_state
+ def _extract_page_statistics(self) -> dict[str, int]:
+ """Extract high-level page statistics from DOM tree for LLM context"""
+ stats = {
+ 'links': 0,
+ 'iframes': 0,
+ 'shadow_open': 0,
+ 'shadow_closed': 0,
+ 'scroll_containers': 0,
+ 'images': 0,
+ 'interactive_elements': 0,
+ 'total_elements': 0,
+ }
+
+ if not self.browser_state.dom_state or not self.browser_state.dom_state._root:
+ return stats
+
+ def traverse_node(node: SimplifiedNode) -> None:
+ """Recursively traverse simplified DOM tree to count elements"""
+ if not node or not node.original_node:
+ return
+
+ original = node.original_node
+ stats['total_elements'] += 1
+
+ # Count by node type and tag
+ if original.node_type == NodeType.ELEMENT_NODE:
+ tag = original.tag_name.lower() if original.tag_name else ''
+
+ if tag == 'a':
+ stats['links'] += 1
+ elif tag in ('iframe', 'frame'):
+ stats['iframes'] += 1
+ elif tag == 'img':
+ stats['images'] += 1
+
+ # Check if scrollable
+ if original.is_actually_scrollable:
+ stats['scroll_containers'] += 1
+
+ # Check if interactive
+ if node.interactive_index is not None:
+ stats['interactive_elements'] += 1
+
+ # Check if this element hosts shadow DOM
+ if node.is_shadow_host:
+ # Check if any shadow children are closed
+ has_closed_shadow = any(
+ child.original_node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE
+ and child.original_node.shadow_root_type
+ and child.original_node.shadow_root_type.lower() == 'closed'
+ for child in node.children
+ )
+ if has_closed_shadow:
+ stats['shadow_closed'] += 1
+ else:
+ stats['shadow_open'] += 1
+
+ elif original.node_type == NodeType.DOCUMENT_FRAGMENT_NODE:
+ # Shadow DOM fragment - these are the actual shadow roots
+ # But don't double-count since we count them at the host level above
+ pass
+
+ # Traverse children
+ for child in node.children:
+ traverse_node(child)
+
+ traverse_node(self.browser_state.dom_state._root)
+ return stats
+
@observe_debug(ignore_input=True, ignore_output=True, name='_get_browser_state_description')
def _get_browser_state_description(self) -> str:
+ # Extract page statistics first
+ page_stats = self._extract_page_statistics()
+
+ # Format statistics for LLM
+ stats_text = ''
+ if page_stats['total_elements'] < 10:
+ stats_text += 'Page appears empty (SPA not loaded?) - '
+ stats_text += f'{page_stats["links"]} links, {page_stats["interactive_elements"]} interactive, '
+ stats_text += f'{page_stats["iframes"]} iframes, {page_stats["scroll_containers"]} scroll containers'
+ if page_stats['shadow_open'] > 0 or page_stats['shadow_closed'] > 0:
+ stats_text += f', {page_stats["shadow_open"]} shadow(open), {page_stats["shadow_closed"]} shadow(closed)'
+ if page_stats['images'] > 0:
+ stats_text += f', {page_stats["images"]} images'
+ stats_text += f', {page_stats["total_elements"]} total elements'
+ stats_text += '\n\n'
+
elements_text = self.browser_state.dom_state.llm_representation(include_attributes=self.include_attributes)
if len(elements_text) > self.max_clickable_elements_length:
@@ -122,9 +208,8 @@ class AgentMessagePrompt:
else:
truncated_text = ''
- has_content_above = (self.browser_state.pixels_above or 0) > 0
- has_content_below = (self.browser_state.pixels_below or 0) > 0
-
+ has_content_above = False
+ has_content_below = False
# Enhanced page information for the model
page_info_text = ''
if self.browser_state.page_info:
@@ -132,10 +217,11 @@ class AgentMessagePrompt:
# Compute page statistics dynamically
pages_above = pi.pixels_above / pi.viewport_height if pi.viewport_height > 0 else 0
pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0
+ has_content_above = pages_above > 0
+ has_content_below = pages_below > 0
total_pages = pi.page_height / pi.viewport_height if pi.viewport_height > 0 else 0
current_page_position = pi.scroll_y / max(pi.page_height - pi.viewport_height, 1)
page_info_text = ''
- page_info_text += f'Viewport size: {pi.viewport_width}x{pi.viewport_height}px, Total page size: {pi.page_width}x{pi.page_height}px, '
page_info_text += f'{pages_above:.1f} pages above, '
page_info_text += f'{pages_below:.1f} pages below, '
page_info_text += f'{total_pages:.1f} total pages'
@@ -146,18 +232,14 @@ class AgentMessagePrompt:
if self.browser_state.page_info:
pi = self.browser_state.page_info
pages_above = pi.pixels_above / pi.viewport_height if pi.viewport_height > 0 else 0
- elements_text = f'... {self.browser_state.pixels_above} pixels above ({pages_above:.1f} pages) - scroll to see more or extract structured data if you are looking for specific information ...\n{elements_text}'
- else:
- elements_text = f'... {self.browser_state.pixels_above} pixels above - scroll to see more or extract structured data if you are looking for specific information ...\n{elements_text}'
+ elements_text = f'... {pages_above:.1f} pages above - scroll to see more or extract structured data if you are looking for specific information ...\n{elements_text}'
else:
elements_text = f'[Start of page]\n{elements_text}'
if has_content_below:
if self.browser_state.page_info:
pi = self.browser_state.page_info
pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0
- elements_text = f'{elements_text}\n... {self.browser_state.pixels_below} pixels below ({pages_below:.1f} pages) - scroll to see more or extract structured data if you are looking for specific information ...'
- else:
- elements_text = f'{elements_text}\n... {self.browser_state.pixels_below} pixels below - scroll to see more or extract structured data if you are looking for specific information ...'
+ elements_text = f'{elements_text}\n... {pages_below:.1f} pages below - scroll to see more or extract structured data if you are looking for specific information ...'
else:
elements_text = f'{elements_text}\n[End of page]'
else:
@@ -190,7 +272,7 @@ class AgentMessagePrompt:
if self.include_recent_events and self.browser_state.recent_events:
recent_events_text = f'Recent browser events: {self.browser_state.recent_events}\n'
- browser_state = f"""{current_tab_text}
+ browser_state = f"""{stats_text}{current_tab_text}
Available tabs:
{tabs_text}
{page_info_text}
@@ -205,9 +287,6 @@ Available tabs:
else:
step_info_description = ''
- time_str = datetime.now().strftime('%Y-%m-%d %H:%M')
- step_info_description += f'Current date and time: {time_str}'
-
time_str = datetime.now().strftime('%Y-%m-%d')
step_info_description += f'Current date: {time_str}'
diff --git a/browser_use/browser/watchdogs/default_action_watchdog.py b/browser_use/browser/watchdogs/default_action_watchdog.py
index dc7fcd241..02d252ced 100644
--- a/browser_use/browser/watchdogs/default_action_watchdog.py
+++ b/browser_use/browser/watchdogs/default_action_watchdog.py
@@ -71,7 +71,7 @@ class DefaultActionWatchdog(BaseWatchdog):
msg = f'Downloaded file to {download_path}'
self.logger.info(f'š¾ {msg}')
else:
- msg = f'Clicked button with index {index_for_logging}: {element_node.get_all_children_text(max_depth=2)}'
+ msg = f'Clicked button {element_node.node_name}: {element_node.get_all_children_text(max_depth=2)}'
self.logger.debug(f'š±ļø {msg}')
self.logger.debug(f'Element xpath: {element_node.xpath}')
@@ -1912,7 +1912,7 @@ class DefaultActionWatchdog(BaseWatchdog):
self.logger.error(msg)
raise BrowserError(message=msg, long_term_memory=msg)
except Exception as e:
- msg = f'Failed to get dropdown options for element with index {index_for_logging}'
+ msg = 'Failed to get dropdown options'
error_msg = f'{msg}: {str(e)}'
self.logger.error(error_msg)
raise BrowserError(
diff --git a/browser_use/dom/serializer/serializer.py b/browser_use/dom/serializer/serializer.py
index 436faf20e..1b199965d 100644
--- a/browser_use/dom/serializer/serializer.py
+++ b/browser_use/dom/serializer/serializer.py
@@ -137,13 +137,16 @@ class DOMTreeSerializer:
return None
if node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE:
- # Super simple pass-through for shadow DOM elements
+ # ENHANCED shadow DOM processing - always include shadow content
simplified = SimplifiedNode(original_node=node, children=[])
for child in node.children_and_shadow_roots:
simplified_child = self._create_simplified_tree(child, depth + 1)
if simplified_child:
simplified.children.append(simplified_child)
- return simplified
+
+ # Always return shadow DOM fragments, even if children seem empty
+ # Shadow DOM often contains the actual interactive content in SPAs
+ return simplified if simplified.children else SimplifiedNode(original_node=node, children=[])
elif node.node_type == NodeType.ELEMENT_NODE:
# Skip non-content elements
@@ -161,19 +164,26 @@ class DOMTreeSerializer:
is_visible = node.is_visible
is_scrollable = node.is_actually_scrollable
+ has_shadow_content = bool(node.children_and_shadow_roots)
- # Include if interactive (regardless of visibility), or scrollable, or has children to process
+ # ENHANCED SHADOW DOM DETECTION: Include shadow hosts even if not visible
+ is_shadow_host = any(child.node_type == NodeType.DOCUMENT_FRAGMENT_NODE for child in node.children_and_shadow_roots)
- if is_visible or is_scrollable or bool(node.children_and_shadow_roots):
- simplified = SimplifiedNode(original_node=node, children=[])
- # simplified._analysis = analysis # Store analysis for grouping
+ # Include if interactive (regardless of visibility), scrollable, has children, or is shadow host
+ if is_visible or is_scrollable or has_shadow_content or is_shadow_host:
+ simplified = SimplifiedNode(original_node=node, children=[], is_shadow_host=is_shadow_host)
- # Process children
+ # Process ALL children including shadow roots with enhanced logging
for child in node.children_and_shadow_roots:
simplified_child = self._create_simplified_tree(child, depth + 1)
if simplified_child:
simplified.children.append(simplified_child)
+ # SHADOW DOM SPECIAL CASE: Always include shadow hosts even if not visible
+ # Many SPA frameworks (React, Vue) render content in shadow DOM
+ if is_shadow_host and simplified.children:
+ return simplified
+
# Return if meaningful or has meaningful children
if is_visible or is_scrollable or simplified.children:
return simplified
@@ -449,23 +459,34 @@ class DOMTreeSerializer:
# Build attributes string
attributes_html_str = DOMTreeSerializer._build_attributes_string(node.original_node, include_attributes, '')
- # Build the line
+ # Build the line with shadow host indicator
+ shadow_prefix = ''
+ if node.is_shadow_host:
+ # Check if any shadow children are closed
+ has_closed_shadow = any(
+ child.original_node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE
+ and child.original_node.shadow_root_type
+ and child.original_node.shadow_root_type.lower() == 'closed'
+ for child in node.children
+ )
+ shadow_prefix = '|SHADOW(closed)|' if has_closed_shadow else '|SHADOW(open)|'
+
if should_show_scroll and node.interactive_index is None:
# Scrollable container but not clickable
- line = f'{depth_str}|SCROLL|<{node.original_node.tag_name}'
+ line = f'{depth_str}{shadow_prefix}|SCROLL|<{node.original_node.tag_name}'
elif node.interactive_index is not None:
# Clickable (and possibly scrollable)
new_prefix = '*' if node.is_new else ''
scroll_prefix = '|SCROLL+' if should_show_scroll else '['
- line = f'{depth_str}{new_prefix}{scroll_prefix}{node.interactive_index}]<{node.original_node.tag_name}'
+ line = f'{depth_str}{shadow_prefix}{new_prefix}{scroll_prefix}{node.interactive_index}]<{node.original_node.tag_name}'
elif node.original_node.tag_name.upper() == 'IFRAME':
# Iframe element (not interactive)
- line = f'{depth_str}|IFRAME|<{node.original_node.tag_name}'
+ line = f'{depth_str}{shadow_prefix}|IFRAME|<{node.original_node.tag_name}'
elif node.original_node.tag_name.upper() == 'FRAME':
# Frame element (not interactive)
- line = f'{depth_str}|FRAME|<{node.original_node.tag_name}'
+ line = f'{depth_str}{shadow_prefix}|FRAME|<{node.original_node.tag_name}'
else:
- line = f'{depth_str}<{node.original_node.tag_name}'
+ line = f'{depth_str}{shadow_prefix}<{node.original_node.tag_name}'
if attributes_html_str:
line += f' {attributes_html_str}'
@@ -480,6 +501,25 @@ class DOMTreeSerializer:
formatted_text.append(line)
+ elif node.original_node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE:
+ # Shadow DOM representation - show clearly to LLM
+ if node.original_node.shadow_root_type and node.original_node.shadow_root_type.lower() == 'closed':
+ formatted_text.append(f'{depth_str}ā¼ Shadow Content (Closed)')
+ else:
+ formatted_text.append(f'{depth_str}ā¼ Shadow Content (Open)')
+
+ next_depth += 1
+
+ # Process shadow DOM children
+ for child in node.children:
+ child_text = DOMTreeSerializer.serialize_tree(child, include_attributes, next_depth)
+ if child_text:
+ formatted_text.append(child_text)
+
+ # Close shadow DOM indicator
+ if node.children: # Only show close if we had content
+ formatted_text.append(f'{depth_str}ā² Shadow Content End')
+
elif node.original_node.node_type == NodeType.TEXT_NODE:
# Include visible text
is_visible = node.original_node.snapshot_node and node.original_node.is_visible
@@ -492,11 +532,12 @@ class DOMTreeSerializer:
clean_text = node.original_node.node_value.strip()
formatted_text.append(f'{depth_str}{clean_text}')
- # Process children
- for child in node.children:
- child_text = DOMTreeSerializer.serialize_tree(child, include_attributes, next_depth)
- if child_text:
- formatted_text.append(child_text)
+ # Process children (for non-shadow elements)
+ if node.original_node.node_type != NodeType.DOCUMENT_FRAGMENT_NODE:
+ for child in node.children:
+ child_text = DOMTreeSerializer.serialize_tree(child, include_attributes, next_depth)
+ if child_text:
+ formatted_text.append(child_text)
return '\n'.join(formatted_text)
diff --git a/browser_use/dom/views.py b/browser_use/dom/views.py
index cbcaadaa1..061b070a8 100644
--- a/browser_use/dom/views.py
+++ b/browser_use/dom/views.py
@@ -19,6 +19,8 @@ DEFAULT_INCLUDE_ATTRIBUTES = [
'title',
'type',
'checked',
+ # 'class',
+ 'id',
'name',
'role',
'value',
@@ -51,6 +53,51 @@ DEFAULT_INCLUDE_ATTRIBUTES = [
'ax_name',
]
+STATIC_ATTRIBUTES = {
+ 'class',
+ 'id',
+ 'name',
+ 'type',
+ 'placeholder',
+ 'aria-label',
+ 'title',
+ # 'aria-expanded',
+ 'role',
+ 'data-testid',
+ 'data-test',
+ 'data-cy',
+ 'data-selenium',
+ 'for',
+ 'required',
+ 'disabled',
+ 'readonly',
+ 'checked',
+ 'selected',
+ 'multiple',
+ 'href',
+ 'target',
+ 'rel',
+ 'aria-describedby',
+ 'aria-labelledby',
+ 'aria-controls',
+ 'aria-owns',
+ 'aria-live',
+ 'aria-atomic',
+ 'aria-busy',
+ 'aria-disabled',
+ 'aria-hidden',
+ 'aria-pressed',
+ 'aria-checked',
+ 'aria-selected',
+ 'tabindex',
+ 'alt',
+ 'src',
+ 'lang',
+ 'itemscope',
+ 'itemtype',
+ 'itemprop',
+}
+
@dataclass
class CurrentPageTargets:
@@ -93,6 +140,7 @@ class SimplifiedNode:
ignored_by_paint_order: bool = False # More info in dom/serializer/paint_order.py
excluded_by_parent: bool = False # New field for bbox filtering
+ is_shadow_host: bool = False # New field for shadow DOM hosts
def _clean_original_node_json(self, node_json: dict) -> dict:
"""Recursively remove children_nodes and shadow_roots from original_node JSON."""
@@ -683,8 +731,9 @@ class EnhancedDOMTreeNode:
parent_branch_path = self._get_parent_branch_path()
parent_branch_path_string = '/'.join(parent_branch_path)
- # Get attributes hash
- attributes_string = ''.join(f'{key}={value}' for key, value in self.attributes.items())
+ attributes_string = ''.join(
+ f'{k}={v}' for k, v in sorted((k, v) for k, v in self.attributes.items() if k in STATIC_ATTRIBUTES)
+ )
# Combine both for final hash
combined_string = f'{parent_branch_path_string}|{attributes_string}'
diff --git a/browser_use/sync/service.py b/browser_use/sync/service.py
index b4eb24872..f046d2831 100644
--- a/browser_use/sync/service.py
+++ b/browser_use/sync/service.py
@@ -113,14 +113,14 @@ class CloudSync:
f'Failed to send sync event: POST {response.request.url} {response.status_code} - {response.text}'
)
except httpx.TimeoutException:
- logger.warning(f'Event send timed out after 10 seconds: {event}')
+ logger.debug(f'Event send timed out after 10 seconds: {event}')
except httpx.ConnectError as e:
# logger.warning(f'ā ļø Failed to connect to cloud service at {self.base_url}: {e}')
pass
except httpx.HTTPError as e:
- logger.warning(f'HTTP error sending event {event}: {type(e).__name__}: {e}')
+ logger.debug(f'HTTP error sending event {event}: {type(e).__name__}: {e}')
except Exception as e:
- logger.warning(f'Unexpected error sending event {event}: {type(e).__name__}: {e}')
+ logger.debug(f'Unexpected error sending event {event}: {type(e).__name__}: {e}')
async def _background_auth(self, agent_session_id: str) -> None:
"""Run authentication in background or show cloud URL if already authenticated"""
diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 19c37cca2..9caf2d7b3 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -293,7 +293,7 @@ class Tools(Generic[Context]):
await event
# Wait for handler to complete and get any exception or metadata
click_metadata = await event.event_result(raise_if_any=True, raise_if_none=False)
- memory = f'Clicked element with index {params.index}'
+ memory = 'Clicked element'
if params.while_holding_ctrl:
memory += ' and opened in new tab'
diff --git a/examples/features/rerun_history.py b/examples/features/rerun_history.py
index dac51b4c1..3696d6fcd 100644
--- a/examples/features/rerun_history.py
+++ b/examples/features/rerun_history.py
@@ -24,43 +24,17 @@ from browser_use.llm.openai.chat import ChatOpenAI
async def main():
# Example task to demonstrate history saving and rerunning
- task = 'Go to GitHub and find the browser-use repository'
history_file = Path('agent_history.json')
+ task = 'Go to https://browser-use.github.io/stress-tests/challenges/ember-form.html and fill the form with example data.'
llm = ChatOpenAI(model='gpt-4.1-mini')
- # Step 1: Run agent and save history
- print('š Running agent and saving history...')
-
- agent = Agent(
- task=task,
- llm=llm,
- )
-
- # Run the agent
- history = await agent.run(max_steps=5)
-
- # Save the history for later rerun
+ agent = Agent(task=task, llm=llm, max_actions_per_step=1)
+ await agent.run(max_steps=5)
agent.save_history(history_file)
- print(f'ā
History saved to {history_file}')
- print(f'š Completed {len(history.history)} steps')
+ rerun_agent = Agent(task='', llm=llm)
- # Step 2: Load and rerun the history
- print('\nš Loading and rerunning history...')
-
- # Create new agent for rerunning (task can be empty since we're replaying)
- rerun_agent = Agent(
- task='',
- llm=llm,
- )
-
- # Load and rerun the saved history
- results = await rerun_agent.load_and_rerun(
- history_file=history_file,
- max_retries=3, # Retry failed actions up to 3 times
- skip_failures=True, # Continue even if some actions fail
- delay_between_actions=1.0, # Wait 1 second between actions
- )
+ await rerun_agent.load_and_rerun(history_file)
if __name__ == '__main__':
diff --git a/tests/ci/test_browser_event_ClickElementEvent.py b/tests/ci/test_browser_event_ClickElementEvent.py
index 08265ac98..6a8b62684 100644
--- a/tests/ci/test_browser_event_ClickElementEvent.py
+++ b/tests/ci/test_browser_event_ClickElementEvent.py
@@ -188,9 +188,7 @@ class TestClickElementEvent:
result_text = result.extracted_content or result.long_term_memory
# Core logic validation: Verify click was successful
assert result_text is not None
- assert f'Clicked element with index {button_index}' in result_text, (
- f'Expected click confirmation in result content, got: {result_text}'
- )
+ assert 'Clicked element' in result_text, f'Expected click confirmation in result content, got: {result_text}'
# Note: The click action doesn't include button text in the result, only the index
# Verify the click actually had an effect on the page using CDP
@@ -262,9 +260,7 @@ class TestClickElementEvent:
assert isinstance(result, ActionResult)
result_text = result.extracted_content or result.long_term_memory
assert result_text is not None
- assert f'Clicked element with index {link_index}' in result_text, (
- f'Expected click confirmation in result content, got: {result_text}'
- )
+ assert 'Clicked element' in result_text, f'Expected click confirmation in result content, got: {result_text}'
# Verify that a new tab was opened
tabs = await browser_session.get_tabs()