From 9618835c976498fd4d88354a202f7fff61180e16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Wed, 17 Sep 2025 09:48:17 -0700 Subject: [PATCH 01/15] fix element hash --- browser_use/dom/views.py | 15 ++++++++++++- examples/features/rerun_history.py | 35 +++++++----------------------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/browser_use/dom/views.py b/browser_use/dom/views.py index cbcaadaa1..49fbb8edf 100644 --- a/browser_use/dom/views.py +++ b/browser_use/dom/views.py @@ -684,7 +684,20 @@ class EnhancedDOMTreeNode: parent_branch_path_string = '/'.join(parent_branch_path) # Get attributes hash - attributes_string = ''.join(f'{key}={value}' for key, value in self.attributes.items()) + static_attributes = set( + [ + 'class', + 'id', + 'name', + 'type', + 'placeholder', + 'aria-label', + 'title', + 'value', + 'aria-expanded', + ] + ) + attributes_string = ''.join(f'{key}={value}' for key, value in self.attributes.items() if key in static_attributes) # Combine both for final hash combined_string = f'{parent_branch_path_string}|{attributes_string}' diff --git a/examples/features/rerun_history.py b/examples/features/rerun_history.py index dac51b4c1..0cd8a9f8f 100644 --- a/examples/features/rerun_history.py +++ b/examples/features/rerun_history.py @@ -24,42 +24,23 @@ from browser_use.llm.openai.chat import ChatOpenAI async def main(): # Example task to demonstrate history saving and rerunning - task = 'Go to GitHub and find the browser-use repository' history_file = Path('agent_history.json') + task = 'Go to GitHub and find the browser-use repository' llm = ChatOpenAI(model='gpt-4.1-mini') - # Step 1: Run agent and save history - print('šŸš€ Running agent and saving history...') + # agent = Agent(task=task, llm=llm) - agent = Agent( - task=task, - llm=llm, - ) + # await agent.run(max_steps=5) - # Run the agent - history = await agent.run(max_steps=5) + # agent.save_history(history_file) - # Save the history for later rerun - agent.save_history(history_file) + rerun_agent = Agent(task='', llm=llm) - print(f'āœ… History saved to {history_file}') - print(f'šŸ“Š Completed {len(history.history)} steps') - - # Step 2: Load and rerun the history - print('\nšŸ”„ Loading and rerunning history...') - - # Create new agent for rerunning (task can be empty since we're replaying) - rerun_agent = Agent( - task='', - llm=llm, - ) - - # Load and rerun the saved history results = await rerun_agent.load_and_rerun( history_file=history_file, - max_retries=3, # Retry failed actions up to 3 times - skip_failures=True, # Continue even if some actions fail - delay_between_actions=1.0, # Wait 1 second between actions + max_retries=3, + skip_failures=True, + delay_between_actions=1.0, ) From 40ce0d382dd08466e520946b447cb0c28d336dd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Wed, 17 Sep 2025 09:51:34 -0700 Subject: [PATCH 02/15] Enhance static attributes list in EnhancedDOMTreeNode for improved DOM serialization --- browser_use/dom/views.py | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/browser_use/dom/views.py b/browser_use/dom/views.py index 49fbb8edf..703102b0f 100644 --- a/browser_use/dom/views.py +++ b/browser_use/dom/views.py @@ -693,8 +693,41 @@ class EnhancedDOMTreeNode: 'placeholder', 'aria-label', 'title', - 'value', 'aria-expanded', + 'role', + 'data-testid', + 'data-test', + 'data-cy', + 'data-selenium', + 'for', + 'required', + 'disabled', + 'readonly', + 'checked', + 'selected', + 'multiple', + 'href', + 'target', + 'rel', + 'aria-describedby', + 'aria-labelledby', + 'aria-controls', + 'aria-owns', + 'aria-live', + 'aria-atomic', + 'aria-busy', + 'aria-disabled', + 'aria-hidden', + 'aria-pressed', + 'aria-checked', + 'aria-selected', + 'tabindex', + 'alt', + 'src', + 'lang', + 'itemscope', + 'itemtype', + 'itemprop', ] ) attributes_string = ''.join(f'{key}={value}' for key, value in self.attributes.items() if key in static_attributes) From a51bc0ab546098dc82c87cf7050a9378d666014b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Wed, 17 Sep 2025 09:52:47 -0700 Subject: [PATCH 03/15] Include exmaple --- examples/features/rerun_history.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/features/rerun_history.py b/examples/features/rerun_history.py index 0cd8a9f8f..81f170f1f 100644 --- a/examples/features/rerun_history.py +++ b/examples/features/rerun_history.py @@ -28,11 +28,11 @@ async def main(): task = 'Go to GitHub and find the browser-use repository' llm = ChatOpenAI(model='gpt-4.1-mini') - # agent = Agent(task=task, llm=llm) + agent = Agent(task=task, llm=llm) - # await agent.run(max_steps=5) + await agent.run(max_steps=5) - # agent.save_history(history_file) + agent.save_history(history_file) rerun_agent = Agent(task='', llm=llm) From 1be170faaab788110ef285c23f0319ae0d5cf2f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:10:15 -0700 Subject: [PATCH 04/15] Fix logs --- browser_use/sync/service.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/browser_use/sync/service.py b/browser_use/sync/service.py index b4eb24872..f046d2831 100644 --- a/browser_use/sync/service.py +++ b/browser_use/sync/service.py @@ -113,14 +113,14 @@ class CloudSync: f'Failed to send sync event: POST {response.request.url} {response.status_code} - {response.text}' ) except httpx.TimeoutException: - logger.warning(f'Event send timed out after 10 seconds: {event}') + logger.debug(f'Event send timed out after 10 seconds: {event}') except httpx.ConnectError as e: # logger.warning(f'āš ļø Failed to connect to cloud service at {self.base_url}: {e}') pass except httpx.HTTPError as e: - logger.warning(f'HTTP error sending event {event}: {type(e).__name__}: {e}') + logger.debug(f'HTTP error sending event {event}: {type(e).__name__}: {e}') except Exception as e: - logger.warning(f'Unexpected error sending event {event}: {type(e).__name__}: {e}') + logger.debug(f'Unexpected error sending event {event}: {type(e).__name__}: {e}') async def _background_auth(self, agent_session_id: str) -> None: """Run authentication in background or show cloud URL if already authenticated""" From 3e9f891a67b97e94d0fa34c98dd6ac2feadc0db3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:10:28 -0700 Subject: [PATCH 05/15] Different example --- examples/features/rerun_history.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/examples/features/rerun_history.py b/examples/features/rerun_history.py index 81f170f1f..3696d6fcd 100644 --- a/examples/features/rerun_history.py +++ b/examples/features/rerun_history.py @@ -25,23 +25,16 @@ from browser_use.llm.openai.chat import ChatOpenAI async def main(): # Example task to demonstrate history saving and rerunning history_file = Path('agent_history.json') - task = 'Go to GitHub and find the browser-use repository' + task = 'Go to https://browser-use.github.io/stress-tests/challenges/ember-form.html and fill the form with example data.' llm = ChatOpenAI(model='gpt-4.1-mini') - agent = Agent(task=task, llm=llm) - + agent = Agent(task=task, llm=llm, max_actions_per_step=1) await agent.run(max_steps=5) - agent.save_history(history_file) rerun_agent = Agent(task='', llm=llm) - results = await rerun_agent.load_and_rerun( - history_file=history_file, - max_retries=3, - skip_failures=True, - delay_between_actions=1.0, - ) + await rerun_agent.load_and_rerun(history_file) if __name__ == '__main__': From 48c75ffd3d28c0d17306a18e49025910d066a501 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Wed, 17 Sep 2025 11:04:23 -0700 Subject: [PATCH 06/15] Update browser_use/dom/views.py Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com> --- browser_use/dom/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/dom/views.py b/browser_use/dom/views.py index 703102b0f..3053ecde6 100644 --- a/browser_use/dom/views.py +++ b/browser_use/dom/views.py @@ -730,7 +730,7 @@ class EnhancedDOMTreeNode: 'itemprop', ] ) - attributes_string = ''.join(f'{key}={value}' for key, value in self.attributes.items() if key in static_attributes) + attributes_string = ''.join(f"{k}={v}" for k, v in sorted((k, v) for k, v in self.attributes.items() if k in static_attributes)) # Combine both for final hash combined_string = f'{parent_branch_path_string}|{attributes_string}' From 5d6c199f4fa1a2463b5d84775e21bb5bbcd5db9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Wed, 17 Sep 2025 11:04:31 -0700 Subject: [PATCH 07/15] Update browser_use/dom/views.py Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com> --- browser_use/dom/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/dom/views.py b/browser_use/dom/views.py index 3053ecde6..37d25fb0b 100644 --- a/browser_use/dom/views.py +++ b/browser_use/dom/views.py @@ -693,7 +693,7 @@ class EnhancedDOMTreeNode: 'placeholder', 'aria-label', 'title', - 'aria-expanded', + # 'aria-expanded', 'role', 'data-testid', 'data-test', From ca469caa317f30924582057ad6cfbc630f579d0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Wed, 17 Sep 2025 11:06:31 -0700 Subject: [PATCH 08/15] The __hash__ method inefficiently recreates the static_attributes set on every call. --- browser_use/dom/views.py | 96 ++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 47 deletions(-) diff --git a/browser_use/dom/views.py b/browser_use/dom/views.py index 37d25fb0b..80e54bde4 100644 --- a/browser_use/dom/views.py +++ b/browser_use/dom/views.py @@ -51,6 +51,53 @@ DEFAULT_INCLUDE_ATTRIBUTES = [ 'ax_name', ] +STATIC_ATTRIBUTES = set( + [ + 'class', + 'id', + 'name', + 'type', + 'placeholder', + 'aria-label', + 'title', + # 'aria-expanded', + 'role', + 'data-testid', + 'data-test', + 'data-cy', + 'data-selenium', + 'for', + 'required', + 'disabled', + 'readonly', + 'checked', + 'selected', + 'multiple', + 'href', + 'target', + 'rel', + 'aria-describedby', + 'aria-labelledby', + 'aria-controls', + 'aria-owns', + 'aria-live', + 'aria-atomic', + 'aria-busy', + 'aria-disabled', + 'aria-hidden', + 'aria-pressed', + 'aria-checked', + 'aria-selected', + 'tabindex', + 'alt', + 'src', + 'lang', + 'itemscope', + 'itemtype', + 'itemprop', + ] +) + @dataclass class CurrentPageTargets: @@ -683,54 +730,9 @@ class EnhancedDOMTreeNode: parent_branch_path = self._get_parent_branch_path() parent_branch_path_string = '/'.join(parent_branch_path) - # Get attributes hash - static_attributes = set( - [ - 'class', - 'id', - 'name', - 'type', - 'placeholder', - 'aria-label', - 'title', - # 'aria-expanded', - 'role', - 'data-testid', - 'data-test', - 'data-cy', - 'data-selenium', - 'for', - 'required', - 'disabled', - 'readonly', - 'checked', - 'selected', - 'multiple', - 'href', - 'target', - 'rel', - 'aria-describedby', - 'aria-labelledby', - 'aria-controls', - 'aria-owns', - 'aria-live', - 'aria-atomic', - 'aria-busy', - 'aria-disabled', - 'aria-hidden', - 'aria-pressed', - 'aria-checked', - 'aria-selected', - 'tabindex', - 'alt', - 'src', - 'lang', - 'itemscope', - 'itemtype', - 'itemprop', - ] + attributes_string = ''.join( + f'{k}={v}' for k, v in sorted((k, v) for k, v in self.attributes.items() if k in STATIC_ATTRIBUTES) ) - attributes_string = ''.join(f"{k}={v}" for k, v in sorted((k, v) for k, v in self.attributes.items() if k in static_attributes)) # Combine both for final hash combined_string = f'{parent_branch_path_string}|{attributes_string}' From 30eb8fe36f70dc8bfed122f55778ebdb473082a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Wed, 17 Sep 2025 11:07:05 -0700 Subject: [PATCH 09/15] Linter --- browser_use/dom/views.py | 90 ++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 46 deletions(-) diff --git a/browser_use/dom/views.py b/browser_use/dom/views.py index 80e54bde4..854565879 100644 --- a/browser_use/dom/views.py +++ b/browser_use/dom/views.py @@ -51,52 +51,50 @@ DEFAULT_INCLUDE_ATTRIBUTES = [ 'ax_name', ] -STATIC_ATTRIBUTES = set( - [ - 'class', - 'id', - 'name', - 'type', - 'placeholder', - 'aria-label', - 'title', - # 'aria-expanded', - 'role', - 'data-testid', - 'data-test', - 'data-cy', - 'data-selenium', - 'for', - 'required', - 'disabled', - 'readonly', - 'checked', - 'selected', - 'multiple', - 'href', - 'target', - 'rel', - 'aria-describedby', - 'aria-labelledby', - 'aria-controls', - 'aria-owns', - 'aria-live', - 'aria-atomic', - 'aria-busy', - 'aria-disabled', - 'aria-hidden', - 'aria-pressed', - 'aria-checked', - 'aria-selected', - 'tabindex', - 'alt', - 'src', - 'lang', - 'itemscope', - 'itemtype', - 'itemprop', - ] -) +STATIC_ATTRIBUTES = { + 'class', + 'id', + 'name', + 'type', + 'placeholder', + 'aria-label', + 'title', + # 'aria-expanded', + 'role', + 'data-testid', + 'data-test', + 'data-cy', + 'data-selenium', + 'for', + 'required', + 'disabled', + 'readonly', + 'checked', + 'selected', + 'multiple', + 'href', + 'target', + 'rel', + 'aria-describedby', + 'aria-labelledby', + 'aria-controls', + 'aria-owns', + 'aria-live', + 'aria-atomic', + 'aria-busy', + 'aria-disabled', + 'aria-hidden', + 'aria-pressed', + 'aria-checked', + 'aria-selected', + 'tabindex', + 'alt', + 'src', + 'lang', + 'itemscope', + 'itemtype', + 'itemprop', +} @dataclass From 9b6ae5900696ad60b2f403adc496436db4826140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Thu, 11 Sep 2025 23:35:52 -0700 Subject: [PATCH 10/15] Fix pixel below --- browser_use/agent/prompts.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py index 33a545fb2..d3f3017dd 100644 --- a/browser_use/agent/prompts.py +++ b/browser_use/agent/prompts.py @@ -122,9 +122,8 @@ class AgentMessagePrompt: else: truncated_text = '' - has_content_above = (self.browser_state.pixels_above or 0) > 0 - has_content_below = (self.browser_state.pixels_below or 0) > 0 - + has_content_above = False + has_content_below = False # Enhanced page information for the model page_info_text = '' if self.browser_state.page_info: @@ -132,6 +131,8 @@ class AgentMessagePrompt: # Compute page statistics dynamically pages_above = pi.pixels_above / pi.viewport_height if pi.viewport_height > 0 else 0 pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0 + has_content_above = pages_above > 0 + has_content_below = pages_below > 0 total_pages = pi.page_height / pi.viewport_height if pi.viewport_height > 0 else 0 current_page_position = pi.scroll_y / max(pi.page_height - pi.viewport_height, 1) page_info_text = '' @@ -146,18 +147,18 @@ class AgentMessagePrompt: if self.browser_state.page_info: pi = self.browser_state.page_info pages_above = pi.pixels_above / pi.viewport_height if pi.viewport_height > 0 else 0 - elements_text = f'... {self.browser_state.pixels_above} pixels above ({pages_above:.1f} pages) - scroll to see more or extract structured data if you are looking for specific information ...\n{elements_text}' + elements_text = f'... {pages_above:.1f} pages above - scroll to see more or extract structured data if you are looking for specific information ...\n{elements_text}' else: - elements_text = f'... {self.browser_state.pixels_above} pixels above - scroll to see more or extract structured data if you are looking for specific information ...\n{elements_text}' + elements_text = f'... {pages_above:.1f} pages above - scroll to see more or extract structured data if you are looking for specific information ...\n{elements_text}' else: elements_text = f'[Start of page]\n{elements_text}' if has_content_below: if self.browser_state.page_info: pi = self.browser_state.page_info pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0 - elements_text = f'{elements_text}\n... {self.browser_state.pixels_below} pixels below ({pages_below:.1f} pages) - scroll to see more or extract structured data if you are looking for specific information ...' + elements_text = f'{elements_text}\n... {pages_below:.1f} pages below - scroll to see more or extract structured data if you are looking for specific information ...' else: - elements_text = f'{elements_text}\n... {self.browser_state.pixels_below} pixels below - scroll to see more or extract structured data if you are looking for specific information ...' + elements_text = f'{elements_text}\n... {pages_below:.1f} pages below - scroll to see more or extract structured data if you are looking for specific information ...' else: elements_text = f'{elements_text}\n[End of page]' else: From d1f33c0c5e36a70ae25c37874aeff16712d2f29d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sun, 14 Sep 2025 20:12:48 -0700 Subject: [PATCH 11/15] Remove index return to avoid wrong index clicks --- browser_use/browser/watchdogs/default_action_watchdog.py | 2 +- browser_use/tools/service.py | 2 +- tests/ci/test_browser_event_ClickElementEvent.py | 8 ++------ 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/browser_use/browser/watchdogs/default_action_watchdog.py b/browser_use/browser/watchdogs/default_action_watchdog.py index dc7fcd241..180a5dedb 100644 --- a/browser_use/browser/watchdogs/default_action_watchdog.py +++ b/browser_use/browser/watchdogs/default_action_watchdog.py @@ -1912,7 +1912,7 @@ class DefaultActionWatchdog(BaseWatchdog): self.logger.error(msg) raise BrowserError(message=msg, long_term_memory=msg) except Exception as e: - msg = f'Failed to get dropdown options for element with index {index_for_logging}' + msg = 'Failed to get dropdown options' error_msg = f'{msg}: {str(e)}' self.logger.error(error_msg) raise BrowserError( diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py index 19c37cca2..9caf2d7b3 100644 --- a/browser_use/tools/service.py +++ b/browser_use/tools/service.py @@ -293,7 +293,7 @@ class Tools(Generic[Context]): await event # Wait for handler to complete and get any exception or metadata click_metadata = await event.event_result(raise_if_any=True, raise_if_none=False) - memory = f'Clicked element with index {params.index}' + memory = 'Clicked element' if params.while_holding_ctrl: memory += ' and opened in new tab' diff --git a/tests/ci/test_browser_event_ClickElementEvent.py b/tests/ci/test_browser_event_ClickElementEvent.py index 08265ac98..6a8b62684 100644 --- a/tests/ci/test_browser_event_ClickElementEvent.py +++ b/tests/ci/test_browser_event_ClickElementEvent.py @@ -188,9 +188,7 @@ class TestClickElementEvent: result_text = result.extracted_content or result.long_term_memory # Core logic validation: Verify click was successful assert result_text is not None - assert f'Clicked element with index {button_index}' in result_text, ( - f'Expected click confirmation in result content, got: {result_text}' - ) + assert 'Clicked element' in result_text, f'Expected click confirmation in result content, got: {result_text}' # Note: The click action doesn't include button text in the result, only the index # Verify the click actually had an effect on the page using CDP @@ -262,9 +260,7 @@ class TestClickElementEvent: assert isinstance(result, ActionResult) result_text = result.extracted_content or result.long_term_memory assert result_text is not None - assert f'Clicked element with index {link_index}' in result_text, ( - f'Expected click confirmation in result content, got: {result_text}' - ) + assert 'Clicked element' in result_text, f'Expected click confirmation in result content, got: {result_text}' # Verify that a new tab was opened tabs = await browser_session.get_tabs() From a6211d2df9da6b9b7d627558e06ee18629df2123 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sun, 14 Sep 2025 20:10:24 -0700 Subject: [PATCH 12/15] Refine page info reporting by removing viewport size details and current time from step info --- browser_use/agent/prompts.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py index 33a545fb2..928ed05ba 100644 --- a/browser_use/agent/prompts.py +++ b/browser_use/agent/prompts.py @@ -135,7 +135,6 @@ class AgentMessagePrompt: total_pages = pi.page_height / pi.viewport_height if pi.viewport_height > 0 else 0 current_page_position = pi.scroll_y / max(pi.page_height - pi.viewport_height, 1) page_info_text = '' - page_info_text += f'Viewport size: {pi.viewport_width}x{pi.viewport_height}px, Total page size: {pi.page_width}x{pi.page_height}px, ' page_info_text += f'{pages_above:.1f} pages above, ' page_info_text += f'{pages_below:.1f} pages below, ' page_info_text += f'{total_pages:.1f} total pages' @@ -205,9 +204,6 @@ Available tabs: else: step_info_description = '' - time_str = datetime.now().strftime('%Y-%m-%d %H:%M') - step_info_description += f'Current date and time: {time_str}' - time_str = datetime.now().strftime('%Y-%m-%d') step_info_description += f'Current date: {time_str}' From f46e9d5eb7c1c1cf31b8eedfc3f3c530ef20adf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Sun, 14 Sep 2025 19:49:44 -0700 Subject: [PATCH 13/15] removed info about which element clicked --- browser_use/browser/watchdogs/default_action_watchdog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/browser/watchdogs/default_action_watchdog.py b/browser_use/browser/watchdogs/default_action_watchdog.py index 180a5dedb..02d252ced 100644 --- a/browser_use/browser/watchdogs/default_action_watchdog.py +++ b/browser_use/browser/watchdogs/default_action_watchdog.py @@ -71,7 +71,7 @@ class DefaultActionWatchdog(BaseWatchdog): msg = f'Downloaded file to {download_path}' self.logger.info(f'šŸ’¾ {msg}') else: - msg = f'Clicked button with index {index_for_logging}: {element_node.get_all_children_text(max_depth=2)}' + msg = f'Clicked button {element_node.node_name}: {element_node.get_all_children_text(max_depth=2)}' self.logger.debug(f'šŸ–±ļø {msg}') self.logger.debug(f'Element xpath: {element_node.xpath}') From 00d0574b969040016fdb42f545dda5a24e561fca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Wed, 17 Sep 2025 11:18:01 -0700 Subject: [PATCH 14/15] Refactor AgentMessagePrompt to remove redundant elements_text assignments for pages above and below. This simplifies the code and improves readability. --- browser_use/agent/prompts.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py index d3f3017dd..6b755e9be 100644 --- a/browser_use/agent/prompts.py +++ b/browser_use/agent/prompts.py @@ -148,8 +148,6 @@ class AgentMessagePrompt: pi = self.browser_state.page_info pages_above = pi.pixels_above / pi.viewport_height if pi.viewport_height > 0 else 0 elements_text = f'... {pages_above:.1f} pages above - scroll to see more or extract structured data if you are looking for specific information ...\n{elements_text}' - else: - elements_text = f'... {pages_above:.1f} pages above - scroll to see more or extract structured data if you are looking for specific information ...\n{elements_text}' else: elements_text = f'[Start of page]\n{elements_text}' if has_content_below: @@ -157,8 +155,6 @@ class AgentMessagePrompt: pi = self.browser_state.page_info pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0 elements_text = f'{elements_text}\n... {pages_below:.1f} pages below - scroll to see more or extract structured data if you are looking for specific information ...' - else: - elements_text = f'{elements_text}\n... {pages_below:.1f} pages below - scroll to see more or extract structured data if you are looking for specific information ...' else: elements_text = f'{elements_text}\n[End of page]' else: From 6244c97876baf8b12030b2964d490164541ead9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Wed, 17 Sep 2025 11:32:11 -0700 Subject: [PATCH 15/15] Include information about dom metadata in prompt --- browser_use/agent/prompts.py | 88 +++++++++++++++++++++++- browser_use/dom/serializer/serializer.py | 77 ++++++++++++++++----- browser_use/dom/views.py | 3 + 3 files changed, 149 insertions(+), 19 deletions(-) diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py index 928ed05ba..bd9436df0 100644 --- a/browser_use/agent/prompts.py +++ b/browser_use/agent/prompts.py @@ -2,6 +2,7 @@ import importlib.resources from datetime import datetime from typing import TYPE_CHECKING, Literal, Optional +from browser_use.dom.views import NodeType, SimplifiedNode from browser_use.llm.messages import ContentPartImageParam, ContentPartTextParam, ImageURL, SystemMessage, UserMessage from browser_use.observability import observe_debug from browser_use.utils import is_new_tab_page @@ -112,8 +113,93 @@ class AgentMessagePrompt: self.sample_images = sample_images or [] assert self.browser_state + def _extract_page_statistics(self) -> dict[str, int]: + """Extract high-level page statistics from DOM tree for LLM context""" + stats = { + 'links': 0, + 'iframes': 0, + 'shadow_open': 0, + 'shadow_closed': 0, + 'scroll_containers': 0, + 'images': 0, + 'interactive_elements': 0, + 'total_elements': 0, + } + + if not self.browser_state.dom_state or not self.browser_state.dom_state._root: + return stats + + def traverse_node(node: SimplifiedNode) -> None: + """Recursively traverse simplified DOM tree to count elements""" + if not node or not node.original_node: + return + + original = node.original_node + stats['total_elements'] += 1 + + # Count by node type and tag + if original.node_type == NodeType.ELEMENT_NODE: + tag = original.tag_name.lower() if original.tag_name else '' + + if tag == 'a': + stats['links'] += 1 + elif tag in ('iframe', 'frame'): + stats['iframes'] += 1 + elif tag == 'img': + stats['images'] += 1 + + # Check if scrollable + if original.is_actually_scrollable: + stats['scroll_containers'] += 1 + + # Check if interactive + if node.interactive_index is not None: + stats['interactive_elements'] += 1 + + # Check if this element hosts shadow DOM + if node.is_shadow_host: + # Check if any shadow children are closed + has_closed_shadow = any( + child.original_node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE + and child.original_node.shadow_root_type + and child.original_node.shadow_root_type.lower() == 'closed' + for child in node.children + ) + if has_closed_shadow: + stats['shadow_closed'] += 1 + else: + stats['shadow_open'] += 1 + + elif original.node_type == NodeType.DOCUMENT_FRAGMENT_NODE: + # Shadow DOM fragment - these are the actual shadow roots + # But don't double-count since we count them at the host level above + pass + + # Traverse children + for child in node.children: + traverse_node(child) + + traverse_node(self.browser_state.dom_state._root) + return stats + @observe_debug(ignore_input=True, ignore_output=True, name='_get_browser_state_description') def _get_browser_state_description(self) -> str: + # Extract page statistics first + page_stats = self._extract_page_statistics() + + # Format statistics for LLM + stats_text = '' + if page_stats['total_elements'] < 10: + stats_text += 'Page appears empty (SPA not loaded?) - ' + stats_text += f'{page_stats["links"]} links, {page_stats["interactive_elements"]} interactive, ' + stats_text += f'{page_stats["iframes"]} iframes, {page_stats["scroll_containers"]} scroll containers' + if page_stats['shadow_open'] > 0 or page_stats['shadow_closed'] > 0: + stats_text += f', {page_stats["shadow_open"]} shadow(open), {page_stats["shadow_closed"]} shadow(closed)' + if page_stats['images'] > 0: + stats_text += f', {page_stats["images"]} images' + stats_text += f', {page_stats["total_elements"]} total elements' + stats_text += '\n\n' + elements_text = self.browser_state.dom_state.llm_representation(include_attributes=self.include_attributes) if len(elements_text) > self.max_clickable_elements_length: @@ -189,7 +275,7 @@ class AgentMessagePrompt: if self.include_recent_events and self.browser_state.recent_events: recent_events_text = f'Recent browser events: {self.browser_state.recent_events}\n' - browser_state = f"""{current_tab_text} + browser_state = f"""{stats_text}{current_tab_text} Available tabs: {tabs_text} {page_info_text} diff --git a/browser_use/dom/serializer/serializer.py b/browser_use/dom/serializer/serializer.py index 436faf20e..1b199965d 100644 --- a/browser_use/dom/serializer/serializer.py +++ b/browser_use/dom/serializer/serializer.py @@ -137,13 +137,16 @@ class DOMTreeSerializer: return None if node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE: - # Super simple pass-through for shadow DOM elements + # ENHANCED shadow DOM processing - always include shadow content simplified = SimplifiedNode(original_node=node, children=[]) for child in node.children_and_shadow_roots: simplified_child = self._create_simplified_tree(child, depth + 1) if simplified_child: simplified.children.append(simplified_child) - return simplified + + # Always return shadow DOM fragments, even if children seem empty + # Shadow DOM often contains the actual interactive content in SPAs + return simplified if simplified.children else SimplifiedNode(original_node=node, children=[]) elif node.node_type == NodeType.ELEMENT_NODE: # Skip non-content elements @@ -161,19 +164,26 @@ class DOMTreeSerializer: is_visible = node.is_visible is_scrollable = node.is_actually_scrollable + has_shadow_content = bool(node.children_and_shadow_roots) - # Include if interactive (regardless of visibility), or scrollable, or has children to process + # ENHANCED SHADOW DOM DETECTION: Include shadow hosts even if not visible + is_shadow_host = any(child.node_type == NodeType.DOCUMENT_FRAGMENT_NODE for child in node.children_and_shadow_roots) - if is_visible or is_scrollable or bool(node.children_and_shadow_roots): - simplified = SimplifiedNode(original_node=node, children=[]) - # simplified._analysis = analysis # Store analysis for grouping + # Include if interactive (regardless of visibility), scrollable, has children, or is shadow host + if is_visible or is_scrollable or has_shadow_content or is_shadow_host: + simplified = SimplifiedNode(original_node=node, children=[], is_shadow_host=is_shadow_host) - # Process children + # Process ALL children including shadow roots with enhanced logging for child in node.children_and_shadow_roots: simplified_child = self._create_simplified_tree(child, depth + 1) if simplified_child: simplified.children.append(simplified_child) + # SHADOW DOM SPECIAL CASE: Always include shadow hosts even if not visible + # Many SPA frameworks (React, Vue) render content in shadow DOM + if is_shadow_host and simplified.children: + return simplified + # Return if meaningful or has meaningful children if is_visible or is_scrollable or simplified.children: return simplified @@ -449,23 +459,34 @@ class DOMTreeSerializer: # Build attributes string attributes_html_str = DOMTreeSerializer._build_attributes_string(node.original_node, include_attributes, '') - # Build the line + # Build the line with shadow host indicator + shadow_prefix = '' + if node.is_shadow_host: + # Check if any shadow children are closed + has_closed_shadow = any( + child.original_node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE + and child.original_node.shadow_root_type + and child.original_node.shadow_root_type.lower() == 'closed' + for child in node.children + ) + shadow_prefix = '|SHADOW(closed)|' if has_closed_shadow else '|SHADOW(open)|' + if should_show_scroll and node.interactive_index is None: # Scrollable container but not clickable - line = f'{depth_str}|SCROLL|<{node.original_node.tag_name}' + line = f'{depth_str}{shadow_prefix}|SCROLL|<{node.original_node.tag_name}' elif node.interactive_index is not None: # Clickable (and possibly scrollable) new_prefix = '*' if node.is_new else '' scroll_prefix = '|SCROLL+' if should_show_scroll else '[' - line = f'{depth_str}{new_prefix}{scroll_prefix}{node.interactive_index}]<{node.original_node.tag_name}' + line = f'{depth_str}{shadow_prefix}{new_prefix}{scroll_prefix}{node.interactive_index}]<{node.original_node.tag_name}' elif node.original_node.tag_name.upper() == 'IFRAME': # Iframe element (not interactive) - line = f'{depth_str}|IFRAME|<{node.original_node.tag_name}' + line = f'{depth_str}{shadow_prefix}|IFRAME|<{node.original_node.tag_name}' elif node.original_node.tag_name.upper() == 'FRAME': # Frame element (not interactive) - line = f'{depth_str}|FRAME|<{node.original_node.tag_name}' + line = f'{depth_str}{shadow_prefix}|FRAME|<{node.original_node.tag_name}' else: - line = f'{depth_str}<{node.original_node.tag_name}' + line = f'{depth_str}{shadow_prefix}<{node.original_node.tag_name}' if attributes_html_str: line += f' {attributes_html_str}' @@ -480,6 +501,25 @@ class DOMTreeSerializer: formatted_text.append(line) + elif node.original_node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE: + # Shadow DOM representation - show clearly to LLM + if node.original_node.shadow_root_type and node.original_node.shadow_root_type.lower() == 'closed': + formatted_text.append(f'{depth_str}ā–¼ Shadow Content (Closed)') + else: + formatted_text.append(f'{depth_str}ā–¼ Shadow Content (Open)') + + next_depth += 1 + + # Process shadow DOM children + for child in node.children: + child_text = DOMTreeSerializer.serialize_tree(child, include_attributes, next_depth) + if child_text: + formatted_text.append(child_text) + + # Close shadow DOM indicator + if node.children: # Only show close if we had content + formatted_text.append(f'{depth_str}ā–² Shadow Content End') + elif node.original_node.node_type == NodeType.TEXT_NODE: # Include visible text is_visible = node.original_node.snapshot_node and node.original_node.is_visible @@ -492,11 +532,12 @@ class DOMTreeSerializer: clean_text = node.original_node.node_value.strip() formatted_text.append(f'{depth_str}{clean_text}') - # Process children - for child in node.children: - child_text = DOMTreeSerializer.serialize_tree(child, include_attributes, next_depth) - if child_text: - formatted_text.append(child_text) + # Process children (for non-shadow elements) + if node.original_node.node_type != NodeType.DOCUMENT_FRAGMENT_NODE: + for child in node.children: + child_text = DOMTreeSerializer.serialize_tree(child, include_attributes, next_depth) + if child_text: + formatted_text.append(child_text) return '\n'.join(formatted_text) diff --git a/browser_use/dom/views.py b/browser_use/dom/views.py index 854565879..061b070a8 100644 --- a/browser_use/dom/views.py +++ b/browser_use/dom/views.py @@ -19,6 +19,8 @@ DEFAULT_INCLUDE_ATTRIBUTES = [ 'title', 'type', 'checked', + # 'class', + 'id', 'name', 'role', 'value', @@ -138,6 +140,7 @@ class SimplifiedNode: ignored_by_paint_order: bool = False # More info in dom/serializer/paint_order.py excluded_by_parent: bool = False # New field for bbox filtering + is_shadow_host: bool = False # New field for shadow DOM hosts def _clean_original_node_json(self, node_json: dict) -> dict: """Recursively remove children_nodes and shadow_roots from original_node JSON."""