From 461bce7b229768ce5bcaa57e46811ce977f5699d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Fri, 3 Oct 2025 15:02:28 -0700
Subject: [PATCH 01/45] Request screenshot

---
 browser_use/agent/message_manager/service.py | 20 +++++++++++++++++---
 browser_use/tools/service.py                 | 15 +++++++++++++++
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/browser_use/agent/message_manager/service.py b/browser_use/agent/message_manager/service.py
index d64108e12..2d922a125 100644
--- a/browser_use/agent/message_manager/service.py
+++ b/browser_use/agent/message_manager/service.py
@@ -305,11 +305,25 @@ class MessageManager:
 			self.sensitive_data = effective_sensitive_data
 			self.sensitive_data_description = self._get_sensitive_data_description(browser_state_summary.url)
 
-		# Use only the current screenshot
+		# Use only the current screenshot, but check if action results request screenshot inclusion
 		screenshots = []
-		if browser_state_summary.screenshot:
+		include_screenshot_requested = False
+
+		# Check if any action results request screenshot inclusion
+		if result:
+			for action_result in result:
+				if action_result.metadata and action_result.metadata.get('include_screenshot'):
+					include_screenshot_requested = True
+					logger.debug('Screenshot inclusion requested by action result')
+					break
+
+		# Include screenshot if either use_vision is True, or if explicitly requested by an action
+		if (use_vision or include_screenshot_requested) and browser_state_summary.screenshot:
 			screenshots.append(browser_state_summary.screenshot)
 
+		# Override use_vision if screenshot was explicitly requested
+		effective_use_vision = use_vision or include_screenshot_requested
+
 		# Create single state message with all content
 		assert browser_state_summary
 		state_message = AgentMessagePrompt(
@@ -327,7 +341,7 @@ class MessageManager:
 			vision_detail_level=self.vision_detail_level,
 			include_recent_events=self.include_recent_events,
 			sample_images=self.sample_images,
-		).get_user_message(use_vision)
+		).get_user_message(effective_use_vision)
 
 		# Set the state message with caching enabled
 		self._set_message_with_type(state_message, 'state')
diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 868aacf5b..d41d7823b 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -844,6 +844,21 @@ Note: For multiple pages (>=1.0), scrolls are performed one page at a time to en
 					long_term_memory=f"Tried scrolling to text '{text}' but it was not found",
 				)
 
+		@self.registry.action(
+			'Request to include a screenshot in your next browser state. Use this when you need visual confirmation or when the page contains complex visual information that is hard to understand from the DOM alone.'
+		)
+		async def take_screenshot():
+			"""Request that a screenshot be included in the next observation"""
+			memory = 'Requested screenshot for next observation'
+			msg = f'📸 {memory}'
+			logger.info(msg)
+
+			# Return flag in metadata to signal that screenshot should be included
+			return ActionResult(
+				extracted_content=memory,
+				metadata={'include_screenshot': True},
+			)
+
 		# Dropdown Actions
 
 		@self.registry.action(

From 889efd3ee29b977e73728f128803a069940777b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Fri, 3 Oct 2025 15:59:05 -0700
Subject: [PATCH 02/45] System prompt

---
 browser_use/agent/system_prompt.md             | 5 +++--
 browser_use/agent/system_prompt_flash.md       | 8 ++++++--
 browser_use/agent/system_prompt_no_thinking.md | 5 +++--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/browser_use/agent/system_prompt.md b/browser_use/agent/system_prompt.md
index 2c96badef..3944bcf8d 100644
--- a/browser_use/agent/system_prompt.md
+++ b/browser_use/agent/system_prompt.md
@@ -20,7 +20,7 @@ At every step, your input will consist of:
 1. <agent_history>: A chronological event stream including your previous actions and their results.
 2. <agent_state>: Current <user_request>, summary of <file_system>, <todo_contents>, and <step_info>.
 3. <browser_state>: Current URL, open tabs, interactive elements indexed for actions, and visible page content.
-4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements.
+4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements. If you used take_screenshot before
 5. <read_state> This will be displayed only if your previous action was extract_structured_data or read_file. This data is only shown in the current step.
 </input>
 
@@ -66,8 +66,9 @@ Note that:
 </browser_state>
 
 <browser_vision>
-You will be provided with a screenshot of the current page with  bounding boxes around interactive elements. This is your GROUND TRUTH: reason about the image in your thinking to evaluate your progress.
+If you used take_screenshot before, you will be provided with a screenshot of the current page with  bounding boxes around interactive elements. This is your GROUND TRUTH: reason about the image in your thinking to evaluate your progress.
 If an interactive index inside your browser_state does not have text information, then the interactive index is written at the top center of it's element in the screenshot.
+Use take_screenshot if you are unsure or simply want more information. 
 </browser_vision>
 
 <browser_rules>
diff --git a/browser_use/agent/system_prompt_flash.md b/browser_use/agent/system_prompt_flash.md
index c8d3feaa3..32795764f 100644
--- a/browser_use/agent/system_prompt_flash.md
+++ b/browser_use/agent/system_prompt_flash.md
@@ -20,7 +20,7 @@ At every step, your input will consist of:
 1. <agent_history>: A chronological event stream including your previous actions and their results.
 2. <agent_state>: Current <user_request>, summary of <file_system>, <todo_contents>, and <step_info>.
 3. <browser_state>: Current URL, open tabs, interactive elements indexed for actions, and visible page content.
-4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements.
+4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements. If you used take_screenshot before
 5. <read_state> This will be displayed only if your previous action was extract_structured_data or read_file. This data is only shown in the current step.
 </input>
 
@@ -64,10 +64,14 @@ Note that:
 </browser_state>
 
 <browser_vision>
-You will be provided with a screenshot of the current page with  bounding boxes around interactive elements. This is your GROUND TRUTH: reason about the image in your thinking to evaluate your progress.
+If you used take_screenshot before, you will be provided with a screenshot of the current page with  bounding boxes around interactive elements. This is your GROUND TRUTH: reason about the image in your thinking to evaluate your progress.
 If an interactive index inside your browser_state does not have text information, then the interactive index is written at the top center of it's element in the screenshot.
+Use take_screenshot if you are unsure or simply want more information. 
 </browser_vision>
 
+
+
+
 <browser_rules>
 Strictly follow these rules while using the browser and navigating the web:
 - Only interact with elements that have a numeric [index] assigned.
diff --git a/browser_use/agent/system_prompt_no_thinking.md b/browser_use/agent/system_prompt_no_thinking.md
index f51bcd171..fdfaf57c3 100644
--- a/browser_use/agent/system_prompt_no_thinking.md
+++ b/browser_use/agent/system_prompt_no_thinking.md
@@ -20,7 +20,7 @@ At every step, your input will consist of:
 1. <agent_history>: A chronological event stream including your previous actions and their results.
 2. <agent_state>: Current <user_request>, summary of <file_system>, <todo_contents>, and <step_info>.
 3. <browser_state>: Current URL, open tabs, interactive elements indexed for actions, and visible page content.
-4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements.
+4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements. If you used take_screenshot before
 5. <read_state> This will be displayed only if your previous action was extract_structured_data or read_file. This data is only shown in the current step.
 </input>
 
@@ -66,8 +66,9 @@ Note that:
 </browser_state>
 
 <browser_vision>
-You will be provided with a screenshot of the current page with  bounding boxes around interactive elements. This is your GROUND TRUTH: reason about the image in your thinking to evaluate your progress.
+If you used take_screenshot before, you will be provided with a screenshot of the current page with  bounding boxes around interactive elements. This is your GROUND TRUTH: reason about the image in your thinking to evaluate your progress.
 If an interactive index inside your browser_state does not have text information, then the interactive index is written at the top center of it's element in the screenshot.
+Use take_screenshot if you are unsure or simply want more information. 
 </browser_vision>
 
 <browser_rules>

From e9e19f785de6fbd887b46c7ef9e378ffd8220b7c Mon Sep 17 00:00:00 2001
From: Prakhar Jain <prakharjain1114@gmail.com>
Date: Sat, 4 Oct 2025 20:46:41 +0530
Subject: [PATCH 03/45] added option to interactive elements

---
 browser_use/dom/serializer/serializer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/browser_use/dom/serializer/serializer.py b/browser_use/dom/serializer/serializer.py
index 1a515379d..e57cee5fc 100644
--- a/browser_use/dom/serializer/serializer.py
+++ b/browser_use/dom/serializer/serializer.py
@@ -677,7 +677,7 @@ class DOMTreeSerializer:
 		# 5. Keep if has role suggesting interactivity
 		if node.original_node.attributes:
 			role = node.original_node.attributes.get('role')
-			if role in ['button', 'link', 'checkbox', 'radio', 'tab', 'menuitem']:
+			if role in ['button', 'link', 'checkbox', 'radio', 'tab', 'menuitem', 'option']:
 				return False
 
 		# Default: exclude this child

From d3abbcb2c741645ba07bf3bbb2402ce0a45a2b2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sat, 4 Oct 2025 12:56:40 -0700
Subject: [PATCH 04/45] Include use_vision auto

---
 browser_use/agent/message_manager/service.py  | 24 ++++++++++++++-----
 browser_use/agent/service.py                  |  8 ++++---
 browser_use/agent/system_prompt.md            |  2 +-
 browser_use/agent/system_prompt_flash.md      |  2 +-
 .../agent/system_prompt_no_thinking.md        |  2 +-
 browser_use/agent/views.py                    |  4 ++--
 browser_use/telemetry/views.py                |  4 ++--
 docs/customize/agent/all-parameters.mdx       |  2 +-
 docs/customize/tools/available.mdx            |  3 +++
 9 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/browser_use/agent/message_manager/service.py b/browser_use/agent/message_manager/service.py
index 2d922a125..521e10c64 100644
--- a/browser_use/agent/message_manager/service.py
+++ b/browser_use/agent/message_manager/service.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import logging
-from typing import Literal
+from typing import Literal, Union
 
 from browser_use.agent.message_manager.views import (
 	HistoryItem,
@@ -285,7 +285,7 @@ class MessageManager:
 		model_output: AgentOutput | None = None,
 		result: list[ActionResult] | None = None,
 		step_info: AgentStepInfo | None = None,
-		use_vision=True,
+		use_vision: Union[bool, Literal['auto']] = 'auto',
 		page_filtered_actions: str | None = None,
 		sensitive_data=None,
 		available_file_paths: list[str] | None = None,  # Always pass current available_file_paths
@@ -317,12 +317,24 @@ class MessageManager:
 					logger.debug('Screenshot inclusion requested by action result')
 					break
 
-		# Include screenshot if either use_vision is True, or if explicitly requested by an action
-		if (use_vision or include_screenshot_requested) and browser_state_summary.screenshot:
+		# Handle different use_vision modes:
+		# - "auto": Only include screenshot if explicitly requested by action (e.g., take_screenshot)
+		# - True: Always include screenshot
+		# - False: Never include screenshot
+		include_screenshot = False
+		if use_vision is True:
+			# Always include screenshot when use_vision=True
+			include_screenshot = True
+		elif use_vision == 'auto':
+			# Only include screenshot if explicitly requested by action when use_vision="auto"
+			include_screenshot = include_screenshot_requested
+		# else: use_vision is False, never include screenshot (include_screenshot stays False)
+
+		if include_screenshot and browser_state_summary.screenshot:
 			screenshots.append(browser_state_summary.screenshot)
 
-		# Override use_vision if screenshot was explicitly requested
-		effective_use_vision = use_vision or include_screenshot_requested
+		# Use vision in the user message if screenshots are included
+		effective_use_vision = len(screenshots) > 0
 
 		# Create single state message with all content
 		assert browser_state_summary
diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py
index c9d314a4f..21c65edbc 100644
--- a/browser_use/agent/service.py
+++ b/browser_use/agent/service.py
@@ -9,7 +9,7 @@ import time
 from collections.abc import Awaitable, Callable
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Generic, Literal, TypeVar
+from typing import Any, Generic, Literal, TypeVar, Union
 from urllib.parse import urlparse
 
 from dotenv import load_dotenv
@@ -154,7 +154,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		register_should_stop_callback: Callable[[], Awaitable[bool]] | None = None,
 		# Agent settings
 		output_model_schema: type[AgentStructuredOutput] | None = None,
-		use_vision: bool = True,
+		use_vision: Union[bool, Literal['auto']] = 'auto',
 		save_conversation_path: str | Path | None = None,
 		save_conversation_path_encoding: str | None = 'utf-8',
 		max_failures: int = 3,
@@ -255,7 +255,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		elif controller is not None:
 			self.tools = controller
 		else:
-			self.tools = Tools(display_files_in_done_text=display_files_in_done_text)
+			# Exclude take_screenshot tool when use_vision=False
+			exclude_actions = ['take_screenshot'] if use_vision is False else []
+			self.tools = Tools(exclude_actions=exclude_actions, display_files_in_done_text=display_files_in_done_text)
 
 		# Structured output
 		self.output_model_schema = output_model_schema
diff --git a/browser_use/agent/system_prompt.md b/browser_use/agent/system_prompt.md
index 3944bcf8d..b48a68cb6 100644
--- a/browser_use/agent/system_prompt.md
+++ b/browser_use/agent/system_prompt.md
@@ -20,7 +20,7 @@ At every step, your input will consist of:
 1. <agent_history>: A chronological event stream including your previous actions and their results.
 2. <agent_state>: Current <user_request>, summary of <file_system>, <todo_contents>, and <step_info>.
 3. <browser_state>: Current URL, open tabs, interactive elements indexed for actions, and visible page content.
-4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements. If you used take_screenshot before
+4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements. If you used take_screenshot before, this will contain a screenshot.
 5. <read_state> This will be displayed only if your previous action was extract_structured_data or read_file. This data is only shown in the current step.
 </input>
 
diff --git a/browser_use/agent/system_prompt_flash.md b/browser_use/agent/system_prompt_flash.md
index 32795764f..9f14aad29 100644
--- a/browser_use/agent/system_prompt_flash.md
+++ b/browser_use/agent/system_prompt_flash.md
@@ -20,7 +20,7 @@ At every step, your input will consist of:
 1. <agent_history>: A chronological event stream including your previous actions and their results.
 2. <agent_state>: Current <user_request>, summary of <file_system>, <todo_contents>, and <step_info>.
 3. <browser_state>: Current URL, open tabs, interactive elements indexed for actions, and visible page content.
-4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements. If you used take_screenshot before
+4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements. If you used take_screenshot before, this will contain a screenshot.
 5. <read_state> This will be displayed only if your previous action was extract_structured_data or read_file. This data is only shown in the current step.
 </input>
 
diff --git a/browser_use/agent/system_prompt_no_thinking.md b/browser_use/agent/system_prompt_no_thinking.md
index fdfaf57c3..c905e5bb6 100644
--- a/browser_use/agent/system_prompt_no_thinking.md
+++ b/browser_use/agent/system_prompt_no_thinking.md
@@ -20,7 +20,7 @@ At every step, your input will consist of:
 1. <agent_history>: A chronological event stream including your previous actions and their results.
 2. <agent_state>: Current <user_request>, summary of <file_system>, <todo_contents>, and <step_info>.
 3. <browser_state>: Current URL, open tabs, interactive elements indexed for actions, and visible page content.
-4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements. If you used take_screenshot before
+4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements. If you used take_screenshot before, this will contain a screenshot.
 5. <read_state> This will be displayed only if your previous action was extract_structured_data or read_file. This data is only shown in the current step.
 </input>
 
diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py
index daa6ee78d..c433935b3 100644
--- a/browser_use/agent/views.py
+++ b/browser_use/agent/views.py
@@ -5,7 +5,7 @@ import logging
 import traceback
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Generic, Literal
+from typing import Any, Generic, Literal, Union
 
 from openai import RateLimitError
 from pydantic import BaseModel, ConfigDict, Field, ValidationError, create_model, model_validator
@@ -33,7 +33,7 @@ logger = logging.getLogger(__name__)
 class AgentSettings(BaseModel):
 	"""Configuration options for the Agent"""
 
-	use_vision: bool = True
+	use_vision: Union[bool, Literal['auto']] = 'auto'
 	vision_detail_level: Literal['auto', 'low', 'high'] = 'auto'
 	save_conversation_path: str | Path | None = None
 	save_conversation_path_encoding: str | None = 'utf-8'
diff --git a/browser_use/telemetry/views.py b/browser_use/telemetry/views.py
index a5fd8ee83..48b3a8e6f 100644
--- a/browser_use/telemetry/views.py
+++ b/browser_use/telemetry/views.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from collections.abc import Sequence
 from dataclasses import asdict, dataclass
-from typing import Any
+from typing import Any, Literal, Union
 
 from browser_use.config import is_running_in_docker
 
@@ -29,7 +29,7 @@ class AgentTelemetryEvent(BaseTelemetryEvent):
 	model_provider: str
 	max_steps: int
 	max_actions_per_step: int
-	use_vision: bool
+	use_vision: Union[bool, Literal['auto']]
 	version: str
 	source: str
 	cdp_url: str | None
diff --git a/docs/customize/agent/all-parameters.mdx b/docs/customize/agent/all-parameters.mdx
index 05f6e217e..0f31dbb6e 100644
--- a/docs/customize/agent/all-parameters.mdx
+++ b/docs/customize/agent/all-parameters.mdx
@@ -13,7 +13,7 @@ mode: "wide"
 - `output_model_schema`: Pydantic model class for structured output validation. [Example](https://github.com/browser-use/browser-use/blob/main/examples/features/custom_output.py)
 
 ### Vision & Processing
-- `use_vision` (default: `True`): Enable/disable vision capabilities for processing screenshots
+- `use_vision` (default: `"auto"`): Vision mode - `"auto"` includes take_screenshot tool but only uses vision when requested, `True` always includes screenshots, `False` never includes screenshots and excludes take_screenshot tool
 - `vision_detail_level` (default: `'auto'`): Screenshot detail level - `'low'`, `'high'`, or `'auto'`
 - `page_extraction_llm`: Separate LLM model for page content extraction. You can choose a small & fast model because it only needs to extract text from the page (default: same as `llm`)
 
diff --git a/docs/customize/tools/available.mdx b/docs/customize/tools/available.mdx
index 1bd6f4dfe..c045f3dca 100644
--- a/docs/customize/tools/available.mdx
+++ b/docs/customize/tools/available.mdx
@@ -32,6 +32,9 @@ mode: "wide"
 ### Content Extraction
 - **`extract_structured_data`** - Extract data from webpages using LLM
 
+### Visual Analysis
+- **`take_screenshot`** - Request a screenshot in your next browser state for visual confirmation
+
 ### Form Controls
 - **`get_dropdown_options`** - Get dropdown option values
 - **`select_dropdown_option`** - Select dropdown options

From d01447499884d222c037204f90b0bfe2e1566f95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sat, 4 Oct 2025 12:59:11 -0700
Subject: [PATCH 05/45] Linter

---
 browser_use/agent/message_manager/service.py | 4 ++--
 browser_use/agent/service.py                 | 4 ++--
 browser_use/agent/views.py                   | 4 ++--
 browser_use/telemetry/views.py               | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/browser_use/agent/message_manager/service.py b/browser_use/agent/message_manager/service.py
index 521e10c64..b8abcfc3f 100644
--- a/browser_use/agent/message_manager/service.py
+++ b/browser_use/agent/message_manager/service.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import logging
-from typing import Literal, Union
+from typing import Literal
 
 from browser_use.agent.message_manager.views import (
 	HistoryItem,
@@ -285,7 +285,7 @@ class MessageManager:
 		model_output: AgentOutput | None = None,
 		result: list[ActionResult] | None = None,
 		step_info: AgentStepInfo | None = None,
-		use_vision: Union[bool, Literal['auto']] = 'auto',
+		use_vision: bool | Literal['auto'] = 'auto',
 		page_filtered_actions: str | None = None,
 		sensitive_data=None,
 		available_file_paths: list[str] | None = None,  # Always pass current available_file_paths
diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py
index 21c65edbc..6b0a2fa0d 100644
--- a/browser_use/agent/service.py
+++ b/browser_use/agent/service.py
@@ -9,7 +9,7 @@ import time
 from collections.abc import Awaitable, Callable
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Generic, Literal, TypeVar, Union
+from typing import Any, Generic, Literal, TypeVar
 from urllib.parse import urlparse
 
 from dotenv import load_dotenv
@@ -154,7 +154,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		register_should_stop_callback: Callable[[], Awaitable[bool]] | None = None,
 		# Agent settings
 		output_model_schema: type[AgentStructuredOutput] | None = None,
-		use_vision: Union[bool, Literal['auto']] = 'auto',
+		use_vision: bool | Literal['auto'] = 'auto',
 		save_conversation_path: str | Path | None = None,
 		save_conversation_path_encoding: str | None = 'utf-8',
 		max_failures: int = 3,
diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py
index c433935b3..a8ae7f3c4 100644
--- a/browser_use/agent/views.py
+++ b/browser_use/agent/views.py
@@ -5,7 +5,7 @@ import logging
 import traceback
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Generic, Literal, Union
+from typing import Any, Generic, Literal
 
 from openai import RateLimitError
 from pydantic import BaseModel, ConfigDict, Field, ValidationError, create_model, model_validator
@@ -33,7 +33,7 @@ logger = logging.getLogger(__name__)
 class AgentSettings(BaseModel):
 	"""Configuration options for the Agent"""
 
-	use_vision: Union[bool, Literal['auto']] = 'auto'
+	use_vision: bool | Literal['auto'] = 'auto'
 	vision_detail_level: Literal['auto', 'low', 'high'] = 'auto'
 	save_conversation_path: str | Path | None = None
 	save_conversation_path_encoding: str | None = 'utf-8'
diff --git a/browser_use/telemetry/views.py b/browser_use/telemetry/views.py
index 48b3a8e6f..486842f8e 100644
--- a/browser_use/telemetry/views.py
+++ b/browser_use/telemetry/views.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from collections.abc import Sequence
 from dataclasses import asdict, dataclass
-from typing import Any, Literal, Union
+from typing import Any, Literal
 
 from browser_use.config import is_running_in_docker
 
@@ -29,7 +29,7 @@ class AgentTelemetryEvent(BaseTelemetryEvent):
 	model_provider: str
 	max_steps: int
 	max_actions_per_step: int
-	use_vision: Union[bool, Literal['auto']]
+	use_vision: bool | Literal['auto']
 	version: str
 	source: str
 	cdp_url: str | None

From 3bbdcb1e976dfcd0582970c6e766674612129813 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sat, 4 Oct 2025 13:05:30 -0700
Subject: [PATCH 06/45] bump-anthropic-version-for-linter

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index e369a33d4..42e24718d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ dependencies = [
     "authlib>=1.6.0",
     "google-genai>=1.29.0,<2.0.0",
     "openai>=1.99.2,<2.0.0",
-    "anthropic>=0.58.2,<1.0.0",
+    "anthropic>=0.68.1,<1.0.0",
     "groq>=0.30.0",
     "ollama>=0.5.1",
     "google-api-python-client>=2.174.0",

From 50fb58284a472fcefeabd958c26af53081312ed4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 00:00:28 -0700
Subject: [PATCH 07/45] short-systemprompt

---
 browser_use/agent/system_prompt_flash.md | 168 -----------------------
 1 file changed, 168 deletions(-)

diff --git a/browser_use/agent/system_prompt_flash.md b/browser_use/agent/system_prompt_flash.md
index 9f14aad29..b1ee5da06 100644
--- a/browser_use/agent/system_prompt_flash.md
+++ b/browser_use/agent/system_prompt_flash.md
@@ -1,174 +1,7 @@
 You are an AI agent designed to operate in an iterative loop to automate browser tasks. Your ultimate goal is accomplishing the task provided in <user_request>.
 
-<intro>
-You excel at following tasks:
-1. Navigating complex websites and extracting precise information
-2. Automating form submissions and interactive web actions
-3. Gathering and saving information 
-4. Using your filesystem effectively to decide what to keep in your context
-5. Operate effectively in an agent loop
-6. Efficiently performing diverse web tasks
-</intro>
 
-<language_settings>
-- Default working language: **English**
-- Always respond in the same language as the user request
-</language_settings>
-
-<input>
-At every step, your input will consist of: 
-1. <agent_history>: A chronological event stream including your previous actions and their results.
-2. <agent_state>: Current <user_request>, summary of <file_system>, <todo_contents>, and <step_info>.
-3. <browser_state>: Current URL, open tabs, interactive elements indexed for actions, and visible page content.
-4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements. If you used take_screenshot before, this will contain a screenshot.
-5. <read_state> This will be displayed only if your previous action was extract_structured_data or read_file. This data is only shown in the current step.
-</input>
-
-<agent_history>
-Agent history will be given as a list of step information as follows:
-
-<step_{{step_number}}>:
-Memory: Your memory / thinking of this step
-Action Results: Your actions and their results
-</step_{{step_number}}>
-
-and system messages wrapped in <sys> tag.
-</agent_history>
-
-<user_request>
-USER REQUEST: This is your ultimate objective and always remains visible.
-- This has the highest priority. Make the user happy.
-- If the user request is very specific - then carefully follow each step and dont skip or hallucinate steps.
-- If the task is open ended you can plan yourself how to get it done.
-</user_request>
-
-<browser_state>
-1. Browser State will be given as:
-
-Current URL: URL of the page you are currently viewing.
-Open Tabs: Open tabs with their ids.
 Interactive Elements: All interactive elements will be provided in format as [index]<type>text</type> where
-- index: Numeric identifier for interaction
-- type: HTML element type (button, input, etc.)
-- text: Element description
-
-Examples:
-[33]<div>User form</div>
-\t*[35]<button aria-label='Submit form'>Submit</button>
-
-Note that:
-- Only elements with numeric indexes in [] are interactive
-- (stacked) indentation (with \t) is important and means that the element is a (html) child of the element above (with a lower index)
-- Elements tagged with a star `*[` are the new interactive elements that appeared on the website since the last step - if url has not changed. Your previous actions caused that change. Think if you need to interact with them, e.g. after input_text you might need to select the right option from the list.
-- Pure text elements without [] are not interactive.
-</browser_state>
-
-<browser_vision>
-If you used take_screenshot before, you will be provided with a screenshot of the current page with  bounding boxes around interactive elements. This is your GROUND TRUTH: reason about the image in your thinking to evaluate your progress.
-If an interactive index inside your browser_state does not have text information, then the interactive index is written at the top center of it's element in the screenshot.
-Use take_screenshot if you are unsure or simply want more information. 
-</browser_vision>
-
-
-
-
-<browser_rules>
-Strictly follow these rules while using the browser and navigating the web:
-- Only interact with elements that have a numeric [index] assigned.
-- Only use indexes that are explicitly provided.
-- If research is needed, open a **new tab** instead of reusing the current one.
-- If the page changes after, for example, an input text action, analyse if you need to interact with new elements, e.g. selecting the right option from the list.
-- By default, only elements in the visible viewport are listed. Use scrolling tools if you suspect relevant content is offscreen which you need to interact with. Scroll ONLY if there are more pixels below or above the page.
-- You can scroll by a specific number of pages using the num_pages parameter (e.g., 0.5 for half page, 2.0 for two pages).
-- If a captcha appears, attempt solving it if possible. If not, use fallback strategies (e.g., alternative site, backtrack).
-- If expected elements are missing, try refreshing, scrolling, or navigating back.
-- If the page is not fully loaded, use the wait action.
-- You can call extract_structured_data on specific pages to gather structured semantic information from the entire page, including parts not currently visible.
-- Call extract_structured_data only if the information you are looking for is not visible in your <browser_state> otherwise always just use the needed text from the <browser_state>.
-- Calling the extract_structured_data tool is expensive! DO NOT query the same page with the same extract_structured_data query multiple times. Make sure that you are on the page with relevant information based on the screenshot before calling this tool.
-- If you fill an input field and your action sequence is interrupted, most often something changed e.g. suggestions popped up under the field.
-- If the action sequence was interrupted in previous step due to page changes, make sure to complete any remaining actions that were not executed. For example, if you tried to input text and click a search button but the click was not executed because the page changed, you should retry the click action in your next step.
-- If the <user_request> includes specific page information such as product type, rating, price, location, etc., try to apply filters to be more efficient.
-- The <user_request> is the ultimate goal. If the user specifies explicit steps, they have always the highest priority.
-- If you input_text into a field, you might need to press enter, click the search button, or select from dropdown for completion.
-- Don't login into a page if you don't have to. Don't login if you don't have the credentials. 
-- There are 2 types of tasks always first think which type of request you are dealing with:
-1. Very specific step by step instructions:
-- Follow them as very precise and don't skip steps. Try to complete everything as requested.
-2. Open ended tasks. Plan yourself, be creative in achieving them.
-- If you get stuck e.g. with logins or captcha in open-ended tasks you can re-evaluate the task and try alternative ways, e.g. sometimes accidentally login pops up, even though there some part of the page is accessible or you get some information via web search.
-- If you reach a PDF viewer, the file is automatically downloaded and you can see its path in <available_file_paths>. You can either read the file or scroll in the page to see more.
-</browser_rules>
-
-<file_system>
-- You have access to a persistent file system which you can use to track progress, store results, and manage long tasks.
-- Your file system is initialized with a `todo.md`: Use this to keep a checklist for known subtasks. Use `replace_file_str` tool to update markers in `todo.md` as first action whenever you complete an item. This file should guide your step-by-step execution when you have a long running task.
-- If you are writing a `csv` file, make sure to use double quotes if cell elements contain commas.
-- If the file is too large, you are only given a preview of your file. Use `read_file` to see the full content if necessary.
-- If exists, <available_file_paths> includes files you have downloaded or uploaded by the user. You can only read or upload these files but you don't have write access.
-- If the task is really long, initialize a `results.md` file to accumulate your results.
-- DO NOT use the file system if the task is less than 10 steps!
-</file_system>
-
-<task_completion_rules>
-You must call the `done` action in one of two cases:
-- When you have fully completed the USER REQUEST.
-- When you reach the final allowed step (`max_steps`), even if the task is incomplete.
-- If it is ABSOLUTELY IMPOSSIBLE to continue.
-
-The `done` action is your opportunity to terminate and share your findings with the user.
-- Set `success` to `true` only if the full USER REQUEST has been completed with no missing components.
-- If any part of the request is missing, incomplete, or uncertain, set `success` to `false`.
-- You can use the `text` field of the `done` action to communicate your findings and `files_to_display` to send file attachments to the user, e.g. `["results.md"]`.
-- Put ALL the relevant information you found so far in the `text` field when you call `done` action.
-- Combine `text` and `files_to_display` to provide a coherent reply to the user and fulfill the USER REQUEST.
-- You are ONLY ALLOWED to call `done` as a single action. Don't call it together with other actions.
-- If the user asks for specified format, such as "return JSON with following structure", "return a list of format...", MAKE sure to use the right format in your answer.
-- If the user asks for a structured output, your `done` action's schema will be modified. Take this schema into account when solving the task!
-</task_completion_rules>
-
-<action_rules>
-- You are allowed to use a maximum of {max_actions} actions per step.
-
-If you are allowed multiple actions, you can specify multiple actions in the list to be executed sequentially (one after another).
-- If the page changes after an action, the sequence is interrupted and you get the new state. You can see this in your agent history when this happens.
-</action_rules>
-
-<efficiency_guidelines>
-You can output multiple actions in one step. Try to be efficient where it makes sense. Do not predict actions which do not make sense for the current page.
-
-**Recommended Action Combinations:**
-- `input_text` + `click_element_by_index` → Fill form field and submit/search in one step
-- `input_text` + `input_text` → Fill multiple form fields
-- `click_element_by_index` + `click_element_by_index` → Navigate through multi-step flows (when the page does not navigate between clicks)
-- `scroll` with num_pages 10 + `extract_structured_data` → Scroll to the bottom of the page to load more content before extracting structured data
-- File operations + browser actions 
-
-Do not try multiple different paths in one step. Always have one clear goal per step. 
-Its important that you see in the next step if your action was successful, so do not chain actions which change the browser state multiple times, e.g. 
-- do not use click_element_by_index and then go_to_url, because you would not see if the click was successful or not. 
-- or do not use switch_tab and switch_tab together, because you would not see the state in between.
-- do not use input_text and then scroll, because you would not see if the input text was successful or not. 
-</efficiency_guidelines>
-
-<reasoning_rules>
-Be clear and concise in your decision-making. Exhibit the following reasoning patterns to successfully achieve the <user_request>:
-- Reason about <agent_history> to track progress and context toward <user_request>.
-- Analyze the most recent "Next Goal" and "Action Result" in <agent_history> and clearly state what you previously tried to achieve.
-- Analyze all relevant items in <agent_history>, <browser_state>, <read_state>, <file_system>, <read_state> and the screenshot to understand your state.
-- Explicitly judge success/failure/uncertainty of the last action. Never assume an action succeeded just because it appears to be executed in your last step in <agent_history>. For example, you might have "Action 1/1: Input '2025-05-05' into element 3." in your history even though inputting text failed. Always verify using <browser_vision> (screenshot) as the primary ground truth. If a screenshot is unavailable, fall back to <browser_state>. If the expected change is missing, mark the last action as failed (or uncertain) and plan a recovery.
-- If todo.md is empty and the task is multi-step, generate a stepwise plan in todo.md using file tools.
-- Analyze `todo.md` to guide and track your progress. 
-- If any todo.md items are finished, mark them as complete in the file.
-- Analyze whether you are stuck, e.g. when you repeat the same actions multiple times without any progress. Then consider alternative approaches e.g. scrolling for more context or send_keys to interact with keys directly or different pages.
-- Analyze the <read_state> where one-time information are displayed due to your previous action. Reason about whether you want to keep this information in memory and plan writing them into a file if applicable using the file tools.
-- If you see information relevant to <user_request>, plan saving the information into a file.
-- Before writing data into a file, analyze the <file_system> and check if the file already has some content to avoid overwriting.
-- Decide what concise, actionable context should be stored in memory to inform future reasoning.
-- When ready to finish, state you are preparing to call done and communicate completion/results to the user.
-- Before done, use read_file to verify file contents intended for user output.
-- Always reason about the <user_request>. Make sure to carefully analyze the specific steps and information required. E.g. specific filters, specific form fields, specific information to search. Make sure to always compare the current trajactory with the user request and think carefully if thats how the user requested it.
-</reasoning_rules>
 
 <output>
 You must respond with a valid JSON in this exact format:
@@ -177,5 +10,4 @@ You must respond with a valid JSON in this exact format:
   "action":[{{"go_to_url": {{ "url": "url_value"}}}}]
 }}
 
-Action list should NEVER be empty.
 </output>

From 9a655e56c51d6abb991dc6e749ba791940466698 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 11:09:15 -0700
Subject: [PATCH 08/45] Shorter tool description

---
 .gitignore                   |   2 +
 browser_use/tools/service.py | 123 ++++++++++-------------------------
 2 files changed, 37 insertions(+), 88 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3f66345ea..b2c71bb34 100644
--- a/.gitignore
+++ b/.gitignore
@@ -63,3 +63,5 @@ screenshot.png
 
 all_github_issues_progress.md
 all_github_issues.md
+
+todo-input-token.md
diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index d41d7823b..5815b618d 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -115,7 +115,7 @@ class Tools(Generic[Context]):
 
 		# Basic Navigation Actions
 		@self.registry.action(
-			'Search a query with search engine which defaults to DuckDuckGo. Dont specify search_engine unless user asks for different search engine. Available search engines: duckduckgo, google, bing.',
+			'Search query (defaults DuckDuckGo). Options: duckduckgo, google, bing.',
 			param_model=SearchAction,
 		)
 		async def search(params: SearchAction, browser_session: BrowserSession):
@@ -158,7 +158,7 @@ class Tools(Generic[Context]):
 				return ActionResult(error=f'Failed to search {params.search_engine} for "{params.query}": {str(e)}')
 
 		@self.registry.action(
-			'Navigate to URL, optionally set new_tab=True to open in new tab, otherwise default is False.',
+			'Navigate to URL. Set new_tab=True to open in new tab.',
 			param_model=GoToUrlAction,
 		)
 		async def go_to_url(params: GoToUrlAction, browser_session: BrowserSession):
@@ -218,9 +218,7 @@ class Tools(Generic[Context]):
 				error_msg = f'Failed to go back: {str(e)}'
 				return ActionResult(error=error_msg)
 
-		@self.registry.action(
-			'Wait for x seconds (default 3) (max 30 seconds). This can be used to wait until the page is fully loaded.'
-		)
+		@self.registry.action('Wait x seconds (default 3, max 30). ')
 		async def wait(seconds: int = 3):
 			# Cap wait time at maximum 30 seconds
 			# Reduce the wait time by 3 seconds to account for the llm call which takes at least 3 seconds
@@ -236,7 +234,7 @@ class Tools(Generic[Context]):
 		# Element Interaction Actions
 
 		@self.registry.action(
-			'Click an element by index. Only indices from your browser_state are allowed. Never use an index that is not inside your current browser_state. Optionally set ctrl=True to open any resulting navigation in a new tab.',
+			'Click element by index from browser_state. Set ctrl=True to open in new tab.',
 			param_model=ClickElementAction,
 		)
 		async def click(params: ClickElementAction, browser_session: BrowserSession):
@@ -290,7 +288,7 @@ class Tools(Generic[Context]):
 				return ActionResult(error=error_msg)
 
 		@self.registry.action(
-			'Input text into an input interactive element. Only input text into indices that are inside your current browser_state and are valid input fields.',
+			'Input text into element by index from browser_state.',
 			param_model=InputTextAction,
 		)
 		async def input_text(
@@ -352,7 +350,7 @@ class Tools(Generic[Context]):
 				return ActionResult(error=error_msg)
 
 		@self.registry.action(
-			'Upload file to interactive element with file path. Only upload files to indices that are inside your current browser_state and are valid file upload fields.',
+			'Upload file to element by index from browser_state.',
 			param_model=UploadFileAction,
 		)
 		async def upload_file(
@@ -503,7 +501,7 @@ class Tools(Generic[Context]):
 
 		# Tab Management Actions
 
-		@self.registry.action('Switch to tab with tab_id.', param_model=SwitchTabAction)
+		@self.registry.action('Switch to tab by tab_id.', param_model=SwitchTabAction)
 		async def switch_tab(params: SwitchTabAction, browser_session: BrowserSession):
 			# Simple switch tab logic
 			try:
@@ -525,7 +523,7 @@ class Tools(Generic[Context]):
 				memory = f'Attempted to switch to tab #{params.tab_id}'
 				return ActionResult(extracted_content=memory, long_term_memory=memory)
 
-		@self.registry.action('Close an existing tab', param_model=CloseTabAction)
+		@self.registry.action('Close tab by tab_id.', param_model=CloseTabAction)
 		async def close_tab(params: CloseTabAction, browser_session: BrowserSession):
 			# Simple close tab logic
 			try:
@@ -557,20 +555,11 @@ class Tools(Generic[Context]):
 		# This action is temporarily disabled as it needs refactoring to use events
 
 		@self.registry.action(
-			"""This tool sends the markdown of the current page with the query to an LLM to extract structured, semantic data (e.g. product description, price, all information about XYZ) from the markdown of the current webpage based on a query.
-Only use when:
-- You are sure that you are on the right page for the query
-- You know exactly the information you need to extract from the page
-- You did not previously call this tool on the same page
-You can not use this tool to:
-- Get interactive elements like buttons, links, dropdowns, menus, etc.
-- If you previously asked extract_structured_data on the same page with the same query, you should not call it again.
-
-Set extract_links=True only if your query requires extracting links/URLs from the page.
-Use start_from_char to start extraction from a specific character position (use if extraction was previously truncated and you want more content).
-
-If this tool does not return the desired outcome, do not call it again, use scroll_to_text or scroll to find the desired information.
-""",
+			"""Extract semantic data from page markdown via LLM query (e.g. product info, prices).
+Use when: on right page, know what to extract, haven't used on same page before.
+Can't get: interactive elements (buttons, links, dropdowns).
+Set extract_links=True for URLs. Use start_from_char if truncated.
+If fails, use scroll_to_text or scroll instead.""",
 		)
 		async def extract_structured_data(
 			query: str,
@@ -690,11 +679,9 @@ You will be given a query and the markdown of a webpage that has been filtered t
 				raise RuntimeError(str(e))
 
 		@self.registry.action(
-			"""Scroll the page by specified number of pages (set down=True to scroll down, down=False to scroll up, num_pages=number of pages to scroll like 0.5 for half page, 10.0 for ten pages, etc.). 
-Default behavior is to scroll by one page. This is enough for most cases.
-Optionally, if there are multiple scroll containers, use frame_element_index parameter with an element inside the container you want to scroll in. For that you must use indices that exist in your browser_state (works well for dropdowns and custom UI components). 
-If you need to get to the bottom of the page, use a high number of pages at once like 10 to get to the bottom of the page.
-Note: For multiple pages (>=1.0), scrolls are performed one page at a time to ensure reliability. Page height is detected from viewport, fallback is 1000px per page.""",
+			"""Scroll page by num_pages (down=True for down, False for up). Default 1 page, use 0.5 for half, 10 for bottom.
+For specific containers, use frame_element_index from browser_state (works with dropdowns, custom UI).
+Multiple pages (>=1.0) scroll sequentially. Page height from viewport or 1000px fallback.""",
 			param_model=ScrollAction,
 		)
 		async def scroll(params: ScrollAction, browser_session: BrowserSession):
@@ -803,7 +790,7 @@ Note: For multiple pages (>=1.0), scrolls are performed one page at a time to en
 				return ActionResult(error=error_msg)
 
 		@self.registry.action(
-			'Send strings of special keys to use e.g. Escape, Backspace, Insert, PageDown, Delete, Enter, or Shortcuts such as `Control+o`, `Control+Shift+T`',
+			'Send special keys (Escape, Enter, PageDown) or shortcuts (Control+o, Control+Shift+T).',
 			param_model=SendKeysAction,
 		)
 		async def send_keys(params: SendKeysAction, browser_session: BrowserSession):
@@ -822,7 +809,7 @@ Note: For multiple pages (>=1.0), scrolls are performed one page at a time to en
 				return ActionResult(error=error_msg)
 
 		@self.registry.action(
-			description='Scroll to a text in the current page. This helps you to be efficient. Prefer this tool over scrolling step by step if you know what to scroll to.',
+			description='Scroll to text on page. Prefer over step-by-step scrolling when target known.',
 		)
 		async def scroll_to_text(text: str, browser_session: BrowserSession):  # type: ignore
 			# Dispatch scroll to text event
@@ -844,9 +831,7 @@ Note: For multiple pages (>=1.0), scrolls are performed one page at a time to en
 					long_term_memory=f"Tried scrolling to text '{text}' but it was not found",
 				)
 
-		@self.registry.action(
-			'Request to include a screenshot in your next browser state. Use this when you need visual confirmation or when the page contains complex visual information that is hard to understand from the DOM alone.'
-		)
+		@self.registry.action('Request screenshot in next browser state. Use for visual confirmation or complex visual content.')
 		async def take_screenshot():
 			"""Request that a screenshot be included in the next observation"""
 			memory = 'Requested screenshot for next observation'
@@ -862,7 +847,7 @@ Note: For multiple pages (>=1.0), scrolls are performed one page at a time to en
 		# Dropdown Actions
 
 		@self.registry.action(
-			'Get list of values for a dropdown input field. Only works on dropdown-style form elements (<select>, Semantic UI/aria-labeled select, etc.). Do not use this tool for none dropdown elements.',
+			'Get dropdown values (<select>, ARIA select). Only for dropdown elements.',
 			param_model=GetDropdownOptionsAction,
 		)
 		async def get_dropdown_options(params: GetDropdownOptionsAction, browser_session: BrowserSession):
@@ -888,7 +873,7 @@ Note: For multiple pages (>=1.0), scrolls are performed one page at a time to en
 			)
 
 		@self.registry.action(
-			'Select dropdown option by exact text from any dropdown type (native <select>, ARIA menus, or custom dropdowns). Searches target element and children to find selectable options.',
+			'Select dropdown option by exact text (native <select>, ARIA, custom). Searches element and children.',
 			param_model=SelectDropdownOptionAction,
 		)
 		async def select_dropdown_option(params: SelectDropdownOptionAction, browser_session: BrowserSession):
@@ -931,9 +916,7 @@ Note: For multiple pages (>=1.0), scrolls are performed one page at a time to en
 					return ActionResult(error=error_msg)
 
 		# File System Actions
-		@self.registry.action(
-			'Write or append content to file_name in file system. Allowed extensions are .md, .txt, .json, .csv, .pdf. For .pdf files, write the content in markdown format and it will automatically be converted to a properly formatted PDF document.'
-		)
+		@self.registry.action('Write/append to file (.md, .txt, .json, .csv, .pdf). PDF: write markdown, auto-converts to PDF.')
 		async def write_file(
 			file_name: str,
 			content: str,
@@ -954,14 +937,14 @@ Note: For multiple pages (>=1.0), scrolls are performed one page at a time to en
 			return ActionResult(extracted_content=result, long_term_memory=result)
 
 		@self.registry.action(
-			'Replace old_str with new_str in file_name. old_str must exactly match the string to replace in original text. Recommended tool to mark completed items in todo.md or change specific contents in a file.'
+			'Replace old_str with new_str in file. old_str must match exactly. For todo.md updates or specific edits.'
 		)
 		async def replace_file_str(file_name: str, old_str: str, new_str: str, file_system: FileSystem):
 			result = await file_system.replace_file_str(file_name, old_str, new_str)
 			logger.info(f'💾 {result}')
 			return ActionResult(extracted_content=result, long_term_memory=result)
 
-		@self.registry.action('Read file_name from file system')
+		@self.registry.action('Read file from file system.')
 		async def read_file(file_name: str, available_file_paths: list[str], file_system: FileSystem):
 			if available_file_paths and file_name in available_file_paths:
 				result = await file_system.read_file(file_name, external_file=True)
@@ -991,58 +974,22 @@ Note: For multiple pages (>=1.0), scrolls are performed one page at a time to en
 			)
 
 		@self.registry.action(
-			"""This JavaScript code gets executed with Runtime.evaluate and 'returnByValue': True, 'awaitPromise': True
+			"""Execute JavaScript with Runtime.evaluate (returnByValue:true, awaitPromise:true).
 
-SYNTAX RULES - FAILURE TO FOLLOW CAUSES "Uncaught at line 0" ERRORS:
-- ALWAYS wrap your code in IIFE: (function(){ ... })() or (async function(){ ... })() for async code
-- ALWAYS add try-catch blocks to prevent execution errors
-- ALWAYS use proper semicolons and valid JavaScript syntax
-- NEVER write multiline code without proper IIFE wrapping
-- ALWAYS validate elements exist before accessing them
+SYNTAX RULES - ALWAYS wrap in IIFE or get "Uncaught at line 0":
+- IIFE: (function(){ ... })() or async: (async function(){ ... })()
+- Add try-catch, proper semicolons, validate elements exist
 
-EXAMPLES:
-Use this tool when other tools do not work on the first try as expected or when a more general tool is needed, e.g. for filling a form all at once, hovering, dragging, extracting only links, extracting content from the page, press and hold, hovering, clicking on coordinates, zooming, use this if the user provides custom selectors which you can otherwise not interact with ....
-You can also use it to explore the website.
-- Write code to solve problems you could not solve with other tools.
-- Don't write comments in here, no human reads that.
-- Write only valid js code.
-- use this to e.g. extract + filter links, convert the page to json into the format you need etc...
+Use when other tools fail or need custom logic (forms, hover, drag, extract links, custom selectors, shadow DOM, React/Vue/Angular, etc.).
 
-
-- limit the output otherwise your context will explode
-- think if you deal with special elements like iframes / shadow roots etc
-- Adopt your strategy for React Native Web, React, Angular, Vue, MUI pages etc.
-- e.g. with  synthetic events, keyboard simulation, shadow DOM, etc.
-
-PROPER SYNTAX EXAMPLES:
+Examples:
 CORRECT: (function(){ try { const el = document.querySelector('#id'); return el ? el.value : 'not found'; } catch(e) { return 'Error: ' + e.message; } })()
 CORRECT: (async function(){ try { await new Promise(r => setTimeout(r, 100)); return 'done'; } catch(e) { return 'Error: ' + e.message; } })()
+WRONG: document.querySelector('#id').value (no IIFE)
 
-WRONG: const el = document.querySelector('#id'); el ? el.value : '';
-WRONG: document.querySelector('#id').value
-WRONG: Multiline code without IIFE wrapping
+Shadow DOM: (function(){ try { const hosts = document.querySelectorAll('*'); for (let host of hosts) { if (host.shadowRoot) { const el = host.shadowRoot.querySelector('#target'); if (el) return el.textContent; } } return 'Not found'; } catch(e) { return 'Error: ' + e.message; } })()
 
-SHADOW DOM ACCESS EXAMPLE:
-(function(){
-    try {
-        const hosts = document.querySelectorAll('*');
-        for (let host of hosts) {
-            if (host.shadowRoot) {
-                const el = host.shadowRoot.querySelector('#target');
-                if (el) return el.textContent;
-            }
-        }
-        return 'Not found';
-    } catch(e) {
-        return 'Error: ' + e.message;
-    }
-})()
-
-## Return values:
-- Async functions (with await, promises, timeouts) are automatically handled
-- Returns strings, numbers, booleans, and serialized objects/arrays
-- Use JSON.stringify() for complex objects: JSON.stringify(Array.from(document.querySelectorAll('a')).map(el => el.textContent.trim()))
-""",
+Returns strings, numbers, booleans, objects/arrays. Use JSON.stringify() for complex objects.""",
 		)
 		async def execute_js(code: str, browser_session: BrowserSession):
 			# Execute JavaScript with proper error handling and promise support
@@ -1207,7 +1154,7 @@ SHADOW DOM ACCESS EXAMPLE:
 			self.display_files_in_done_text = display_files_in_done_text
 
 			@self.registry.action(
-				'Complete task - with return text and if the task is finished (success=True) or not yet completely finished (success=False), because last step is reached',
+				'Complete task with return text. Set success=True if finished, False if not (e.g. max steps reached).',
 				param_model=StructuredOutputAction[output_model],
 			)
 			async def done(params: StructuredOutputAction):
@@ -1229,7 +1176,7 @@ SHADOW DOM ACCESS EXAMPLE:
 		else:
 
 			@self.registry.action(
-				'Complete task - provide a summary of results for the user. Set success=True if task completed successfully, false otherwise. Text should be your response to the user summarizing results. Include files you would like to display to the user in files_to_display.',
+				'Complete task with summary. Set success=True if completed successfully. Include files in files_to_display.',
 				param_model=DoneAction,
 			)
 			async def done(params: DoneAction, file_system: FileSystem):

From 22acf58424acd985a9932d9a2a1176823fa5bb00 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 11:22:54 -0700
Subject: [PATCH 09/45] Shorter tools

---
 browser_use/tools/service.py | 65 +++++++++++++-----------------------
 browser_use/tools/views.py   | 52 ++++++++++++-----------------
 2 files changed, 44 insertions(+), 73 deletions(-)

diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 5815b618d..4c3b883f0 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -115,7 +115,7 @@ class Tools(Generic[Context]):
 
 		# Basic Navigation Actions
 		@self.registry.action(
-			'Search query (defaults DuckDuckGo). Options: duckduckgo, google, bing.',
+			'Search query.',
 			param_model=SearchAction,
 		)
 		async def search(params: SearchAction, browser_session: BrowserSession):
@@ -158,7 +158,7 @@ class Tools(Generic[Context]):
 				return ActionResult(error=f'Failed to search {params.search_engine} for "{params.query}": {str(e)}')
 
 		@self.registry.action(
-			'Navigate to URL. Set new_tab=True to open in new tab.',
+			'Navigate to URL.',
 			param_model=GoToUrlAction,
 		)
 		async def go_to_url(params: GoToUrlAction, browser_session: BrowserSession):
@@ -218,7 +218,7 @@ class Tools(Generic[Context]):
 				error_msg = f'Failed to go back: {str(e)}'
 				return ActionResult(error=error_msg)
 
-		@self.registry.action('Wait x seconds (default 3, max 30). ')
+		@self.registry.action('Wait for page load.')
 		async def wait(seconds: int = 3):
 			# Cap wait time at maximum 30 seconds
 			# Reduce the wait time by 3 seconds to account for the llm call which takes at least 3 seconds
@@ -234,7 +234,7 @@ class Tools(Generic[Context]):
 		# Element Interaction Actions
 
 		@self.registry.action(
-			'Click element by index from browser_state. Set ctrl=True to open in new tab.',
+			'Click element.',
 			param_model=ClickElementAction,
 		)
 		async def click(params: ClickElementAction, browser_session: BrowserSession):
@@ -288,7 +288,7 @@ class Tools(Generic[Context]):
 				return ActionResult(error=error_msg)
 
 		@self.registry.action(
-			'Input text into element by index from browser_state.',
+			'Input text.',
 			param_model=InputTextAction,
 		)
 		async def input_text(
@@ -350,7 +350,7 @@ class Tools(Generic[Context]):
 				return ActionResult(error=error_msg)
 
 		@self.registry.action(
-			'Upload file to element by index from browser_state.',
+			'Upload file.',
 			param_model=UploadFileAction,
 		)
 		async def upload_file(
@@ -501,7 +501,7 @@ class Tools(Generic[Context]):
 
 		# Tab Management Actions
 
-		@self.registry.action('Switch to tab by tab_id.', param_model=SwitchTabAction)
+		@self.registry.action('Switch tab.', param_model=SwitchTabAction)
 		async def switch_tab(params: SwitchTabAction, browser_session: BrowserSession):
 			# Simple switch tab logic
 			try:
@@ -523,7 +523,7 @@ class Tools(Generic[Context]):
 				memory = f'Attempted to switch to tab #{params.tab_id}'
 				return ActionResult(extracted_content=memory, long_term_memory=memory)
 
-		@self.registry.action('Close tab by tab_id.', param_model=CloseTabAction)
+		@self.registry.action('Close tab.', param_model=CloseTabAction)
 		async def close_tab(params: CloseTabAction, browser_session: BrowserSession):
 			# Simple close tab logic
 			try:
@@ -555,11 +555,7 @@ class Tools(Generic[Context]):
 		# This action is temporarily disabled as it needs refactoring to use events
 
 		@self.registry.action(
-			"""Extract semantic data from page markdown via LLM query (e.g. product info, prices).
-Use when: on right page, know what to extract, haven't used on same page before.
-Can't get: interactive elements (buttons, links, dropdowns).
-Set extract_links=True for URLs. Use start_from_char if truncated.
-If fails, use scroll_to_text or scroll instead.""",
+			"""Extract page data via LLM. Use when on right page, know what to extract. Can't get interactive elements.""",
 		)
 		async def extract_structured_data(
 			query: str,
@@ -679,9 +675,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 				raise RuntimeError(str(e))
 
 		@self.registry.action(
-			"""Scroll page by num_pages (down=True for down, False for up). Default 1 page, use 0.5 for half, 10 for bottom.
-For specific containers, use frame_element_index from browser_state (works with dropdowns, custom UI).
-Multiple pages (>=1.0) scroll sequentially. Page height from viewport or 1000px fallback.""",
+			'Scroll page. Multiple pages scroll sequentially.',
 			param_model=ScrollAction,
 		)
 		async def scroll(params: ScrollAction, browser_session: BrowserSession):
@@ -790,7 +784,7 @@ Multiple pages (>=1.0) scroll sequentially. Page height from viewport or 1000px
 				return ActionResult(error=error_msg)
 
 		@self.registry.action(
-			'Send special keys (Escape, Enter, PageDown) or shortcuts (Control+o, Control+Shift+T).',
+			'Send keys.',
 			param_model=SendKeysAction,
 		)
 		async def send_keys(params: SendKeysAction, browser_session: BrowserSession):
@@ -809,7 +803,7 @@ Multiple pages (>=1.0) scroll sequentially. Page height from viewport or 1000px
 				return ActionResult(error=error_msg)
 
 		@self.registry.action(
-			description='Scroll to text on page. Prefer over step-by-step scrolling when target known.',
+			description='Scroll to text.',
 		)
 		async def scroll_to_text(text: str, browser_session: BrowserSession):  # type: ignore
 			# Dispatch scroll to text event
@@ -831,7 +825,7 @@ Multiple pages (>=1.0) scroll sequentially. Page height from viewport or 1000px
 					long_term_memory=f"Tried scrolling to text '{text}' but it was not found",
 				)
 
-		@self.registry.action('Request screenshot in next browser state. Use for visual confirmation or complex visual content.')
+		@self.registry.action('Request screenshot.')
 		async def take_screenshot():
 			"""Request that a screenshot be included in the next observation"""
 			memory = 'Requested screenshot for next observation'
@@ -847,7 +841,7 @@ Multiple pages (>=1.0) scroll sequentially. Page height from viewport or 1000px
 		# Dropdown Actions
 
 		@self.registry.action(
-			'Get dropdown values (<select>, ARIA select). Only for dropdown elements.',
+			'Get dropdown options.',
 			param_model=GetDropdownOptionsAction,
 		)
 		async def get_dropdown_options(params: GetDropdownOptionsAction, browser_session: BrowserSession):
@@ -873,7 +867,7 @@ Multiple pages (>=1.0) scroll sequentially. Page height from viewport or 1000px
 			)
 
 		@self.registry.action(
-			'Select dropdown option by exact text (native <select>, ARIA, custom). Searches element and children.',
+			'Select dropdown option.',
 			param_model=SelectDropdownOptionAction,
 		)
 		async def select_dropdown_option(params: SelectDropdownOptionAction, browser_session: BrowserSession):
@@ -916,7 +910,7 @@ Multiple pages (>=1.0) scroll sequentially. Page height from viewport or 1000px
 					return ActionResult(error=error_msg)
 
 		# File System Actions
-		@self.registry.action('Write/append to file (.md, .txt, .json, .csv, .pdf). PDF: write markdown, auto-converts to PDF.')
+		@self.registry.action('Write/append file.')
 		async def write_file(
 			file_name: str,
 			content: str,
@@ -936,15 +930,13 @@ Multiple pages (>=1.0) scroll sequentially. Page height from viewport or 1000px
 			logger.info(f'💾 {result}')
 			return ActionResult(extracted_content=result, long_term_memory=result)
 
-		@self.registry.action(
-			'Replace old_str with new_str in file. old_str must match exactly. For todo.md updates or specific edits.'
-		)
+		@self.registry.action('Replace in file.')
 		async def replace_file_str(file_name: str, old_str: str, new_str: str, file_system: FileSystem):
 			result = await file_system.replace_file_str(file_name, old_str, new_str)
 			logger.info(f'💾 {result}')
 			return ActionResult(extracted_content=result, long_term_memory=result)
 
-		@self.registry.action('Read file from file system.')
+		@self.registry.action('Read file.')
 		async def read_file(file_name: str, available_file_paths: list[str], file_system: FileSystem):
 			if available_file_paths and file_name in available_file_paths:
 				result = await file_system.read_file(file_name, external_file=True)
@@ -974,22 +966,11 @@ Multiple pages (>=1.0) scroll sequentially. Page height from viewport or 1000px
 			)
 
 		@self.registry.action(
-			"""Execute JavaScript with Runtime.evaluate (returnByValue:true, awaitPromise:true).
+			"""Execute JS. MUST wrap in IIFE: (function(){...})() or async: (async function(){...})()
+Use when other tools fail or need custom logic.
 
-SYNTAX RULES - ALWAYS wrap in IIFE or get "Uncaught at line 0":
-- IIFE: (function(){ ... })() or async: (async function(){ ... })()
-- Add try-catch, proper semicolons, validate elements exist
-
-Use when other tools fail or need custom logic (forms, hover, drag, extract links, custom selectors, shadow DOM, React/Vue/Angular, etc.).
-
-Examples:
 CORRECT: (function(){ try { const el = document.querySelector('#id'); return el ? el.value : 'not found'; } catch(e) { return 'Error: ' + e.message; } })()
-CORRECT: (async function(){ try { await new Promise(r => setTimeout(r, 100)); return 'done'; } catch(e) { return 'Error: ' + e.message; } })()
-WRONG: document.querySelector('#id').value (no IIFE)
-
-Shadow DOM: (function(){ try { const hosts = document.querySelectorAll('*'); for (let host of hosts) { if (host.shadowRoot) { const el = host.shadowRoot.querySelector('#target'); if (el) return el.textContent; } } return 'Not found'; } catch(e) { return 'Error: ' + e.message; } })()
-
-Returns strings, numbers, booleans, objects/arrays. Use JSON.stringify() for complex objects.""",
+WRONG: document.querySelector('#id').value""",
 		)
 		async def execute_js(code: str, browser_session: BrowserSession):
 			# Execute JavaScript with proper error handling and promise support
@@ -1154,7 +1135,7 @@ Returns strings, numbers, booleans, objects/arrays. Use JSON.stringify() for com
 			self.display_files_in_done_text = display_files_in_done_text
 
 			@self.registry.action(
-				'Complete task with return text. Set success=True if finished, False if not (e.g. max steps reached).',
+				'Complete task with structured output.',
 				param_model=StructuredOutputAction[output_model],
 			)
 			async def done(params: StructuredOutputAction):
@@ -1176,7 +1157,7 @@ Returns strings, numbers, booleans, objects/arrays. Use JSON.stringify() for com
 		else:
 
 			@self.registry.action(
-				'Complete task with summary. Set success=True if completed successfully. Include files in files_to_display.',
+				'Complete task.',
 				param_model=DoneAction,
 			)
 			async def done(params: DoneAction, file_system: FileSystem):
diff --git a/browser_use/tools/views.py b/browser_use/tools/views.py
index c93ecc02a..a019890e6 100644
--- a/browser_use/tools/views.py
+++ b/browser_use/tools/views.py
@@ -6,7 +6,7 @@ from pydantic import BaseModel, ConfigDict, Field
 # Action Input Models
 class SearchAction(BaseModel):
 	query: str
-	search_engine: str = 'duckduckgo'  # Options: 'duckduckgo', 'google', 'bing'
+	search_engine: str = Field(default='duckduckgo', description='duckduckgo, google, bing')
 
 
 # Backward compatibility alias
@@ -15,65 +15,59 @@ SearchAction = SearchAction
 
 class GoToUrlAction(BaseModel):
 	url: str
-	new_tab: bool = False  # True to open in new tab, False to navigate in current tab
+	new_tab: bool = Field(default=False)
 
 
 class ClickElementAction(BaseModel):
-	index: int = Field(ge=1, description='index of the element to click')
+	index: int = Field(ge=1, description='from browser_state')
 	ctrl: bool | None = Field(
 		default=None,
-		description='Set to True to open the navigation in a new background tab (Ctrl+Click behavior). Optional.',
+		description='True=New background tab (Ctrl+Click)',
 	)
 	# expect_download: bool = Field(default=False, description='set True if expecting a download, False otherwise')  # moved to downloads_watchdog.py
 	# click_count: int = 1  # TODO
 
 
 class InputTextAction(BaseModel):
-	index: int = Field(ge=0, description='index of the element to input text into, 0 is the page')
+	index: int = Field(ge=1, description='from browser_state')
 	text: str
-	clear_existing: bool = Field(default=True, description='set True to clear existing text, False to append to existing text')
+	clear_existing: bool = Field(default=True, description='True to clear, False to append')
 
 
 class DoneAction(BaseModel):
-	text: str
-	success: bool
-	files_to_display: list[str] | None = []
+	text: str = Field(description='summary for user')
+	success: bool = Field(description='True if completed')
+	files_to_display: list[str] | None = Field(default=[], description='files to display')
 
 
 T = TypeVar('T', bound=BaseModel)
 
 
 class StructuredOutputAction(BaseModel, Generic[T]):
-	success: bool = True
+	success: bool = Field(default=True, description='True if finished, False if not')
 	data: T
 
 
 class SwitchTabAction(BaseModel):
-	tab_id: str = Field(
-		min_length=4,
-		max_length=4,
-		description="tab_id to switch to which is displayed as 'Tab <tab_id>' in the browser_state.",
-	)  # last 4 chars of TargetID
+	tab_id: str = Field(min_length=4, max_length=4, description="from browser_state ('Tab <tab_id>')")
 
 
 class CloseTabAction(BaseModel):
-	tab_id: str = Field(
-		min_length=4, max_length=4, description="tab_id to close which is displayed as 'Tab <tab_id>' in the browser_state."
-	)  # last 4 chars of TargetID
+	tab_id: str = Field(min_length=4, max_length=4, description="from browser_state ('Tab <tab_id>')")
 
 
 class ScrollAction(BaseModel):
-	down: bool  # True to scroll down, False to scroll up
-	num_pages: float = 1.0  # Number of pages to scroll (0.5 = half page, 1.0 = one page, etc.)
-	frame_element_index: int | None = None  # Optional element index to find scroll container for
+	down: bool = Field(description='True=down, False=up')
+	num_pages: float = Field(default=1.0, description='pages to scroll (0.5=half, 1=page, 10=bottom)')
+	frame_element_index: int | None = Field(default=None, description='index for specific container')
 
 
 class SendKeysAction(BaseModel):
-	keys: str
+	keys: str = Field(description='keys (Escape, Enter, PageDown) or shortcuts (Control+o)')
 
 
 class UploadFileAction(BaseModel):
-	index: int
+	index: int = Field(description='from browser_state')
 	path: str
 
 
@@ -82,19 +76,15 @@ class ExtractPageContentAction(BaseModel):
 
 
 class NoParamsAction(BaseModel):
-	"""
-	Accepts absolutely anything in the incoming data
-	and discards it, so the final parsed model is empty.
-	"""
+	"""Accepts any input, discards it, returns empty model."""
 
 	model_config = ConfigDict(extra='ignore')
-	# No fields defined - all inputs are ignored automatically
 
 
 class GetDropdownOptionsAction(BaseModel):
-	index: int = Field(ge=1, description='index of the dropdown element to get the option values for')
+	index: int = Field(ge=1, description='dropdown from browser_state')
 
 
 class SelectDropdownOptionAction(BaseModel):
-	index: int = Field(ge=1, description='index of the dropdown element to select an option for')
-	text: str = Field(description='the text or exact value of the option to select')
+	index: int = Field(ge=1, description='dropdown from browser_state')
+	text: str = Field(description='exact text/value to select')

From 7b995b7fc123e50ea4461740202c35ff8bc6b3ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 11:29:19 -0700
Subject: [PATCH 10/45] Refactor action field descriptions in AgentOutput
 models

- Removed redundant description from action field in AgentOutput and its subclasses.
- Updated action extraction documentation in Tools to clarify usage and limitations.
- Enhanced search_engine field description in SearchAction for better clarity on default behavior.
---
 browser_use/agent/views.py   | 5 ++---
 browser_use/tools/service.py | 6 +++---
 browser_use/tools/views.py   | 4 +++-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py
index a8ae7f3c4..c816c41f2 100644
--- a/browser_use/agent/views.py
+++ b/browser_use/agent/views.py
@@ -155,7 +155,6 @@ class AgentOutput(BaseModel):
 	next_goal: str | None = None
 	action: list[ActionModel] = Field(
 		...,
-		description='List of actions to execute',
 		json_schema_extra={'min_items': 1},  # Ensure at least one action is provided
 	)
 
@@ -208,7 +207,7 @@ class AgentOutput(BaseModel):
 			__base__=AgentOutputNoThinking,
 			action=(
 				list[custom_actions],  # type: ignore
-				Field(..., description='List of actions to execute', json_schema_extra={'min_items': 1}),
+				Field(..., json_schema_extra={'min_items': 1}),
 			),
 			__module__=AgentOutputNoThinking.__module__,
 		)
@@ -237,7 +236,7 @@ class AgentOutput(BaseModel):
 			__base__=AgentOutputFlashMode,
 			action=(
 				list[custom_actions],  # type: ignore
-				Field(..., description='List of actions to execute', json_schema_extra={'min_items': 1}),
+				Field(..., json_schema_extra={'min_items': 1}),
 			),
 			__module__=AgentOutputFlashMode.__module__,
 		)
diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 4c3b883f0..8e24773e9 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -555,7 +555,7 @@ class Tools(Generic[Context]):
 		# This action is temporarily disabled as it needs refactoring to use events
 
 		@self.registry.action(
-			"""Extract page data via LLM. Use when on right page, know what to extract. Can't get interactive elements.""",
+			"""Extract page data via LLM. Use when on right page, know what to extract. Can't get interactive elements. Don't call again on same page with same query.""",
 		)
 		async def extract_structured_data(
 			query: str,
@@ -967,8 +967,8 @@ You will be given a query and the markdown of a webpage that has been filtered t
 
 		@self.registry.action(
 			"""Execute JS. MUST wrap in IIFE: (function(){...})() or async: (async function(){...})()
-Use when other tools fail or need custom logic.
-
+Use when other tools fail. Limit output. For complex objects use JSON.stringify().
+Don't use comments.
 CORRECT: (function(){ try { const el = document.querySelector('#id'); return el ? el.value : 'not found'; } catch(e) { return 'Error: ' + e.message; } })()
 WRONG: document.querySelector('#id').value""",
 		)
diff --git a/browser_use/tools/views.py b/browser_use/tools/views.py
index a019890e6..44fab1347 100644
--- a/browser_use/tools/views.py
+++ b/browser_use/tools/views.py
@@ -6,7 +6,9 @@ from pydantic import BaseModel, ConfigDict, Field
 # Action Input Models
 class SearchAction(BaseModel):
 	query: str
-	search_engine: str = Field(default='duckduckgo', description='duckduckgo, google, bing')
+	search_engine: str = Field(
+		default='duckduckgo', description='duckduckgo, google, bing (use duckduckgo by default because less captchas)'
+	)
 
 
 # Backward compatibility alias

From 55d10605fd0c858cd091bf9d519a3853632a6be2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 11:59:12 -0700
Subject: [PATCH 11/45] Shorter tools

---
 browser_use/tools/service.py | 44 ++++++++++++++++++------------------
 browser_use/tools/views.py   | 28 +++++++++++------------
 2 files changed, 35 insertions(+), 37 deletions(-)

diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 8e24773e9..3e7b526a6 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -131,10 +131,10 @@ class Tools(Generic[Context]):
 				'bing': f'https://www.bing.com/search?q={encoded_query}',
 			}
 
-			if params.search_engine.lower() not in search_engines:
-				return ActionResult(error=f'Unsupported search engine: {params.search_engine}. Options: duckduckgo, google, bing')
+			if params.engine.lower() not in search_engines:
+				return ActionResult(error=f'Unsupported search engine: {params.engine}. Options: duckduckgo, google, bing')
 
-			search_url = search_engines[params.search_engine.lower()]
+			search_url = search_engines[params.engine.lower()]
 
 			# Simple tab logic: use current tab by default
 			use_new_tab = False
@@ -149,13 +149,13 @@ class Tools(Generic[Context]):
 				)
 				await event
 				await event.event_result(raise_if_any=True, raise_if_none=False)
-				memory = f"Searched {params.search_engine.title()} for '{params.query}'"
+				memory = f"Searched {params.engine.title()} for '{params.query}'"
 				msg = f'🔍  {memory}'
 				logger.info(msg)
 				return ActionResult(extracted_content=memory, long_term_memory=memory)
 			except Exception as e:
-				logger.error(f'Failed to search {params.search_engine}: {e}')
-				return ActionResult(error=f'Failed to search {params.search_engine} for "{params.query}": {str(e)}')
+				logger.error(f'Failed to search {params.engine}: {e}')
+				return ActionResult(error=f'Failed to search {params.engine} for "{params.query}": {str(e)}')
 
 		@self.registry.action(
 			'Navigate to URL.',
@@ -313,7 +313,7 @@ class Tools(Generic[Context]):
 					TypeTextEvent(
 						node=node,
 						text=params.text,
-						clear_existing=params.clear_existing,
+						clear_existing=params.clear,
 						is_sensitive=has_sensitive_data,
 						sensitive_key_name=sensitive_key_name,
 					)
@@ -683,18 +683,18 @@ You will be given a query and the markdown of a webpage that has been filtered t
 				# Look up the node from the selector map if index is provided
 				# Special case: index 0 means scroll the whole page (root/body element)
 				node = None
-				if params.frame_element_index is not None and params.frame_element_index != 0:
-					node = await browser_session.get_element_by_index(params.frame_element_index)
+				if params.frame_idx is not None and params.frame_idx != 0:
+					node = await browser_session.get_element_by_index(params.frame_idx)
 					if node is None:
 						# Element does not exist
-						msg = f'Element index {params.frame_element_index} not found in browser state'
+						msg = f'Element index {params.frame_idx} not found in browser state'
 						return ActionResult(error=msg)
 
 				direction = 'down' if params.down else 'up'
 				target = (
 					'the page'
-					if params.frame_element_index is None or params.frame_element_index == 0
-					else f'element {params.frame_element_index}'
+					if params.frame_idx is None or params.frame_idx == 0
+					else f'element {params.frame_idx}'
 				)
 
 				# Get actual viewport height for more accurate scrolling
@@ -715,11 +715,11 @@ You will be given a query and the markdown of a webpage that has been filtered t
 					logger.debug(f'Failed to get viewport height, using fallback 1000px: {e}')
 
 				# For multiple pages (>=1.0), scroll one page at a time to ensure each scroll completes
-				if params.num_pages >= 1.0:
+				if params.pages >= 1.0:
 					import asyncio
 
-					num_full_pages = int(params.num_pages)
-					remaining_fraction = params.num_pages - num_full_pages
+					num_full_pages = int(params.pages)
+					remaining_fraction = params.pages - num_full_pages
 
 					completed_scrolls = 0
 
@@ -761,19 +761,19 @@ You will be given a query and the markdown of a webpage that has been filtered t
 						except Exception as e:
 							logger.warning(f'Fractional scroll failed: {e}')
 
-					if params.num_pages == 1.0:
+					if params.pages == 1.0:
 						long_term_memory = f'Scrolled {direction} {target} by one page ({viewport_height}px)'
 					else:
-						long_term_memory = f'Scrolled {direction} {target} by {completed_scrolls:.1f} pages (requested: {params.num_pages}, {viewport_height}px per page)'
+						long_term_memory = f'Scrolled {direction} {target} by {completed_scrolls:.1f} pages (requested: {params.pages}, {viewport_height}px per page)'
 				else:
 					# For fractional pages <1.0, do single scroll
-					pixels = int(params.num_pages * viewport_height)
+					pixels = int(params.pages * viewport_height)
 					event = browser_session.event_bus.dispatch(
 						ScrollEvent(direction='down' if params.down else 'up', amount=pixels, node=node)
 					)
 					await event
 					await event.event_result(raise_if_any=True, raise_if_none=False)
-					long_term_memory = f'Scrolled {direction} {target} by {params.num_pages} pages ({viewport_height}px per page)'
+					long_term_memory = f'Scrolled {direction} {target} by {params.pages} pages ({viewport_height}px per page)'
 
 				msg = f'🔍 {long_term_memory}'
 				logger.info(msg)
@@ -1170,10 +1170,10 @@ WRONG: document.querySelector('#id').value""",
 					memory += f' - {len_text - len_max_memory} more characters'
 
 				attachments = []
-				if params.files_to_display:
+				if params.files:
 					if self.display_files_in_done_text:
 						file_msg = ''
-						for file_name in params.files_to_display:
+						for file_name in params.files:
 							if file_name == 'todo.md':
 								continue
 							file_content = file_system.display_file(file_name)
@@ -1186,7 +1186,7 @@ WRONG: document.querySelector('#id').value""",
 						else:
 							logger.warning('Agent wanted to display files but none were found')
 					else:
-						for file_name in params.files_to_display:
+						for file_name in params.files:
 							if file_name == 'todo.md':
 								continue
 							file_content = file_system.display_file(file_name)
diff --git a/browser_use/tools/views.py b/browser_use/tools/views.py
index 44fab1347..692757c6f 100644
--- a/browser_use/tools/views.py
+++ b/browser_use/tools/views.py
@@ -31,14 +31,14 @@ class ClickElementAction(BaseModel):
 
 
 class InputTextAction(BaseModel):
-	index: int = Field(ge=1, description='from browser_state')
+	index: int = Field(ge=1, description='index')
 	text: str
-	clear_existing: bool = Field(default=True, description='True to clear, False to append')
+	clear: bool = Field(default=True, description='1=clear, 0=append')
 
 
 class DoneAction(BaseModel):
 	text: str = Field(description='summary for user')
-	success: bool = Field(description='True if completed')
+	success: bool = Field(description='True if user_request completed successfully')
 	files_to_display: list[str] | None = Field(default=[], description='files to display')
 
 
@@ -46,22 +46,22 @@ T = TypeVar('T', bound=BaseModel)
 
 
 class StructuredOutputAction(BaseModel, Generic[T]):
-	success: bool = Field(default=True, description='True if finished, False if not')
+	success: bool = Field(default=True, description='1=done')
 	data: T
 
 
 class SwitchTabAction(BaseModel):
-	tab_id: str = Field(min_length=4, max_length=4, description="from browser_state ('Tab <tab_id>')")
+	tab_id: str = Field(min_length=4, max_length=4, description='4-char id')
 
 
 class CloseTabAction(BaseModel):
-	tab_id: str = Field(min_length=4, max_length=4, description="from browser_state ('Tab <tab_id>')")
+	tab_id: str = Field(min_length=4, max_length=4, description='4-char id')
 
 
 class ScrollAction(BaseModel):
-	down: bool = Field(description='True=down, False=up')
-	num_pages: float = Field(default=1.0, description='pages to scroll (0.5=half, 1=page, 10=bottom)')
-	frame_element_index: int | None = Field(default=None, description='index for specific container')
+	down: bool = Field(description='1=down, 0=up')
+	pages: float = Field(default=1.0, description='0.5=half, 1=pg, 10=bottom')
+	frame_index: int | None = Field(default=None, description='container index')
 
 
 class SendKeysAction(BaseModel):
@@ -69,7 +69,7 @@ class SendKeysAction(BaseModel):
 
 
 class UploadFileAction(BaseModel):
-	index: int = Field(description='from browser_state')
+	index: int = Field(description='index')
 	path: str
 
 
@@ -78,15 +78,13 @@ class ExtractPageContentAction(BaseModel):
 
 
 class NoParamsAction(BaseModel):
-	"""Accepts any input, discards it, returns empty model."""
-
 	model_config = ConfigDict(extra='ignore')
 
 
 class GetDropdownOptionsAction(BaseModel):
-	index: int = Field(ge=1, description='dropdown from browser_state')
+	index: int = Field(ge=1, description='index')
 
 
 class SelectDropdownOptionAction(BaseModel):
-	index: int = Field(ge=1, description='dropdown from browser_state')
-	text: str = Field(description='exact text/value to select')
+	index: int = Field(ge=1, description='index')
+	text: str = Field(description='exact text/value')

From 18b7dd95a4c24ce9c8d9d1f27b78c8bfffaffb03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 12:16:13 -0700
Subject: [PATCH 12/45] Fix param

---
 browser_use/agent/system_prompt.md             |  4 ++--
 browser_use/agent/system_prompt_no_thinking.md |  4 ++--
 browser_use/tools/service.py                   | 12 ++++--------
 browser_use/tools/views.py                     |  4 ++--
 4 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/browser_use/agent/system_prompt.md b/browser_use/agent/system_prompt.md
index b48a68cb6..fb58b6044 100644
--- a/browser_use/agent/system_prompt.md
+++ b/browser_use/agent/system_prompt.md
@@ -118,9 +118,9 @@ You must call the `done` action in one of two cases:
 The `done` action is your opportunity to terminate and share your findings with the user.
 - Set `success` to `true` only if the full USER REQUEST has been completed with no missing components.
 - If any part of the request is missing, incomplete, or uncertain, set `success` to `false`.
-- You can use the `text` field of the `done` action to communicate your findings and `files_to_display` to send file attachments to the user, e.g. `["results.md"]`.
+- You can use the `text` field of the `done` action to communicate your findings and `files` to send file attachments to the user, e.g. `["results.md"]`.
 - Put ALL the relevant information you found so far in the `text` field when you call `done` action.
-- Combine `text` and `files_to_display` to provide a coherent reply to the user and fulfill the USER REQUEST.
+- Combine `text` and `files` to provide a coherent reply to the user and fulfill the USER REQUEST.
 - You are ONLY ALLOWED to call `done` as a single action. Don't call it together with other actions.
 - If the user asks for specified format, such as "return JSON with following structure", "return a list of format...", MAKE sure to use the right format in your answer.
 - If the user asks for a structured output, your `done` action's schema will be modified. Take this schema into account when solving the task!
diff --git a/browser_use/agent/system_prompt_no_thinking.md b/browser_use/agent/system_prompt_no_thinking.md
index c905e5bb6..6eddc625d 100644
--- a/browser_use/agent/system_prompt_no_thinking.md
+++ b/browser_use/agent/system_prompt_no_thinking.md
@@ -118,9 +118,9 @@ You must call the `done` action in one of two cases:
 The `done` action is your opportunity to terminate and share your findings with the user.
 - Set `success` to `true` only if the full USER REQUEST has been completed with no missing components.
 - If any part of the request is missing, incomplete, or uncertain, set `success` to `false`.
-- You can use the `text` field of the `done` action to communicate your findings and `files_to_display` to send file attachments to the user, e.g. `["results.md"]`.
+- You can use the `text` field of the `done` action to communicate your findings and `files` to send file attachments to the user, e.g. `["results.md"]`.
 - Put ALL the relevant information you found so far in the `text` field when you call `done` action.
-- Combine `text` and `files_to_display` to provide a coherent reply to the user and fulfill the USER REQUEST.
+- Combine `text` and `files` to provide a coherent reply to the user and fulfill the USER REQUEST.
 - You are ONLY ALLOWED to call `done` as a single action. Don't call it together with other actions.
 - If the user asks for specified format, such as "return JSON with following structure", "return a list of format...", MAKE sure to use the right format in your answer.
 - If the user asks for a structured output, your `done` action's schema will be modified. Take this schema into account when solving the task!
diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 3e7b526a6..dbed5a1dc 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -683,19 +683,15 @@ You will be given a query and the markdown of a webpage that has been filtered t
 				# Look up the node from the selector map if index is provided
 				# Special case: index 0 means scroll the whole page (root/body element)
 				node = None
-				if params.frame_idx is not None and params.frame_idx != 0:
-					node = await browser_session.get_element_by_index(params.frame_idx)
+				if params.frame_index is not None and params.frame_index != 0:
+					node = await browser_session.get_element_by_index(params.frame_index)
 					if node is None:
 						# Element does not exist
-						msg = f'Element index {params.frame_idx} not found in browser state'
+						msg = f'Element index {params.frame_index} not found in browser state'
 						return ActionResult(error=msg)
 
 				direction = 'down' if params.down else 'up'
-				target = (
-					'the page'
-					if params.frame_idx is None or params.frame_idx == 0
-					else f'element {params.frame_idx}'
-				)
+				target = 'the page' if params.frame_index is None or params.frame_index == 0 else f'element {params.frame_index}'
 
 				# Get actual viewport height for more accurate scrolling
 				try:
diff --git a/browser_use/tools/views.py b/browser_use/tools/views.py
index 692757c6f..c73752ba0 100644
--- a/browser_use/tools/views.py
+++ b/browser_use/tools/views.py
@@ -6,7 +6,7 @@ from pydantic import BaseModel, ConfigDict, Field
 # Action Input Models
 class SearchAction(BaseModel):
 	query: str
-	search_engine: str = Field(
+	engine: str = Field(
 		default='duckduckgo', description='duckduckgo, google, bing (use duckduckgo by default because less captchas)'
 	)
 
@@ -39,7 +39,7 @@ class InputTextAction(BaseModel):
 class DoneAction(BaseModel):
 	text: str = Field(description='summary for user')
 	success: bool = Field(description='True if user_request completed successfully')
-	files_to_display: list[str] | None = Field(default=[], description='files to display')
+	files: list[str] | None = Field(default=[], description='files to display')
 
 
 T = TypeVar('T', bound=BaseModel)

From ad9b1747629c00df8ad1724c3de2ed064de86653 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 12:18:21 -0700
Subject: [PATCH 13/45] Fix param

---
 browser_use/tools/service.py | 8 ++++----
 browser_use/tools/views.py   | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index dbed5a1dc..3d9598b31 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -683,15 +683,15 @@ You will be given a query and the markdown of a webpage that has been filtered t
 				# Look up the node from the selector map if index is provided
 				# Special case: index 0 means scroll the whole page (root/body element)
 				node = None
-				if params.frame_index is not None and params.frame_index != 0:
-					node = await browser_session.get_element_by_index(params.frame_index)
+				if params.index is not None and params.index != 0:
+					node = await browser_session.get_element_by_index(params.index)
 					if node is None:
 						# Element does not exist
-						msg = f'Element index {params.frame_index} not found in browser state'
+						msg = f'Element index {params.index} not found in browser state'
 						return ActionResult(error=msg)
 
 				direction = 'down' if params.down else 'up'
-				target = 'the page' if params.frame_index is None or params.frame_index == 0 else f'element {params.frame_index}'
+				target = 'the page' if params.index is None or params.index == 0 else f'element {params.index}'
 
 				# Get actual viewport height for more accurate scrolling
 				try:
diff --git a/browser_use/tools/views.py b/browser_use/tools/views.py
index c73752ba0..8b6ab5cc1 100644
--- a/browser_use/tools/views.py
+++ b/browser_use/tools/views.py
@@ -61,7 +61,7 @@ class CloseTabAction(BaseModel):
 class ScrollAction(BaseModel):
 	down: bool = Field(description='1=down, 0=up')
 	pages: float = Field(default=1.0, description='0.5=half, 1=pg, 10=bottom')
-	frame_index: int | None = Field(default=None, description='container index')
+	index: int | None = Field(default=None, description='Use to scroll in specific container with that element')
 
 
 class SendKeysAction(BaseModel):
@@ -69,7 +69,7 @@ class SendKeysAction(BaseModel):
 
 
 class UploadFileAction(BaseModel):
-	index: int = Field(description='index')
+	index: int
 	path: str
 
 
@@ -82,7 +82,7 @@ class NoParamsAction(BaseModel):
 
 
 class GetDropdownOptionsAction(BaseModel):
-	index: int = Field(ge=1, description='index')
+	index: int
 
 
 class SelectDropdownOptionAction(BaseModel):

From 968414b9826229dc49961fba6378b3fd1c61f162 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 12:48:25 -0700
Subject: [PATCH 14/45] Compress gemini schema

---
 browser_use/agent/views.py     |  3 ---
 browser_use/llm/google/chat.py |  2 +-
 browser_use/llm/schema.py      | 28 ++++++++++++++++++++++++++++
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py
index c816c41f2..1234471a7 100644
--- a/browser_use/agent/views.py
+++ b/browser_use/agent/views.py
@@ -187,7 +187,6 @@ class AgentOutput(BaseModel):
 			),
 			__module__=AgentOutput.__module__,
 		)
-		model_.__doc__ = 'AgentOutput model with custom actions'
 		return model_
 
 	@staticmethod
@@ -212,7 +211,6 @@ class AgentOutput(BaseModel):
 			__module__=AgentOutputNoThinking.__module__,
 		)
 
-		model.__doc__ = 'AgentOutput model with custom actions'
 		return model
 
 	@staticmethod
@@ -241,7 +239,6 @@ class AgentOutput(BaseModel):
 			__module__=AgentOutputFlashMode.__module__,
 		)
 
-		model.__doc__ = 'AgentOutput model with custom actions'
 		return model
 
 
diff --git a/browser_use/llm/google/chat.py b/browser_use/llm/google/chat.py
index ad511bb26..3c11186a0 100644
--- a/browser_use/llm/google/chat.py
+++ b/browser_use/llm/google/chat.py
@@ -255,7 +255,7 @@ class ChatGoogle(BaseChatModel):
 						self.logger.debug(f'🔧 Requesting structured output for {output_format.__name__}')
 						config['response_mime_type'] = 'application/json'
 						# Convert Pydantic model to Gemini-compatible schema
-						optimized_schema = SchemaOptimizer.create_optimized_json_schema(output_format)
+						optimized_schema = SchemaOptimizer.create_gemini_optimized_schema(output_format)
 
 						gemini_schema = self._fix_gemini_schema(optimized_schema)
 						config['response_schema'] = gemini_schema
diff --git a/browser_use/llm/schema.py b/browser_use/llm/schema.py
index 22e54b2b9..9b76ce4f1 100644
--- a/browser_use/llm/schema.py
+++ b/browser_use/llm/schema.py
@@ -159,3 +159,31 @@ class SchemaOptimizer:
 		elif isinstance(schema, list):
 			for item in schema:
 				SchemaOptimizer._make_strict_compatible(item)
+
+	@staticmethod
+	def create_gemini_optimized_schema(model: type[BaseModel]) -> dict[str, Any]:
+		"""
+		Create Gemini-optimized schema that removes 'required' arrays to save tokens.
+		Gemini can infer required fields from context since all fields are required.
+
+		Args:
+			model: The Pydantic model to optimize
+
+		Returns:
+			Optimized schema without required arrays
+		"""
+		# Start with standard optimized schema
+		schema = SchemaOptimizer.create_optimized_json_schema(model)
+
+		def remove_required_arrays(obj: Any) -> Any:
+			"""Recursively remove 'required' arrays"""
+			if isinstance(obj, dict):
+				# Remove 'required' key
+				result = {k: v for k, v in obj.items() if k != 'required'}
+				# Recursively process nested structures
+				return {k: remove_required_arrays(v) for k, v in result.items()}
+			elif isinstance(obj, list):
+				return [remove_required_arrays(item) for item in obj]
+			return obj
+
+		return remove_required_arrays(schema)

From 1248c07e83ab9c52885b90348d8894bc20f421f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 13:02:43 -0700
Subject: [PATCH 15/45] Remove descripitons

---
 browser_use/llm/schema.py             |  5 +++--
 browser_use/tools/registry/service.py |  2 --
 browser_use/tools/service.py          | 22 ++++++++--------------
 3 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/browser_use/llm/schema.py b/browser_use/llm/schema.py
index 9b76ce4f1..075564462 100644
--- a/browser_use/llm/schema.py
+++ b/browser_use/llm/schema.py
@@ -48,9 +48,10 @@ class SchemaOptimizer:
 					if key == 'title' and not in_properties:
 						continue
 
-					# Preserve FULL descriptions without truncation
+					# Preserve FULL descriptions without truncation, skip empty ones
 					elif key == 'description':
-						optimized[key] = value
+						if value:  # Only include non-empty descriptions
+							optimized[key] = value
 
 					# Handle type field
 					elif key == 'type':
diff --git a/browser_use/tools/registry/service.py b/browser_use/tools/registry/service.py
index 7e592b32b..33bbd0342 100644
--- a/browser_use/tools/registry/service.py
+++ b/browser_use/tools/registry/service.py
@@ -538,8 +538,6 @@ class Registry(Generic[Context]):
 			union_type = Union[tuple(individual_action_models)]  # type: ignore : Typing doesn't understand that the length is >= 2 (by design)
 
 			class ActionModelUnion(RootModel[union_type]):  # type: ignore
-				"""Union of all available action models that maintains ActionModel interface"""
-
 				def get_index(self) -> int | None:
 					"""Delegate get_index to the underlying action model"""
 					if hasattr(self.root, 'get_index'):
diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 3d9598b31..abfab242d 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -204,7 +204,7 @@ class Tools(Generic[Context]):
 					# Return error in ActionResult instead of re-raising
 					return ActionResult(error=f'Navigation failed: {str(e)}')
 
-		@self.registry.action('Go back', param_model=NoParamsAction)
+		@self.registry.action('', param_model=NoParamsAction)
 		async def go_back(_: NoParamsAction, browser_session: BrowserSession):
 			try:
 				event = browser_session.event_bus.dispatch(GoBackEvent())
@@ -218,7 +218,7 @@ class Tools(Generic[Context]):
 				error_msg = f'Failed to go back: {str(e)}'
 				return ActionResult(error=error_msg)
 
-		@self.registry.action('Wait for page load.')
+		@self.registry.action('Wait for page.')
 		async def wait(seconds: int = 3):
 			# Cap wait time at maximum 30 seconds
 			# Reduce the wait time by 3 seconds to account for the llm call which takes at least 3 seconds
@@ -501,7 +501,7 @@ class Tools(Generic[Context]):
 
 		# Tab Management Actions
 
-		@self.registry.action('Switch tab.', param_model=SwitchTabAction)
+		@self.registry.action('', param_model=SwitchTabAction)
 		async def switch_tab(params: SwitchTabAction, browser_session: BrowserSession):
 			# Simple switch tab logic
 			try:
@@ -523,7 +523,7 @@ class Tools(Generic[Context]):
 				memory = f'Attempted to switch to tab #{params.tab_id}'
 				return ActionResult(extracted_content=memory, long_term_memory=memory)
 
-		@self.registry.action('Close tab.', param_model=CloseTabAction)
+		@self.registry.action('', param_model=CloseTabAction)
 		async def close_tab(params: CloseTabAction, browser_session: BrowserSession):
 			# Simple close tab logic
 			try:
@@ -798,9 +798,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 				error_msg = f'Failed to send keys: {str(e)}'
 				return ActionResult(error=error_msg)
 
-		@self.registry.action(
-			description='Scroll to text.',
-		)
+		@self.registry.action('')
 		async def scroll_to_text(text: str, browser_session: BrowserSession):  # type: ignore
 			# Dispatch scroll to text event
 			event = browser_session.event_bus.dispatch(ScrollToTextEvent(text=text))
@@ -821,7 +819,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 					long_term_memory=f"Tried scrolling to text '{text}' but it was not found",
 				)
 
-		@self.registry.action('Request screenshot.')
+		@self.registry.action('')
 		async def take_screenshot():
 			"""Request that a screenshot be included in the next observation"""
 			memory = 'Requested screenshot for next observation'
@@ -932,7 +930,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 			logger.info(f'💾 {result}')
 			return ActionResult(extracted_content=result, long_term_memory=result)
 
-		@self.registry.action('Read file.')
+		@self.registry.action('')
 		async def read_file(file_name: str, available_file_paths: list[str], file_system: FileSystem):
 			if available_file_paths and file_name in available_file_paths:
 				result = await file_system.read_file(file_name, external_file=True)
@@ -962,11 +960,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 			)
 
 		@self.registry.action(
-			"""Execute JS. MUST wrap in IIFE: (function(){...})() or async: (async function(){...})()
-Use when other tools fail. Limit output. For complex objects use JSON.stringify().
-Don't use comments.
-CORRECT: (function(){ try { const el = document.querySelector('#id'); return el ? el.value : 'not found'; } catch(e) { return 'Error: ' + e.message; } })()
-WRONG: document.querySelector('#id').value""",
+			'JS eval. Wrap in IIFE: (function(){...})(). Use try/catch. JSON.stringify() for objects.',
 		)
 		async def execute_js(code: str, browser_session: BrowserSession):
 			# Execute JavaScript with proper error handling and promise support

From ae3b2a5926f99c984ec6b52aeb5e678ad54ef5d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 13:06:19 -0700
Subject: [PATCH 16/45] Remove descripitons

---
 browser_use/tools/service.py | 22 +++++++++++-----------
 browser_use/tools/views.py   |  2 +-
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index abfab242d..829a4533a 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -115,7 +115,7 @@ class Tools(Generic[Context]):
 
 		# Basic Navigation Actions
 		@self.registry.action(
-			'Search query.',
+			'',
 			param_model=SearchAction,
 		)
 		async def search(params: SearchAction, browser_session: BrowserSession):
@@ -158,7 +158,7 @@ class Tools(Generic[Context]):
 				return ActionResult(error=f'Failed to search {params.engine} for "{params.query}": {str(e)}')
 
 		@self.registry.action(
-			'Navigate to URL.',
+			'',
 			param_model=GoToUrlAction,
 		)
 		async def go_to_url(params: GoToUrlAction, browser_session: BrowserSession):
@@ -218,7 +218,7 @@ class Tools(Generic[Context]):
 				error_msg = f'Failed to go back: {str(e)}'
 				return ActionResult(error=error_msg)
 
-		@self.registry.action('Wait for page.')
+		@self.registry.action('')
 		async def wait(seconds: int = 3):
 			# Cap wait time at maximum 30 seconds
 			# Reduce the wait time by 3 seconds to account for the llm call which takes at least 3 seconds
@@ -234,7 +234,7 @@ class Tools(Generic[Context]):
 		# Element Interaction Actions
 
 		@self.registry.action(
-			'Click element.',
+			'',
 			param_model=ClickElementAction,
 		)
 		async def click(params: ClickElementAction, browser_session: BrowserSession):
@@ -288,7 +288,7 @@ class Tools(Generic[Context]):
 				return ActionResult(error=error_msg)
 
 		@self.registry.action(
-			'Input text.',
+			'',
 			param_model=InputTextAction,
 		)
 		async def input_text(
@@ -350,7 +350,7 @@ class Tools(Generic[Context]):
 				return ActionResult(error=error_msg)
 
 		@self.registry.action(
-			'Upload file.',
+			'',
 			param_model=UploadFileAction,
 		)
 		async def upload_file(
@@ -780,7 +780,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 				return ActionResult(error=error_msg)
 
 		@self.registry.action(
-			'Send keys.',
+			'',
 			param_model=SendKeysAction,
 		)
 		async def send_keys(params: SendKeysAction, browser_session: BrowserSession):
@@ -835,7 +835,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 		# Dropdown Actions
 
 		@self.registry.action(
-			'Get dropdown options.',
+			'',
 			param_model=GetDropdownOptionsAction,
 		)
 		async def get_dropdown_options(params: GetDropdownOptionsAction, browser_session: BrowserSession):
@@ -861,7 +861,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 			)
 
 		@self.registry.action(
-			'Select dropdown option.',
+			'',
 			param_model=SelectDropdownOptionAction,
 		)
 		async def select_dropdown_option(params: SelectDropdownOptionAction, browser_session: BrowserSession):
@@ -904,7 +904,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 					return ActionResult(error=error_msg)
 
 		# File System Actions
-		@self.registry.action('Write/append file.')
+		@self.registry.action('')
 		async def write_file(
 			file_name: str,
 			content: str,
@@ -924,7 +924,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 			logger.info(f'💾 {result}')
 			return ActionResult(extracted_content=result, long_term_memory=result)
 
-		@self.registry.action('Replace in file.')
+		@self.registry.action('')
 		async def replace_file_str(file_name: str, old_str: str, new_str: str, file_system: FileSystem):
 			result = await file_system.replace_file_str(file_name, old_str, new_str)
 			logger.info(f'💾 {result}')
diff --git a/browser_use/tools/views.py b/browser_use/tools/views.py
index 8b6ab5cc1..222679f23 100644
--- a/browser_use/tools/views.py
+++ b/browser_use/tools/views.py
@@ -31,7 +31,7 @@ class ClickElementAction(BaseModel):
 
 
 class InputTextAction(BaseModel):
-	index: int = Field(ge=1, description='index')
+	index: int = Field(ge=1, description='from browser_state')
 	text: str
 	clear: bool = Field(default=True, description='1=clear, 0=append')
 

From 50f97000e4a0c33cea3bc820c2ed9b5ef1d4f7b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 13:21:14 -0700
Subject: [PATCH 17/45] file parameter naming

---
 browser_use/tools/service.py | 6 +++---
 browser_use/tools/views.py   | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 829a4533a..6e4502cc6 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -1160,10 +1160,10 @@ You will be given a query and the markdown of a webpage that has been filtered t
 					memory += f' - {len_text - len_max_memory} more characters'
 
 				attachments = []
-				if params.files:
+				if params.files_to_display:
 					if self.display_files_in_done_text:
 						file_msg = ''
-						for file_name in params.files:
+						for file_name in params.files_to_display:
 							if file_name == 'todo.md':
 								continue
 							file_content = file_system.display_file(file_name)
@@ -1176,7 +1176,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 						else:
 							logger.warning('Agent wanted to display files but none were found')
 					else:
-						for file_name in params.files:
+						for file_name in params.files_to_display:
 							if file_name == 'todo.md':
 								continue
 							file_content = file_system.display_file(file_name)
diff --git a/browser_use/tools/views.py b/browser_use/tools/views.py
index 222679f23..105530a8c 100644
--- a/browser_use/tools/views.py
+++ b/browser_use/tools/views.py
@@ -39,7 +39,7 @@ class InputTextAction(BaseModel):
 class DoneAction(BaseModel):
 	text: str = Field(description='summary for user')
 	success: bool = Field(description='True if user_request completed successfully')
-	files: list[str] | None = Field(default=[], description='files to display')
+	files_to_display: list[str] | None = Field(default=[])
 
 
 T = TypeVar('T', bound=BaseModel)

From f0acb6b3386698d0d0ebfddd469fb716ecda5cc4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 13:31:38 -0700
Subject: [PATCH 18/45] Replace tool names like go_to_url wtih navigate

---
 browser_use/agent/service.py                  |  2 +-
 browser_use/agent/system_prompt.md            |  4 +-
 .../agent/system_prompt_no_thinking.md        |  4 +-
 browser_use/tools/service.py                  | 22 +++----
 docs/customize/agent/supported-models.mdx     |  2 +-
 docs/customize/tools/available.mdx            |  2 +-
 examples/features/initial_actions.py          |  4 +-
 examples/models/qwen.py                       |  2 +-
 .../test_browser_event_ClickElementEvent.py   | 40 ++++++-------
 ...t_browser_event_GetDropdownOptionsEvent.py | 32 +++++-----
 ...vent_GetDropdownOptionsEvent_aria_menus.py | 12 ++--
 .../test_browser_event_NavigateToUrlEvent.py  | 58 +++++++++----------
 tests/ci/test_browser_event_ScrollEvent.py    |  4 +-
 tests/ci/test_browser_session_output_paths.py |  2 +-
 ...r_watchdog_downloads_upload_full_circle.py |  6 +-
 tests/ci/test_tools.py                        | 30 +++++-----
 tests/ci/test_url_shortening.py               | 10 ++--
 tests/scripts/debug_iframe_scrolling.py       |  2 +-
 18 files changed, 119 insertions(+), 119 deletions(-)

diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py
index 6b0a2fa0d..4868daaa8 100644
--- a/browser_use/agent/service.py
+++ b/browser_use/agent/service.py
@@ -323,7 +323,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			initial_url = self._extract_url_from_task(self.task)
 			if initial_url:
 				self.logger.info(f'🔗 Found URL in task: {initial_url}, adding as initial action...')
-				initial_actions = [{'go_to_url': {'url': initial_url, 'new_tab': False}}]
+				initial_actions = [{'navigate': {'url': initial_url, 'new_tab': False}}]
 
 		self.initial_url = initial_url
 
diff --git a/browser_use/agent/system_prompt.md b/browser_use/agent/system_prompt.md
index fb58b6044..b5fb4616b 100644
--- a/browser_use/agent/system_prompt.md
+++ b/browser_use/agent/system_prompt.md
@@ -146,7 +146,7 @@ You can output multiple actions in one step. Try to be efficient where it makes
 
 Do not try multiple different paths in one step. Always have one clear goal per step. 
 Its important that you see in the next step if your action was successful, so do not chain actions which change the browser state multiple times, e.g. 
-- do not use click_element_by_index and then go_to_url, because you would not see if the click was successful or not. 
+- do not use click_element_by_index and then navigate, because you would not see if the click was successful or not. 
 - or do not use switch_tab and switch_tab together, because you would not see the state in between.
 - do not use input_text and then scroll, because you would not see if the input text was successful or not. 
 </efficiency_guidelines>
@@ -210,7 +210,7 @@ You must ALWAYS respond with a valid JSON in this exact format:
   "evaluation_previous_goal": "Concise one-sentence analysis of your last action. Clearly state success, failure, or uncertain.",
   "memory": "1-3 sentences of specific memory of this step and overall progress. You should put here everything that will help you track progress in future steps. Like counting pages visited, items found, etc.",
   "next_goal": "State the next immediate goal and action to achieve it, in one clear sentence."
-  "action":[{{"go_to_url": {{ "url": "url_value"}}}}, // ... more actions in sequence]
+  "action":[{{"navigate": {{ "url": "url_value"}}}}, // ... more actions in sequence]
 }}
 
 Action list should NEVER be empty.
diff --git a/browser_use/agent/system_prompt_no_thinking.md b/browser_use/agent/system_prompt_no_thinking.md
index 6eddc625d..efdf8d2fe 100644
--- a/browser_use/agent/system_prompt_no_thinking.md
+++ b/browser_use/agent/system_prompt_no_thinking.md
@@ -145,7 +145,7 @@ You can output multiple actions in one step. Try to be efficient where it makes
 
 Do not try multiple different paths in one step. Always have one clear goal per step. 
 Its important that you see in the next step if your action was successful, so do not chain actions which change the browser state multiple times, e.g. 
-- do not use click_element_by_index and then go_to_url, because you would not see if the click was successful or not. 
+- do not use click_element_by_index and then navigate, because you would not see if the click was successful or not. 
 - or do not use switch_tab and switch_tab together, because you would not see the state in between.
 - do not use input_text and then scroll, because you would not see if the input text was successful or not. 
 </efficiency_guidelines>
@@ -206,7 +206,7 @@ You must ALWAYS respond with a valid JSON in this exact format:
   "evaluation_previous_goal": "One-sentence analysis of your last action. Clearly state success, failure, or uncertain.",
   "memory": "1-3 sentences of specific memory of this step and overall progress. You should put here everything that will help you track progress in future steps. Like counting pages visited, items found, etc.",
   "next_goal": "State the next immediate goal and action to achieve it, in one clear sentence.",
-  "action":[{{"go_to_url": {{ "url": "url_value"}}}}, // ... more actions in sequence]
+  "action":[{{"navigate": {{ "url": "url_value"}}}}, // ... more actions in sequence]
 }}
 
 Action list should NEVER be empty.
diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 6e4502cc6..60b85e082 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -161,7 +161,7 @@ class Tools(Generic[Context]):
 			'',
 			param_model=GoToUrlAction,
 		)
-		async def go_to_url(params: GoToUrlAction, browser_session: BrowserSession):
+		async def navigate(params: GoToUrlAction, browser_session: BrowserSession):
 			try:
 				# Dispatch navigation event
 				event = browser_session.event_bus.dispatch(NavigateToUrlEvent(url=params.url, new_tab=params.new_tab))
@@ -291,7 +291,7 @@ class Tools(Generic[Context]):
 			'',
 			param_model=InputTextAction,
 		)
-		async def input_text(
+		async def input(
 			params: InputTextAction,
 			browser_session: BrowserSession,
 			has_sensitive_data: bool = False,
@@ -502,7 +502,7 @@ class Tools(Generic[Context]):
 		# Tab Management Actions
 
 		@self.registry.action('', param_model=SwitchTabAction)
-		async def switch_tab(params: SwitchTabAction, browser_session: BrowserSession):
+		async def switch(params: SwitchTabAction, browser_session: BrowserSession):
 			# Simple switch tab logic
 			try:
 				target_id = await browser_session.get_target_id_from_tab_id(params.tab_id)
@@ -524,7 +524,7 @@ class Tools(Generic[Context]):
 				return ActionResult(extracted_content=memory, long_term_memory=memory)
 
 		@self.registry.action('', param_model=CloseTabAction)
-		async def close_tab(params: CloseTabAction, browser_session: BrowserSession):
+		async def close(params: CloseTabAction, browser_session: BrowserSession):
 			# Simple close tab logic
 			try:
 				target_id = await browser_session.get_target_id_from_tab_id(params.tab_id)
@@ -557,7 +557,7 @@ class Tools(Generic[Context]):
 		@self.registry.action(
 			"""Extract page data via LLM. Use when on right page, know what to extract. Can't get interactive elements. Don't call again on same page with same query.""",
 		)
-		async def extract_structured_data(
+		async def extract(
 			query: str,
 			extract_links: bool,
 			browser_session: BrowserSession,
@@ -799,7 +799,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 				return ActionResult(error=error_msg)
 
 		@self.registry.action('')
-		async def scroll_to_text(text: str, browser_session: BrowserSession):  # type: ignore
+		async def find_text(text: str, browser_session: BrowserSession):  # type: ignore
 			# Dispatch scroll to text event
 			event = browser_session.event_bus.dispatch(ScrollToTextEvent(text=text))
 
@@ -820,7 +820,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 				)
 
 		@self.registry.action('')
-		async def take_screenshot():
+		async def screenshot():
 			"""Request that a screenshot be included in the next observation"""
 			memory = 'Requested screenshot for next observation'
 			msg = f'📸 {memory}'
@@ -838,7 +838,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 			'',
 			param_model=GetDropdownOptionsAction,
 		)
-		async def get_dropdown_options(params: GetDropdownOptionsAction, browser_session: BrowserSession):
+		async def dropdown_options(params: GetDropdownOptionsAction, browser_session: BrowserSession):
 			"""Get all options from a native dropdown or ARIA menu"""
 			# Look up the node from the selector map
 			node = await browser_session.get_element_by_index(params.index)
@@ -864,7 +864,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 			'',
 			param_model=SelectDropdownOptionAction,
 		)
-		async def select_dropdown_option(params: SelectDropdownOptionAction, browser_session: BrowserSession):
+		async def select_dropdown(params: SelectDropdownOptionAction, browser_session: BrowserSession):
 			"""Select dropdown option by the text of the option you want to select"""
 			# Look up the node from the selector map
 			node = await browser_session.get_element_by_index(params.index)
@@ -925,7 +925,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 			return ActionResult(extracted_content=result, long_term_memory=result)
 
 		@self.registry.action('')
-		async def replace_file_str(file_name: str, old_str: str, new_str: str, file_system: FileSystem):
+		async def replace_file(file_name: str, old_str: str, new_str: str, file_system: FileSystem):
 			result = await file_system.replace_file_str(file_name, old_str, new_str)
 			logger.info(f'💾 {result}')
 			return ActionResult(extracted_content=result, long_term_memory=result)
@@ -962,7 +962,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 		@self.registry.action(
 			'JS eval. Wrap in IIFE: (function(){...})(). Use try/catch. JSON.stringify() for objects.',
 		)
-		async def execute_js(code: str, browser_session: BrowserSession):
+		async def evaluate(code: str, browser_session: BrowserSession):
 			# Execute JavaScript with proper error handling and promise support
 
 			cdp_session = await browser_session.get_or_create_cdp_session()
diff --git a/docs/customize/agent/supported-models.mdx b/docs/customize/agent/supported-models.mdx
index 35b67a7fa..740ed036b 100644
--- a/docs/customize/agent/supported-models.mdx
+++ b/docs/customize/agent/supported-models.mdx
@@ -215,7 +215,7 @@ llm = ChatOllama(model="llama3.1:8b")
 ## Qwen [example](https://github.com/browser-use/browser-use/blob/main/examples/models/qwen.py)
 
 Currently, only `qwen-vl-max` is recommended for Browser Use. Other Qwen models, including `qwen-max`, have issues with the action schema format.
-Smaller Qwen models may return incorrect action schema formats (e.g., `actions: [{"go_to_url": "google.com"}]` instead of `[{"go_to_url": {"url": "google.com"}}]`). If you want to use other models, add concrete examples of the correct action format to your prompt.
+Smaller Qwen models may return incorrect action schema formats (e.g., `actions: [{"navigate": "google.com"}]` instead of `[{"navigate": {"url": "google.com"}}]`). If you want to use other models, add concrete examples of the correct action format to your prompt.
 
 ```python
 from browser_use import Agent, ChatOpenAI
diff --git a/docs/customize/tools/available.mdx b/docs/customize/tools/available.mdx
index c045f3dca..488c79176 100644
--- a/docs/customize/tools/available.mdx
+++ b/docs/customize/tools/available.mdx
@@ -10,7 +10,7 @@ mode: "wide"
 
 ### Navigation & Browser Control
 - **`search`** - Search queries in Google
-- **`go_to_url`** - Navigate to URLs  
+- **`navigate`** - Navigate to URLs  
 - **`go_back`** - Go back in browser history
 - **`wait`** - Wait for specified seconds
 
diff --git a/examples/features/initial_actions.py b/examples/features/initial_actions.py
index 7e8d585c8..4a80f6f93 100644
--- a/examples/features/initial_actions.py
+++ b/examples/features/initial_actions.py
@@ -13,8 +13,8 @@ from browser_use import Agent, ChatOpenAI
 llm = ChatOpenAI(model='gpt-4.1-mini')
 
 initial_actions = [
-	{'go_to_url': {'url': 'https://www.google.com', 'new_tab': True}},
-	{'go_to_url': {'url': 'https://en.wikipedia.org/wiki/Randomness', 'new_tab': True}},
+	{'navigate': {'url': 'https://www.google.com', 'new_tab': True}},
+	{'navigate': {'url': 'https://en.wikipedia.org/wiki/Randomness', 'new_tab': True}},
 ]
 agent = Agent(
 	task='What theories are displayed on the page?',
diff --git a/examples/models/qwen.py b/examples/models/qwen.py
index 93e19aff2..fcb08e68a 100644
--- a/examples/models/qwen.py
+++ b/examples/models/qwen.py
@@ -13,7 +13,7 @@ base_url = 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1'
 
 # so far we only had success with qwen-vl-max
 # other models, even qwen-max, do not return the right output format. They confuse the action schema.
-# E.g. they return actions: [{"go_to_url": "google.com"}] instead of [{"go_to_url": {"url": "google.com"}}]
+# E.g. they return actions: [{"navigate": "google.com"}] instead of [{"navigate": {"url": "google.com"}}]
 # If you want to use smaller models and you see they mix up the action schema, add concrete examples to your prompt of the right format.
 llm = ChatOpenAI(model='qwen-vl-max', api_key=api_key, base_url=base_url)
 
diff --git a/tests/ci/test_browser_event_ClickElementEvent.py b/tests/ci/test_browser_event_ClickElementEvent.py
index 2399594d3..976c476ec 100644
--- a/tests/ci/test_browser_event_ClickElementEvent.py
+++ b/tests/ci/test_browser_event_ClickElementEvent.py
@@ -130,12 +130,12 @@ class TestClickElementEvent:
 		)
 
 		# Navigate to the clickable elements test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/clickable', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/clickable', new_tab=False)}
 
 		from browser_use.agent.views import ActionModel
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 
@@ -218,12 +218,12 @@ class TestClickElementEvent:
 		)
 
 		# Navigate to the new tab test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/newTab', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/newTab', new_tab=False)}
 
 		from browser_use.agent.views import ActionModel
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 		await asyncio.sleep(1)  # Wait for page to load
@@ -306,12 +306,12 @@ class TestClickElementEvent:
 		)
 
 		# Navigate to the comparison test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/comparison', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/comparison', new_tab=False)}
 
 		from browser_use.agent.views import ActionModel
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 		await asyncio.sleep(1)
@@ -393,12 +393,12 @@ class TestClickElementEvent:
 		)
 
 		# Navigate to the page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/inline_offscreen', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/inline_offscreen', new_tab=False)}
 
 		from browser_use.agent.views import ActionModel
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 		await asyncio.sleep(0.5)
@@ -475,12 +475,12 @@ class TestClickElementEvent:
 		)
 
 		# Navigate to the page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/block_in_inline', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/block_in_inline', new_tab=False)}
 
 		from browser_use.agent.views import ActionModel
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 		await asyncio.sleep(0.5)
@@ -563,12 +563,12 @@ class TestClickElementEvent:
 		)
 
 		# Navigate to the page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/covered_element', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/covered_element', new_tab=False)}
 
 		from browser_use.agent.views import ActionModel
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 		await asyncio.sleep(0.5)
@@ -623,12 +623,12 @@ class TestClickElementEvent:
 		)
 
 		# Navigate to the page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/file_input', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/file_input', new_tab=False)}
 
 		from browser_use.agent.views import ActionModel
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 		await asyncio.sleep(0.5)
@@ -684,12 +684,12 @@ class TestClickElementEvent:
 		)
 
 		# Navigate to the page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/select_dropdown', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/select_dropdown', new_tab=False)}
 
 		from browser_use.agent.views import ActionModel
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 		await asyncio.sleep(0.5)
@@ -1081,12 +1081,12 @@ class TestClickElementEvent:
 			)
 
 			# Navigate to the file upload test page
-			goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/fileupload', new_tab=False)}
+			goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/fileupload', new_tab=False)}
 
 			from browser_use.agent.views import ActionModel
 
 			class GoToUrlActionModel(ActionModel):
-				go_to_url: GoToUrlAction | None = None
+				navigate: GoToUrlAction | None = None
 
 			await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 
@@ -1227,11 +1227,11 @@ class TestClickElementEvent:
 			)
 
 			# Navigate to the test page
-			goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/upload-test', new_tab=False)}
+			goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/upload-test', new_tab=False)}
 			from browser_use.agent.views import ActionModel
 
 			class GoToUrlActionModel(ActionModel):
-				go_to_url: GoToUrlAction | None = None
+				navigate: GoToUrlAction | None = None
 
 			await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 			await asyncio.sleep(0.5)
diff --git a/tests/ci/test_browser_event_GetDropdownOptionsEvent.py b/tests/ci/test_browser_event_GetDropdownOptionsEvent.py
index 3d3193e25..078410350 100644
--- a/tests/ci/test_browser_event_GetDropdownOptionsEvent.py
+++ b/tests/ci/test_browser_event_GetDropdownOptionsEvent.py
@@ -278,10 +278,10 @@ class TestGetDropdownOptionsEvent:
 	async def test_native_select_dropdown(self, tools, browser_session: BrowserSession, base_url):
 		"""Test get_dropdown_options with native HTML select element."""
 		# Navigate to the native dropdown test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 
@@ -336,10 +336,10 @@ class TestGetDropdownOptionsEvent:
 	async def test_aria_menu_dropdown(self, tools, browser_session: BrowserSession, base_url):
 		"""Test get_dropdown_options with ARIA role='menu' element."""
 		# Navigate to the ARIA menu test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 
@@ -398,10 +398,10 @@ class TestGetDropdownOptionsEvent:
 	async def test_custom_dropdown(self, tools, browser_session: BrowserSession, base_url):
 		"""Test get_dropdown_options with custom dropdown implementation."""
 		# Navigate to the custom dropdown test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/custom-dropdown', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/custom-dropdown', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 
@@ -456,10 +456,10 @@ class TestGetDropdownOptionsEvent:
 	async def test_element_not_found_error(self, tools, browser_session: BrowserSession, base_url):
 		"""Test get_dropdown_options with invalid element index."""
 		# Navigate to any test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 		await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
@@ -486,10 +486,10 @@ class TestSelectDropdownOptionEvent:
 	async def test_select_native_dropdown_option(self, tools, browser_session: BrowserSession, base_url):
 		"""Test select_dropdown_option with native HTML select element."""
 		# Navigate to the native dropdown test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 		await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
@@ -534,10 +534,10 @@ class TestSelectDropdownOptionEvent:
 	async def test_select_aria_menu_option(self, tools, browser_session: BrowserSession, base_url):
 		"""Test select_dropdown_option with ARIA menu."""
 		# Navigate to the ARIA menu test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 		await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
@@ -586,10 +586,10 @@ class TestSelectDropdownOptionEvent:
 	async def test_select_custom_dropdown_option(self, tools, browser_session: BrowserSession, base_url):
 		"""Test select_dropdown_option with custom dropdown."""
 		# Navigate to the custom dropdown test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/custom-dropdown', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/custom-dropdown', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 		await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
@@ -634,10 +634,10 @@ class TestSelectDropdownOptionEvent:
 	async def test_select_invalid_option_error(self, tools, browser_session: BrowserSession, base_url):
 		"""Test select_dropdown_option with non-existent option text."""
 		# Navigate to the native dropdown test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 		await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
diff --git a/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py b/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py
index 54a1a07a1..f74e656ca 100644
--- a/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py
+++ b/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py
@@ -152,10 +152,10 @@ class TestARIAMenuDropdown:
 	async def test_get_dropdown_options_with_aria_menu(self, tools, browser_session: BrowserSession, base_url):
 		"""Test that get_dropdown_options can retrieve options from ARIA menus."""
 		# Navigate to the ARIA menu test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 
@@ -219,10 +219,10 @@ class TestARIAMenuDropdown:
 	async def test_select_dropdown_option_with_aria_menu(self, tools, browser_session: BrowserSession, base_url):
 		"""Test that select_dropdown_option can select an option from ARIA menus."""
 		# Navigate to the ARIA menu test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 
@@ -289,10 +289,10 @@ class TestARIAMenuDropdown:
 	async def test_get_dropdown_options_with_nested_aria_menu(self, tools, browser_session: BrowserSession, base_url):
 		"""Test that get_dropdown_options can handle nested ARIA menus (like Sort submenu)."""
 		# Navigate to the ARIA menu test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 
diff --git a/tests/ci/test_browser_event_NavigateToUrlEvent.py b/tests/ci/test_browser_event_NavigateToUrlEvent.py
index a6a008d4c..088f6776f 100644
--- a/tests/ci/test_browser_event_NavigateToUrlEvent.py
+++ b/tests/ci/test_browser_event_NavigateToUrlEvent.py
@@ -59,15 +59,15 @@ def tools():
 
 
 class TestNavigateToUrlEvent:
-	"""Test NavigateToUrlEvent and go_to_url action functionality."""
+	"""Test NavigateToUrlEvent and navigate action functionality."""
 
-	async def test_go_to_url_action(self, tools, browser_session: BrowserSession, base_url):
+	async def test_navigate_action(self, tools, browser_session: BrowserSession, base_url):
 		"""Test that GoToUrlAction navigates to the specified URL and test both state summary methods."""
 		# Test successful navigation to a valid page
-		action_data = {'go_to_url': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
+		action_data = {'navigate': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		action_model = GoToUrlActionModel(**action_data)
 		result = await tools.act(action_model, browser_session)
@@ -77,14 +77,14 @@ class TestNavigateToUrlEvent:
 		assert result.extracted_content is not None
 		assert f'Navigated to {base_url}' in result.extracted_content
 
-	async def test_go_to_url_network_error(self, tools, browser_session: BrowserSession):
-		"""Test that go_to_url handles network errors gracefully instead of throwing hard errors."""
-		# Create action model for go_to_url with an invalid domain
-		action_data = {'go_to_url': GoToUrlAction(url='https://www.nonexistentdndbeyond.com/', new_tab=False)}
+	async def test_navigate_network_error(self, tools, browser_session: BrowserSession):
+		"""Test that navigate handles network errors gracefully instead of throwing hard errors."""
+		# Create action model for navigate with an invalid domain
+		action_data = {'navigate': GoToUrlAction(url='https://www.nonexistentdndbeyond.com/', new_tab=False)}
 
 		# Create the ActionModel instance
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		action_model = GoToUrlActionModel(**action_data)
 
@@ -123,17 +123,17 @@ class TestNavigateToUrlEvent:
 		current_url = await browser_session.get_current_page_url()
 		assert f'{base_url}/page1' in current_url
 
-	async def test_go_to_url_new_tab(self, tools, browser_session, base_url):
+	async def test_navigate_new_tab(self, tools, browser_session, base_url):
 		"""Test that GoToUrlAction with new_tab=True opens URL in a new tab."""
 		# Get initial tab count
 		initial_tabs = await browser_session.get_tabs()
 		initial_tab_count = len(initial_tabs)
 
 		# Navigate to URL in new tab
-		action_data = {'go_to_url': GoToUrlAction(url=f'{base_url}/page2', new_tab=True)}
+		action_data = {'navigate': GoToUrlAction(url=f'{base_url}/page2', new_tab=True)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		result = await tools.act(GoToUrlActionModel(**action_data), browser_session)
 		await asyncio.sleep(0.5)
@@ -155,15 +155,15 @@ class TestNavigateToUrlEvent:
 	async def test_navigate_javascript_url(self, tools, browser_session, base_url):
 		"""Test that javascript: URLs are handled appropriately."""
 		# Navigate to a normal page first
-		action_data = {'go_to_url': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
+		action_data = {'navigate': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**action_data), browser_session)
 
 		# Try to navigate to javascript: URL (should be handled gracefully)
-		js_action = {'go_to_url': GoToUrlAction(url='javascript:alert("test")', new_tab=False)}
+		js_action = {'navigate': GoToUrlAction(url='javascript:alert("test")', new_tab=False)}
 		result = await tools.act(GoToUrlActionModel(**js_action), browser_session)
 
 		# Should either succeed or fail gracefully
@@ -174,10 +174,10 @@ class TestNavigateToUrlEvent:
 		# Create a simple data URL
 		data_url = 'data:text/html,<html><head><title>Data URL Test</title></head><body><h1>Data URL Content</h1></body></html>'
 
-		action_data = {'go_to_url': GoToUrlAction(url=data_url, new_tab=False)}
+		action_data = {'navigate': GoToUrlAction(url=data_url, new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		result = await tools.act(GoToUrlActionModel(**action_data), browser_session)
 
@@ -210,10 +210,10 @@ class TestNavigateToUrlEvent:
 		)
 
 		# Navigate to page with hash
-		action_data = {'go_to_url': GoToUrlAction(url=f'{base_url}/page-with-anchors#section1', new_tab=False)}
+		action_data = {'navigate': GoToUrlAction(url=f'{base_url}/page-with-anchors#section1', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		result = await tools.act(GoToUrlActionModel(**action_data), browser_session)
 
@@ -247,10 +247,10 @@ class TestNavigateToUrlEvent:
 		)
 
 		# Navigate with query parameters
-		action_data = {'go_to_url': GoToUrlAction(url=f'{base_url}/search?q=test+query&page=1', new_tab=False)}
+		action_data = {'navigate': GoToUrlAction(url=f'{base_url}/search?q=test+query&page=1', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		result = await tools.act(GoToUrlActionModel(**action_data), browser_session)
 
@@ -267,19 +267,19 @@ class TestNavigateToUrlEvent:
 	async def test_navigate_multiple_tabs(self, tools, browser_session, base_url):
 		"""Test navigating in multiple tabs sequentially."""
 		# Navigate to first page in current tab
-		action1 = {'go_to_url': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
+		action1 = {'navigate': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**action1), browser_session)
 
 		# Open second page in new tab
-		action2 = {'go_to_url': GoToUrlAction(url=f'{base_url}/page2', new_tab=True)}
+		action2 = {'navigate': GoToUrlAction(url=f'{base_url}/page2', new_tab=True)}
 		await tools.act(GoToUrlActionModel(**action2), browser_session)
 
 		# Open home page in yet another new tab
-		action3 = {'go_to_url': GoToUrlAction(url=base_url, new_tab=True)}
+		action3 = {'navigate': GoToUrlAction(url=base_url, new_tab=True)}
 		await tools.act(GoToUrlActionModel(**action3), browser_session)
 
 		# Should have 3 tabs now
@@ -296,10 +296,10 @@ class TestNavigateToUrlEvent:
 		# Using a private IP that's unlikely to respond
 		timeout_url = 'http://192.0.2.1:8080/timeout'
 
-		action_data = {'go_to_url': GoToUrlAction(url=timeout_url, new_tab=False)}
+		action_data = {'navigate': GoToUrlAction(url=timeout_url, new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		# This should complete without hanging indefinitely
 		result = await tools.act(GoToUrlActionModel(**action_data), browser_session)
@@ -317,10 +317,10 @@ class TestNavigateToUrlEvent:
 		)
 
 		# Navigate to redirect URL
-		action_data = {'go_to_url': GoToUrlAction(url=f'{base_url}/redirect', new_tab=False)}
+		action_data = {'navigate': GoToUrlAction(url=f'{base_url}/redirect', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		result = await tools.act(GoToUrlActionModel(**action_data), browser_session)
 
diff --git a/tests/ci/test_browser_event_ScrollEvent.py b/tests/ci/test_browser_event_ScrollEvent.py
index e8767384f..dd2c568f8 100644
--- a/tests/ci/test_browser_event_ScrollEvent.py
+++ b/tests/ci/test_browser_event_ScrollEvent.py
@@ -83,10 +83,10 @@ class TestScrollActions:
 		"""Test basic scroll action functionality."""
 
 		# Navigate to scrollable page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/scrollable', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/scrollable', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 
diff --git a/tests/ci/test_browser_session_output_paths.py b/tests/ci/test_browser_session_output_paths.py
index 9dfa13261..526cd039d 100644
--- a/tests/ci/test_browser_session_output_paths.py
+++ b/tests/ci/test_browser_session_output_paths.py
@@ -60,7 +60,7 @@ def interactive_llm(httpserver_url):
 			"next_goal": "Navigate to the URL",
 			"action": [
 				{{
-					"go_to_url": {{
+					"navigate": {{
 						"url": "{httpserver_url}",
 						"new_tab": false
 					}}
diff --git a/tests/ci/test_browser_watchdog_downloads_upload_full_circle.py b/tests/ci/test_browser_watchdog_downloads_upload_full_circle.py
index eb60f8d56..679ef14d4 100644
--- a/tests/ci/test_browser_watchdog_downloads_upload_full_circle.py
+++ b/tests/ci/test_browser_watchdog_downloads_upload_full_circle.py
@@ -162,10 +162,10 @@ class TestDownloadUploadFullCircle:
 
 				# Step 1: Navigate to download page
 				class GoToUrlActionModel(ActionModel):
-					go_to_url: GoToUrlAction | None = None
+					navigate: GoToUrlAction | None = None
 
 				result = await tools.act(
-					GoToUrlActionModel(go_to_url=GoToUrlAction(url=f'{base_url}/download-page', new_tab=False)), browser_session
+					GoToUrlActionModel(navigate=GoToUrlAction(url=f'{base_url}/download-page', new_tab=False)), browser_session
 				)
 				assert result.error is None, f'Navigation to download page failed: {result.error}'
 
@@ -228,7 +228,7 @@ class TestDownloadUploadFullCircle:
 				for i, tab in enumerate(tabs_before):
 					print(f'  Tab {i}: {tab.url}')
 				result = await tools.act(
-					GoToUrlActionModel(go_to_url=GoToUrlAction(url=f'{base_url}/upload-page', new_tab=True)), browser_session
+					GoToUrlActionModel(navigate=GoToUrlAction(url=f'{base_url}/upload-page', new_tab=True)), browser_session
 				)
 				assert result.error is None, f'Navigation to upload page failed: {result.error}'
 				print(f'✅ Navigation result: {result.extracted_content}')
diff --git a/tests/ci/test_tools.py b/tests/ci/test_tools.py
index e8625a0cf..2a86fac6c 100644
--- a/tests/ci/test_tools.py
+++ b/tests/ci/test_tools.py
@@ -96,7 +96,7 @@ class TestToolsIntegration:
 		"""Test that the registry contains the expected default actions."""
 		# Check that common actions are registered
 		common_actions = [
-			'go_to_url',
+			'navigate',
 			'search',
 			'click',
 			'input_text',
@@ -125,10 +125,10 @@ class TestToolsIntegration:
 			return ActionResult(extracted_content=f'Custom action executed with: {params.text} on {current_url}')
 
 		# Navigate to a page first
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 
@@ -209,10 +209,10 @@ class TestToolsIntegration:
 	async def test_go_back_action(self, tools, browser_session, base_url):
 		"""Test that go_back action navigates to the previous page."""
 		# Navigate to first page
-		goto_action1 = {'go_to_url': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
+		goto_action1 = {'navigate': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action1), browser_session)
 
@@ -221,7 +221,7 @@ class TestToolsIntegration:
 		print(f'First page URL: {first_url}')
 
 		# Navigate to second page
-		goto_action2 = {'go_to_url': GoToUrlAction(url=f'{base_url}/page2', new_tab=False)}
+		goto_action2 = {'navigate': GoToUrlAction(url=f'{base_url}/page2', new_tab=False)}
 		await tools.act(GoToUrlActionModel(**goto_action2), browser_session)
 
 		# Verify we're on the second page
@@ -259,10 +259,10 @@ class TestToolsIntegration:
 
 		# Navigate to each page in sequence
 		for url in urls:
-			action_data = {'go_to_url': GoToUrlAction(url=url, new_tab=False)}
+			action_data = {'navigate': GoToUrlAction(url=url, new_tab=False)}
 
 			class GoToUrlActionModel(ActionModel):
-				go_to_url: GoToUrlAction | None = None
+				navigate: GoToUrlAction | None = None
 
 			await tools.act(GoToUrlActionModel(**action_data), browser_session)
 
@@ -293,7 +293,7 @@ class TestToolsIntegration:
 		assert 'scroll' not in excluded_tools.registry.registry.actions
 
 		# But other actions are still there
-		assert 'go_to_url' in excluded_tools.registry.registry.actions
+		assert 'navigate' in excluded_tools.registry.registry.actions
 		assert 'click' in excluded_tools.registry.registry.actions
 
 	async def test_search_action(self, tools, browser_session, base_url):
@@ -325,10 +325,10 @@ class TestToolsIntegration:
 			file_system = FileSystem(temp_dir)
 
 			# First navigate to a page
-			goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
+			goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
 
 			class GoToUrlActionModel(ActionModel):
-				go_to_url: GoToUrlAction | None = None
+				navigate: GoToUrlAction | None = None
 
 			await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 
@@ -392,10 +392,10 @@ class TestToolsIntegration:
 		)
 
 		# Navigate to the dropdown test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/dropdown1', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/dropdown1', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 
@@ -519,10 +519,10 @@ class TestToolsIntegration:
 		)
 
 		# Navigate to the dropdown test page
-		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/dropdown2', new_tab=False)}
+		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/dropdown2', new_tab=False)}
 
 		class GoToUrlActionModel(ActionModel):
-			go_to_url: GoToUrlAction | None = None
+			navigate: GoToUrlAction | None = None
 
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 
diff --git a/tests/ci/test_url_shortening.py b/tests/ci/test_url_shortening.py
index 6ea45b15d..022e6de23 100644
--- a/tests/ci/test_url_shortening.py
+++ b/tests/ci/test_url_shortening.py
@@ -93,7 +93,7 @@ class TestUrlShorteningOutputProcessing:
 			'evaluation_previous_goal': 'Successfully processed the request',
 			'memory': f'Found useful info at {shortened_url}',
 			'next_goal': 'Complete the documentation review',
-			'action': [{'go_to_url': {'url': shortened_url, 'new_tab': False}}],
+			'action': [{'navigate': {'url': shortened_url, 'new_tab': False}}],
 		}
 
 		# Create properly typed AgentOutput with custom actions
@@ -109,7 +109,7 @@ class TestUrlShorteningOutputProcessing:
 		assert SUPER_LONG_URL in (agent_output.thinking or '')
 		assert SUPER_LONG_URL in (agent_output.memory or '')
 		action_data = agent_output.action[0].model_dump()
-		assert action_data['go_to_url']['url'] == SUPER_LONG_URL
+		assert action_data['navigate']['url'] == SUPER_LONG_URL
 
 
 class TestUrlShorteningEndToEnd:
@@ -137,7 +137,7 @@ class TestUrlShorteningEndToEnd:
 			'evaluation_previous_goal': 'Starting documentation extraction',
 			'memory': f'Target URL: {shortened_url}',
 			'next_goal': 'Extract API documentation',
-			'action': [{'go_to_url': {'url': shortened_url, 'new_tab': True}}],
+			'action': [{'navigate': {'url': shortened_url, 'new_tab': True}}],
 		}
 
 		# Create AgentOutput with custom actions
@@ -153,8 +153,8 @@ class TestUrlShorteningEndToEnd:
 		assert SUPER_LONG_URL in (agent_output.thinking or '')
 		assert SUPER_LONG_URL in (agent_output.memory or '')
 		action_data = agent_output.action[0].model_dump()
-		assert action_data['go_to_url']['url'] == SUPER_LONG_URL
-		assert action_data['go_to_url']['new_tab'] is True
+		assert action_data['navigate']['url'] == SUPER_LONG_URL
+		assert action_data['navigate']['new_tab'] is True
 
 		# Verify original shortened content is no longer present
 		assert shortened_url not in (agent_output.thinking or '')
diff --git a/tests/scripts/debug_iframe_scrolling.py b/tests/scripts/debug_iframe_scrolling.py
index 7b02fba66..62682bed3 100644
--- a/tests/scripts/debug_iframe_scrolling.py
+++ b/tests/scripts/debug_iframe_scrolling.py
@@ -38,7 +38,7 @@ async def debug_iframe_scrolling():
 			"next_goal": "Navigate to the iframe test page",
 			"action": [
 				{
-					"go_to_url": {
+					"navigate": {
 						"url": "https://browser-use.github.io/stress-tests/challenges/iframe-inception-level1.html",
 						"new_tab": false
 					}

From 0d731cfb26e6f6bd391ee7e5a0d378b746f2e1b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 13:32:16 -0700
Subject: [PATCH 19/45] Replace tool names like go_to_url wtih navigate

---
 browser_use/agent/system_prompt_flash.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/browser_use/agent/system_prompt_flash.md b/browser_use/agent/system_prompt_flash.md
index b1ee5da06..376f64609 100644
--- a/browser_use/agent/system_prompt_flash.md
+++ b/browser_use/agent/system_prompt_flash.md
@@ -7,7 +7,7 @@ Interactive Elements: All interactive elements will be provided in format as [in
 You must respond with a valid JSON in this exact format:
 {{
   "memory": "Up to 5 sentences of specific reasoning about: Was the previous step successful / failed? What do we need to remember from the current state for the task? Plan ahead what are the best next actions. What's the next immediate goal? Depending on the complexity think longer. For example if its opvious to click the start button just say: click start. But if you need to remember more about the step it could be: Step successful, need to remember A, B, C to visit later. Next click on A.",
-  "action":[{{"go_to_url": {{ "url": "url_value"}}}}]
+  "action":[{{"navigate": {{ "url": "url_value"}}}}]
 }}
 
 </output>

From 47bf973285674cdd4dc3860968bbe2d27123798c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 13:46:31 -0700
Subject: [PATCH 20/45] Default value for extract_links

---
 browser_use/tools/service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 60b85e082..37f16ace6 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -559,10 +559,10 @@ class Tools(Generic[Context]):
 		)
 		async def extract(
 			query: str,
-			extract_links: bool,
 			browser_session: BrowserSession,
 			page_extraction_llm: BaseChatModel,
 			file_system: FileSystem,
+			extract_links: bool = False,
 			start_from_char: int = 0,
 		):
 			# Constants

From 90d414042a2b6a11700fbc0486eaa5603e8967e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 15:09:35 -0700
Subject: [PATCH 21/45] Update docs

---
 browser_use/agent/system_prompt.md            | 42 +++++++++----------
 .../agent/system_prompt_no_thinking.md        | 36 ++++++++--------
 browser_use/agent/views.py                    |  6 +--
 browser_use/tools/service.py                  |  2 +-
 docs/customize/agent/prompting-guide.mdx      |  8 ++--
 docs/customize/tools/available.mdx            | 32 +++++++-------
 examples/file_system/file_system.py           |  2 +-
 7 files changed, 63 insertions(+), 65 deletions(-)

diff --git a/browser_use/agent/system_prompt.md b/browser_use/agent/system_prompt.md
index b5fb4616b..146adc23e 100644
--- a/browser_use/agent/system_prompt.md
+++ b/browser_use/agent/system_prompt.md
@@ -20,8 +20,8 @@ At every step, your input will consist of:
 1. <agent_history>: A chronological event stream including your previous actions and their results.
 2. <agent_state>: Current <user_request>, summary of <file_system>, <todo_contents>, and <step_info>.
 3. <browser_state>: Current URL, open tabs, interactive elements indexed for actions, and visible page content.
-4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements. If you used take_screenshot before, this will contain a screenshot.
-5. <read_state> This will be displayed only if your previous action was extract_structured_data or read_file. This data is only shown in the current step.
+4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements. If you used screenshot before, this will contain a screenshot.
+5. <read_state> This will be displayed only if your previous action was extract or read_file. This data is only shown in the current step.
 </input>
 
 <agent_history>
@@ -61,14 +61,14 @@ Examples:
 Note that:
 - Only elements with numeric indexes in [] are interactive
 - (stacked) indentation (with \t) is important and means that the element is a (html) child of the element above (with a lower index)
-- Elements tagged with a star `*[` are the new interactive elements that appeared on the website since the last step - if url has not changed. Your previous actions caused that change. Think if you need to interact with them, e.g. after input_text you might need to select the right option from the list.
+- Elements tagged with a star `*[` are the new interactive elements that appeared on the website since the last step - if url has not changed. Your previous actions caused that change. Think if you need to interact with them, e.g. after input you might need to select the right option from the list.
 - Pure text elements without [] are not interactive.
 </browser_state>
 
 <browser_vision>
-If you used take_screenshot before, you will be provided with a screenshot of the current page with  bounding boxes around interactive elements. This is your GROUND TRUTH: reason about the image in your thinking to evaluate your progress.
+If you used screenshot before, you will be provided with a screenshot of the current page with  bounding boxes around interactive elements. This is your GROUND TRUTH: reason about the image in your thinking to evaluate your progress.
 If an interactive index inside your browser_state does not have text information, then the interactive index is written at the top center of it's element in the screenshot.
-Use take_screenshot if you are unsure or simply want more information. 
+Use screenshot if you are unsure or simply want more information. 
 </browser_vision>
 
 <browser_rules>
@@ -78,18 +78,18 @@ Strictly follow these rules while using the browser and navigating the web:
 - If research is needed, open a **new tab** instead of reusing the current one.
 - If the page changes after, for example, an input text action, analyse if you need to interact with new elements, e.g. selecting the right option from the list.
 - By default, only elements in the visible viewport are listed. Use scrolling tools if you suspect relevant content is offscreen which you need to interact with. Scroll ONLY if there are more pixels below or above the page.
-- You can scroll by a specific number of pages using the num_pages parameter (e.g., 0.5 for half page, 2.0 for two pages).
+- You can scroll by a specific number of pages using the pages parameter (e.g., 0.5 for half page, 2.0 for two pages).
 - If a captcha appears, attempt solving it if possible. If not, use fallback strategies (e.g., alternative site, backtrack).
 - If expected elements are missing, try refreshing, scrolling, or navigating back.
 - If the page is not fully loaded, use the wait action.
-- You can call extract_structured_data on specific pages to gather structured semantic information from the entire page, including parts not currently visible.
-- Call extract_structured_data only if the information you are looking for is not visible in your <browser_state> otherwise always just use the needed text from the <browser_state>.
-- Calling the extract_structured_data tool is expensive! DO NOT query the same page with the same extract_structured_data query multiple times. Make sure that you are on the page with relevant information based on the screenshot before calling this tool.
+- You can call extract on specific pages to gather structured semantic information from the entire page, including parts not currently visible.
+- Call extract only if the information you are looking for is not visible in your <browser_state> otherwise always just use the needed text from the <browser_state>.
+- Calling the extract tool is expensive! DO NOT query the same page with the same extract query multiple times. Make sure that you are on the page with relevant information based on the screenshot before calling this tool.
 - If you fill an input field and your action sequence is interrupted, most often something changed e.g. suggestions popped up under the field.
 - If the action sequence was interrupted in previous step due to page changes, make sure to complete any remaining actions that were not executed. For example, if you tried to input text and click a search button but the click was not executed because the page changed, you should retry the click action in your next step.
 - If the <user_request> includes specific page information such as product type, rating, price, location, etc., try to apply filters to be more efficient.
 - The <user_request> is the ultimate goal. If the user specifies explicit steps, they have always the highest priority.
-- If you input_text into a field, you might need to press enter, click the search button, or select from dropdown for completion.
+- If you input into a field, you might need to press enter, click the search button, or select from dropdown for completion.
 - Don't login into a page if you don't have to. Don't login if you don't have the credentials. 
 - There are 2 types of tasks always first think which type of request you are dealing with:
 1. Very specific step by step instructions:
@@ -101,7 +101,7 @@ Strictly follow these rules while using the browser and navigating the web:
 
 <file_system>
 - You have access to a persistent file system which you can use to track progress, store results, and manage long tasks.
-- Your file system is initialized with a `todo.md`: Use this to keep a checklist for known subtasks. Use `replace_file_str` tool to update markers in `todo.md` as first action whenever you complete an item. This file should guide your step-by-step execution when you have a long running task.
+- Your file system is initialized with a `todo.md`: Use this to keep a checklist for known subtasks. Use `replace_file` tool to update markers in `todo.md` as first action whenever you complete an item. This file should guide your step-by-step execution when you have a long running task.
 - If you are writing a `csv` file, make sure to use double quotes if cell elements contain commas.
 - If the file is too large, you are only given a preview of your file. Use `read_file` to see the full content if necessary.
 - If exists, <available_file_paths> includes files you have downloaded or uploaded by the user. You can only read or upload these files but you don't have write access.
@@ -138,17 +138,17 @@ If you are allowed multiple actions, you can specify multiple actions in the lis
 You can output multiple actions in one step. Try to be efficient where it makes sense. Do not predict actions which do not make sense for the current page.
 
 **Recommended Action Combinations:**
-- `input_text` + `click_element_by_index` → Fill form field and submit/search in one step
-- `input_text` + `input_text` → Fill multiple form fields
-- `click_element_by_index` + `click_element_by_index` → Navigate through multi-step flows (when the page does not navigate between clicks)
-- `scroll` with num_pages 10 + `extract_structured_data` → Scroll to the bottom of the page to load more content before extracting structured data
-- File operations + browser actions 
+- `input` + `click` → Fill form field and submit/search in one step
+- `input` + `input` → Fill multiple form fields
+- `click` + `click` → Navigate through multi-step flows (when the page does not navigate between clicks)
+- `scroll` with pages 10 + `extract` → Scroll to the bottom of the page to load more content before extracting structured data
+- File operations + browser actions
 
-Do not try multiple different paths in one step. Always have one clear goal per step. 
-Its important that you see in the next step if your action was successful, so do not chain actions which change the browser state multiple times, e.g. 
-- do not use click_element_by_index and then navigate, because you would not see if the click was successful or not. 
-- or do not use switch_tab and switch_tab together, because you would not see the state in between.
-- do not use input_text and then scroll, because you would not see if the input text was successful or not. 
+Do not try multiple different paths in one step. Always have one clear goal per step.
+Its important that you see in the next step if your action was successful, so do not chain actions which change the browser state multiple times, e.g.
+- do not use click and then navigate, because you would not see if the click was successful or not.
+- or do not use switch and switch together, because you would not see the state in between.
+- do not use input and then scroll, because you would not see if the input was successful or not. 
 </efficiency_guidelines>
 
 <reasoning_rules>
diff --git a/browser_use/agent/system_prompt_no_thinking.md b/browser_use/agent/system_prompt_no_thinking.md
index efdf8d2fe..d51da1b1e 100644
--- a/browser_use/agent/system_prompt_no_thinking.md
+++ b/browser_use/agent/system_prompt_no_thinking.md
@@ -20,8 +20,8 @@ At every step, your input will consist of:
 1. <agent_history>: A chronological event stream including your previous actions and their results.
 2. <agent_state>: Current <user_request>, summary of <file_system>, <todo_contents>, and <step_info>.
 3. <browser_state>: Current URL, open tabs, interactive elements indexed for actions, and visible page content.
-4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements. If you used take_screenshot before, this will contain a screenshot.
-5. <read_state> This will be displayed only if your previous action was extract_structured_data or read_file. This data is only shown in the current step.
+4. <browser_vision>: Screenshot of the browser with bounding boxes around interactive elements. If you used screenshot before, this will contain a screenshot.
+5. <read_state> This will be displayed only if your previous action was extract or read_file. This data is only shown in the current step.
 </input>
 
 <agent_history>
@@ -61,14 +61,14 @@ Examples:
 Note that:
 - Only elements with numeric indexes in [] are interactive
 - (stacked) indentation (with \t) is important and means that the element is a (html) child of the element above (with a lower index)
-- Elements tagged with a star `*[` are the new interactive elements that appeared on the website since the last step - if url has not changed. Your previous actions caused that change. Think if you need to interact with them, e.g. after input_text you might need to select the right option from the list.
+- Elements tagged with a star `*[` are the new interactive elements that appeared on the website since the last step - if url has not changed. Your previous actions caused that change. Think if you need to interact with them, e.g. after input you might need to select the right option from the list.
 - Pure text elements without [] are not interactive.
 </browser_state>
 
 <browser_vision>
-If you used take_screenshot before, you will be provided with a screenshot of the current page with  bounding boxes around interactive elements. This is your GROUND TRUTH: reason about the image in your thinking to evaluate your progress.
+If you used screenshot before, you will be provided with a screenshot of the current page with  bounding boxes around interactive elements. This is your GROUND TRUTH: reason about the image in your thinking to evaluate your progress.
 If an interactive index inside your browser_state does not have text information, then the interactive index is written at the top center of it's element in the screenshot.
-Use take_screenshot if you are unsure or simply want more information. 
+Use screenshot if you are unsure or simply want more information. 
 </browser_vision>
 
 <browser_rules>
@@ -78,18 +78,18 @@ Strictly follow these rules while using the browser and navigating the web:
 - If research is needed, open a **new tab** instead of reusing the current one.
 - If the page changes after, for example, an input text action, analyse if you need to interact with new elements, e.g. selecting the right option from the list.
 - By default, only elements in the visible viewport are listed. Use scrolling tools if you suspect relevant content is offscreen which you need to interact with. Scroll ONLY if there are more pixels below or above the page.
-- You can scroll by a specific number of pages using the num_pages parameter (e.g., 0.5 for half page, 2.0 for two pages).
+- You can scroll by a specific number of pages using the pages parameter (e.g., 0.5 for half page, 2.0 for two pages).
 - If a captcha appears, attempt solving it if possible. If not, use fallback strategies (e.g., alternative site, backtrack).
 - If expected elements are missing, try refreshing, scrolling, or navigating back.
 - If the page is not fully loaded, use the wait action.
-- You can call extract_structured_data on specific pages to gather structured semantic information from the entire page, including parts not currently visible.
-- Call extract_structured_data only if the information you are looking for is not visible in your <browser_state> otherwise always just use the needed text from the <browser_state>.
-- Calling the extract_structured_data tool is expensive! DO NOT query the same page with the same extract_structured_data query multiple times. Make sure that you are on the page with relevant information based on the screenshot before calling this tool.
+- You can call extract on specific pages to gather structured semantic information from the entire page, including parts not currently visible.
+- Call extract only if the information you are looking for is not visible in your <browser_state> otherwise always just use the needed text from the <browser_state>.
+- Calling the extract tool is expensive! DO NOT query the same page with the same extract query multiple times. Make sure that you are on the page with relevant information based on the screenshot before calling this tool.
 - If you fill an input field and your action sequence is interrupted, most often something changed e.g. suggestions popped up under the field.
 - If the action sequence was interrupted in previous step due to page changes, make sure to complete any remaining actions that were not executed. For example, if you tried to input text and click a search button but the click was not executed because the page changed, you should retry the click action in your next step.
 - If the <user_request> includes specific page information such as product type, rating, price, location, etc., try to apply filters to be more efficient.
 - The <user_request> is the ultimate goal. If the user specifies explicit steps, they have always the highest priority.
-- If you input_text into a field, you might need to press enter, click the search button, or select from dropdown for completion.
+- If you input into a field, you might need to press enter, click the search button, or select from dropdown for completion.
 - Don't login into a page if you don't have to. Don't login if you don't have the credentials. 
 - There are 2 types of tasks always first think which type of request you are dealing with:
 1. Very specific step by step instructions:
@@ -101,7 +101,7 @@ Strictly follow these rules while using the browser and navigating the web:
 
 <file_system>
 - You have access to a persistent file system which you can use to track progress, store results, and manage long tasks.
-- Your file system is initialized with a `todo.md`: Use this to keep a checklist for known subtasks. Use `replace_file_str` tool to update markers in `todo.md` as first action whenever you complete an item. This file should guide your step-by-step execution when you have a long running task.
+- Your file system is initialized with a `todo.md`: Use this to keep a checklist for known subtasks. Use `replace_file` tool to update markers in `todo.md` as first action whenever you complete an item. This file should guide your step-by-step execution when you have a long running task.
 - If you are writing a `csv` file, make sure to use double quotes if cell elements contain commas.
 - If the file is too large, you are only given a preview of your file. Use `read_file` to see the full content if necessary.
 - If exists, <available_file_paths> includes files you have downloaded or uploaded by the user. You can only read or upload these files but you don't have write access.
@@ -137,17 +137,17 @@ If you are allowed multiple actions, you can specify multiple actions in the lis
 You can output multiple actions in one step. Try to be efficient where it makes sense. Do not predict actions which do not make sense for the current page.
 
 **Recommended Action Combinations:**
-- `input_text` + `click_element_by_index` → Fill form field and submit/search in one step
-- `input_text` + `input_text` → Fill multiple form fields
-- `click_element_by_index` + `click_element_by_index` → Navigate through multi-step flows (when the page does not navigate between clicks)
-- `scroll` with num_pages 10 + `extract_structured_data` → Scroll to the bottom of the page to load more content before extracting structured data
+- `input` + `click` → Fill form field and submit/search in one step
+- `input` + `input` → Fill multiple form fields
+- `click` + `click` → Navigate through multi-step flows (when the page does not navigate between clicks)
+- `scroll` with pages 10 + `extract` → Scroll to the bottom of the page to load more content before extracting structured data
 - File operations + browser actions 
 
 Do not try multiple different paths in one step. Always have one clear goal per step. 
 Its important that you see in the next step if your action was successful, so do not chain actions which change the browser state multiple times, e.g. 
-- do not use click_element_by_index and then navigate, because you would not see if the click was successful or not. 
-- or do not use switch_tab and switch_tab together, because you would not see the state in between.
-- do not use input_text and then scroll, because you would not see if the input text was successful or not. 
+- do not use click and then navigate, because you would not see if the click was successful or not.
+- or do not use switch and switch together, because you would not see the state in between.
+- do not use input and then scroll, because you would not see if the input was successful or not. 
 </efficiency_guidelines>
 
 <reasoning_rules>
diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py
index 1234471a7..5cb80eb86 100644
--- a/browser_use/agent/views.py
+++ b/browser_use/agent/views.py
@@ -327,12 +327,10 @@ class AgentHistory(BaseModel):
 		if self.model_output:
 			action_dump = [action.model_dump(exclude_none=True) for action in self.model_output.action]
 
-			# Filter sensitive data only from input_text action parameters if sensitive_data is provided
+			# Filter sensitive data only from input action parameters if sensitive_data is provided
 			if sensitive_data:
 				action_dump = [
-					self._filter_sensitive_data_from_dict(action, sensitive_data)
-					if action.get('name') == 'input_text'
-					else action
+					self._filter_sensitive_data_from_dict(action, sensitive_data) if action.get('name') == 'input' else action
 					for action in action_dump
 				]
 
diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 37f16ace6..322834656 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -273,7 +273,7 @@ class Tools(Generic[Context]):
 			except BrowserError as e:
 				if 'Cannot click on <select> elements.' in str(e):
 					try:
-						return await get_dropdown_options(
+						return await dropdown_options(
 							params=GetDropdownOptionsAction(index=params.index), browser_session=browser_session
 						)
 					except Exception as dropdown_error:
diff --git a/docs/customize/agent/prompting-guide.mdx b/docs/customize/agent/prompting-guide.mdx
index f19d88e90..7d7f05fd7 100644
--- a/docs/customize/agent/prompting-guide.mdx
+++ b/docs/customize/agent/prompting-guide.mdx
@@ -12,7 +12,7 @@ Prompting can trasticly improve performance and solve existing limitations of th
 ```python
 task = """
 1. Go to https://quotes.toscrape.com/
-2. Use extract_structured_data action with the query "first 3 quotes with their authors"
+2. Use extract action with the query "first 3 quotes with their authors"
 3. Save results to quotes.csv using write_file action
 4. Do a google search for the first quote and find when it was written
 """
@@ -30,11 +30,11 @@ When you know exactly what the agent should do, reference actions by name:
 ```python
 task = """
 1. Use search action to find "Python tutorials"
-2. Use click_element_by_index to open first result in a new tab   
+2. Use click to open first result in a new tab
 3. Use scroll action to scroll down 2 pages
-4. Use extract_structured_data to extract the names of the first 5 items 
+4. Use extract to extract the names of the first 5 items
 5. Wait for 2 seconds if the page is not loaded, refresh it and wait 10 sec
-6. Use send_keys action with "Tab Tab ArrowDown Enter" 
+6. Use send_keys action with "Tab Tab ArrowDown Enter"
 """
 ```
 
diff --git a/docs/customize/tools/available.mdx b/docs/customize/tools/available.mdx
index 488c79176..5248a9f94 100644
--- a/docs/customize/tools/available.mdx
+++ b/docs/customize/tools/available.mdx
@@ -9,40 +9,40 @@ mode: "wide"
 
 
 ### Navigation & Browser Control
-- **`search`** - Search queries in Google
-- **`navigate`** - Navigate to URLs  
+- **`search`** - Search queries (DuckDuckGo, Google, Bing)
+- **`navigate`** - Navigate to URLs
 - **`go_back`** - Go back in browser history
 - **`wait`** - Wait for specified seconds
 
 ### Page Interaction
-- **`click_element_by_index`** - Click elements by their index
-- **`input_text`** - Input text into form fields
-- **`upload_file_to_element`** - Upload files to file inputs
+- **`click`** - Click elements by their index
+- **`input`** - Input text into form fields
+- **`upload_file`** - Upload files to file inputs
 - **`scroll`** - Scroll the page up/down
-- **`scroll_to_text`** - Scroll to specific text on page
+- **`find_text`** - Scroll to specific text on page
 - **`send_keys`** - Send special keys (Enter, Escape, etc.)
 
 ### JavaScript Execution
-- **`execute_js`** - Execute custom JavaScript code on the page (for advanced interactions, shadow DOM, custom selectors, data extraction)
+- **`evaluate`** - Execute custom JavaScript code on the page (for advanced interactions, shadow DOM, custom selectors, data extraction)
 
-### Tab Management  
-- **`switch_tab`** - Switch between browser tabs
-- **`close_tab`** - Close browser tabs
+### Tab Management
+- **`switch`** - Switch between browser tabs
+- **`close`** - Close browser tabs
 
 ### Content Extraction
-- **`extract_structured_data`** - Extract data from webpages using LLM
+- **`extract`** - Extract data from webpages using LLM
 
 ### Visual Analysis
-- **`take_screenshot`** - Request a screenshot in your next browser state for visual confirmation
+- **`screenshot`** - Request a screenshot in your next browser state for visual confirmation
 
 ### Form Controls
-- **`get_dropdown_options`** - Get dropdown option values
-- **`select_dropdown_option`** - Select dropdown options
+- **`dropdown_options`** - Get dropdown option values
+- **`select_dropdown`** - Select dropdown options
 
 ### File Operations
 - **`write_file`** - Write content to files
-- **`read_file`** - Read file contents  
-- **`replace_file_str`** - Replace text in files
+- **`read_file`** - Read file contents
+- **`replace_file`** - Replace text in files
 
 ### Task Completion
 - **`done`** - Complete the task (always available)
diff --git a/examples/file_system/file_system.py b/examples/file_system/file_system.py
index 4ea48b880..88e53efaf 100644
--- a/examples/file_system/file_system.py
+++ b/examples/file_system/file_system.py
@@ -24,7 +24,7 @@ Then, use append_file to add the first sentence of the article to "data.md"
 Then, read the file to see its content and make sure it's correct.
 Finally, share the file with me.
 
-NOTE: DO NOT USE extract_structured_data action - everything is visible in browser state.
+NOTE: DO NOT USE extract action - everything is visible in browser state.
 """.strip('\n')
 
 llm = ChatOpenAI(model='gpt-4.1-mini')

From 8daca53589725c9ba216a5c6176f66c08cf498b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 15:11:40 -0700
Subject: [PATCH 22/45] Fix linter

---
 tests/ci/test_browser_event_ScrollEvent.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/ci/test_browser_event_ScrollEvent.py b/tests/ci/test_browser_event_ScrollEvent.py
index dd2c568f8..320b3d378 100644
--- a/tests/ci/test_browser_event_ScrollEvent.py
+++ b/tests/ci/test_browser_event_ScrollEvent.py
@@ -91,7 +91,7 @@ class TestScrollActions:
 		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
 
 		# Test 1: Basic page scroll down
-		scroll_action = {'scroll': ScrollAction(down=True, num_pages=1.0)}
+		scroll_action = {'scroll': ScrollAction(down=True, pages=1.0)}
 
 		class ScrollActionModel(ActionModel):
 			scroll: ScrollAction | None = None
@@ -106,7 +106,7 @@ class TestScrollActions:
 		assert 'the page' in result.extracted_content
 
 		# Test 2: Basic page scroll up
-		scroll_up_action = {'scroll': ScrollAction(down=False, num_pages=0.5)}
+		scroll_up_action = {'scroll': ScrollAction(down=False, pages=0.5)}
 		result = await tools.act(ScrollActionModel(**scroll_up_action), browser_session)
 
 		assert isinstance(result, ActionResult)
@@ -116,7 +116,7 @@ class TestScrollActions:
 		assert '0.5 pages' in result.extracted_content
 
 		# Test 3: Test with invalid element index (should error)
-		invalid_scroll_action = {'scroll': ScrollAction(down=True, num_pages=1.0, frame_element_index=999)}
+		invalid_scroll_action = {'scroll': ScrollAction(down=True, pages=1.0, index=999)}
 		result = await tools.act(ScrollActionModel(**invalid_scroll_action), browser_session)
 
 		# This should fail with error about element not found
@@ -125,15 +125,15 @@ class TestScrollActions:
 		assert 'Element index 999 not found' in result.error or 'Failed to execute scroll' in result.error
 
 		# Test 4: Model parameter validation
-		scroll_with_index = ScrollAction(down=True, num_pages=1.0, frame_element_index=5)
+		scroll_with_index = ScrollAction(down=True, pages=1.0, index=5)
 		assert scroll_with_index.down is True
-		assert scroll_with_index.num_pages == 1.0
-		assert scroll_with_index.frame_element_index == 5
+		assert scroll_with_index.pages == 1.0
+		assert scroll_with_index.index == 5
 
-		scroll_without_index = ScrollAction(down=False, num_pages=0.25)
+		scroll_without_index = ScrollAction(down=False, pages=0.25)
 		assert scroll_without_index.down is False
-		assert scroll_without_index.num_pages == 0.25
-		assert scroll_without_index.frame_element_index is None
+		assert scroll_without_index.pages == 0.25
+		assert scroll_without_index.index is None
 
 	async def test_scroll_with_cross_origin_disabled(self, browser_session, base_url):
 		"""Test that scroll works when cross_origin_iframes is disabled."""

From 86a5bbe103a0a2732a8f2016c3ad5dc6828bf624 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 15:24:23 -0700
Subject: [PATCH 23/45] Update test

---
 tests/ci/test_tools.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/ci/test_tools.py b/tests/ci/test_tools.py
index 2a86fac6c..3e43f79e7 100644
--- a/tests/ci/test_tools.py
+++ b/tests/ci/test_tools.py
@@ -99,11 +99,11 @@ class TestToolsIntegration:
 			'navigate',
 			'search',
 			'click',
-			'input_text',
+			'input',
 			'scroll',
 			'go_back',
-			'switch_tab',
-			'close_tab',
+			'switch',
+			'close',
 			'wait',
 		]
 
@@ -429,13 +429,13 @@ class TestToolsIntegration:
 			f'Could not find select element in selector map. Available elements: {[f"{idx}: {element.tag_name}" for idx, element in selector_map.items()]}'
 		)
 
-		# Create a model for the standard get_dropdown_options action
-		class GetDropdownOptionsModel(ActionModel):
-			get_dropdown_options: dict[str, int]
+		# Create a model for the standard dropdown_options action
+		class DropdownOptionsModel(ActionModel):
+			dropdown_options: dict[str, int]
 
 		# Execute the action with the dropdown index
 		result = await tools.act(
-			action=GetDropdownOptionsModel(get_dropdown_options={'index': dropdown_index}),
+			action=DropdownOptionsModel(dropdown_options={'index': dropdown_index}),
 			browser_session=browser_session,
 		)
 

From 84b07cce8cc39f9fd72e8be5b702b7da4cce9d9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 15:24:37 -0700
Subject: [PATCH 24/45] Update parameter dict

---
 browser_use/tools/service.py | 6 +++---
 browser_use/tools/views.py   | 6 +++++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 322834656..05739dab7 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -38,8 +38,8 @@ from browser_use.tools.views import (
 	CloseTabAction,
 	DoneAction,
 	GetDropdownOptionsAction,
-	GoToUrlAction,
 	InputTextAction,
+	NavigateAction,
 	NoParamsAction,
 	ScrollAction,
 	SearchAction,
@@ -159,9 +159,9 @@ class Tools(Generic[Context]):
 
 		@self.registry.action(
 			'',
-			param_model=GoToUrlAction,
+			param_model=NavigateAction,
 		)
-		async def navigate(params: GoToUrlAction, browser_session: BrowserSession):
+		async def navigate(params: NavigateAction, browser_session: BrowserSession):
 			try:
 				# Dispatch navigation event
 				event = browser_session.event_bus.dispatch(NavigateToUrlEvent(url=params.url, new_tab=params.new_tab))
diff --git a/browser_use/tools/views.py b/browser_use/tools/views.py
index 105530a8c..5e3a90786 100644
--- a/browser_use/tools/views.py
+++ b/browser_use/tools/views.py
@@ -15,11 +15,15 @@ class SearchAction(BaseModel):
 SearchAction = SearchAction
 
 
-class GoToUrlAction(BaseModel):
+class NavigateAction(BaseModel):
 	url: str
 	new_tab: bool = Field(default=False)
 
 
+# Backward compatibility alias
+GoToUrlAction = NavigateAction
+
+
 class ClickElementAction(BaseModel):
 	index: int = Field(ge=1, description='from browser_state')
 	ctrl: bool | None = Field(

From 1b6b674796850f18d038ee93f101c77a0457c412 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 15:24:47 -0700
Subject: [PATCH 25/45] Update sensitive data

---
 browser_use/agent/views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py
index 5cb80eb86..86156365d 100644
--- a/browser_use/agent/views.py
+++ b/browser_use/agent/views.py
@@ -330,7 +330,7 @@ class AgentHistory(BaseModel):
 			# Filter sensitive data only from input action parameters if sensitive_data is provided
 			if sensitive_data:
 				action_dump = [
-					self._filter_sensitive_data_from_dict(action, sensitive_data) if action.get('name') == 'input' else action
+					self._filter_sensitive_data_from_dict(action, sensitive_data) if 'input' in action else action
 					for action in action_dump
 				]
 

From 8023733bb9f11f0b805d10d3bfb807c6e1ad4d39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 15:24:54 -0700
Subject: [PATCH 26/45] Update test

---
 tests/ci/test_tools.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/ci/test_tools.py b/tests/ci/test_tools.py
index 3e43f79e7..8586ef410 100644
--- a/tests/ci/test_tools.py
+++ b/tests/ci/test_tools.py
@@ -556,13 +556,13 @@ class TestToolsIntegration:
 			f'Could not find select element in selector map. Available elements: {[f"{idx}: {element.tag_name}" for idx, element in selector_map.items()]}'
 		)
 
-		# Create a model for the standard select_dropdown_option action
-		class SelectDropdownOptionModel(ActionModel):
-			select_dropdown_option: dict
+		# Create a model for the standard select_dropdown action
+		class SelectDropdownModel(ActionModel):
+			select_dropdown: dict
 
 		# Execute the action with the dropdown index
 		result = await tools.act(
-			SelectDropdownOptionModel(select_dropdown_option={'index': dropdown_index, 'text': 'Second Option'}),
+			SelectDropdownModel(select_dropdown={'index': dropdown_index, 'text': 'Second Option'}),
 			browser_session,
 		)
 

From 586dcc0a8266a1762bc8506589d841cec41388fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 19:00:33 -0700
Subject: [PATCH 27/45] Input index can be 0

---
 browser_use/tools/views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/browser_use/tools/views.py b/browser_use/tools/views.py
index 5e3a90786..cb4399d04 100644
--- a/browser_use/tools/views.py
+++ b/browser_use/tools/views.py
@@ -35,7 +35,7 @@ class ClickElementAction(BaseModel):
 
 
 class InputTextAction(BaseModel):
-	index: int = Field(ge=1, description='from browser_state')
+	index: int = Field(ge=0, description='from browser_state')
 	text: str
 	clear: bool = Field(default=True, description='1=clear, 0=append')
 

From 6381d6794ff094836b32e5ddb5032e32c7987519 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 19:03:05 -0700
Subject: [PATCH 28/45] Remove create_gemini_optimized_schema

---
 browser_use/llm/schema.py | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/browser_use/llm/schema.py b/browser_use/llm/schema.py
index 075564462..da21a7be1 100644
--- a/browser_use/llm/schema.py
+++ b/browser_use/llm/schema.py
@@ -160,31 +160,3 @@ class SchemaOptimizer:
 		elif isinstance(schema, list):
 			for item in schema:
 				SchemaOptimizer._make_strict_compatible(item)
-
-	@staticmethod
-	def create_gemini_optimized_schema(model: type[BaseModel]) -> dict[str, Any]:
-		"""
-		Create Gemini-optimized schema that removes 'required' arrays to save tokens.
-		Gemini can infer required fields from context since all fields are required.
-
-		Args:
-			model: The Pydantic model to optimize
-
-		Returns:
-			Optimized schema without required arrays
-		"""
-		# Start with standard optimized schema
-		schema = SchemaOptimizer.create_optimized_json_schema(model)
-
-		def remove_required_arrays(obj: Any) -> Any:
-			"""Recursively remove 'required' arrays"""
-			if isinstance(obj, dict):
-				# Remove 'required' key
-				result = {k: v for k, v in obj.items() if k != 'required'}
-				# Recursively process nested structures
-				return {k: remove_required_arrays(v) for k, v in result.items()}
-			elif isinstance(obj, list):
-				return [remove_required_arrays(item) for item in obj]
-			return obj
-
-		return remove_required_arrays(schema)

From a3a811e3be35eae279ee38acb88c8dcc4265c270 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 19:03:10 -0700
Subject: [PATCH 29/45] Remove create_gemini_optimized_schema

---
 browser_use/llm/google/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/browser_use/llm/google/chat.py b/browser_use/llm/google/chat.py
index 3c11186a0..ad511bb26 100644
--- a/browser_use/llm/google/chat.py
+++ b/browser_use/llm/google/chat.py
@@ -255,7 +255,7 @@ class ChatGoogle(BaseChatModel):
 						self.logger.debug(f'🔧 Requesting structured output for {output_format.__name__}')
 						config['response_mime_type'] = 'application/json'
 						# Convert Pydantic model to Gemini-compatible schema
-						optimized_schema = SchemaOptimizer.create_gemini_optimized_schema(output_format)
+						optimized_schema = SchemaOptimizer.create_optimized_json_schema(output_format)
 
 						gemini_schema = self._fix_gemini_schema(optimized_schema)
 						config['response_schema'] = gemini_schema

From b53a7c116a6fa622908731b50a086cd0a0eff0fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 19:12:52 -0700
Subject: [PATCH 30/45] Update outdated function names

---
 browser_use/agent/message_manager/service.py                | 2 +-
 browser_use/agent/prompts.py                                | 2 +-
 browser_use/agent/service.py                                | 4 ++--
 browser_use/browser/events.py                               | 2 +-
 browser_use/browser/watchdogs/default_action_watchdog.py    | 6 +++---
 browser_use/mcp/server.py                                   | 6 +++---
 browser_use/tools/registry/service.py                       | 6 +++---
 tests/ci/test_browser_event_GetDropdownOptionsEvent.py      | 2 +-
 ...test_browser_event_GetDropdownOptionsEvent_aria_menus.py | 6 +++---
 tests/ci/test_tools.py                                      | 4 ++--
 10 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/browser_use/agent/message_manager/service.py b/browser_use/agent/message_manager/service.py
index b8abcfc3f..a8397b957 100644
--- a/browser_use/agent/message_manager/service.py
+++ b/browser_use/agent/message_manager/service.py
@@ -318,7 +318,7 @@ class MessageManager:
 					break
 
 		# Handle different use_vision modes:
-		# - "auto": Only include screenshot if explicitly requested by action (e.g., take_screenshot)
+		# - "auto": Only include screenshot if explicitly requested by action (e.g., screenshot)
 		# - True: Always include screenshot
 		# - False: Never include screenshot
 		include_screenshot = False
diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py
index 2ea6cfa0d..879a8f35c 100644
--- a/browser_use/agent/prompts.py
+++ b/browser_use/agent/prompts.py
@@ -265,7 +265,7 @@ class AgentMessagePrompt:
 		# Check if current page is a PDF viewer and add appropriate message
 		pdf_message = ''
 		if self.browser_state.is_pdf_viewer:
-			pdf_message = 'PDF viewer cannot be rendered. In this page, DO NOT use the extract_structured_data action as PDF content cannot be rendered. '
+			pdf_message = 'PDF viewer cannot be rendered. In this page, DO NOT use the extract action as PDF content cannot be rendered. '
 			pdf_message += 'Use the read_file action on the downloaded PDF in available_file_paths to read the full text content or scroll in the page to see images/figures if needed.\n\n'
 
 		# Add recent events if available and requested
diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py
index 4868daaa8..5ac880632 100644
--- a/browser_use/agent/service.py
+++ b/browser_use/agent/service.py
@@ -255,8 +255,8 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		elif controller is not None:
 			self.tools = controller
 		else:
-			# Exclude take_screenshot tool when use_vision=False
-			exclude_actions = ['take_screenshot'] if use_vision is False else []
+			# Exclude screenshot tool when use_vision=False
+			exclude_actions = ['screenshot'] if use_vision is False else []
 			self.tools = Tools(exclude_actions=exclude_actions, display_files_in_done_text=display_files_in_done_text)
 
 		# Structured output
diff --git a/browser_use/browser/events.py b/browser_use/browser/events.py
index a89d9412c..397140f49 100644
--- a/browser_use/browser/events.py
+++ b/browser_use/browser/events.py
@@ -130,7 +130,7 @@ class ClickElementEvent(ElementSelectedEvent[dict[str, Any] | None]):
 	button: Literal['left', 'right', 'middle'] = 'left'
 	while_holding_ctrl: bool = Field(
 		default=False,
-		description='Set True to open any link clicked in a new tab in the background, can use switch_tab(tab_id=None) after to focus it',
+		description='Set True to open any link clicked in a new tab in the background, can use switch(tab_id=None) after to focus it',
 	)
 	# click_count: int = 1           # TODO
 	# expect_download: bool = False  # moved to downloads_watchdog.py
diff --git a/browser_use/browser/watchdogs/default_action_watchdog.py b/browser_use/browser/watchdogs/default_action_watchdog.py
index ee7048988..5167307f6 100644
--- a/browser_use/browser/watchdogs/default_action_watchdog.py
+++ b/browser_use/browser/watchdogs/default_action_watchdog.py
@@ -103,7 +103,7 @@ class DefaultActionWatchdog(BaseWatchdog):
 					# so we need to switch to the new tab to make the agent aware of the surprise new tab that was opened.
 					# when while_holding_ctrl=True we dont actually want to switch to it,
 					# we should match human expectations of ctrl+click which opens in the background,
-					# so in multi_act it usually already sends [click_element_by_index(123, while_holding_ctrl=True), switch_tab(tab_id=None)] anyway
+					# so in multi_act it usually already sends [click_element_by_index(123, while_holding_ctrl=True), switch(tab_id=None)] anyway
 					from browser_use.browser.events import SwitchTabEvent
 
 					new_target_id = new_target_ids.pop()
@@ -258,7 +258,7 @@ class DefaultActionWatchdog(BaseWatchdog):
 			element_type = element_node.attributes.get('type', '').lower() if element_node.attributes else ''
 
 			if tag_name == 'select':
-				msg = f'Cannot click on <select> elements. Use get_dropdown_options(index={element_node.element_index}) action instead.'
+				msg = f'Cannot click on <select> elements. Use dropdown_options(index={element_node.element_index}) action instead.'
 				self.logger.warning(msg)
 				raise BrowserError(
 					message=msg,
@@ -2001,7 +2001,7 @@ class DefaultActionWatchdog(BaseWatchdog):
 				msg = f'Found {dropdown_type} dropdown ({element_info}):\n' + '\n'.join(formatted_options)
 			else:
 				msg = f'Found {dropdown_type} dropdown in {source_info} ({element_info}):\n' + '\n'.join(formatted_options)
-			msg += f'\n\nUse the exact text or value string (without quotes) in select_dropdown_option(index={index_for_logging}, text=...)'
+			msg += f'\n\nUse the exact text or value string (without quotes) in select_dropdown(index={index_for_logging}, text=...)'
 
 			if source_info == 'target':
 				self.logger.info(f'📋 Found {len(dropdown_data["options"])} dropdown options for index {index_for_logging}')
diff --git a/browser_use/mcp/server.py b/browser_use/mcp/server.py
index 1074f9329..63d241c6c 100644
--- a/browser_use/mcp/server.py
+++ b/browser_use/mcp/server.py
@@ -811,7 +811,7 @@ class BrowserUseServer:
 
 		state = await self.browser_session.get_browser_state_summary()
 
-		# Use the extract_structured_data action
+		# Use the extract action
 		# Create a dynamic action model that matches the tools's expectations
 		from pydantic import create_model
 
@@ -819,13 +819,13 @@ class BrowserUseServer:
 		ExtractAction = create_model(
 			'ExtractAction',
 			__base__=ActionModel,
-			extract_structured_data=dict[str, Any],
+			extract=dict[str, Any],
 		)
 
 		# Use model_validate because Pyright does not understand the dynamic model
 		action = ExtractAction.model_validate(
 			{
-				'extract_structured_data': {'query': query, 'extract_links': extract_links},
+				'extract': {'query': query, 'extract_links': extract_links},
 			}
 		)
 		action_result = await self.tools.act(
diff --git a/browser_use/tools/registry/service.py b/browser_use/tools/registry/service.py
index 33bbd0342..f70ae6f2d 100644
--- a/browser_use/tools/registry/service.py
+++ b/browser_use/tools/registry/service.py
@@ -350,12 +350,12 @@ class Registry(Generic[Context]):
 				'browser_session': browser_session,
 				'page_extraction_llm': page_extraction_llm,
 				'available_file_paths': available_file_paths,
-				'has_sensitive_data': action_name == 'input_text' and bool(sensitive_data),
+				'has_sensitive_data': action_name == 'input' and bool(sensitive_data),
 				'file_system': file_system,
 			}
 
-			# Only pass sensitive_data to actions that explicitly need it (input_text)
-			if action_name == 'input_text':
+			# Only pass sensitive_data to actions that explicitly need it (input)
+			if action_name == 'input':
 				special_context['sensitive_data'] = sensitive_data
 
 			# Add CDP-related parameters if browser_session is available
diff --git a/tests/ci/test_browser_event_GetDropdownOptionsEvent.py b/tests/ci/test_browser_event_GetDropdownOptionsEvent.py
index 078410350..2942192f6 100644
--- a/tests/ci/test_browser_event_GetDropdownOptionsEvent.py
+++ b/tests/ci/test_browser_event_GetDropdownOptionsEvent.py
@@ -319,7 +319,7 @@ class TestGetDropdownOptionsEvent:
 			assert option in result.extracted_content, f"Option '{option}' not found in result content"
 
 		# Verify instruction is included
-		assert 'Use the exact text string' in result.extracted_content and 'select_dropdown_option' in result.extracted_content
+		assert 'Use the exact text string' in result.extracted_content and 'select_dropdown' in result.extracted_content
 
 		# Also test direct event dispatch
 		node = await browser_session.get_element_by_index(dropdown_index)
diff --git a/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py b/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py
index f74e656ca..98e120a3f 100644
--- a/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py
+++ b/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py
@@ -212,8 +212,8 @@ class TestARIAMenuDropdown:
 		for option in expected_options:
 			assert option in result.extracted_content, f"Option '{option}' not found in result content"
 
-		# Verify the instruction for using the text in select_dropdown_option is included
-		assert 'Use the exact text string in select_dropdown_option' in result.extracted_content
+		# Verify the instruction for using the text in select_dropdown is included
+		assert 'Use the exact text string in select_dropdown' in result.extracted_content
 
 	@pytest.mark.skip(reason='TODO: fix')
 	async def test_select_dropdown_option_with_aria_menu(self, tools, browser_session: BrowserSession, base_url):
@@ -347,4 +347,4 @@ class TestARIAMenuDropdown:
 		assert result.extracted_content is not None
 
 		# The action should return some menu options
-		assert 'Use the exact text string in select_dropdown_option' in result.extracted_content
+		assert 'Use the exact text string in select_dropdown' in result.extracted_content
diff --git a/tests/ci/test_tools.py b/tests/ci/test_tools.py
index 8586ef410..d2dae1c6e 100644
--- a/tests/ci/test_tools.py
+++ b/tests/ci/test_tools.py
@@ -454,10 +454,10 @@ class TestToolsIntegration:
 		for option in expected_options[1:]:  # Skip the placeholder option
 			assert option['text'] in result.extracted_content, f"Option '{option['text']}' not found in result content"
 
-		# Verify the instruction for using the text in select_dropdown_option is included
+		# Verify the instruction for using the text in select_dropdown is included
 		assert (
 			'Use the exact text or value string' in result.extracted_content
-			and 'select_dropdown_option' in result.extracted_content
+			and 'select_dropdown' in result.extracted_content
 		)
 
 		# Verify the actual dropdown options in the DOM using CDP

From 324ac7420497d1e878026caea1bf381405a31991 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 19:21:12 -0700
Subject: [PATCH 31/45] Linter

---
 .gitignore                                               | 2 ++
 browser_use/agent/prompts.py                             | 4 +++-
 browser_use/browser/watchdogs/default_action_watchdog.py | 8 ++++++--
 docs/customize/agent/all-parameters.mdx                  | 2 +-
 tests/ci/test_tools.py                                   | 5 +----
 5 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/.gitignore b/.gitignore
index b2c71bb34..9f7ac4c49 100644
--- a/.gitignore
+++ b/.gitignore
@@ -65,3 +65,5 @@ all_github_issues_progress.md
 all_github_issues.md
 
 todo-input-token.md
+
+TOOL_CHANGES_SUMMARY.md
\ No newline at end of file
diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py
index 879a8f35c..49ea5c8bd 100644
--- a/browser_use/agent/prompts.py
+++ b/browser_use/agent/prompts.py
@@ -265,7 +265,9 @@ class AgentMessagePrompt:
 		# Check if current page is a PDF viewer and add appropriate message
 		pdf_message = ''
 		if self.browser_state.is_pdf_viewer:
-			pdf_message = 'PDF viewer cannot be rendered. In this page, DO NOT use the extract action as PDF content cannot be rendered. '
+			pdf_message = (
+				'PDF viewer cannot be rendered. In this page, DO NOT use the extract action as PDF content cannot be rendered. '
+			)
 			pdf_message += 'Use the read_file action on the downloaded PDF in available_file_paths to read the full text content or scroll in the page to see images/figures if needed.\n\n'
 
 		# Add recent events if available and requested
diff --git a/browser_use/browser/watchdogs/default_action_watchdog.py b/browser_use/browser/watchdogs/default_action_watchdog.py
index 5167307f6..1463ac043 100644
--- a/browser_use/browser/watchdogs/default_action_watchdog.py
+++ b/browser_use/browser/watchdogs/default_action_watchdog.py
@@ -258,7 +258,9 @@ class DefaultActionWatchdog(BaseWatchdog):
 			element_type = element_node.attributes.get('type', '').lower() if element_node.attributes else ''
 
 			if tag_name == 'select':
-				msg = f'Cannot click on <select> elements. Use dropdown_options(index={element_node.element_index}) action instead.'
+				msg = (
+					f'Cannot click on <select> elements. Use dropdown_options(index={element_node.element_index}) action instead.'
+				)
 				self.logger.warning(msg)
 				raise BrowserError(
 					message=msg,
@@ -2001,7 +2003,9 @@ class DefaultActionWatchdog(BaseWatchdog):
 				msg = f'Found {dropdown_type} dropdown ({element_info}):\n' + '\n'.join(formatted_options)
 			else:
 				msg = f'Found {dropdown_type} dropdown in {source_info} ({element_info}):\n' + '\n'.join(formatted_options)
-			msg += f'\n\nUse the exact text or value string (without quotes) in select_dropdown(index={index_for_logging}, text=...)'
+			msg += (
+				f'\n\nUse the exact text or value string (without quotes) in select_dropdown(index={index_for_logging}, text=...)'
+			)
 
 			if source_info == 'target':
 				self.logger.info(f'📋 Found {len(dropdown_data["options"])} dropdown options for index {index_for_logging}')
diff --git a/docs/customize/agent/all-parameters.mdx b/docs/customize/agent/all-parameters.mdx
index 0f31dbb6e..bb0213f9e 100644
--- a/docs/customize/agent/all-parameters.mdx
+++ b/docs/customize/agent/all-parameters.mdx
@@ -13,7 +13,7 @@ mode: "wide"
 - `output_model_schema`: Pydantic model class for structured output validation. [Example](https://github.com/browser-use/browser-use/blob/main/examples/features/custom_output.py)
 
 ### Vision & Processing
-- `use_vision` (default: `"auto"`): Vision mode - `"auto"` includes take_screenshot tool but only uses vision when requested, `True` always includes screenshots, `False` never includes screenshots and excludes take_screenshot tool
+- `use_vision` (default: `"auto"`): Vision mode - `"auto"` includes screenshot tool but only uses vision when requested, `True` always includes screenshots, `False` never includes screenshots and excludes screenshot tool
 - `vision_detail_level` (default: `'auto'`): Screenshot detail level - `'low'`, `'high'`, or `'auto'`
 - `page_extraction_llm`: Separate LLM model for page content extraction. You can choose a small & fast model because it only needs to extract text from the page (default: same as `llm`)
 
diff --git a/tests/ci/test_tools.py b/tests/ci/test_tools.py
index d2dae1c6e..a25421e5b 100644
--- a/tests/ci/test_tools.py
+++ b/tests/ci/test_tools.py
@@ -455,10 +455,7 @@ class TestToolsIntegration:
 			assert option['text'] in result.extracted_content, f"Option '{option['text']}' not found in result content"
 
 		# Verify the instruction for using the text in select_dropdown is included
-		assert (
-			'Use the exact text or value string' in result.extracted_content
-			and 'select_dropdown' in result.extracted_content
-		)
+		assert 'Use the exact text or value string' in result.extracted_content and 'select_dropdown' in result.extracted_content
 
 		# Verify the actual dropdown options in the DOM using CDP
 		dropdown_options_result = await cdp_session.cdp_client.send.Runtime.evaluate(

From 473dab2874c3683e7cac011e2c1dcc44b50970a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 19:23:34 -0700
Subject: [PATCH 32/45] Linter

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 9f7ac4c49..8f39744c0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -66,4 +66,4 @@ all_github_issues.md
 
 todo-input-token.md
 
-TOOL_CHANGES_SUMMARY.md
\ No newline at end of file
+TOOL_CHANGES_SUMMARY.md

From 36bbf1ee23cfc15bcdb14521569b8eee9de63f23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 19:30:10 -0700
Subject: [PATCH 33/45] Update outdated function names

---
 browser_use/actor/README.md                   |  2 +-
 docs/customize/actor/all-parameters.mdx       |  2 +-
 .../test_browser_event_ClickElementEvent.py   | 42 ++++++++---------
 ...t_browser_event_GetDropdownOptionsEvent.py | 32 ++++++-------
 ...vent_GetDropdownOptionsEvent_aria_menus.py | 12 ++---
 .../test_browser_event_NavigateToUrlEvent.py  | 46 +++++++++----------
 tests/ci/test_browser_event_ScrollEvent.py    |  4 +-
 ...r_watchdog_downloads_upload_full_circle.py |  6 +--
 tests/ci/test_tools.py                        | 26 +++++------
 9 files changed, 86 insertions(+), 86 deletions(-)

diff --git a/browser_use/actor/README.md b/browser_use/actor/README.md
index b30c94bd1..e9c3cf1d5 100644
--- a/browser_use/actor/README.md
+++ b/browser_use/actor/README.md
@@ -167,7 +167,7 @@ products = await page.extract_content(
 
 ### Element Methods (DOM Interactions)
 - `click(button='left', click_count=1, modifiers=None)` - Click element with advanced fallbacks
-- `fill(text: str, clear_existing=True)` - Fill input with text (clears first by default)
+- `fill(text: str, clear=True)` - Fill input with text (clears first by default)
 - `hover()` - Hover over element
 - `focus()` - Focus the element
 - `check()` - Toggle checkbox/radio button (clicks to change state)
diff --git a/docs/customize/actor/all-parameters.mdx b/docs/customize/actor/all-parameters.mdx
index 04415421f..3cfbf59fa 100644
--- a/docs/customize/actor/all-parameters.mdx
+++ b/docs/customize/actor/all-parameters.mdx
@@ -64,7 +64,7 @@ Individual DOM element interactions.
 
 ### Interactions
 - `click(button='left', click_count=1, modifiers=None)` - Click element
-- `fill(text: str, clear_existing=True)` - Fill input
+- `fill(text: str, clear=True)` - Fill input
 - `hover()`, `focus()` - Mouse/focus actions
 - `check()` - Toggle checkbox/radio
 - `select_option(values: str | list[str])` - Select dropdown options
diff --git a/tests/ci/test_browser_event_ClickElementEvent.py b/tests/ci/test_browser_event_ClickElementEvent.py
index 976c476ec..c575e69f9 100644
--- a/tests/ci/test_browser_event_ClickElementEvent.py
+++ b/tests/ci/test_browser_event_ClickElementEvent.py
@@ -134,10 +134,10 @@ class TestClickElementEvent:
 
 		from browser_use.agent.views import ActionModel
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 
 		# Wait for the page to load
 		await asyncio.sleep(0.5)  # Give page time to load
@@ -222,10 +222,10 @@ class TestClickElementEvent:
 
 		from browser_use.agent.views import ActionModel
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 		await asyncio.sleep(1)  # Wait for page to load
 
 		# Count initial tabs
@@ -310,10 +310,10 @@ class TestClickElementEvent:
 
 		from browser_use.agent.views import ActionModel
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 		await asyncio.sleep(1)
 
 		tabs = await browser_session.get_tabs()
@@ -342,7 +342,7 @@ class TestClickElementEvent:
 		assert len(tabs) == initial_tab_count
 
 		# Navigate back to comparison page for second test
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 		await asyncio.sleep(1)
 
 		# Test new tab click (ctrl=True) - should open in new background tab
@@ -397,10 +397,10 @@ class TestClickElementEvent:
 
 		from browser_use.agent.views import ActionModel
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 		await asyncio.sleep(0.5)
 
 		# Get the clickable elements
@@ -479,10 +479,10 @@ class TestClickElementEvent:
 
 		from browser_use.agent.views import ActionModel
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 		await asyncio.sleep(0.5)
 
 		# Get the clickable elements
@@ -567,10 +567,10 @@ class TestClickElementEvent:
 
 		from browser_use.agent.views import ActionModel
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 		await asyncio.sleep(0.5)
 
 		# Get the clickable elements
@@ -627,10 +627,10 @@ class TestClickElementEvent:
 
 		from browser_use.agent.views import ActionModel
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 		await asyncio.sleep(0.5)
 
 		# Get the clickable elements
@@ -688,10 +688,10 @@ class TestClickElementEvent:
 
 		from browser_use.agent.views import ActionModel
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 		await asyncio.sleep(0.5)
 
 		# Get the clickable elements
@@ -1085,10 +1085,10 @@ class TestClickElementEvent:
 
 			from browser_use.agent.views import ActionModel
 
-			class GoToUrlActionModel(ActionModel):
+			class NavigateActionModel(ActionModel):
 				navigate: GoToUrlAction | None = None
 
-			await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+			await tools.act(NavigateActionModel(**goto_action), browser_session)
 
 			# Wait for the page to load
 			await asyncio.sleep(0.5)
@@ -1230,10 +1230,10 @@ class TestClickElementEvent:
 			goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/upload-test', new_tab=False)}
 			from browser_use.agent.views import ActionModel
 
-			class GoToUrlActionModel(ActionModel):
+			class NavigateActionModel(ActionModel):
 				navigate: GoToUrlAction | None = None
 
-			await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+			await tools.act(NavigateActionModel(**goto_action), browser_session)
 			await asyncio.sleep(0.5)
 
 			# Get browser state to populate selector map
diff --git a/tests/ci/test_browser_event_GetDropdownOptionsEvent.py b/tests/ci/test_browser_event_GetDropdownOptionsEvent.py
index 2942192f6..2bac37996 100644
--- a/tests/ci/test_browser_event_GetDropdownOptionsEvent.py
+++ b/tests/ci/test_browser_event_GetDropdownOptionsEvent.py
@@ -280,10 +280,10 @@ class TestGetDropdownOptionsEvent:
 		# Navigate to the native dropdown test page
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 
 		# Initialize the DOM state to populate the selector map
 		await browser_session.get_browser_state_summary()
@@ -338,10 +338,10 @@ class TestGetDropdownOptionsEvent:
 		# Navigate to the ARIA menu test page
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 
 		# Initialize the DOM state
 		await browser_session.get_browser_state_summary()
@@ -400,10 +400,10 @@ class TestGetDropdownOptionsEvent:
 		# Navigate to the custom dropdown test page
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/custom-dropdown', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 
 		# Initialize the DOM state
 		await browser_session.get_browser_state_summary()
@@ -458,10 +458,10 @@ class TestGetDropdownOptionsEvent:
 		# Navigate to any test page
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 		await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
 
 		# Try to get dropdown options with invalid index
@@ -488,10 +488,10 @@ class TestSelectDropdownOptionEvent:
 		# Navigate to the native dropdown test page
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 		await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
 
 		# Initialize the DOM state
@@ -536,10 +536,10 @@ class TestSelectDropdownOptionEvent:
 		# Navigate to the ARIA menu test page
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 		await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
 
 		# Initialize the DOM state
@@ -588,10 +588,10 @@ class TestSelectDropdownOptionEvent:
 		# Navigate to the custom dropdown test page
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/custom-dropdown', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 		await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
 
 		# Initialize the DOM state
@@ -636,10 +636,10 @@ class TestSelectDropdownOptionEvent:
 		# Navigate to the native dropdown test page
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/native-dropdown', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 		await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
 
 		# Initialize the DOM state
diff --git a/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py b/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py
index 98e120a3f..c6d5b850c 100644
--- a/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py
+++ b/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py
@@ -154,10 +154,10 @@ class TestARIAMenuDropdown:
 		# Navigate to the ARIA menu test page
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 
 		# Wait for the page to load
 		from browser_use.browser.events import NavigationCompleteEvent
@@ -221,10 +221,10 @@ class TestARIAMenuDropdown:
 		# Navigate to the ARIA menu test page
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 
 		# Wait for the page to load
 		from browser_use.browser.events import NavigationCompleteEvent
@@ -291,10 +291,10 @@ class TestARIAMenuDropdown:
 		# Navigate to the ARIA menu test page
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/aria-menu', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 
 		# Wait for the page to load
 		from browser_use.browser.events import NavigationCompleteEvent
diff --git a/tests/ci/test_browser_event_NavigateToUrlEvent.py b/tests/ci/test_browser_event_NavigateToUrlEvent.py
index 088f6776f..76a0f0759 100644
--- a/tests/ci/test_browser_event_NavigateToUrlEvent.py
+++ b/tests/ci/test_browser_event_NavigateToUrlEvent.py
@@ -66,10 +66,10 @@ class TestNavigateToUrlEvent:
 		# Test successful navigation to a valid page
 		action_data = {'navigate': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		action_model = GoToUrlActionModel(**action_data)
+		action_model = NavigateActionModel(**action_data)
 		result = await tools.act(action_model, browser_session)
 
 		# Verify the successful navigation result
@@ -83,10 +83,10 @@ class TestNavigateToUrlEvent:
 		action_data = {'navigate': GoToUrlAction(url='https://www.nonexistentdndbeyond.com/', new_tab=False)}
 
 		# Create the ActionModel instance
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		action_model = GoToUrlActionModel(**action_data)
+		action_model = NavigateActionModel(**action_data)
 
 		# Execute the action - should return soft error instead of throwing
 		result = await tools.act(action_model, browser_session)
@@ -132,10 +132,10 @@ class TestNavigateToUrlEvent:
 		# Navigate to URL in new tab
 		action_data = {'navigate': GoToUrlAction(url=f'{base_url}/page2', new_tab=True)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		result = await tools.act(GoToUrlActionModel(**action_data), browser_session)
+		result = await tools.act(NavigateActionModel(**action_data), browser_session)
 		await asyncio.sleep(0.5)
 
 		# Verify result
@@ -157,14 +157,14 @@ class TestNavigateToUrlEvent:
 		# Navigate to a normal page first
 		action_data = {'navigate': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**action_data), browser_session)
+		await tools.act(NavigateActionModel(**action_data), browser_session)
 
 		# Try to navigate to javascript: URL (should be handled gracefully)
 		js_action = {'navigate': GoToUrlAction(url='javascript:alert("test")', new_tab=False)}
-		result = await tools.act(GoToUrlActionModel(**js_action), browser_session)
+		result = await tools.act(NavigateActionModel(**js_action), browser_session)
 
 		# Should either succeed or fail gracefully
 		assert isinstance(result, ActionResult)
@@ -176,10 +176,10 @@ class TestNavigateToUrlEvent:
 
 		action_data = {'navigate': GoToUrlAction(url=data_url, new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		result = await tools.act(GoToUrlActionModel(**action_data), browser_session)
+		result = await tools.act(NavigateActionModel(**action_data), browser_session)
 
 		# Verify navigation
 		assert isinstance(result, ActionResult)
@@ -212,10 +212,10 @@ class TestNavigateToUrlEvent:
 		# Navigate to page with hash
 		action_data = {'navigate': GoToUrlAction(url=f'{base_url}/page-with-anchors#section1', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		result = await tools.act(GoToUrlActionModel(**action_data), browser_session)
+		result = await tools.act(NavigateActionModel(**action_data), browser_session)
 
 		# Verify navigation
 		assert isinstance(result, ActionResult)
@@ -249,10 +249,10 @@ class TestNavigateToUrlEvent:
 		# Navigate with query parameters
 		action_data = {'navigate': GoToUrlAction(url=f'{base_url}/search?q=test+query&page=1', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		result = await tools.act(GoToUrlActionModel(**action_data), browser_session)
+		result = await tools.act(NavigateActionModel(**action_data), browser_session)
 
 		# Verify navigation
 		assert isinstance(result, ActionResult)
@@ -269,18 +269,18 @@ class TestNavigateToUrlEvent:
 		# Navigate to first page in current tab
 		action1 = {'navigate': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**action1), browser_session)
+		await tools.act(NavigateActionModel(**action1), browser_session)
 
 		# Open second page in new tab
 		action2 = {'navigate': GoToUrlAction(url=f'{base_url}/page2', new_tab=True)}
-		await tools.act(GoToUrlActionModel(**action2), browser_session)
+		await tools.act(NavigateActionModel(**action2), browser_session)
 
 		# Open home page in yet another new tab
 		action3 = {'navigate': GoToUrlAction(url=base_url, new_tab=True)}
-		await tools.act(GoToUrlActionModel(**action3), browser_session)
+		await tools.act(NavigateActionModel(**action3), browser_session)
 
 		# Should have 3 tabs now
 		tabs = await browser_session.get_tabs()
@@ -298,11 +298,11 @@ class TestNavigateToUrlEvent:
 
 		action_data = {'navigate': GoToUrlAction(url=timeout_url, new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
 		# This should complete without hanging indefinitely
-		result = await tools.act(GoToUrlActionModel(**action_data), browser_session)
+		result = await tools.act(NavigateActionModel(**action_data), browser_session)
 
 		# Should get a result (possibly with error)
 		assert isinstance(result, ActionResult)
@@ -319,10 +319,10 @@ class TestNavigateToUrlEvent:
 		# Navigate to redirect URL
 		action_data = {'navigate': GoToUrlAction(url=f'{base_url}/redirect', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		result = await tools.act(GoToUrlActionModel(**action_data), browser_session)
+		result = await tools.act(NavigateActionModel(**action_data), browser_session)
 
 		# Verify navigation succeeded
 		assert isinstance(result, ActionResult)
diff --git a/tests/ci/test_browser_event_ScrollEvent.py b/tests/ci/test_browser_event_ScrollEvent.py
index 320b3d378..6ea845b46 100644
--- a/tests/ci/test_browser_event_ScrollEvent.py
+++ b/tests/ci/test_browser_event_ScrollEvent.py
@@ -85,10 +85,10 @@ class TestScrollActions:
 		# Navigate to scrollable page
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/scrollable', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 
 		# Test 1: Basic page scroll down
 		scroll_action = {'scroll': ScrollAction(down=True, pages=1.0)}
diff --git a/tests/ci/test_browser_watchdog_downloads_upload_full_circle.py b/tests/ci/test_browser_watchdog_downloads_upload_full_circle.py
index 679ef14d4..92b15750e 100644
--- a/tests/ci/test_browser_watchdog_downloads_upload_full_circle.py
+++ b/tests/ci/test_browser_watchdog_downloads_upload_full_circle.py
@@ -161,11 +161,11 @@ class TestDownloadUploadFullCircle:
 				base_url = f'http://{download_upload_server.host}:{download_upload_server.port}'
 
 				# Step 1: Navigate to download page
-				class GoToUrlActionModel(ActionModel):
+				class NavigateActionModel(ActionModel):
 					navigate: GoToUrlAction | None = None
 
 				result = await tools.act(
-					GoToUrlActionModel(navigate=GoToUrlAction(url=f'{base_url}/download-page', new_tab=False)), browser_session
+					NavigateActionModel(navigate=GoToUrlAction(url=f'{base_url}/download-page', new_tab=False)), browser_session
 				)
 				assert result.error is None, f'Navigation to download page failed: {result.error}'
 
@@ -228,7 +228,7 @@ class TestDownloadUploadFullCircle:
 				for i, tab in enumerate(tabs_before):
 					print(f'  Tab {i}: {tab.url}')
 				result = await tools.act(
-					GoToUrlActionModel(navigate=GoToUrlAction(url=f'{base_url}/upload-page', new_tab=True)), browser_session
+					NavigateActionModel(navigate=GoToUrlAction(url=f'{base_url}/upload-page', new_tab=True)), browser_session
 				)
 				assert result.error is None, f'Navigation to upload page failed: {result.error}'
 				print(f'✅ Navigation result: {result.extracted_content}')
diff --git a/tests/ci/test_tools.py b/tests/ci/test_tools.py
index a25421e5b..44aef7e69 100644
--- a/tests/ci/test_tools.py
+++ b/tests/ci/test_tools.py
@@ -127,10 +127,10 @@ class TestToolsIntegration:
 		# Navigate to a page first
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 
 		# Create the custom action model
 		custom_action_data = {'custom_action': CustomParams(text='test_value')}
@@ -211,10 +211,10 @@ class TestToolsIntegration:
 		# Navigate to first page
 		goto_action1 = {'navigate': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action1), browser_session)
+		await tools.act(NavigateActionModel(**goto_action1), browser_session)
 
 		# Store the first page URL
 		first_url = await browser_session.get_current_page_url()
@@ -222,7 +222,7 @@ class TestToolsIntegration:
 
 		# Navigate to second page
 		goto_action2 = {'navigate': GoToUrlAction(url=f'{base_url}/page2', new_tab=False)}
-		await tools.act(GoToUrlActionModel(**goto_action2), browser_session)
+		await tools.act(NavigateActionModel(**goto_action2), browser_session)
 
 		# Verify we're on the second page
 		second_url = await browser_session.get_current_page_url()
@@ -261,10 +261,10 @@ class TestToolsIntegration:
 		for url in urls:
 			action_data = {'navigate': GoToUrlAction(url=url, new_tab=False)}
 
-			class GoToUrlActionModel(ActionModel):
+			class NavigateActionModel(ActionModel):
 				navigate: GoToUrlAction | None = None
 
-			await tools.act(GoToUrlActionModel(**action_data), browser_session)
+			await tools.act(NavigateActionModel(**action_data), browser_session)
 
 			# Verify current page
 			current_url = await browser_session.get_current_page_url()
@@ -327,10 +327,10 @@ class TestToolsIntegration:
 			# First navigate to a page
 			goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/page1', new_tab=False)}
 
-			class GoToUrlActionModel(ActionModel):
+			class NavigateActionModel(ActionModel):
 				navigate: GoToUrlAction | None = None
 
-			await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+			await tools.act(NavigateActionModel(**goto_action), browser_session)
 
 			success_done_message = 'Successfully completed task'
 
@@ -394,10 +394,10 @@ class TestToolsIntegration:
 		# Navigate to the dropdown test page
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/dropdown1', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 
 		# Wait for the page to load using CDP
 		cdp_session = browser_session.agent_focus
@@ -518,10 +518,10 @@ class TestToolsIntegration:
 		# Navigate to the dropdown test page
 		goto_action = {'navigate': GoToUrlAction(url=f'{base_url}/dropdown2', new_tab=False)}
 
-		class GoToUrlActionModel(ActionModel):
+		class NavigateActionModel(ActionModel):
 			navigate: GoToUrlAction | None = None
 
-		await tools.act(GoToUrlActionModel(**goto_action), browser_session)
+		await tools.act(NavigateActionModel(**goto_action), browser_session)
 
 		# Wait for the page to load using CDP
 		cdp_session = browser_session.agent_focus

From 6d941a5dccd346984188ea3a54416fc762520fcf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 19:32:39 -0700
Subject: [PATCH 34/45] Rename Clear existing to clear

---
 browser_use/actor/element.py                             | 4 ++--
 browser_use/browser/events.py                            | 2 +-
 browser_use/browser/watchdogs/default_action_watchdog.py | 6 +++---
 browser_use/tools/service.py                             | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/browser_use/actor/element.py b/browser_use/actor/element.py
index 7d368586c..daaa9d3ee 100644
--- a/browser_use/actor/element.py
+++ b/browser_use/actor/element.py
@@ -349,7 +349,7 @@ class Element:
 			# Extract key element info for error message
 			raise RuntimeError(f'Failed to click element: {e}')
 
-	async def fill(self, value: str, clear_existing: bool = True) -> None:
+	async def fill(self, value: str, clear: bool = True) -> None:
 		"""Fill the input element using proper CDP methods with improved focus handling."""
 		try:
 			# Use the existing CDP client and session
@@ -409,7 +409,7 @@ class Element:
 			)
 
 			# Step 2: Clear existing text if requested
-			if clear_existing and focused_successfully:
+			if clear and focused_successfully:
 				cleared_successfully = await self._clear_text_field(
 					object_id=object_id, cdp_client=cdp_client, session_id=session_id
 				)
diff --git a/browser_use/browser/events.py b/browser_use/browser/events.py
index 397140f49..ae7b8b1e5 100644
--- a/browser_use/browser/events.py
+++ b/browser_use/browser/events.py
@@ -143,7 +143,7 @@ class TypeTextEvent(ElementSelectedEvent[dict | None]):
 
 	node: 'EnhancedDOMTreeNode'
 	text: str
-	clear_existing: bool = True
+	clear: bool = True
 	is_sensitive: bool = False  # Flag to indicate if text contains sensitive data
 	sensitive_key_name: str | None = None  # Name of the sensitive key being typed (e.g., 'username', 'password')
 
diff --git a/browser_use/browser/watchdogs/default_action_watchdog.py b/browser_use/browser/watchdogs/default_action_watchdog.py
index 1463ac043..1ab26839b 100644
--- a/browser_use/browser/watchdogs/default_action_watchdog.py
+++ b/browser_use/browser/watchdogs/default_action_watchdog.py
@@ -144,7 +144,7 @@ class DefaultActionWatchdog(BaseWatchdog):
 					input_metadata = await self._input_text_element_node_impl(
 						element_node,
 						event.text,
-						clear_existing=event.clear_existing or (not event.text),
+						clear=event.clear or (not event.text),
 						is_sensitive=event.is_sensitive,
 					)
 					# Log with sensitive data protection
@@ -994,7 +994,7 @@ class DefaultActionWatchdog(BaseWatchdog):
 		return False
 
 	async def _input_text_element_node_impl(
-		self, element_node: EnhancedDOMTreeNode, text: str, clear_existing: bool = True, is_sensitive: bool = False
+		self, element_node: EnhancedDOMTreeNode, text: str, clear: bool = True, is_sensitive: bool = False
 	) -> dict | None:
 		"""
 		Input text into an element using pure CDP with improved focus fallbacks.
@@ -1057,7 +1057,7 @@ class DefaultActionWatchdog(BaseWatchdog):
 			)
 
 			# Step 2: Clear existing text if requested
-			if clear_existing and focused_successfully:
+			if clear and focused_successfully:
 				cleared_successfully = await self._clear_text_field(object_id=object_id, cdp_session=cdp_session)
 				if not cleared_successfully:
 					self.logger.warning('⚠️ Text field clearing failed, typing may append to existing text')
diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 05739dab7..60a0012e3 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -313,7 +313,7 @@ class Tools(Generic[Context]):
 					TypeTextEvent(
 						node=node,
 						text=params.text,
-						clear_existing=params.clear,
+						clear=params.clear,
 						is_sensitive=has_sensitive_data,
 						sensitive_key_name=sensitive_key_name,
 					)

From 3eed1345c5816db11c12b8327084c9f6ec3f6236 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 19:41:29 -0700
Subject: [PATCH 35/45] Increase descriptions

---
 browser_use/tools/service.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 60a0012e3..2851da352 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -555,7 +555,7 @@ class Tools(Generic[Context]):
 		# This action is temporarily disabled as it needs refactoring to use events
 
 		@self.registry.action(
-			"""Extract page data via LLM. Use when on right page, know what to extract. Can't get interactive elements. Don't call again on same page with same query.""",
+			"""LLM extracts structured data from page markdown. Use when: on right page, know what to extract, haven't called before on same page+query. Can't get interactive elements. Set extract_links=True for URLs. Use start_from_char if truncated. If fails, use find_text/scroll instead.""",
 		)
 		async def extract(
 			query: str,
@@ -675,7 +675,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 				raise RuntimeError(str(e))
 
 		@self.registry.action(
-			'Scroll page. Multiple pages scroll sequentially.',
+			"""Scroll by pages (down=True/False, pages=0.5-10.0, default 1.0). Use index for scroll containers (dropdowns/custom UI). High pages (10) reaches bottom. Multi-page scrolls sequentially. Viewport-based height, fallback 1000px/page.""",
 			param_model=ScrollAction,
 		)
 		async def scroll(params: ScrollAction, browser_session: BrowserSession):
@@ -960,7 +960,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 			)
 
 		@self.registry.action(
-			'JS eval. Wrap in IIFE: (function(){...})(). Use try/catch. JSON.stringify() for objects.',
+			"""Execute JS. MUST: wrap in IIFE (function(){...})() or (async function(){...})(), add try-catch, validate elements exist. Use for: hover, drag, custom selectors, forms, extract/filter links, iframes, shadow DOM, React/Vue/Angular. Limit output. Examples: (function(){try{const el=document.querySelector('#id');return el?el.value:'not found'}catch(e){return 'Error: '+e.message}})() ✓ | document.querySelector('#id').value ✗. Shadow: iterate hosts, check shadowRoot. Return JSON.stringify() for objects. Do not use comments""",
 		)
 		async def evaluate(code: str, browser_session: BrowserSession):
 			# Execute JavaScript with proper error handling and promise support

From fc36b6f8b481de4891d7a30677e9f6008fa55916 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 19:59:44 -0700
Subject: [PATCH 36/45] System prompt flash

---
 browser_use/agent/system_prompt_flash.md | 57 +++++++++++++++++++++---
 1 file changed, 52 insertions(+), 5 deletions(-)

diff --git a/browser_use/agent/system_prompt_flash.md b/browser_use/agent/system_prompt_flash.md
index 376f64609..41c75f41c 100644
--- a/browser_use/agent/system_prompt_flash.md
+++ b/browser_use/agent/system_prompt_flash.md
@@ -1,13 +1,60 @@
 You are an AI agent designed to operate in an iterative loop to automate browser tasks. Your ultimate goal is accomplishing the task provided in <user_request>.
 
 
-Interactive Elements: All interactive elements will be provided in format as [index]<type>text</type> where
+<language_settings>
+Default: English. Match user's language.
+</language_settings>
+
+
+<user_request>
+Ultimate objective. Specific tasks: follow each step. Open-ended: plan approach.
+</user_request>
+
+<browser_state>
+Elements: [index]<type>text</type>. Only [indexed] are interactive. Indentation=child. *[=new.
+</browser_state>
+
+
+
+
+<browser_rules>
+- Page changes: analyze new elements
+- Captcha: try solving, else use fallback (alt site, backtrack)
+- Don't login without credentials or if unnecessary. Stuck? Try alternatives (partial access, web search)
+- PDFs auto-download to available_file_paths. Read file or scroll viewer.
+- Track progress via agent_history. State what you tried last.
+- Judge last action success/fail/uncertain. Verify via screenshot (primary) or browser_state. Never assume success from history alone.
+- For very long tasks (>15 steps): create plan in todo.md, mark items complete as done.
+- Stuck? Try alternatives: scroll, send_keys, different pages.
+- Save relevant user_request info to files. Check file_system before writing to avoid overwrites.
+- Store concise, actionable context in memory.
+- Before done: verify file contents with read_file.
+- Compare trajectory with user_request carefully (filters, fields, info).
+</browser_rules>
+
+<file_system>
+Persistent file system for progress tracking. todo.md: checklist for subtasks, update with replace_file_str when completing items. CSV: use double quotes for commas. Large files: preview shown, use read_file for full content. available_file_paths: downloaded/user files (read/upload only). Long tasks: use results.md. DON'T use for tasks <10 steps.
+</file_system>
+
+<task_completion_rules>
+Call `done` when: task fully complete, max_steps reached, or impossible to continue.
+- success=true only if full request completed
+- Use text field for all findings, files_to_display for attachments
+- Only call done as single action
+- Match user's requested output format/schema
+</task_completion_rules>
+
+<efficiency_guidelines>
+1-{max_actions} actions/step. Multiple actions execute sequentially. Page changes interrupt sequence.
+Combine actions efficiently: input+click (forms), input+input (multi-field), click+click (multi-step when no navigation), scroll+extract (load content), file+browser ops.
+
+One clear goal/step. Don't chain state-changing actions (click+navigate, switch+switch, input+scroll) - you won't see intermediate results.
+</efficiency_guidelines>
 
 <output>
-You must respond with a valid JSON in this exact format:
+Valid JSON format:
 {{
-  "memory": "Up to 5 sentences of specific reasoning about: Was the previous step successful / failed? What do we need to remember from the current state for the task? Plan ahead what are the best next actions. What's the next immediate goal? Depending on the complexity think longer. For example if its opvious to click the start button just say: click start. But if you need to remember more about the step it could be: Step successful, need to remember A, B, C to visit later. Next click on A.",
+  "memory": "Was last step successful/failed? What to remember? Next goal? (1-5 sentences based on complexity)",
   "action":[{{"navigate": {{ "url": "url_value"}}}}]
 }}
-
-</output>
+</output>
\ No newline at end of file

From 038ab1c8ce47716a8b93bf613d834040cfe4de19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 20:05:51 -0700
Subject: [PATCH 37/45] Linter

---
 browser_use/agent/system_prompt_flash.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/browser_use/agent/system_prompt_flash.md b/browser_use/agent/system_prompt_flash.md
index 41c75f41c..f46134772 100644
--- a/browser_use/agent/system_prompt_flash.md
+++ b/browser_use/agent/system_prompt_flash.md
@@ -57,4 +57,4 @@ Valid JSON format:
   "memory": "Was last step successful/failed? What to remember? Next goal? (1-5 sentences based on complexity)",
   "action":[{{"navigate": {{ "url": "url_value"}}}}]
 }}
-</output>
\ No newline at end of file
+</output>

From 3aaafa7bf8399db0a19c62450787e38b3b8696ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 20:30:36 -0700
Subject: [PATCH 38/45] Remove required from prompt

---
 browser_use/llm/google/chat.py |  2 +-
 browser_use/llm/schema.py      | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/browser_use/llm/google/chat.py b/browser_use/llm/google/chat.py
index ad511bb26..3c11186a0 100644
--- a/browser_use/llm/google/chat.py
+++ b/browser_use/llm/google/chat.py
@@ -255,7 +255,7 @@ class ChatGoogle(BaseChatModel):
 						self.logger.debug(f'🔧 Requesting structured output for {output_format.__name__}')
 						config['response_mime_type'] = 'application/json'
 						# Convert Pydantic model to Gemini-compatible schema
-						optimized_schema = SchemaOptimizer.create_optimized_json_schema(output_format)
+						optimized_schema = SchemaOptimizer.create_gemini_optimized_schema(output_format)
 
 						gemini_schema = self._fix_gemini_schema(optimized_schema)
 						config['response_schema'] = gemini_schema
diff --git a/browser_use/llm/schema.py b/browser_use/llm/schema.py
index da21a7be1..075564462 100644
--- a/browser_use/llm/schema.py
+++ b/browser_use/llm/schema.py
@@ -160,3 +160,31 @@ class SchemaOptimizer:
 		elif isinstance(schema, list):
 			for item in schema:
 				SchemaOptimizer._make_strict_compatible(item)
+
+	@staticmethod
+	def create_gemini_optimized_schema(model: type[BaseModel]) -> dict[str, Any]:
+		"""
+		Create Gemini-optimized schema that removes 'required' arrays to save tokens.
+		Gemini can infer required fields from context since all fields are required.
+
+		Args:
+			model: The Pydantic model to optimize
+
+		Returns:
+			Optimized schema without required arrays
+		"""
+		# Start with standard optimized schema
+		schema = SchemaOptimizer.create_optimized_json_schema(model)
+
+		def remove_required_arrays(obj: Any) -> Any:
+			"""Recursively remove 'required' arrays"""
+			if isinstance(obj, dict):
+				# Remove 'required' key
+				result = {k: v for k, v in obj.items() if k != 'required'}
+				# Recursively process nested structures
+				return {k: remove_required_arrays(v) for k, v in result.items()}
+			elif isinstance(obj, list):
+				return [remove_required_arrays(item) for item in obj]
+			return obj
+
+		return remove_required_arrays(schema)

From 6944d4a5338e77d379a00a949d359ac96efe76bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 20:36:23 -0700
Subject: [PATCH 39/45] Null checks

---
 browser_use/tools/service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py
index 2851da352..07fee4f06 100644
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -960,7 +960,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 			)
 
 		@self.registry.action(
-			"""Execute JS. MUST: wrap in IIFE (function(){...})() or (async function(){...})(), add try-catch, validate elements exist. Use for: hover, drag, custom selectors, forms, extract/filter links, iframes, shadow DOM, React/Vue/Angular. Limit output. Examples: (function(){try{const el=document.querySelector('#id');return el?el.value:'not found'}catch(e){return 'Error: '+e.message}})() ✓ | document.querySelector('#id').value ✗. Shadow: iterate hosts, check shadowRoot. Return JSON.stringify() for objects. Do not use comments""",
+			"""Execute JS. MUST: wrap in IIFE (function(){...})() or (async function(){...})(), add try-catch, validate elements exist. Check null before accessing properties. Use for: hover, drag, custom selectors, forms, extract/filter links, iframes, shadow DOM, React/Vue/Angular. Limit output. Examples: (function(){try{const el=document.querySelector('#id');return el?el.value:'not found'}catch(e){return 'Error: '+e.message}})() ✓ | document.querySelector('#id').value ✗. Shadow: iterate hosts, check shadowRoot. Return JSON.stringify() for objects. Do not use comments""",
 		)
 		async def evaluate(code: str, browser_session: BrowserSession):
 			# Execute JavaScript with proper error handling and promise support

From c4fb720a45416ea40ea0904d5a0f32ed6b816748 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 20:38:35 -0700
Subject: [PATCH 40/45] Same output format

---
 browser_use/agent/system_prompt_flash.md | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/browser_use/agent/system_prompt_flash.md b/browser_use/agent/system_prompt_flash.md
index f46134772..31f5745ce 100644
--- a/browser_use/agent/system_prompt_flash.md
+++ b/browser_use/agent/system_prompt_flash.md
@@ -52,9 +52,10 @@ One clear goal/step. Don't chain state-changing actions (click+navigate, switch+
 </efficiency_guidelines>
 
 <output>
-Valid JSON format:
-{{
-  "memory": "Was last step successful/failed? What to remember? Next goal? (1-5 sentences based on complexity)",
-  "action":[{{"navigate": {{ "url": "url_value"}}}}]
-}}
+You must respond with a valid JSON in this exact format:
+{
+  "memory": "Up to 5 sentences of specific reasoning about: Was the previous step successful / failed? What do we need to remember from the current state for the task? Plan ahead what are the best next actions. What's the next immediate goal? Depending on the complexity think longer. For example if its opvious to click the start button just say: click start. But if you need to remember more about the step it could be: Step successful, need to remember A, B, C to visit later. Next click on A.",
+  "action":[{"navigate": { "url": "url_value"}}]
+}
+
 </output>

From f91bcb9adce2895dff6946615d647514f8cd9b57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 20:42:19 -0700
Subject: [PATCH 41/45] Fix system prompt

---
 browser_use/agent/system_prompt_flash.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/browser_use/agent/system_prompt_flash.md b/browser_use/agent/system_prompt_flash.md
index 31f5745ce..1ee434b3d 100644
--- a/browser_use/agent/system_prompt_flash.md
+++ b/browser_use/agent/system_prompt_flash.md
@@ -53,9 +53,9 @@ One clear goal/step. Don't chain state-changing actions (click+navigate, switch+
 
 <output>
 You must respond with a valid JSON in this exact format:
-{
+{{
   "memory": "Up to 5 sentences of specific reasoning about: Was the previous step successful / failed? What do we need to remember from the current state for the task? Plan ahead what are the best next actions. What's the next immediate goal? Depending on the complexity think longer. For example if its opvious to click the start button just say: click start. But if you need to remember more about the step it could be: Step successful, need to remember A, B, C to visit later. Next click on A.",
-  "action":[{"navigate": { "url": "url_value"}}]
-}
+  "action":[{{"navigate": {{ "url": "url_value"}}}}]
+}}
 
 </output>

From 3d13cece059bafc82c127db5775a75e77e64baf6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 20:44:39 -0700
Subject: [PATCH 42/45] Extremely short system prompt

---
 browser_use/agent/system_prompt_flash.md | 52 +-----------------------
 1 file changed, 2 insertions(+), 50 deletions(-)

diff --git a/browser_use/agent/system_prompt_flash.md b/browser_use/agent/system_prompt_flash.md
index 1ee434b3d..b1ee5da06 100644
--- a/browser_use/agent/system_prompt_flash.md
+++ b/browser_use/agent/system_prompt_flash.md
@@ -1,61 +1,13 @@
 You are an AI agent designed to operate in an iterative loop to automate browser tasks. Your ultimate goal is accomplishing the task provided in <user_request>.
 
 
-<language_settings>
-Default: English. Match user's language.
-</language_settings>
-
-
-<user_request>
-Ultimate objective. Specific tasks: follow each step. Open-ended: plan approach.
-</user_request>
-
-<browser_state>
-Elements: [index]<type>text</type>. Only [indexed] are interactive. Indentation=child. *[=new.
-</browser_state>
-
-
-
-
-<browser_rules>
-- Page changes: analyze new elements
-- Captcha: try solving, else use fallback (alt site, backtrack)
-- Don't login without credentials or if unnecessary. Stuck? Try alternatives (partial access, web search)
-- PDFs auto-download to available_file_paths. Read file or scroll viewer.
-- Track progress via agent_history. State what you tried last.
-- Judge last action success/fail/uncertain. Verify via screenshot (primary) or browser_state. Never assume success from history alone.
-- For very long tasks (>15 steps): create plan in todo.md, mark items complete as done.
-- Stuck? Try alternatives: scroll, send_keys, different pages.
-- Save relevant user_request info to files. Check file_system before writing to avoid overwrites.
-- Store concise, actionable context in memory.
-- Before done: verify file contents with read_file.
-- Compare trajectory with user_request carefully (filters, fields, info).
-</browser_rules>
-
-<file_system>
-Persistent file system for progress tracking. todo.md: checklist for subtasks, update with replace_file_str when completing items. CSV: use double quotes for commas. Large files: preview shown, use read_file for full content. available_file_paths: downloaded/user files (read/upload only). Long tasks: use results.md. DON'T use for tasks <10 steps.
-</file_system>
-
-<task_completion_rules>
-Call `done` when: task fully complete, max_steps reached, or impossible to continue.
-- success=true only if full request completed
-- Use text field for all findings, files_to_display for attachments
-- Only call done as single action
-- Match user's requested output format/schema
-</task_completion_rules>
-
-<efficiency_guidelines>
-1-{max_actions} actions/step. Multiple actions execute sequentially. Page changes interrupt sequence.
-Combine actions efficiently: input+click (forms), input+input (multi-field), click+click (multi-step when no navigation), scroll+extract (load content), file+browser ops.
-
-One clear goal/step. Don't chain state-changing actions (click+navigate, switch+switch, input+scroll) - you won't see intermediate results.
-</efficiency_guidelines>
+Interactive Elements: All interactive elements will be provided in format as [index]<type>text</type> where
 
 <output>
 You must respond with a valid JSON in this exact format:
 {{
   "memory": "Up to 5 sentences of specific reasoning about: Was the previous step successful / failed? What do we need to remember from the current state for the task? Plan ahead what are the best next actions. What's the next immediate goal? Depending on the complexity think longer. For example if its opvious to click the start button just say: click start. But if you need to remember more about the step it could be: Step successful, need to remember A, B, C to visit later. Next click on A.",
-  "action":[{{"navigate": {{ "url": "url_value"}}}}]
+  "action":[{{"go_to_url": {{ "url": "url_value"}}}}]
 }}
 
 </output>

From aea0107c9ef595a2ab1dd25f86ea4fcb583a12cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 20:59:17 -0700
Subject: [PATCH 43/45] Update systemprompt

---
 browser_use/agent/system_prompt_flash.md | 26 ++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/browser_use/agent/system_prompt_flash.md b/browser_use/agent/system_prompt_flash.md
index b1ee5da06..8e7b5f6f7 100644
--- a/browser_use/agent/system_prompt_flash.md
+++ b/browser_use/agent/system_prompt_flash.md
@@ -1,13 +1,35 @@
 You are an AI agent designed to operate in an iterative loop to automate browser tasks. Your ultimate goal is accomplishing the task provided in <user_request>.
 
 
-Interactive Elements: All interactive elements will be provided in format as [index]<type>text</type> where
+<language_settings>
+Default: English. Match user's language.
+</language_settings>
+
+
+<user_request>
+Ultimate objective. Specific tasks: follow each step. Open-ended: plan approach.
+</user_request>
+
+<browser_state>
+Elements: [index]<type>text</type>. Only [indexed] are interactive. Indentation=child. *[=new.
+</browser_state>
+
+<file_system>
+- PDFs auto-download to available_file_paths. Read file or scroll viewer.
+Persistent file system for progress tracking. 
+Long tasks <10 steps: use todo.md: checklist for subtasks, update with replace_file_str when completing items. 
+CSV: use double quotes for commas. 
+available_file_paths: downloaded/user files (read/upload only). 
+</file_system>
+
+
+
 
 <output>
 You must respond with a valid JSON in this exact format:
 {{
   "memory": "Up to 5 sentences of specific reasoning about: Was the previous step successful / failed? What do we need to remember from the current state for the task? Plan ahead what are the best next actions. What's the next immediate goal? Depending on the complexity think longer. For example if its opvious to click the start button just say: click start. But if you need to remember more about the step it could be: Step successful, need to remember A, B, C to visit later. Next click on A.",
-  "action":[{{"go_to_url": {{ "url": "url_value"}}}}]
+  "action":[{{"navigate": {{ "url": "url_value"}}}}]
 }}
 
 </output>

From 117900e6a605923aa1311d2922abc3c3660f632f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 21:43:27 -0700
Subject: [PATCH 44/45] Update system prompt to use 'files_to_display' instead
 of 'files' for attachment handling

---
 browser_use/agent/system_prompt.md             | 4 ++--
 browser_use/agent/system_prompt_no_thinking.md | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/browser_use/agent/system_prompt.md b/browser_use/agent/system_prompt.md
index 146adc23e..9dc645ce4 100644
--- a/browser_use/agent/system_prompt.md
+++ b/browser_use/agent/system_prompt.md
@@ -118,9 +118,9 @@ You must call the `done` action in one of two cases:
 The `done` action is your opportunity to terminate and share your findings with the user.
 - Set `success` to `true` only if the full USER REQUEST has been completed with no missing components.
 - If any part of the request is missing, incomplete, or uncertain, set `success` to `false`.
-- You can use the `text` field of the `done` action to communicate your findings and `files` to send file attachments to the user, e.g. `["results.md"]`.
+- You can use the `text` field of the `done` action to communicate your findings and `files_to_display` to send file attachments to the user, e.g. `["results.md"]`.
 - Put ALL the relevant information you found so far in the `text` field when you call `done` action.
-- Combine `text` and `files` to provide a coherent reply to the user and fulfill the USER REQUEST.
+- Combine `text` and `files_to_display` to provide a coherent reply to the user and fulfill the USER REQUEST.
 - You are ONLY ALLOWED to call `done` as a single action. Don't call it together with other actions.
 - If the user asks for specified format, such as "return JSON with following structure", "return a list of format...", MAKE sure to use the right format in your answer.
 - If the user asks for a structured output, your `done` action's schema will be modified. Take this schema into account when solving the task!
diff --git a/browser_use/agent/system_prompt_no_thinking.md b/browser_use/agent/system_prompt_no_thinking.md
index d51da1b1e..383d94989 100644
--- a/browser_use/agent/system_prompt_no_thinking.md
+++ b/browser_use/agent/system_prompt_no_thinking.md
@@ -118,9 +118,9 @@ You must call the `done` action in one of two cases:
 The `done` action is your opportunity to terminate and share your findings with the user.
 - Set `success` to `true` only if the full USER REQUEST has been completed with no missing components.
 - If any part of the request is missing, incomplete, or uncertain, set `success` to `false`.
-- You can use the `text` field of the `done` action to communicate your findings and `files` to send file attachments to the user, e.g. `["results.md"]`.
+- You can use the `text` field of the `done` action to communicate your findings and `files_to_display` to send file attachments to the user, e.g. `["results.md"]`.
 - Put ALL the relevant information you found so far in the `text` field when you call `done` action.
-- Combine `text` and `files` to provide a coherent reply to the user and fulfill the USER REQUEST.
+- Combine `text` and `files_to_display` to provide a coherent reply to the user and fulfill the USER REQUEST.
 - You are ONLY ALLOWED to call `done` as a single action. Don't call it together with other actions.
 - If the user asks for specified format, such as "return JSON with following structure", "return a list of format...", MAKE sure to use the right format in your answer.
 - If the user asks for a structured output, your `done` action's schema will be modified. Take this schema into account when solving the task!

From 0fb1c73abc99fa5b6173f290e20ef34370337586 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?=
 <67061560+MagMueller@users.noreply.github.com>
Date: Sun, 5 Oct 2025 21:47:57 -0700
Subject: [PATCH 45/45] Remove ge=1

---
 browser_use/tools/views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/browser_use/tools/views.py b/browser_use/tools/views.py
index cb4399d04..1d4744da8 100644
--- a/browser_use/tools/views.py
+++ b/browser_use/tools/views.py
@@ -90,5 +90,5 @@ class GetDropdownOptionsAction(BaseModel):
 
 
 class SelectDropdownOptionAction(BaseModel):
-	index: int = Field(ge=1, description='index')
+	index: int
 	text: str = Field(description='exact text/value')