Refactory and Bugfixes (#2525)

- Name functions much better - Fix bug with page filtered actions - Introduce context messages - Read state included only when it's there - default max_history_items to None to not break the cache
2026-05-13 17:56:35 +02:00 · 2025-08-05 15:38:51 +01:00
parent 3cf6811d54 6a15ed4c4e
commit 85153def49
6 changed files with 53 additions and 37 deletions
--- a/browser_use/agent/message_manager/service.py
+++ b/browser_use/agent/message_manager/service.py
@@ -126,7 +126,7 @@ class MessageManager:
 		self.last_input_messages = []
 		# Only initialize messages if state is empty
 		if len(self.state.history.get_messages()) == 0:
-			self._add_message_with_type(self.system_prompt, 'system')
+			self._set_message_with_type(self.system_prompt, 'system')

 	@property
 	def agent_history_description(self) -> str:
@@ -162,7 +162,6 @@ class MessageManager:
 		task_update_item = HistoryItem(system_message=f'User updated <user_request> to: {new_task}')
 		self.state.agent_history_items.append(task_update_item)

-	@observe_debug(ignore_input=True, ignore_output=True, name='update_agent_history_description')
 	def _update_agent_history_description(
 		self,
 		model_output: AgentOutput | None = None,
@@ -244,9 +243,9 @@ class MessageManager:

 		return ''

-	@observe_debug(ignore_input=True, ignore_output=True, name='add_state_message')
-	@time_execution_sync('--add_state_message')
-	def add_state_message(
+	@observe_debug(ignore_input=True, ignore_output=True, name='create_state_messages')
+	@time_execution_sync('--create_state_messages')
+	def create_state_messages(
 		self,
 		browser_state_summary: BrowserStateSummary,
 		model_output: AgentOutput | None = None,
@@ -257,8 +256,12 @@ class MessageManager:
 		sensitive_data=None,
 		available_file_paths: list[str] | None = None,  # Always pass current available_file_paths
 	) -> None:
-		"""Add browser state as human message"""
+		"""Create single state message with all content"""

+		# Clear contextual messages from previous steps to prevent accumulation
+		self.state.history.context_messages.clear()
+
+		# First, update the agent history items with the latest step results
 		self._update_agent_history_description(model_output, result, step_info)
 		if sensitive_data:
 			self.sensitive_data_description = self._get_sensitive_data_description(browser_state_summary.url)
@@ -268,7 +271,7 @@ class MessageManager:
 		if browser_state_summary.screenshot:
 			screenshots.append(browser_state_summary.screenshot)

-		# otherwise add state message and result to next message (which will not stay in memory)
+		# Create single state message with all content
 		assert browser_state_summary
 		state_message = AgentMessagePrompt(
 			browser_state_summary=browser_state_summary,
@@ -285,7 +288,8 @@ class MessageManager:
 			vision_detail_level=self.vision_detail_level,
 		).get_user_message(use_vision)

-		self._add_message_with_type(state_message, 'state')
+		# Set the state message with caching enabled
+		self._set_message_with_type(state_message, 'state')

 	def _log_history_lines(self) -> str:
 		"""Generate a formatted log string of message history for debugging / printing to terminal"""
@@ -333,9 +337,8 @@ class MessageManager:
 		self.last_input_messages = self.state.history.get_messages()
 		return self.last_input_messages

-	def _add_message_with_type(self, message: BaseMessage, message_type: Literal['system', 'state', 'consistent']) -> None:
-		"""Add message to history"""
-
+	def _set_message_with_type(self, message: BaseMessage, message_type: Literal['system', 'state']) -> None:
+		"""Replace a specific state message slot with a new message"""
 		# filter out sensitive data from the message
 		if self.sensitive_data:
 			message = self._filter_sensitive_data(message)
@@ -344,10 +347,16 @@ class MessageManager:
 			self.state.history.system_message = message
 		elif message_type == 'state':
 			self.state.history.state_message = message
-		elif message_type == 'consistent':
-			self.state.history.consistent_messages.append(message)
 		else:
-			raise ValueError(f'Invalid message type: {message_type}')
+			raise ValueError(f'Invalid state message type: {message_type}')
+
+	def _add_context_message(self, message: BaseMessage) -> None:
+		"""Add a contextual message specific to this step (e.g., validation errors, retry instructions, timeout warnings)"""
+		# filter out sensitive data from the message
+		if self.sensitive_data:
+			message = self._filter_sensitive_data(message)
+
+		self.state.history.context_messages.append(message)

 	@time_execution_sync('--filter_sensitive_data')
 	def _filter_sensitive_data(self, message: BaseMessage) -> BaseMessage:
--- a/browser_use/agent/message_manager/views.py
+++ b/browser_use/agent/message_manager/views.py
@@ -72,17 +72,17 @@ class MessageHistory(BaseModel):

 	system_message: BaseMessage | None = None
 	state_message: BaseMessage | None = None
-	consistent_messages: list[BaseMessage] = Field(default_factory=list)
+	context_messages: list[BaseMessage] = Field(default_factory=list)
 	model_config = ConfigDict(arbitrary_types_allowed=True)

 	def get_messages(self) -> list[BaseMessage]:
-		"""Get all messages"""
+		"""Get all messages in the correct order: system -> state -> contextual"""
 		messages = []
 		if self.system_message:
 			messages.append(self.system_message)
 		if self.state_message:
 			messages.append(self.state_message)
-		messages.extend(self.consistent_messages)
+		messages.extend(self.context_messages)

 		return messages

--- a/browser_use/agent/prompts.py
+++ b/browser_use/agent/prompts.py
@@ -218,6 +218,7 @@ Available tabs:

 	@observe_debug(ignore_input=True, ignore_output=True, name='get_user_message')
 	def get_user_message(self, use_vision: bool = True) -> UserMessage:
+		"""Get complete state as a single cached message"""
 		# Don't pass screenshot to model if page is a new tab page, step is 0, and there's only one tab
 		if (
 			is_new_tab_page(self.browser_state.url)
@@ -227,6 +228,7 @@ Available tabs:
 		):
 			use_vision = False

+		# Build complete state description
 		state_description = (
 			'<agent_history>\n'
 			+ (self.agent_history_description.strip('\n') if self.agent_history_description else '')
@@ -234,14 +236,15 @@ Available tabs:
 		)
 		state_description += '<agent_state>\n' + self._get_agent_state_description().strip('\n') + '\n</agent_state>\n'
 		state_description += '<browser_state>\n' + self._get_browser_state_description().strip('\n') + '\n</browser_state>\n'
-		state_description += (
-			'<read_state>\n'
-			+ (self.read_state_description.strip('\n') if self.read_state_description else '')
-			+ '\n</read_state>\n'
-		)
+		# Only add read_state if it has content
+		read_state_description = self.read_state_description.strip('\n').strip() if self.read_state_description else ''
+		if read_state_description:
+			state_description += '<read_state>\n' + read_state_description + '\n</read_state>\n'
+
 		if self.page_filtered_actions:
-			state_description += 'For this page, these additional actions are available:\n'
+			state_description += '<page_specific_actions>\n'
 			state_description += self.page_filtered_actions + '\n'
+			state_description += '</page_specific_actions>\n'

 		if use_vision is True and self.screenshots:
 			# Start with text description
--- a/browser_use/agent/service.py
+++ b/browser_use/agent/service.py
@@ -168,7 +168,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		max_actions_per_step: int = 10,
 		use_thinking: bool = True,
 		flash_mode: bool = False,
-		max_history_items: int = 40,
+		max_history_items: int | None = None,
 		page_extraction_llm: BaseChatModel | None = None,
 		planner_llm: BaseChatModel | None = None,  # Deprecated
 		planner_interval: int = 1,  # Deprecated
@@ -728,13 +728,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		# Get page-specific filtered actions
 		page_filtered_actions = self.controller.registry.get_prompt_description(current_page)

-		# If there are page-specific actions, add them as a special message for this step only
-		if page_filtered_actions:
-			page_action_message = f'For this page, these additional actions are available:\n{page_filtered_actions}'
-			self._message_manager._add_message_with_type(UserMessage(content=page_action_message), 'consistent')
-
-		self.logger.debug(f'💬 Step {self.state.n_steps}: Adding state message to context...')
-		self._message_manager.add_state_message(
+		# Page-specific actions will be included directly in the browser_state message
+		self.logger.debug(f'💬 Step {self.state.n_steps}: Creating state messages for context...')
+		self._message_manager.create_state_messages(
 			browser_state_summary=browser_state_summary,
 			model_output=self.state.last_model_output,
 			result=self.state.last_result,
@@ -814,11 +810,16 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		prefix = f'❌ Result failed {self.state.consecutive_failures + 1}/{self.settings.max_failures} times:\n '
 		self.state.consecutive_failures += 1

+		# TODO: figure out what to do here
 		if isinstance(error, (ValidationError, ValueError)):
 			self.logger.error(f'{prefix}{error_msg}')
+			# Add context message to help model fix validation errors
+			validation_hint = 'Your output format was invalid. Please follow the exact schema structure required for actions.'
+			# self._message_manager._add_context_message(UserMessage(content=validation_hint))
+
 			if 'Max token limit reached' in error_msg:
-				# TODO: figure out what to do here
-				pass
+				token_hint = 'Your response was too long. Keep your thinking and output concise.'
+				# self._message_manager._add_context_message(UserMessage(content=token_hint))
 		# Handle InterruptedError specially
 		elif isinstance(error, InterruptedError):
 			error_msg = 'The agent was interrupted mid-step' + (f' - {error}' if error else '')
@@ -828,6 +829,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			logger.debug(f'Model: {self.llm.model} failed')
 			error_msg += '\n\nReturn a valid JSON object with the required fields.'
 			logger.error(f'{prefix}{error_msg}')
+			# Add context message to help model fix parsing errors
+			parse_hint = 'Your response could not be parsed. Return a valid JSON object with the required fields.'
+			# self._message_manager._add_context_message(UserMessage(content=parse_hint))
 		else:
 			from anthropic import RateLimitError as AnthropicRateLimitError
 			from google.api_core.exceptions import ResourceExhausted
@@ -898,7 +902,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			msg += '\nIf the task is fully finished, set success in "done" to true.'
 			msg += '\nInclude everything you found out for the ultimate task in the done text.'
 			self.logger.info('Last step finishing up')
-			self._message_manager._add_message_with_type(UserMessage(content=msg), 'consistent')
+			self._message_manager._add_context_message(UserMessage(content=msg))
 			self.AgentOutput = self.DoneAgentOutput

 	async def _get_model_output_with_retry(self, input_messages: list[BaseMessage]) -> AgentOutput:
--- a/browser_use/agent/views.py
+++ b/browser_use/agent/views.py
@@ -54,7 +54,7 @@ class AgentSettings(BaseModel):
 	max_actions_per_step: int = 10
 	use_thinking: bool = True
 	flash_mode: bool = False  # If enabled, disables evaluation_previous_goal and next_goal, and sets use_thinking = False
-	max_history_items: int = 40
+	max_history_items: int | None = None

 	page_extraction_llm: BaseChatModel | None = None
 	planner_llm: BaseChatModel | None = None
--- a/browser_use/llm/tests/test_single_step.py
+++ b/browser_use/llm/tests/test_single_step.py
@@ -111,7 +111,7 @@ async def test_single_step_parametrized(llm_class, model_name):
 		# Create mock state message
 		mock_message = create_mock_state_message(temp_dir)

-		agent.message_manager._add_message_with_type(mock_message, 'state')
+		agent.message_manager._set_message_with_type(mock_message, 'state')

 		messages = agent.message_manager.get_messages()

@@ -152,7 +152,7 @@ async def test_single_step():
 			print(mock_message.content)
 			print('\n' + '=' * 50 + '\n')

-			agent.message_manager._add_message_with_type(mock_message, 'state')
+			agent.message_manager._set_message_with_type(mock_message, 'state')

 			messages = agent.message_manager.get_messages()