diff --git a/browser_use/agent/message_manager/service.py b/browser_use/agent/message_manager/service.py index 8126b1c4f..ac34d53fe 100644 --- a/browser_use/agent/message_manager/service.py +++ b/browser_use/agent/message_manager/service.py @@ -126,7 +126,7 @@ class MessageManager: self.last_input_messages = [] # Only initialize messages if state is empty if len(self.state.history.get_messages()) == 0: - self._add_message_with_type(self.system_prompt, 'system') + self._set_message_with_type(self.system_prompt, 'system') @property def agent_history_description(self) -> str: @@ -162,7 +162,6 @@ class MessageManager: task_update_item = HistoryItem(system_message=f'User updated to: {new_task}') self.state.agent_history_items.append(task_update_item) - @observe_debug(ignore_input=True, ignore_output=True, name='update_agent_history_description') def _update_agent_history_description( self, model_output: AgentOutput | None = None, @@ -244,9 +243,9 @@ class MessageManager: return '' - @observe_debug(ignore_input=True, ignore_output=True, name='add_state_message') - @time_execution_sync('--add_state_message') - def add_state_message( + @observe_debug(ignore_input=True, ignore_output=True, name='create_state_messages') + @time_execution_sync('--create_state_messages') + def create_state_messages( self, browser_state_summary: BrowserStateSummary, model_output: AgentOutput | None = None, @@ -257,8 +256,12 @@ class MessageManager: sensitive_data=None, available_file_paths: list[str] | None = None, # Always pass current available_file_paths ) -> None: - """Add browser state as human message""" + """Create single state message with all content""" + # Clear contextual messages from previous steps to prevent accumulation + self.state.history.context_messages.clear() + + # First, update the agent history items with the latest step results self._update_agent_history_description(model_output, result, step_info) if sensitive_data: self.sensitive_data_description = self._get_sensitive_data_description(browser_state_summary.url) @@ -268,7 +271,7 @@ class MessageManager: if browser_state_summary.screenshot: screenshots.append(browser_state_summary.screenshot) - # otherwise add state message and result to next message (which will not stay in memory) + # Create single state message with all content assert browser_state_summary state_message = AgentMessagePrompt( browser_state_summary=browser_state_summary, @@ -285,7 +288,8 @@ class MessageManager: vision_detail_level=self.vision_detail_level, ).get_user_message(use_vision) - self._add_message_with_type(state_message, 'state') + # Set the state message with caching enabled + self._set_message_with_type(state_message, 'state') def _log_history_lines(self) -> str: """Generate a formatted log string of message history for debugging / printing to terminal""" @@ -333,9 +337,8 @@ class MessageManager: self.last_input_messages = self.state.history.get_messages() return self.last_input_messages - def _add_message_with_type(self, message: BaseMessage, message_type: Literal['system', 'state', 'consistent']) -> None: - """Add message to history""" - + def _set_message_with_type(self, message: BaseMessage, message_type: Literal['system', 'state']) -> None: + """Replace a specific state message slot with a new message""" # filter out sensitive data from the message if self.sensitive_data: message = self._filter_sensitive_data(message) @@ -344,10 +347,16 @@ class MessageManager: self.state.history.system_message = message elif message_type == 'state': self.state.history.state_message = message - elif message_type == 'consistent': - self.state.history.consistent_messages.append(message) else: - raise ValueError(f'Invalid message type: {message_type}') + raise ValueError(f'Invalid state message type: {message_type}') + + def _add_context_message(self, message: BaseMessage) -> None: + """Add a contextual message specific to this step (e.g., validation errors, retry instructions, timeout warnings)""" + # filter out sensitive data from the message + if self.sensitive_data: + message = self._filter_sensitive_data(message) + + self.state.history.context_messages.append(message) @time_execution_sync('--filter_sensitive_data') def _filter_sensitive_data(self, message: BaseMessage) -> BaseMessage: diff --git a/browser_use/agent/message_manager/views.py b/browser_use/agent/message_manager/views.py index 587a692a1..00926abd5 100644 --- a/browser_use/agent/message_manager/views.py +++ b/browser_use/agent/message_manager/views.py @@ -72,17 +72,17 @@ class MessageHistory(BaseModel): system_message: BaseMessage | None = None state_message: BaseMessage | None = None - consistent_messages: list[BaseMessage] = Field(default_factory=list) + context_messages: list[BaseMessage] = Field(default_factory=list) model_config = ConfigDict(arbitrary_types_allowed=True) def get_messages(self) -> list[BaseMessage]: - """Get all messages""" + """Get all messages in the correct order: system -> state -> contextual""" messages = [] if self.system_message: messages.append(self.system_message) if self.state_message: messages.append(self.state_message) - messages.extend(self.consistent_messages) + messages.extend(self.context_messages) return messages diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py index 36802cea6..064ebd887 100644 --- a/browser_use/agent/prompts.py +++ b/browser_use/agent/prompts.py @@ -218,6 +218,7 @@ Available tabs: @observe_debug(ignore_input=True, ignore_output=True, name='get_user_message') def get_user_message(self, use_vision: bool = True) -> UserMessage: + """Get complete state as a single cached message""" # Don't pass screenshot to model if page is a new tab page, step is 0, and there's only one tab if ( is_new_tab_page(self.browser_state.url) @@ -227,6 +228,7 @@ Available tabs: ): use_vision = False + # Build complete state description state_description = ( '\n' + (self.agent_history_description.strip('\n') if self.agent_history_description else '') @@ -234,14 +236,15 @@ Available tabs: ) state_description += '\n' + self._get_agent_state_description().strip('\n') + '\n\n' state_description += '\n' + self._get_browser_state_description().strip('\n') + '\n\n' - state_description += ( - '\n' - + (self.read_state_description.strip('\n') if self.read_state_description else '') - + '\n\n' - ) + # Only add read_state if it has content + read_state_description = self.read_state_description.strip('\n').strip() if self.read_state_description else '' + if read_state_description: + state_description += '\n' + read_state_description + '\n\n' + if self.page_filtered_actions: - state_description += 'For this page, these additional actions are available:\n' + state_description += '\n' state_description += self.page_filtered_actions + '\n' + state_description += '\n' if use_vision is True and self.screenshots: # Start with text description diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 6778e5f9f..41a2813ca 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -168,7 +168,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): max_actions_per_step: int = 10, use_thinking: bool = True, flash_mode: bool = False, - max_history_items: int = 40, + max_history_items: int | None = None, page_extraction_llm: BaseChatModel | None = None, planner_llm: BaseChatModel | None = None, # Deprecated planner_interval: int = 1, # Deprecated @@ -728,13 +728,9 @@ class Agent(Generic[Context, AgentStructuredOutput]): # Get page-specific filtered actions page_filtered_actions = self.controller.registry.get_prompt_description(current_page) - # If there are page-specific actions, add them as a special message for this step only - if page_filtered_actions: - page_action_message = f'For this page, these additional actions are available:\n{page_filtered_actions}' - self._message_manager._add_message_with_type(UserMessage(content=page_action_message), 'consistent') - - self.logger.debug(f'💬 Step {self.state.n_steps}: Adding state message to context...') - self._message_manager.add_state_message( + # Page-specific actions will be included directly in the browser_state message + self.logger.debug(f'💬 Step {self.state.n_steps}: Creating state messages for context...') + self._message_manager.create_state_messages( browser_state_summary=browser_state_summary, model_output=self.state.last_model_output, result=self.state.last_result, @@ -814,11 +810,16 @@ class Agent(Generic[Context, AgentStructuredOutput]): prefix = f'❌ Result failed {self.state.consecutive_failures + 1}/{self.settings.max_failures} times:\n ' self.state.consecutive_failures += 1 + # TODO: figure out what to do here if isinstance(error, (ValidationError, ValueError)): self.logger.error(f'{prefix}{error_msg}') + # Add context message to help model fix validation errors + validation_hint = 'Your output format was invalid. Please follow the exact schema structure required for actions.' + # self._message_manager._add_context_message(UserMessage(content=validation_hint)) + if 'Max token limit reached' in error_msg: - # TODO: figure out what to do here - pass + token_hint = 'Your response was too long. Keep your thinking and output concise.' + # self._message_manager._add_context_message(UserMessage(content=token_hint)) # Handle InterruptedError specially elif isinstance(error, InterruptedError): error_msg = 'The agent was interrupted mid-step' + (f' - {error}' if error else '') @@ -828,6 +829,9 @@ class Agent(Generic[Context, AgentStructuredOutput]): logger.debug(f'Model: {self.llm.model} failed') error_msg += '\n\nReturn a valid JSON object with the required fields.' logger.error(f'{prefix}{error_msg}') + # Add context message to help model fix parsing errors + parse_hint = 'Your response could not be parsed. Return a valid JSON object with the required fields.' + # self._message_manager._add_context_message(UserMessage(content=parse_hint)) else: from anthropic import RateLimitError as AnthropicRateLimitError from google.api_core.exceptions import ResourceExhausted @@ -898,7 +902,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): msg += '\nIf the task is fully finished, set success in "done" to true.' msg += '\nInclude everything you found out for the ultimate task in the done text.' self.logger.info('Last step finishing up') - self._message_manager._add_message_with_type(UserMessage(content=msg), 'consistent') + self._message_manager._add_context_message(UserMessage(content=msg)) self.AgentOutput = self.DoneAgentOutput async def _get_model_output_with_retry(self, input_messages: list[BaseMessage]) -> AgentOutput: diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py index e0a82621f..4af745599 100644 --- a/browser_use/agent/views.py +++ b/browser_use/agent/views.py @@ -54,7 +54,7 @@ class AgentSettings(BaseModel): max_actions_per_step: int = 10 use_thinking: bool = True flash_mode: bool = False # If enabled, disables evaluation_previous_goal and next_goal, and sets use_thinking = False - max_history_items: int = 40 + max_history_items: int | None = None page_extraction_llm: BaseChatModel | None = None planner_llm: BaseChatModel | None = None diff --git a/browser_use/llm/tests/test_single_step.py b/browser_use/llm/tests/test_single_step.py index 10c0c0a94..9ecafed32 100644 --- a/browser_use/llm/tests/test_single_step.py +++ b/browser_use/llm/tests/test_single_step.py @@ -111,7 +111,7 @@ async def test_single_step_parametrized(llm_class, model_name): # Create mock state message mock_message = create_mock_state_message(temp_dir) - agent.message_manager._add_message_with_type(mock_message, 'state') + agent.message_manager._set_message_with_type(mock_message, 'state') messages = agent.message_manager.get_messages() @@ -152,7 +152,7 @@ async def test_single_step(): print(mock_message.content) print('\n' + '=' * 50 + '\n') - agent.message_manager._add_message_with_type(mock_message, 'state') + agent.message_manager._set_message_with_type(mock_message, 'state') messages = agent.message_manager.get_messages()