Refactory and Bugfixes (#2525)

- Name functions much better
- Fix bug with page filtered actions
- Introduce context messages
- Read state included only when it's there
- default max_history_items to None to not break the cache
This commit is contained in:
Mert Unsal
2025-08-05 15:38:51 +01:00
committed by GitHub
6 changed files with 53 additions and 37 deletions

View File

@@ -126,7 +126,7 @@ class MessageManager:
self.last_input_messages = []
# Only initialize messages if state is empty
if len(self.state.history.get_messages()) == 0:
self._add_message_with_type(self.system_prompt, 'system')
self._set_message_with_type(self.system_prompt, 'system')
@property
def agent_history_description(self) -> str:
@@ -162,7 +162,6 @@ class MessageManager:
task_update_item = HistoryItem(system_message=f'User updated <user_request> to: {new_task}')
self.state.agent_history_items.append(task_update_item)
@observe_debug(ignore_input=True, ignore_output=True, name='update_agent_history_description')
def _update_agent_history_description(
self,
model_output: AgentOutput | None = None,
@@ -244,9 +243,9 @@ class MessageManager:
return ''
@observe_debug(ignore_input=True, ignore_output=True, name='add_state_message')
@time_execution_sync('--add_state_message')
def add_state_message(
@observe_debug(ignore_input=True, ignore_output=True, name='create_state_messages')
@time_execution_sync('--create_state_messages')
def create_state_messages(
self,
browser_state_summary: BrowserStateSummary,
model_output: AgentOutput | None = None,
@@ -257,8 +256,12 @@ class MessageManager:
sensitive_data=None,
available_file_paths: list[str] | None = None, # Always pass current available_file_paths
) -> None:
"""Add browser state as human message"""
"""Create single state message with all content"""
# Clear contextual messages from previous steps to prevent accumulation
self.state.history.context_messages.clear()
# First, update the agent history items with the latest step results
self._update_agent_history_description(model_output, result, step_info)
if sensitive_data:
self.sensitive_data_description = self._get_sensitive_data_description(browser_state_summary.url)
@@ -268,7 +271,7 @@ class MessageManager:
if browser_state_summary.screenshot:
screenshots.append(browser_state_summary.screenshot)
# otherwise add state message and result to next message (which will not stay in memory)
# Create single state message with all content
assert browser_state_summary
state_message = AgentMessagePrompt(
browser_state_summary=browser_state_summary,
@@ -285,7 +288,8 @@ class MessageManager:
vision_detail_level=self.vision_detail_level,
).get_user_message(use_vision)
self._add_message_with_type(state_message, 'state')
# Set the state message with caching enabled
self._set_message_with_type(state_message, 'state')
def _log_history_lines(self) -> str:
"""Generate a formatted log string of message history for debugging / printing to terminal"""
@@ -333,9 +337,8 @@ class MessageManager:
self.last_input_messages = self.state.history.get_messages()
return self.last_input_messages
def _add_message_with_type(self, message: BaseMessage, message_type: Literal['system', 'state', 'consistent']) -> None:
"""Add message to history"""
def _set_message_with_type(self, message: BaseMessage, message_type: Literal['system', 'state']) -> None:
"""Replace a specific state message slot with a new message"""
# filter out sensitive data from the message
if self.sensitive_data:
message = self._filter_sensitive_data(message)
@@ -344,10 +347,16 @@ class MessageManager:
self.state.history.system_message = message
elif message_type == 'state':
self.state.history.state_message = message
elif message_type == 'consistent':
self.state.history.consistent_messages.append(message)
else:
raise ValueError(f'Invalid message type: {message_type}')
raise ValueError(f'Invalid state message type: {message_type}')
def _add_context_message(self, message: BaseMessage) -> None:
"""Add a contextual message specific to this step (e.g., validation errors, retry instructions, timeout warnings)"""
# filter out sensitive data from the message
if self.sensitive_data:
message = self._filter_sensitive_data(message)
self.state.history.context_messages.append(message)
@time_execution_sync('--filter_sensitive_data')
def _filter_sensitive_data(self, message: BaseMessage) -> BaseMessage:

View File

@@ -72,17 +72,17 @@ class MessageHistory(BaseModel):
system_message: BaseMessage | None = None
state_message: BaseMessage | None = None
consistent_messages: list[BaseMessage] = Field(default_factory=list)
context_messages: list[BaseMessage] = Field(default_factory=list)
model_config = ConfigDict(arbitrary_types_allowed=True)
def get_messages(self) -> list[BaseMessage]:
"""Get all messages"""
"""Get all messages in the correct order: system -> state -> contextual"""
messages = []
if self.system_message:
messages.append(self.system_message)
if self.state_message:
messages.append(self.state_message)
messages.extend(self.consistent_messages)
messages.extend(self.context_messages)
return messages

View File

@@ -218,6 +218,7 @@ Available tabs:
@observe_debug(ignore_input=True, ignore_output=True, name='get_user_message')
def get_user_message(self, use_vision: bool = True) -> UserMessage:
"""Get complete state as a single cached message"""
# Don't pass screenshot to model if page is a new tab page, step is 0, and there's only one tab
if (
is_new_tab_page(self.browser_state.url)
@@ -227,6 +228,7 @@ Available tabs:
):
use_vision = False
# Build complete state description
state_description = (
'<agent_history>\n'
+ (self.agent_history_description.strip('\n') if self.agent_history_description else '')
@@ -234,14 +236,15 @@ Available tabs:
)
state_description += '<agent_state>\n' + self._get_agent_state_description().strip('\n') + '\n</agent_state>\n'
state_description += '<browser_state>\n' + self._get_browser_state_description().strip('\n') + '\n</browser_state>\n'
state_description += (
'<read_state>\n'
+ (self.read_state_description.strip('\n') if self.read_state_description else '')
+ '\n</read_state>\n'
)
# Only add read_state if it has content
read_state_description = self.read_state_description.strip('\n').strip() if self.read_state_description else ''
if read_state_description:
state_description += '<read_state>\n' + read_state_description + '\n</read_state>\n'
if self.page_filtered_actions:
state_description += 'For this page, these additional actions are available:\n'
state_description += '<page_specific_actions>\n'
state_description += self.page_filtered_actions + '\n'
state_description += '</page_specific_actions>\n'
if use_vision is True and self.screenshots:
# Start with text description

View File

@@ -168,7 +168,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
max_actions_per_step: int = 10,
use_thinking: bool = True,
flash_mode: bool = False,
max_history_items: int = 40,
max_history_items: int | None = None,
page_extraction_llm: BaseChatModel | None = None,
planner_llm: BaseChatModel | None = None, # Deprecated
planner_interval: int = 1, # Deprecated
@@ -728,13 +728,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
# Get page-specific filtered actions
page_filtered_actions = self.controller.registry.get_prompt_description(current_page)
# If there are page-specific actions, add them as a special message for this step only
if page_filtered_actions:
page_action_message = f'For this page, these additional actions are available:\n{page_filtered_actions}'
self._message_manager._add_message_with_type(UserMessage(content=page_action_message), 'consistent')
self.logger.debug(f'💬 Step {self.state.n_steps}: Adding state message to context...')
self._message_manager.add_state_message(
# Page-specific actions will be included directly in the browser_state message
self.logger.debug(f'💬 Step {self.state.n_steps}: Creating state messages for context...')
self._message_manager.create_state_messages(
browser_state_summary=browser_state_summary,
model_output=self.state.last_model_output,
result=self.state.last_result,
@@ -814,11 +810,16 @@ class Agent(Generic[Context, AgentStructuredOutput]):
prefix = f'❌ Result failed {self.state.consecutive_failures + 1}/{self.settings.max_failures} times:\n '
self.state.consecutive_failures += 1
# TODO: figure out what to do here
if isinstance(error, (ValidationError, ValueError)):
self.logger.error(f'{prefix}{error_msg}')
# Add context message to help model fix validation errors
validation_hint = 'Your output format was invalid. Please follow the exact schema structure required for actions.'
# self._message_manager._add_context_message(UserMessage(content=validation_hint))
if 'Max token limit reached' in error_msg:
# TODO: figure out what to do here
pass
token_hint = 'Your response was too long. Keep your thinking and output concise.'
# self._message_manager._add_context_message(UserMessage(content=token_hint))
# Handle InterruptedError specially
elif isinstance(error, InterruptedError):
error_msg = 'The agent was interrupted mid-step' + (f' - {error}' if error else '')
@@ -828,6 +829,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
logger.debug(f'Model: {self.llm.model} failed')
error_msg += '\n\nReturn a valid JSON object with the required fields.'
logger.error(f'{prefix}{error_msg}')
# Add context message to help model fix parsing errors
parse_hint = 'Your response could not be parsed. Return a valid JSON object with the required fields.'
# self._message_manager._add_context_message(UserMessage(content=parse_hint))
else:
from anthropic import RateLimitError as AnthropicRateLimitError
from google.api_core.exceptions import ResourceExhausted
@@ -898,7 +902,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
msg += '\nIf the task is fully finished, set success in "done" to true.'
msg += '\nInclude everything you found out for the ultimate task in the done text.'
self.logger.info('Last step finishing up')
self._message_manager._add_message_with_type(UserMessage(content=msg), 'consistent')
self._message_manager._add_context_message(UserMessage(content=msg))
self.AgentOutput = self.DoneAgentOutput
async def _get_model_output_with_retry(self, input_messages: list[BaseMessage]) -> AgentOutput:

View File

@@ -54,7 +54,7 @@ class AgentSettings(BaseModel):
max_actions_per_step: int = 10
use_thinking: bool = True
flash_mode: bool = False # If enabled, disables evaluation_previous_goal and next_goal, and sets use_thinking = False
max_history_items: int = 40
max_history_items: int | None = None
page_extraction_llm: BaseChatModel | None = None
planner_llm: BaseChatModel | None = None

View File

@@ -111,7 +111,7 @@ async def test_single_step_parametrized(llm_class, model_name):
# Create mock state message
mock_message = create_mock_state_message(temp_dir)
agent.message_manager._add_message_with_type(mock_message, 'state')
agent.message_manager._set_message_with_type(mock_message, 'state')
messages = agent.message_manager.get_messages()
@@ -152,7 +152,7 @@ async def test_single_step():
print(mock_message.content)
print('\n' + '=' * 50 + '\n')
agent.message_manager._add_message_with_type(mock_message, 'state')
agent.message_manager._set_message_with_type(mock_message, 'state')
messages = agent.message_manager.get_messages()