From eb833d34d90eb6997f338a298fef9528ae2db12c Mon Sep 17 00:00:00 2001
From: mertunsall <mertunsal1905@gmail.com>
Date: Wed, 23 Jul 2025 19:08:50 +0200
Subject: [PATCH 1/7] now history is cached

---
 browser_use/agent/message_manager/service.py | 132 ++++++++++++-------
 browser_use/agent/message_manager/views.py   |  20 ++-
 browser_use/agent/prompts.py                 |  76 +++++++----
 browser_use/agent/service.py                 |  12 +-
 browser_use/controller/service.py            |  18 +--
 browser_use/llm/tests/test_single_step.py    |   5 +-
 6 files changed, 169 insertions(+), 94 deletions(-)
diff --git a/browser_use/agent/message_manager/service.py b/browser_use/agent/message_manager/service.py
index 9275b754f..0d37c2f56 100644
--- a/browser_use/agent/message_manager/service.py
+++ b/browser_use/agent/message_manager/service.py
@@ -20,6 +20,7 @@ from browser_use.llm.messages import (
 	BaseMessage,
 	ContentPartTextParam,
 	SystemMessage,
+	UserMessage,
 )
 from browser_use.observability import observe_debug
 from browser_use.utils import match_url_with_domain_pattern, time_execution_sync
@@ -127,43 +128,15 @@ class MessageManager:
 		self.last_input_messages = []
 		# Only initialize messages if state is empty
 		if len(self.state.history.get_messages()) == 0:
-			self._add_message_with_type(self.system_prompt, 'system')
+			self._set_state_message(self.system_prompt, 'system')
 
-	@property
-	def agent_history_description(self) -> str:
-		"""Build agent history description from list of items, respecting max_history_items limit"""
-		if self.max_history_items is None:
-			# Include all items
-			return '\n'.join(item.to_string() for item in self.state.agent_history_items)
-
-		total_items = len(self.state.agent_history_items)
-
-		# If we have fewer items than the limit, just return all items
-		if total_items <= self.max_history_items:
-			return '\n'.join(item.to_string() for item in self.state.agent_history_items)
-
-		# We have more items than the limit, so we need to omit some
-		omitted_count = total_items - self.max_history_items
-
-		# Show first item + omitted message + most recent (max_history_items - 1) items
-		# The omitted message doesn't count against the limit, only real history items do
-		recent_items_count = self.max_history_items - 1  # -1 for first item
-
-		items_to_include = [
-			self.state.agent_history_items[0].to_string(),  # Keep first item (initialization)
-			f'<sys>[... {omitted_count} previous steps omitted...]</sys>',
-		]
-		# Add most recent items
-		items_to_include.extend([item.to_string() for item in self.state.agent_history_items[-recent_items_count:]])
-
-		return '\n'.join(items_to_include)
+	# Removed agent_history_description property - now using separate cached history messages
 
 	def add_new_task(self, new_task: str) -> None:
 		self.task = new_task
 		task_update_item = HistoryItem(system_message=f'User updated <user_request> to: {new_task}')
 		self.state.agent_history_items.append(task_update_item)
 
-	@observe_debug(ignore_input=True, ignore_output=True, name='update_agent_history_description')
 	def _update_agent_history_description(
 		self,
 		model_output: AgentOutput | None = None,
@@ -245,9 +218,9 @@ class MessageManager:
 
 		return ''
 
-	@observe_debug(ignore_input=True, ignore_output=True, name='add_state_message')
-	@time_execution_sync('--add_state_message')
-	def add_state_message(
+	@observe_debug(ignore_input=True, ignore_output=True, name='create_state_messages')
+	@time_execution_sync('--create_state_messages')
+	def create_state_messages(
 		self,
 		browser_state_summary: BrowserStateSummary,
 		model_output: AgentOutput | None = None,
@@ -259,8 +232,12 @@ class MessageManager:
 		agent_history_list: AgentHistoryList | None = None,  # Pass AgentHistoryList from agent
 		available_file_paths: list[str] | None = None,  # Always pass current available_file_paths
 	) -> None:
-		"""Add browser state as human message"""
+		"""Reconstruct all state messages from scratch for optimal caching"""
 
+		# Clear contextual messages from previous steps to prevent accumulation
+		self.state.history.context_messages.clear()
+
+		# First, update the agent history items with the latest step results
 		self._update_agent_history_description(model_output, result, step_info)
 		if sensitive_data:
 			self.sensitive_data_description = self._get_sensitive_data_description(browser_state_summary.url)
@@ -276,12 +253,11 @@ class MessageManager:
 		if browser_state_summary.screenshot:
 			screenshots.append(browser_state_summary.screenshot)
 
-		# otherwise add state message and result to next message (which will not stay in memory)
+		# Create the message prompt helper
 		assert browser_state_summary
-		state_message = AgentMessagePrompt(
+		message_prompt = AgentMessagePrompt(
 			browser_state_summary=browser_state_summary,
 			file_system=self.file_system,
-			agent_history_description=self.agent_history_description,
 			read_state_description=self.state.read_state_description,
 			task=self.task,
 			include_attributes=self.include_attributes,
@@ -290,9 +266,28 @@ class MessageManager:
 			sensitive_data=self.sensitive_data_description,
 			available_file_paths=available_file_paths,
 			screenshots=screenshots,
-		).get_user_message(use_vision)
+		)
 
-		self._add_message_with_type(state_message, 'state')
+		# Fully reconstruct all messages every time for optimal caching:
+
+		# 1. Rebuild cached history messages from history items
+		self._rebuild_history_messages()
+
+		# 2. Create fresh agent state message (not cached - changes every step)
+		agent_state_message = message_prompt.get_agent_state_message()
+
+		# 3. Create fresh browser state message with vision (not cached - changes every step)
+		browser_state_message = message_prompt.get_browser_state_message(use_vision)
+
+		# 4. Create fresh read state message (cached - only changes when new content extracted)
+		read_state_message = message_prompt.get_read_state_message()
+
+		# Replace all state messages
+		self._set_state_message(agent_state_message, 'agent_state')
+		self._set_state_message(browser_state_message, 'browser_state')
+		# read state message is optional
+		if read_state_message:
+			self._set_state_message(read_state_message, 'read_state')
 
 	def _log_history_lines(self) -> str:
 		"""Generate a formatted log string of message history for debugging / printing to terminal"""
@@ -340,21 +335,66 @@ class MessageManager:
 		self.last_input_messages = self.state.history.get_messages()
 		return self.last_input_messages
 
-	def _add_message_with_type(self, message: BaseMessage, message_type: Literal['system', 'state', 'consistent']) -> None:
-		"""Add message to history"""
+	def _create_history_message_from_item(self, item: HistoryItem) -> UserMessage:
+		"""Convert a HistoryItem into a cached UserMessage"""
+		content = item.to_string()
+		return UserMessage(content=content, cache=True)
 
+	def _rebuild_history_messages(self) -> None:
+		"""Rebuild the history messages from agent_history_items, respecting max_history_items limit"""
+		history_items = self.state.agent_history_items
+
+		if self.max_history_items is None:
+			# Include all items
+			items_to_include = history_items
+		else:
+			total_items = len(history_items)
+
+			# If we have fewer items than the limit, just return all items
+			if total_items <= self.max_history_items:
+				items_to_include = history_items
+			else:
+				# We have more items than the limit, so we need to omit some
+				omitted_count = total_items - self.max_history_items
+				recent_items_count = self.max_history_items - 1  # -1 for first item
+
+				# Include first item + most recent items
+				items_to_include = [history_items[0]] + history_items[-recent_items_count:]
+
+				# Create an omitted message and insert it between first and recent items
+				if omitted_count > 0:
+					omitted_item = HistoryItem(system_message=f'[... {omitted_count} previous steps omitted...]')
+					items_to_include = [history_items[0], omitted_item] + history_items[-recent_items_count:]
+
+		# Convert items to cached messages
+		self.state.history.history_messages = [self._create_history_message_from_item(item) for item in items_to_include]
+
+	def _set_state_message(
+		self, message: BaseMessage, message_type: Literal['system', 'agent_state', 'browser_state', 'read_state']
+	) -> None:
+		"""Replace a specific state message slot with a new message"""
 		# filter out sensitive data from the message
 		if self.sensitive_data:
 			message = self._filter_sensitive_data(message)
 
 		if message_type == 'system':
 			self.state.history.system_message = message
-		elif message_type == 'state':
-			self.state.history.state_message = message
-		elif message_type == 'consistent':
-			self.state.history.consistent_messages.append(message)
+		elif message_type == 'agent_state':
+			self.state.history.agent_state_message = message
+		elif message_type == 'browser_state':
+			self.state.history.browser_state_message = message
+		elif message_type == 'read_state':
+			self.state.history.read_state_message = message
 		else:
-			raise ValueError(f'Invalid message type: {message_type}')
+			raise ValueError(f'Invalid state message type: {message_type}')
+
+	def _add_context_message(self, message: BaseMessage) -> None:
+		"""Add a contextual message that persists across steps (e.g., page-specific actions, final step warnings)"""
+		# filter out sensitive data from the message
+		if self.sensitive_data:
+			message = self._filter_sensitive_data(message)
+
+		self.state.history.context_messages.append(message)
 
 	@time_execution_sync('--filter_sensitive_data')
 	def _filter_sensitive_data(self, message: BaseMessage) -> BaseMessage:
diff --git a/browser_use/agent/message_manager/views.py b/browser_use/agent/message_manager/views.py
index 587a692a1..f5b2d8885 100644
--- a/browser_use/agent/message_manager/views.py
+++ b/browser_use/agent/message_manager/views.py
@@ -71,18 +71,26 @@ class MessageHistory(BaseModel):
 	"""History of messages"""
 
 	system_message: BaseMessage | None = None
-	state_message: BaseMessage | None = None
-	consistent_messages: list[BaseMessage] = Field(default_factory=list)
+	history_messages: list[BaseMessage] = Field(default_factory=list)
+	agent_state_message: BaseMessage | None = None
+	browser_state_message: BaseMessage | None = None
+	read_state_message: BaseMessage | None = None
+	context_messages: list[BaseMessage] = Field(default_factory=list)
 	model_config = ConfigDict(arbitrary_types_allowed=True)
 
 	def get_messages(self) -> list[BaseMessage]:
-		"""Get all messages"""
+		"""Get all messages in the correct order: system -> history -> agent_state -> browser_state -> read_state -> contextual"""
 		messages = []
 		if self.system_message:
 			messages.append(self.system_message)
-		if self.state_message:
-			messages.append(self.state_message)
-		messages.extend(self.consistent_messages)
+		messages.extend(self.history_messages)
+		if self.agent_state_message:
+			messages.append(self.agent_state_message)
+		if self.browser_state_message:
+			messages.append(self.browser_state_message)
+		if self.read_state_message:
+			messages.append(self.read_state_message)
+		messages.extend(self.context_messages)
 
 		return messages
 
diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py
index 2b8a8bbdb..95b86bfe8 100644
--- a/browser_use/agent/prompts.py
+++ b/browser_use/agent/prompts.py
@@ -79,7 +79,6 @@ class AgentMessagePrompt:
 		self,
 		browser_state_summary: 'BrowserStateSummary',
 		file_system: 'FileSystem',
-		agent_history_description: str | None = None,
 		read_state_description: str | None = None,
 		task: str | None = None,
 		include_attributes: list[str] | None = None,
@@ -92,7 +91,6 @@ class AgentMessagePrompt:
 	):
 		self.browser_state: 'BrowserStateSummary' = browser_state_summary
 		self.file_system: 'FileSystem | None' = file_system
-		self.agent_history_description: str | None = agent_history_description
 		self.read_state_description: str | None = read_state_description
 		self.task: str | None = task
 		self.include_attributes = include_attributes
@@ -104,7 +102,6 @@ class AgentMessagePrompt:
 		self.screenshots = screenshots or []
 		assert self.browser_state
 
-	@observe_debug(ignore_input=True, ignore_output=True, name='_deduplicate_screenshots')
 	def _deduplicate_screenshots(self, screenshots: list[str]) -> list[str]:
 		"""
 		Remove consecutive duplicate screenshots, keeping only the most recent of each.
@@ -134,7 +131,6 @@ class AgentMessagePrompt:
 
 		return unique_screenshots
 
-	@observe_debug(ignore_input=True, ignore_output=True, name='_get_browser_state_description')
 	def _get_browser_state_description(self) -> str:
 		elements_text = self.browser_state.element_tree.clickable_elements_to_string(include_attributes=self.include_attributes)
 
@@ -237,8 +233,13 @@ Interactive elements from top layer of the current page inside the viewport{trun
 			agent_state += '<available_file_paths>\n' + '\n'.join(self.available_file_paths) + '\n</available_file_paths>\n'
 		return agent_state
 
-	@observe_debug(ignore_input=True, ignore_output=True, name='get_user_message')
-	def get_user_message(self, use_vision: bool = True) -> UserMessage:
+	def get_agent_state_message(self) -> UserMessage:
+		"""Get agent state as a separate message (not cached - changes every step)"""
+		agent_state_content = '<agent_state>\n' + self._get_agent_state_description().strip('\n') + '\n</agent_state>'
+		return UserMessage(content=agent_state_content, cache=False)
+
+	def get_browser_state_message(self, use_vision: bool = True) -> UserMessage:
+		"""Get browser state as a separate message, optionally with screenshots"""
 		# Don't pass screenshot to model if page is a new tab page, step is 0, and there's only one tab
 		if (
 			is_new_tab_page(self.browser_state.url)
@@ -248,25 +249,16 @@ Interactive elements from top layer of the current page inside the viewport{trun
 		):
 			use_vision = False
 
-		state_description = (
-			'<agent_history>\n'
-			+ (self.agent_history_description.strip('\n') if self.agent_history_description else '')
-			+ '\n</agent_history>\n'
-		)
-		state_description += '<agent_state>\n' + self._get_agent_state_description().strip('\n') + '\n</agent_state>\n'
-		state_description += '<browser_state>\n' + self._get_browser_state_description().strip('\n') + '\n</browser_state>\n'
-		state_description += (
-			'<read_state>\n'
-			+ (self.read_state_description.strip('\n') if self.read_state_description else '')
-			+ '\n</read_state>\n'
-		)
+		browser_state_content = '<browser_state>\n' + self._get_browser_state_description().strip('\n') + '\n</browser_state>'
+
 		if self.page_filtered_actions:
-			state_description += 'For this page, these additional actions are available:\n'
-			state_description += self.page_filtered_actions + '\n'
+			browser_state_content += '\n<page_specific_actions>\n'
+			browser_state_content += self.page_filtered_actions + '\n'
+			browser_state_content += '</page_specific_actions>\n'
 
 		if use_vision is True and self.screenshots:
 			# Start with text description
-			content_parts: list[ContentPartTextParam | ContentPartImageParam] = [ContentPartTextParam(text=state_description)]
+			content_parts: list[ContentPartTextParam | ContentPartImageParam] = [ContentPartTextParam(text=browser_state_content)]
 
 			# Deduplicate screenshots, keeping only the most recent of each unique image
 			unique_screenshots = self._deduplicate_screenshots(self.screenshots)
@@ -292,6 +284,44 @@ Interactive elements from top layer of the current page inside the viewport{trun
 					)
 				)
 
-			return UserMessage(content=content_parts)
+			return UserMessage(content=content_parts, cache=False)
 
-		return UserMessage(content=state_description)
+		return UserMessage(content=browser_state_content, cache=False)
+
+	def get_read_state_message(self) -> UserMessage | None:
+		"""Get read state as a separate cached message"""
+		if not self.read_state_description:
+			return None
+		if not self.read_state_description.strip('\n').strip():
+			return None
+
+		read_state_content = '<read_state>\n' + self.read_state_description.strip('\n') + '\n</read_state>'
+		return UserMessage(content=read_state_content, cache=False)
+
+	@observe_debug(ignore_input=True, ignore_output=True, name='get_user_message')
+	def get_user_message(self, use_vision: bool = True) -> UserMessage:
+		"""
+		DEPRECATED: This method is kept for backward compatibility but should not be used.
+		Use get_agent_state_message(), get_browser_state_message(), and get_read_state_message() instead.
+		"""
+		# For backward compatibility, return all messages concatenated in a single message
+		agent_state_message = self.get_agent_state_message()
+		browser_state_message = self.get_browser_state_message(use_vision)
+		read_state_message = self.get_read_state_message()
+
+		# Flatten the three message objects into a single list of content parts,
+		# handling both raw-text (`str`) and already-structured (`list[...]`) cases.
+		content_parts: list[ContentPartTextParam | ContentPartImageParam] = []
+
+		for msg in (agent_state_message, browser_state_message, read_state_message):
+			if msg is None:
+				continue
+
+			if isinstance(msg.content, list):
+				# Message is already a list of content parts – reuse as-is.
+				content_parts.extend(msg.content)
+			else:
+				# Wrap plain text in a ContentPartTextParam.
+				content_parts.append(ContentPartTextParam(text=str(msg.content)))
+
+		return UserMessage(content=content_parts, cache=True)
diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py
index 6f5cbd9a6..df6b7fbcf 100644
--- a/browser_use/agent/service.py
+++ b/browser_use/agent/service.py
@@ -710,13 +710,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		# Get page-specific filtered actions
 		page_filtered_actions = self.controller.registry.get_prompt_description(current_page)
 
-		# If there are page-specific actions, add them as a special message for this step only
-		if page_filtered_actions:
-			page_action_message = f'For this page, these additional actions are available:\n{page_filtered_actions}'
-			self._message_manager._add_message_with_type(UserMessage(content=page_action_message), 'consistent')
-
-		self.logger.debug(f'💬 Step {self.state.n_steps + 1}: Adding state message to context...')
-		self._message_manager.add_state_message(
+		# Page-specific actions will be included directly in the browser_state message
+		self.logger.debug(f'💬 Step {self.state.n_steps + 1}: Creating state messages for context...')
+		self._message_manager.create_state_messages(
 			browser_state_summary=browser_state_summary,
 			model_output=self.state.last_model_output,
 			result=self.state.last_result,
@@ -881,7 +877,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			msg += '\nIf the task is fully finished, set success in "done" to true.'
 			msg += '\nInclude everything you found out for the ultimate task in the done text.'
 			self.logger.info('Last step finishing up')
-			self._message_manager._add_message_with_type(UserMessage(content=msg), 'consistent')
+			self._message_manager._add_context_message(UserMessage(content=msg))
 			self.AgentOutput = self.DoneAgentOutput
 
 	async def _get_model_output_with_retry(self, input_messages: list[BaseMessage]) -> AgentOutput:
diff --git a/browser_use/controller/service.py b/browser_use/controller/service.py
index c5f1e210b..975234cfb 100644
--- a/browser_use/controller/service.py
+++ b/browser_use/controller/service.py
@@ -4,7 +4,10 @@ import json
 import logging
 import os
 import re
-from typing import Generic, TypeVar, cast
+from typing import TYPE_CHECKING, Generic, TypeVar, cast
+
+if TYPE_CHECKING:
+	from browser_use.browser.types import Page
 
 try:
 	from lmnr import Laminar  # type: ignore
@@ -15,7 +18,6 @@ from pydantic import BaseModel
 
 from browser_use.agent.views import ActionModel, ActionResult
 from browser_use.browser import BrowserSession
-from browser_use.browser.types import Page
 from browser_use.browser.views import BrowserError
 from browser_use.controller.registry.service import Registry
 from browser_use.controller.views import (
@@ -296,7 +298,7 @@ Only use this for specific queries for information retrieval from the page. Don'
 		async def extract_structured_data(
 			query: str,
 			extract_links: bool,
-			page: Page,
+			page: 'Page',
 			page_extraction_llm: BaseChatModel,
 			file_system: FileSystem,
 		):
@@ -609,7 +611,7 @@ Explain the content of the page and that the requested information is not availa
 			'Send strings of special keys to use Playwright page.keyboard.press - examples include Escape, Backspace, Insert, PageDown, Delete, Enter, or Shortcuts such as `Control+o`, `Control+Shift+T`',
 			param_model=SendKeysAction,
 		)
-		async def send_keys(params: SendKeysAction, page: Page):
+		async def send_keys(params: SendKeysAction, page: 'Page'):
 			try:
 				await page.keyboard.press(params.keys)
 			except Exception as e:
@@ -630,7 +632,7 @@ Explain the content of the page and that the requested information is not availa
 		@self.registry.action(
 			description='Scroll to a text in the current page',
 		)
-		async def scroll_to_text(text: str, page: Page):  # type: ignore
+		async def scroll_to_text(text: str, page: 'Page'):  # type: ignore
 			try:
 				# Try different locator strategies
 				locators = [
@@ -913,7 +915,7 @@ Explain the content of the page and that the requested information is not availa
 				raise BrowserError(msg)
 
 		@self.registry.action('Google Sheets: Get the contents of the entire sheet', domains=['https://docs.google.com'])
-		async def read_sheet_contents(page: Page):
+		async def read_sheet_contents(page: 'Page'):
 			# select all cells
 			await page.keyboard.press('Enter')
 			await page.keyboard.press('Escape')
@@ -979,7 +981,7 @@ Explain the content of the page and that the requested information is not availa
 			)
 
 		@self.registry.action('Google Sheets: Select a specific cell or range of cells', domains=['https://docs.google.com'])
-		async def select_cell_or_range(cell_or_range: str, page: Page):
+		async def select_cell_or_range(cell_or_range: str, page: 'Page'):
 			await page.keyboard.press('Enter')  # make sure we dont delete current cell contents if we were last editing
 			await page.keyboard.press('Escape')  # to clear current focus (otherwise select range popup is additive)
 			await asyncio.sleep(0.1)
@@ -1003,7 +1005,7 @@ Explain the content of the page and that the requested information is not availa
 			'Google Sheets: Fallback method to type text into (only one) currently selected cell',
 			domains=['https://docs.google.com'],
 		)
-		async def fallback_input_into_single_selected_cell(text: str, page: Page):
+		async def fallback_input_into_single_selected_cell(text: str, page: 'Page'):
 			await page.keyboard.type(text, delay=0.1)
 			await page.keyboard.press('Enter')  # make sure to commit the input so it doesn't get overwritten by the next action
 			await page.keyboard.press('ArrowUp')
diff --git a/browser_use/llm/tests/test_single_step.py b/browser_use/llm/tests/test_single_step.py
index 10c0c0a94..3613b5e07 100644
--- a/browser_use/llm/tests/test_single_step.py
+++ b/browser_use/llm/tests/test_single_step.py
@@ -70,7 +70,6 @@ def create_mock_state_message(temp_dir: str):
 	agent_prompt = AgentMessagePrompt(
 		browser_state_summary=mock_browser_state,
 		file_system=mock_file_system,  # Now using actual FileSystem instance
-		agent_history_description='',  # Empty history
 		read_state_description='',  # Empty read state
 		task='Click the button on the page',
 		include_attributes=['id'],
@@ -111,7 +110,7 @@ async def test_single_step_parametrized(llm_class, model_name):
 		# Create mock state message
 		mock_message = create_mock_state_message(temp_dir)
 
-		agent.message_manager._add_message_with_type(mock_message, 'state')
+		agent.message_manager._set_state_message(mock_message, 'agent_state')
 
 		messages = agent.message_manager.get_messages()
 
@@ -152,7 +151,7 @@ async def test_single_step():
 			print(mock_message.content)
 			print('\n' + '=' * 50 + '\n')
 
-			agent.message_manager._add_message_with_type(mock_message, 'state')
+			agent.message_manager._set_state_message(mock_message, 'agent_state')
 
 			messages = agent.message_manager.get_messages()
 

From fd3d04537a6fdedd00a993fb5bacd9170ad06792 Mon Sep 17 00:00:00 2001
From: mertunsall <mertunsal1905@gmail.com>
Date: Wed, 23 Jul 2025 19:24:20 +0200
Subject: [PATCH 2/7] fix

---
 browser_use/agent/prompts.py      |  2 +-
 browser_use/controller/service.py | 18 ++++++++----------
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py
index 95b86bfe8..aaf1ebf0b 100644
--- a/browser_use/agent/prompts.py
+++ b/browser_use/agent/prompts.py
@@ -289,7 +289,7 @@ Interactive elements from top layer of the current page inside the viewport{trun
 		return UserMessage(content=browser_state_content, cache=False)
 
 	def get_read_state_message(self) -> UserMessage | None:
-		"""Get read state as a separate cached message"""
+		"""Get read state as a separate message"""
 		if not self.read_state_description:
 			return None
 		if not self.read_state_description.strip('\n').strip():
diff --git a/browser_use/controller/service.py b/browser_use/controller/service.py
index 975234cfb..c5f1e210b 100644
--- a/browser_use/controller/service.py
+++ b/browser_use/controller/service.py
@@ -4,10 +4,7 @@ import json
 import logging
 import os
 import re
-from typing import TYPE_CHECKING, Generic, TypeVar, cast
-
-if TYPE_CHECKING:
-	from browser_use.browser.types import Page
+from typing import Generic, TypeVar, cast
 
 try:
 	from lmnr import Laminar  # type: ignore
@@ -18,6 +15,7 @@ from pydantic import BaseModel
 
 from browser_use.agent.views import ActionModel, ActionResult
 from browser_use.browser import BrowserSession
+from browser_use.browser.types import Page
 from browser_use.browser.views import BrowserError
 from browser_use.controller.registry.service import Registry
 from browser_use.controller.views import (
@@ -298,7 +296,7 @@ Only use this for specific queries for information retrieval from the page. Don'
 		async def extract_structured_data(
 			query: str,
 			extract_links: bool,
-			page: 'Page',
+			page: Page,
 			page_extraction_llm: BaseChatModel,
 			file_system: FileSystem,
 		):
@@ -611,7 +609,7 @@ Explain the content of the page and that the requested information is not availa
 			'Send strings of special keys to use Playwright page.keyboard.press - examples include Escape, Backspace, Insert, PageDown, Delete, Enter, or Shortcuts such as `Control+o`, `Control+Shift+T`',
 			param_model=SendKeysAction,
 		)
-		async def send_keys(params: SendKeysAction, page: 'Page'):
+		async def send_keys(params: SendKeysAction, page: Page):
 			try:
 				await page.keyboard.press(params.keys)
 			except Exception as e:
@@ -632,7 +630,7 @@ Explain the content of the page and that the requested information is not availa
 		@self.registry.action(
 			description='Scroll to a text in the current page',
 		)
-		async def scroll_to_text(text: str, page: 'Page'):  # type: ignore
+		async def scroll_to_text(text: str, page: Page):  # type: ignore
 			try:
 				# Try different locator strategies
 				locators = [
@@ -915,7 +913,7 @@ Explain the content of the page and that the requested information is not availa
 				raise BrowserError(msg)
 
 		@self.registry.action('Google Sheets: Get the contents of the entire sheet', domains=['https://docs.google.com'])
-		async def read_sheet_contents(page: 'Page'):
+		async def read_sheet_contents(page: Page):
 			# select all cells
 			await page.keyboard.press('Enter')
 			await page.keyboard.press('Escape')
@@ -981,7 +979,7 @@ Explain the content of the page and that the requested information is not availa
 			)
 
 		@self.registry.action('Google Sheets: Select a specific cell or range of cells', domains=['https://docs.google.com'])
-		async def select_cell_or_range(cell_or_range: str, page: 'Page'):
+		async def select_cell_or_range(cell_or_range: str, page: Page):
 			await page.keyboard.press('Enter')  # make sure we dont delete current cell contents if we were last editing
 			await page.keyboard.press('Escape')  # to clear current focus (otherwise select range popup is additive)
 			await asyncio.sleep(0.1)
@@ -1005,7 +1003,7 @@ Explain the content of the page and that the requested information is not availa
 			'Google Sheets: Fallback method to type text into (only one) currently selected cell',
 			domains=['https://docs.google.com'],
 		)
-		async def fallback_input_into_single_selected_cell(text: str, page: 'Page'):
+		async def fallback_input_into_single_selected_cell(text: str, page: Page):
 			await page.keyboard.type(text, delay=0.1)
 			await page.keyboard.press('Enter')  # make sure to commit the input so it doesn't get overwritten by the next action
 			await page.keyboard.press('ArrowUp')

From d7046e442cf5b56d782183e5791403bad817c089 Mon Sep 17 00:00:00 2001
From: mertunsall <mertunsal1905@gmail.com>
Date: Thu, 24 Jul 2025 18:43:27 +0200
Subject: [PATCH 3/7] improve examples

---
 examples/file_system/alphabet_earnings.py | 56 +++++++++++++++++++++++
 examples/file_system/file_system.py       |  2 +-
 2 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 examples/file_system/alphabet_earnings.py

diff --git a/examples/file_system/alphabet_earnings.py b/examples/file_system/alphabet_earnings.py
new file mode 100644
index 000000000..7a8bd6fba
--- /dev/null
+++ b/examples/file_system/alphabet_earnings.py
@@ -0,0 +1,56 @@
+import asyncio
+import os
+import pathlib
+import shutil
+
+from dotenv import load_dotenv
+
+from browser_use import Agent
+from browser_use.browser import BrowserProfile, BrowserSession
+from browser_use.llm import ChatOpenAI
+
+load_dotenv()
+
+''
+SCRIPT_DIR = pathlib.Path(os.path.dirname(os.path.abspath(__file__)))
+agent_dir = SCRIPT_DIR / 'alphabet_earnings'
+agent_dir.mkdir(exist_ok=True)
+
+try:
+	from lmnr import Laminar
+
+	Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY'))
+except Exception as e:
+	print(f'Error initializing Laminar: {e}')
+
+llm = ChatOpenAI(
+	model='o4-mini',
+)
+
+browser_session = BrowserSession(
+	browser_profile=BrowserProfile(downloads_path=str(agent_dir / 'downloads')),
+)
+
+task = """
+Go to https://abc.xyz/assets/cc/27/3ada14014efbadd7a58472f1f3f4/2025q2-alphabet-earnings-release.pdf.
+Read the PDF and save 3 interesting data points in "alphabet_earnings.pdf" and share it with me!
+""".strip('\n')
+
+agent = Agent(
+	task=task,
+	llm=llm,
+	browser_session=browser_session,
+	file_system_path=str(agent_dir / 'fs'),
+	flash_mode=True,
+)
+
+
+async def main():
+	agent_history = await agent.run()
+	input('Press Enter to clean the file system...')
+	# clean the file system
+	shutil.rmtree(str(agent_dir / 'fs'))
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
diff --git a/examples/file_system/file_system.py b/examples/file_system/file_system.py
index 78eda7ca3..c54a52881 100644
--- a/examples/file_system/file_system.py
+++ b/examples/file_system/file_system.py
@@ -12,7 +12,7 @@ load_dotenv()
 
 ''
 SCRIPT_DIR = pathlib.Path(os.path.dirname(os.path.abspath(__file__)))
-agent_dir = SCRIPT_DIR / 'test_no_thinking'
+agent_dir = SCRIPT_DIR / 'file_system'
 agent_dir.mkdir(exist_ok=True)
 conversation_dir = agent_dir / 'conversations' / 'conversation'
 print(f'Agent logs directory: {agent_dir}')

From a6ac62ff5ebc7835211ea4d9849560fc9ba2d045 Mon Sep 17 00:00:00 2001
From: mertunsall <mertunsal1905@gmail.com>
Date: Tue, 5 Aug 2025 13:45:53 +0100
Subject: [PATCH 4/7] format

---
 browser_use/agent/message_manager/service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/browser_use/agent/message_manager/service.py b/browser_use/agent/message_manager/service.py
index 3aceba15c..ad3cd0f26 100644
--- a/browser_use/agent/message_manager/service.py
+++ b/browser_use/agent/message_manager/service.py
@@ -258,7 +258,7 @@ class MessageManager:
 			sensitive_data=self.sensitive_data_description,
 			available_file_paths=available_file_paths,
 			screenshots=screenshots,
-      vision_detail_level=self.vision_detail_level,
+			vision_detail_level=self.vision_detail_level,
 		)
 
 		# Fully reconstruct all messages every time for optimal caching:

From bc9a3a5fbb97cb5122432cad007ff4e6bc4ac654 Mon Sep 17 00:00:00 2001
From: mertunsall <mertunsal1905@gmail.com>
Date: Tue, 5 Aug 2025 15:07:50 +0100
Subject: [PATCH 5/7] just improve

---
 browser_use/agent/message_manager/service.py | 59 +++++-----------
 browser_use/agent/message_manager/views.py   | 16 ++---
 browser_use/agent/prompts.py                 | 73 ++++++--------------
 browser_use/llm/tests/test_single_step.py    |  5 +-
 4 files changed, 47 insertions(+), 106 deletions(-)

diff --git a/browser_use/agent/message_manager/service.py b/browser_use/agent/message_manager/service.py
index ad3cd0f26..9dcf60c26 100644
--- a/browser_use/agent/message_manager/service.py
+++ b/browser_use/agent/message_manager/service.py
@@ -19,7 +19,6 @@ from browser_use.llm.messages import (
 	BaseMessage,
 	ContentPartTextParam,
 	SystemMessage,
-	UserMessage,
 )
 from browser_use.observability import observe_debug
 from browser_use.utils import match_url_with_domain_pattern, time_execution_sync
@@ -129,7 +128,10 @@ class MessageManager:
 		if len(self.state.history.get_messages()) == 0:
 			self._set_state_message(self.system_prompt, 'system')
 
-	# Removed agent_history_description property - now using separate cached history messages
+	@property
+	def agent_history_description(self) -> str:
+		"""Build agent history description from list of items, respecting max_history_items limit"""
+		return self._get_history_description()
 
 	def add_new_task(self, new_task: str) -> None:
 		self.task = new_task
@@ -230,7 +232,7 @@ class MessageManager:
 		sensitive_data=None,
 		available_file_paths: list[str] | None = None,  # Always pass current available_file_paths
 	) -> None:
-		"""Reconstruct all state messages from scratch for optimal caching"""
+		"""Create single state message with all content"""
 
 		# Clear contextual messages from previous steps to prevent accumulation
 		self.state.history.context_messages.clear()
@@ -245,11 +247,12 @@ class MessageManager:
 		if browser_state_summary.screenshot:
 			screenshots.append(browser_state_summary.screenshot)
 
-		# Create the message prompt helper
+		# Create the message prompt helper with history description
 		assert browser_state_summary
 		message_prompt = AgentMessagePrompt(
 			browser_state_summary=browser_state_summary,
 			file_system=self.file_system,
+			agent_history_description=self.agent_history_description,
 			read_state_description=self.state.read_state_description,
 			task=self.task,
 			include_attributes=self.include_attributes,
@@ -261,26 +264,11 @@ class MessageManager:
 			vision_detail_level=self.vision_detail_level,
 		)
 
-		# Fully reconstruct all messages every time for optimal caching:
+		# Create single state message with all content
+		state_message = message_prompt.get_user_message(use_vision)
 
-		# 1. Rebuild cached history messages from history items
-		self._rebuild_history_messages()
-
-		# 2. Create fresh agent state message (not cached - changes every step)
-		agent_state_message = message_prompt.get_agent_state_message()
-
-		# 3. Create fresh browser state message with vision (not cached - changes every step)
-		browser_state_message = message_prompt.get_browser_state_message(use_vision)
-
-		# 4. Create fresh read state message (cached - only changes when new content extracted)
-		read_state_message = message_prompt.get_read_state_message()
-
-		# Replace all state messages
-		self._set_state_message(agent_state_message, 'agent_state')
-		self._set_state_message(browser_state_message, 'browser_state')
-		# read state message is optional
-		if read_state_message:
-			self._set_state_message(read_state_message, 'read_state')
+		# Set the state message with caching enabled
+		self._set_state_message(state_message, 'state')
 
 	def _log_history_lines(self) -> str:
 		"""Generate a formatted log string of message history for debugging / printing to terminal"""
@@ -328,13 +316,8 @@ class MessageManager:
 		self.last_input_messages = self.state.history.get_messages()
 		return self.last_input_messages
 
-	def _create_history_message_from_item(self, item: HistoryItem) -> UserMessage:
-		"""Convert a HistoryItem into a cached UserMessage"""
-		content = item.to_string()
-		return UserMessage(content=content, cache=True)
-
-	def _rebuild_history_messages(self) -> None:
-		"""Rebuild the history messages from agent_history_items, respecting max_history_items limit"""
+	def _get_history_description(self) -> str:
+		"""Build agent history description from list of items, respecting max_history_items limit"""
 		history_items = self.state.agent_history_items
 
 		if self.max_history_items is None:
@@ -359,12 +342,10 @@ class MessageManager:
 					omitted_item = HistoryItem(system_message=f'[... {omitted_count} previous steps omitted...]')
 					items_to_include = [history_items[0], omitted_item] + history_items[-recent_items_count:]
 
-		# Convert items to cached messages
-		self.state.history.history_messages = [self._create_history_message_from_item(item) for item in items_to_include]
+		# Convert items to strings
+		return '\n'.join(item.to_string() for item in items_to_include)
 
-	def _set_state_message(
-		self, message: BaseMessage, message_type: Literal['system', 'agent_state', 'browser_state', 'read_state']
-	) -> None:
+	def _set_state_message(self, message: BaseMessage, message_type: Literal['system', 'state']) -> None:
 		"""Replace a specific state message slot with a new message"""
 		# filter out sensitive data from the message
 		if self.sensitive_data:
@@ -372,12 +353,8 @@ class MessageManager:
 
 		if message_type == 'system':
 			self.state.history.system_message = message
-		elif message_type == 'agent_state':
-			self.state.history.agent_state_message = message
-		elif message_type == 'browser_state':
-			self.state.history.browser_state_message = message
-		elif message_type == 'read_state':
-			self.state.history.read_state_message = message
+		elif message_type == 'state':
+			self.state.history.state_message = message
 		else:
 			raise ValueError(f'Invalid state message type: {message_type}')
 
diff --git a/browser_use/agent/message_manager/views.py b/browser_use/agent/message_manager/views.py
index f5b2d8885..00926abd5 100644
--- a/browser_use/agent/message_manager/views.py
+++ b/browser_use/agent/message_manager/views.py
@@ -71,25 +71,17 @@ class MessageHistory(BaseModel):
 	"""History of messages"""
 
 	system_message: BaseMessage | None = None
-	history_messages: list[BaseMessage] = Field(default_factory=list)
-	agent_state_message: BaseMessage | None = None
-	browser_state_message: BaseMessage | None = None
-	read_state_message: BaseMessage | None = None
+	state_message: BaseMessage | None = None
 	context_messages: list[BaseMessage] = Field(default_factory=list)
 	model_config = ConfigDict(arbitrary_types_allowed=True)
 
 	def get_messages(self) -> list[BaseMessage]:
-		"""Get all messages in the correct order: system -> history -> agent_state -> browser_state -> read_state -> contextual"""
+		"""Get all messages in the correct order: system -> state -> contextual"""
 		messages = []
 		if self.system_message:
 			messages.append(self.system_message)
-		messages.extend(self.history_messages)
-		if self.agent_state_message:
-			messages.append(self.agent_state_message)
-		if self.browser_state_message:
-			messages.append(self.browser_state_message)
-		if self.read_state_message:
-			messages.append(self.read_state_message)
+		if self.state_message:
+			messages.append(self.state_message)
 		messages.extend(self.context_messages)
 
 		return messages
diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py
index 7d3d29cd0..064ebd887 100644
--- a/browser_use/agent/prompts.py
+++ b/browser_use/agent/prompts.py
@@ -81,6 +81,7 @@ class AgentMessagePrompt:
 		self,
 		browser_state_summary: 'BrowserStateSummary',
 		file_system: 'FileSystem',
+		agent_history_description: str | None = None,
 		read_state_description: str | None = None,
 		task: str | None = None,
 		include_attributes: list[str] | None = None,
@@ -94,6 +95,7 @@ class AgentMessagePrompt:
 	):
 		self.browser_state: 'BrowserStateSummary' = browser_state_summary
 		self.file_system: 'FileSystem | None' = file_system
+		self.agent_history_description: str | None = agent_history_description
 		self.read_state_description: str | None = read_state_description
 		self.task: str | None = task
 		self.include_attributes = include_attributes
@@ -214,13 +216,9 @@ Available tabs:
 			agent_state += '<available_file_paths>\n' + '\n'.join(self.available_file_paths) + '\n</available_file_paths>\n'
 		return agent_state
 
-	def get_agent_state_message(self) -> UserMessage:
-		"""Get agent state as a separate message (not cached - changes every step)"""
-		agent_state_content = '<agent_state>\n' + self._get_agent_state_description().strip('\n') + '\n</agent_state>'
-		return UserMessage(content=agent_state_content, cache=False)
-
-	def get_browser_state_message(self, use_vision: bool = True) -> UserMessage:
-		"""Get browser state as a separate message, optionally with screenshots"""
+	@observe_debug(ignore_input=True, ignore_output=True, name='get_user_message')
+	def get_user_message(self, use_vision: bool = True) -> UserMessage:
+		"""Get complete state as a single cached message"""
 		# Don't pass screenshot to model if page is a new tab page, step is 0, and there's only one tab
 		if (
 			is_new_tab_page(self.browser_state.url)
@@ -230,16 +228,27 @@ Available tabs:
 		):
 			use_vision = False
 
-		browser_state_content = '<browser_state>\n' + self._get_browser_state_description().strip('\n') + '\n</browser_state>'
+		# Build complete state description
+		state_description = (
+			'<agent_history>\n'
+			+ (self.agent_history_description.strip('\n') if self.agent_history_description else '')
+			+ '\n</agent_history>\n'
+		)
+		state_description += '<agent_state>\n' + self._get_agent_state_description().strip('\n') + '\n</agent_state>\n'
+		state_description += '<browser_state>\n' + self._get_browser_state_description().strip('\n') + '\n</browser_state>\n'
+		# Only add read_state if it has content
+		read_state_description = self.read_state_description.strip('\n').strip() if self.read_state_description else ''
+		if read_state_description:
+			state_description += '<read_state>\n' + read_state_description + '\n</read_state>\n'
 
 		if self.page_filtered_actions:
-			browser_state_content += '\n<page_specific_actions>\n'
-			browser_state_content += self.page_filtered_actions + '\n'
-			browser_state_content += '</page_specific_actions>\n'
+			state_description += '<page_specific_actions>\n'
+			state_description += self.page_filtered_actions + '\n'
+			state_description += '</page_specific_actions>\n'
 
 		if use_vision is True and self.screenshots:
 			# Start with text description
-			content_parts: list[ContentPartTextParam | ContentPartImageParam] = [ContentPartTextParam(text=browser_state_content)]
+			content_parts: list[ContentPartTextParam | ContentPartImageParam] = [ContentPartTextParam(text=state_description)]
 
 			# Add screenshots with labels
 			for i, screenshot in enumerate(self.screenshots):
@@ -265,42 +274,4 @@ Available tabs:
 
 			return UserMessage(content=content_parts, cache=True)
 
-		return UserMessage(content=browser_state_content, cache=True)
-
-	def get_read_state_message(self) -> UserMessage | None:
-		"""Get read state as a separate message"""
-		if not self.read_state_description:
-			return None
-		if not self.read_state_description.strip('\n').strip():
-			return None
-
-		read_state_content = '<read_state>\n' + self.read_state_description.strip('\n') + '\n</read_state>'
-		return UserMessage(content=read_state_content, cache=False)
-
-	@observe_debug(ignore_input=True, ignore_output=True, name='get_user_message')
-	def get_user_message(self, use_vision: bool = True) -> UserMessage:
-		"""
-		DEPRECATED: This method is kept for backward compatibility but should not be used.
-		Use get_agent_state_message(), get_browser_state_message(), and get_read_state_message() instead.
-		"""
-		# For backward compatibility, return all messages concatenated in a single message
-		agent_state_message = self.get_agent_state_message()
-		browser_state_message = self.get_browser_state_message(use_vision)
-		read_state_message = self.get_read_state_message()
-
-		# Flatten the three message objects into a single list of content parts,
-		# handling both raw-text (`str`) and already-structured (`list[...]`) cases.
-		content_parts: list[ContentPartTextParam | ContentPartImageParam] = []
-
-		for msg in (agent_state_message, browser_state_message, read_state_message):
-			if msg is None:
-				continue
-
-			if isinstance(msg.content, list):
-				# Message is already a list of content parts – reuse as-is.
-				content_parts.extend(msg.content)
-			else:
-				# Wrap plain text in a ContentPartTextParam.
-				content_parts.append(ContentPartTextParam(text=str(msg.content)))
-
-		return UserMessage(content=content_parts, cache=True)
+		return UserMessage(content=state_description, cache=True)
diff --git a/browser_use/llm/tests/test_single_step.py b/browser_use/llm/tests/test_single_step.py
index 3613b5e07..7e7513653 100644
--- a/browser_use/llm/tests/test_single_step.py
+++ b/browser_use/llm/tests/test_single_step.py
@@ -70,6 +70,7 @@ def create_mock_state_message(temp_dir: str):
 	agent_prompt = AgentMessagePrompt(
 		browser_state_summary=mock_browser_state,
 		file_system=mock_file_system,  # Now using actual FileSystem instance
+		agent_history_description='',  # Empty history
 		read_state_description='',  # Empty read state
 		task='Click the button on the page',
 		include_attributes=['id'],
@@ -110,7 +111,7 @@ async def test_single_step_parametrized(llm_class, model_name):
 		# Create mock state message
 		mock_message = create_mock_state_message(temp_dir)
 
-		agent.message_manager._set_state_message(mock_message, 'agent_state')
+		agent.message_manager._set_state_message(mock_message, 'state')
 
 		messages = agent.message_manager.get_messages()
 
@@ -151,7 +152,7 @@ async def test_single_step():
 			print(mock_message.content)
 			print('\n' + '=' * 50 + '\n')
 
-			agent.message_manager._set_state_message(mock_message, 'agent_state')
+			agent.message_manager._set_state_message(mock_message, 'state')
 
 			messages = agent.message_manager.get_messages()
 

From 10581b6e77bf4e7775332611de6fc26e693537fd Mon Sep 17 00:00:00 2001
From: mertunsall <mertunsal1905@gmail.com>
Date: Tue, 5 Aug 2025 15:10:23 +0100
Subject: [PATCH 6/7] just improve more

---
 browser_use/agent/message_manager/service.py | 6 +++---
 browser_use/llm/tests/test_single_step.py    | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/browser_use/agent/message_manager/service.py b/browser_use/agent/message_manager/service.py
index 9dcf60c26..c12094e1d 100644
--- a/browser_use/agent/message_manager/service.py
+++ b/browser_use/agent/message_manager/service.py
@@ -126,7 +126,7 @@ class MessageManager:
 		self.last_input_messages = []
 		# Only initialize messages if state is empty
 		if len(self.state.history.get_messages()) == 0:
-			self._set_state_message(self.system_prompt, 'system')
+			self._set_message_with_type(self.system_prompt, 'system')
 
 	@property
 	def agent_history_description(self) -> str:
@@ -268,7 +268,7 @@ class MessageManager:
 		state_message = message_prompt.get_user_message(use_vision)
 
 		# Set the state message with caching enabled
-		self._set_state_message(state_message, 'state')
+		self._set_message_with_type(state_message, 'state')
 
 	def _log_history_lines(self) -> str:
 		"""Generate a formatted log string of message history for debugging / printing to terminal"""
@@ -345,7 +345,7 @@ class MessageManager:
 		# Convert items to strings
 		return '\n'.join(item.to_string() for item in items_to_include)
 
-	def _set_state_message(self, message: BaseMessage, message_type: Literal['system', 'state']) -> None:
+	def _set_message_with_type(self, message: BaseMessage, message_type: Literal['system', 'state']) -> None:
 		"""Replace a specific state message slot with a new message"""
 		# filter out sensitive data from the message
 		if self.sensitive_data:
diff --git a/browser_use/llm/tests/test_single_step.py b/browser_use/llm/tests/test_single_step.py
index 7e7513653..9ecafed32 100644
--- a/browser_use/llm/tests/test_single_step.py
+++ b/browser_use/llm/tests/test_single_step.py
@@ -111,7 +111,7 @@ async def test_single_step_parametrized(llm_class, model_name):
 		# Create mock state message
 		mock_message = create_mock_state_message(temp_dir)
 
-		agent.message_manager._set_state_message(mock_message, 'state')
+		agent.message_manager._set_message_with_type(mock_message, 'state')
 
 		messages = agent.message_manager.get_messages()
 
@@ -152,7 +152,7 @@ async def test_single_step():
 			print(mock_message.content)
 			print('\n' + '=' * 50 + '\n')
 
-			agent.message_manager._set_state_message(mock_message, 'state')
+			agent.message_manager._set_message_with_type(mock_message, 'state')
 
 			messages = agent.message_manager.get_messages()
 

From 6a15ed4c4efa1814ac1e69302636531b909c05fe Mon Sep 17 00:00:00 2001
From: mertunsall <mertunsal1905@gmail.com>
Date: Tue, 5 Aug 2025 15:21:52 +0100
Subject: [PATCH 7/7] cleaner

---
 browser_use/agent/message_manager/service.py | 66 +++++++++-----------
 browser_use/agent/service.py                 | 14 ++++-
 browser_use/agent/views.py                   |  2 +-
 3 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/browser_use/agent/message_manager/service.py b/browser_use/agent/message_manager/service.py
index c12094e1d..ac34d53fe 100644
--- a/browser_use/agent/message_manager/service.py
+++ b/browser_use/agent/message_manager/service.py
@@ -131,7 +131,31 @@ class MessageManager:
 	@property
 	def agent_history_description(self) -> str:
 		"""Build agent history description from list of items, respecting max_history_items limit"""
-		return self._get_history_description()
+		if self.max_history_items is None:
+			# Include all items
+			return '\n'.join(item.to_string() for item in self.state.agent_history_items)
+
+		total_items = len(self.state.agent_history_items)
+
+		# If we have fewer items than the limit, just return all items
+		if total_items <= self.max_history_items:
+			return '\n'.join(item.to_string() for item in self.state.agent_history_items)
+
+		# We have more items than the limit, so we need to omit some
+		omitted_count = total_items - self.max_history_items
+
+		# Show first item + omitted message + most recent (max_history_items - 1) items
+		# The omitted message doesn't count against the limit, only real history items do
+		recent_items_count = self.max_history_items - 1  # -1 for first item
+
+		items_to_include = [
+			self.state.agent_history_items[0].to_string(),  # Keep first item (initialization)
+			f'<sys>[... {omitted_count} previous steps omitted...]</sys>',
+		]
+		# Add most recent items
+		items_to_include.extend([item.to_string() for item in self.state.agent_history_items[-recent_items_count:]])
+
+		return '\n'.join(items_to_include)
 
 	def add_new_task(self, new_task: str) -> None:
 		self.task = new_task
@@ -247,9 +271,9 @@ class MessageManager:
 		if browser_state_summary.screenshot:
 			screenshots.append(browser_state_summary.screenshot)
 
-		# Create the message prompt helper with history description
+		# Create single state message with all content
 		assert browser_state_summary
-		message_prompt = AgentMessagePrompt(
+		state_message = AgentMessagePrompt(
 			browser_state_summary=browser_state_summary,
 			file_system=self.file_system,
 			agent_history_description=self.agent_history_description,
@@ -262,10 +286,7 @@ class MessageManager:
 			available_file_paths=available_file_paths,
 			screenshots=screenshots,
 			vision_detail_level=self.vision_detail_level,
-		)
-
-		# Create single state message with all content
-		state_message = message_prompt.get_user_message(use_vision)
+		).get_user_message(use_vision)
 
 		# Set the state message with caching enabled
 		self._set_message_with_type(state_message, 'state')
@@ -316,35 +337,6 @@ class MessageManager:
 		self.last_input_messages = self.state.history.get_messages()
 		return self.last_input_messages
 
-	def _get_history_description(self) -> str:
-		"""Build agent history description from list of items, respecting max_history_items limit"""
-		history_items = self.state.agent_history_items
-
-		if self.max_history_items is None:
-			# Include all items
-			items_to_include = history_items
-		else:
-			total_items = len(history_items)
-
-			# If we have fewer items than the limit, just return all items
-			if total_items <= self.max_history_items:
-				items_to_include = history_items
-			else:
-				# We have more items than the limit, so we need to omit some
-				omitted_count = total_items - self.max_history_items
-				recent_items_count = self.max_history_items - 1  # -1 for first item
-
-				# Include first item + most recent items
-				items_to_include = [history_items[0]] + history_items[-recent_items_count:]
-
-				# Create an omitted message and insert it between first and recent items
-				if omitted_count > 0:
-					omitted_item = HistoryItem(system_message=f'[... {omitted_count} previous steps omitted...]')
-					items_to_include = [history_items[0], omitted_item] + history_items[-recent_items_count:]
-
-		# Convert items to strings
-		return '\n'.join(item.to_string() for item in items_to_include)
-
 	def _set_message_with_type(self, message: BaseMessage, message_type: Literal['system', 'state']) -> None:
 		"""Replace a specific state message slot with a new message"""
 		# filter out sensitive data from the message
@@ -359,7 +351,7 @@ class MessageManager:
 			raise ValueError(f'Invalid state message type: {message_type}')
 
 	def _add_context_message(self, message: BaseMessage) -> None:
-		"""Add a contextual message that persists across steps (e.g., page-specific actions, final step warnings)"""
+		"""Add a contextual message specific to this step (e.g., validation errors, retry instructions, timeout warnings)"""
 		# filter out sensitive data from the message
 		if self.sensitive_data:
 			message = self._filter_sensitive_data(message)
diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py
index d687a8620..41a2813ca 100644
--- a/browser_use/agent/service.py
+++ b/browser_use/agent/service.py
@@ -168,7 +168,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		max_actions_per_step: int = 10,
 		use_thinking: bool = True,
 		flash_mode: bool = False,
-		max_history_items: int = 40,
+		max_history_items: int | None = None,
 		page_extraction_llm: BaseChatModel | None = None,
 		planner_llm: BaseChatModel | None = None,  # Deprecated
 		planner_interval: int = 1,  # Deprecated
@@ -810,11 +810,16 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		prefix = f'❌ Result failed {self.state.consecutive_failures + 1}/{self.settings.max_failures} times:\n '
 		self.state.consecutive_failures += 1
 
+		# TODO: figure out what to do here
 		if isinstance(error, (ValidationError, ValueError)):
 			self.logger.error(f'{prefix}{error_msg}')
+			# Add context message to help model fix validation errors
+			validation_hint = 'Your output format was invalid. Please follow the exact schema structure required for actions.'
+			# self._message_manager._add_context_message(UserMessage(content=validation_hint))
+
 			if 'Max token limit reached' in error_msg:
-				# TODO: figure out what to do here
-				pass
+				token_hint = 'Your response was too long. Keep your thinking and output concise.'
+				# self._message_manager._add_context_message(UserMessage(content=token_hint))
 		# Handle InterruptedError specially
 		elif isinstance(error, InterruptedError):
 			error_msg = 'The agent was interrupted mid-step' + (f' - {error}' if error else '')
@@ -824,6 +829,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			logger.debug(f'Model: {self.llm.model} failed')
 			error_msg += '\n\nReturn a valid JSON object with the required fields.'
 			logger.error(f'{prefix}{error_msg}')
+			# Add context message to help model fix parsing errors
+			parse_hint = 'Your response could not be parsed. Return a valid JSON object with the required fields.'
+			# self._message_manager._add_context_message(UserMessage(content=parse_hint))
 		else:
 			from anthropic import RateLimitError as AnthropicRateLimitError
 			from google.api_core.exceptions import ResourceExhausted
diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py
index e0a82621f..4af745599 100644
--- a/browser_use/agent/views.py
+++ b/browser_use/agent/views.py
@@ -54,7 +54,7 @@ class AgentSettings(BaseModel):
 	max_actions_per_step: int = 10
 	use_thinking: bool = True
 	flash_mode: bool = False  # If enabled, disables evaluation_previous_goal and next_goal, and sets use_thinking = False
-	max_history_items: int = 40
+	max_history_items: int | None = None
 
 	page_extraction_llm: BaseChatModel | None = None
 	planner_llm: BaseChatModel | None = None