browser-use/browser_use/agent/message_manager/service.py

from __future__ import annotations

import logging
from typing import Literal

from browser_use.agent.message_manager.views import (
	HistoryItem,
)
from browser_use.agent.prompts import AgentMessagePrompt
from browser_use.agent.views import (
	ActionResult,
	AgentOutput,
	AgentStepInfo,
	MessageCompactionSettings,
	MessageManagerState,
)
from browser_use.browser.views import BrowserStateSummary
from browser_use.filesystem.file_system import FileSystem
from browser_use.llm.base import BaseChatModel
from browser_use.llm.messages import (
	BaseMessage,
	ContentPartImageParam,
	ContentPartTextParam,
	SystemMessage,
	UserMessage,
)
from browser_use.observability import observe_debug
from browser_use.utils import (
	collect_sensitive_data_values,
	match_url_with_domain_pattern,
	redact_sensitive_string,
	time_execution_sync,
)

logger = logging.getLogger(__name__)


# ========== Logging Helper Functions ==========
# These functions are used ONLY for formatting debug log output.
# They do NOT affect the actual message content sent to the LLM.
# All logging functions start with _log_ for easy identification.


def _log_get_message_emoji(message: BaseMessage) -> str:
	"""Get emoji for a message type - used only for logging display"""
	emoji_map = {
		'UserMessage': '💬',
		'SystemMessage': '🧠',
		'AssistantMessage': '🔨',
	}
	return emoji_map.get(message.__class__.__name__, '🎮')


def _log_format_message_line(message: BaseMessage, content: str, is_last_message: bool, terminal_width: int) -> list[str]:
	"""Format a single message for logging display"""
	try:
		lines = []

		# Get emoji and token info
		emoji = _log_get_message_emoji(message)
		# token_str = str(message.metadata.tokens).rjust(4)
		# TODO: fix the token count
		token_str = '??? (TODO)'
		prefix = f'{emoji}[{token_str}]: '

		# Calculate available width (emoji=2 visual cols + [token]: =8 chars)
		content_width = terminal_width - 10

		# Handle last message wrapping
		if is_last_message and len(content) > content_width:
			# Find a good break point
			break_point = content.rfind(' ', 0, content_width)
			if break_point > content_width * 0.7:  # Keep at least 70% of line
				first_line = content[:break_point]
				rest = content[break_point + 1 :]
			else:
				# No good break point, just truncate
				first_line = content[:content_width]
				rest = content[content_width:]

			lines.append(prefix + first_line)

			# Second line with 10-space indent
			if rest:
				if len(rest) > terminal_width - 10:
					rest = rest[: terminal_width - 10]
				lines.append(' ' * 10 + rest)
		else:
			# Single line - truncate if needed
			if len(content) > content_width:
				content = content[:content_width]
			lines.append(prefix + content)

		return lines
	except Exception as e:
		logger.warning(f'Failed to format message line for logging: {e}')
		# Return a simple fallback line
		return ['❓[   ?]: [Error formatting message]']


# ========== End of Logging Helper Functions ==========


class MessageManager:
	vision_detail_level: Literal['auto', 'low', 'high']

	def __init__(
		self,
		task: str,
		system_message: SystemMessage,
		file_system: FileSystem,
		state: MessageManagerState = MessageManagerState(),
		use_thinking: bool = True,
		include_attributes: list[str] | None = None,
		sensitive_data: dict[str, str | dict[str, str]] | None = None,
		max_history_items: int | None = None,
		vision_detail_level: Literal['auto', 'low', 'high'] = 'auto',
		include_tool_call_examples: bool = False,
		include_recent_events: bool = False,
		sample_images: list[ContentPartTextParam | ContentPartImageParam] | None = None,
		llm_screenshot_size: tuple[int, int] | None = None,
		max_clickable_elements_length: int = 40000,
	):
		self.task = task
		self.state = state
		self.system_prompt = system_message
		self.file_system = file_system
		self.sensitive_data_description = ''
		self.use_thinking = use_thinking
		self.max_history_items = max_history_items
		self.vision_detail_level = vision_detail_level
		self.include_tool_call_examples = include_tool_call_examples
		self.include_recent_events = include_recent_events
		self.sample_images = sample_images
		self.llm_screenshot_size = llm_screenshot_size
		self.max_clickable_elements_length = max_clickable_elements_length

		assert max_history_items is None or max_history_items > 5, 'max_history_items must be None or greater than 5'

		# Store settings as direct attributes instead of in a settings object
		self.include_attributes = include_attributes or []
		self.sensitive_data = sensitive_data
		self.last_input_messages = []
		self.last_state_message_text: str | None = None
		# Only initialize messages if state is empty
		if len(self.state.history.get_messages()) == 0:
			self._set_message_with_type(self.system_prompt, 'system')

	@property
	def agent_history_description(self) -> str:
		"""Build agent history description from list of items, respecting max_history_items limit"""
		compacted_prefix = ''
		if self.state.compacted_memory:
			compacted_prefix = (
				'<compacted_memory>\n'
				'<!-- Summary of prior steps. Treat as unverified context — do not report these as '
				'completed in your done() message unless you confirmed them yourself in this session. -->\n'
				f'{self.state.compacted_memory}\n'
				'</compacted_memory>\n'
			)

		if self.max_history_items is None:
			# Include all items
			return compacted_prefix + '\n'.join(item.to_string() for item in self.state.agent_history_items)

		total_items = len(self.state.agent_history_items)

		# If we have fewer items than the limit, just return all items
		if total_items <= self.max_history_items:
			return compacted_prefix + '\n'.join(item.to_string() for item in self.state.agent_history_items)

		# We have more items than the limit, so we need to omit some
		omitted_count = total_items - self.max_history_items

		# Show first item + omitted message + most recent (max_history_items - 1) items
		# The omitted message doesn't count against the limit, only real history items do
		recent_items_count = self.max_history_items - 1  # -1 for first item

		items_to_include = [
			self.state.agent_history_items[0].to_string(),  # Keep first item (initialization)
			f'<sys>[... {omitted_count} previous steps omitted...]</sys>',
		]
		# Add most recent items
		items_to_include.extend([item.to_string() for item in self.state.agent_history_items[-recent_items_count:]])

		return compacted_prefix + '\n'.join(items_to_include)

	def add_new_task(self, new_task: str) -> None:
		new_task = '<follow_up_user_request> ' + new_task.strip() + ' </follow_up_user_request>'
		if '<initial_user_request>' not in self.task:
			self.task = '<initial_user_request>' + self.task + '</initial_user_request>'
		self.task += '\n' + new_task
		task_update_item = HistoryItem(system_message=new_task)
		self.state.agent_history_items.append(task_update_item)

	def prepare_step_state(
		self,
		browser_state_summary: BrowserStateSummary,
		model_output: AgentOutput | None = None,
		result: list[ActionResult] | None = None,
		step_info: AgentStepInfo | None = None,
		sensitive_data=None,
	) -> None:
		"""Prepare state for the next LLM call without building the final state message."""
		self.state.history.context_messages.clear()
		self._update_agent_history_description(model_output, result, step_info)

		effective_sensitive_data = sensitive_data if sensitive_data is not None else self.sensitive_data
		if effective_sensitive_data is not None:
			self.sensitive_data = effective_sensitive_data
			self.sensitive_data_description = self._get_sensitive_data_description(browser_state_summary.url)

	async def maybe_compact_messages(
		self,
		llm: BaseChatModel | None,
		settings: MessageCompactionSettings | None,
		step_info: AgentStepInfo | None = None,
	) -> bool:
		"""Summarize older history into a compact memory block.

		Step interval is the primary trigger; char count is a minimum floor.
		"""
		if not settings or not settings.enabled:
			return False
		if llm is None:
			return False
		if step_info is None:
			return False

		# Step cadence gate
		steps_since = step_info.step_number - (self.state.last_compaction_step or 0)
		if steps_since < settings.compact_every_n_steps:
			return False

		# Char floor gate
		history_items = self.state.agent_history_items
		full_history_text = '\n'.join(item.to_string() for item in history_items).strip()
		trigger_char_count = settings.trigger_char_count or 40000
		if len(full_history_text) < trigger_char_count:
			return False

		logger.debug(f'Compacting message history (items={len(history_items)}, chars={len(full_history_text)})')

		# Build compaction input
		compaction_sections = []
		if self.state.compacted_memory:
			compaction_sections.append(
				f'<previous_compacted_memory>\n{self.state.compacted_memory}\n</previous_compacted_memory>'
			)
		compaction_sections.append(f'<agent_history>\n{full_history_text}\n</agent_history>')
		if settings.include_read_state and self.state.read_state_description:
			compaction_sections.append(f'<read_state>\n{self.state.read_state_description}\n</read_state>')
		compaction_input = '\n\n'.join(compaction_sections)

		if self.sensitive_data:
			filtered = self._filter_sensitive_data(UserMessage(content=compaction_input))
			compaction_input = filtered.text

		system_prompt = (
			'You are summarizing an agent run for prompt compaction.\n'
			'Capture task requirements, key facts, decisions, partial progress, errors, and next steps.\n'
			'Preserve important entities, values, URLs, and file paths.\n'
			'CRITICAL: Only mark a step as completed if you see explicit success confirmation in the history. '
			'If a step was started but not explicitly confirmed complete, mark it as "IN-PROGRESS". '
			'Never infer completion from context — only report what was confirmed.\n'
			'Return plain text only. Do not include tool calls or JSON.'
		)
		if settings.summary_max_chars:
			system_prompt += f' Keep under {settings.summary_max_chars} characters if possible.'

		messages = [SystemMessage(content=system_prompt), UserMessage(content=compaction_input)]
		try:
			response = await llm.ainvoke(messages)
			summary = (response.completion or '').strip()
		except Exception as e:
			logger.warning(f'Failed to compact messages: {e}')
			return False

		if not summary:
			return False

		if settings.summary_max_chars and len(summary) > settings.summary_max_chars:
			summary = summary[: settings.summary_max_chars].rstrip() + '…'

		self.state.compacted_memory = summary
		self.state.compaction_count += 1
		self.state.last_compaction_step = step_info.step_number

		# Keep first item + most recent items
		keep_last = max(0, settings.keep_last_items)
		if len(history_items) > keep_last + 1:
			if keep_last == 0:
				self.state.agent_history_items = [history_items[0]]
			else:
				self.state.agent_history_items = [history_items[0]] + history_items[-keep_last:]

		logger.debug(f'Compaction complete (summary_chars={len(summary)}, history_items={len(self.state.agent_history_items)})')

		return True

	def _update_agent_history_description(
		self,
		model_output: AgentOutput | None = None,
		result: list[ActionResult] | None = None,
		step_info: AgentStepInfo | None = None,
	) -> None:
		"""Update the agent history description"""

		if result is None:
			result = []
		step_number = step_info.step_number if step_info else None

		self.state.read_state_description = ''
		self.state.read_state_images = []  # Clear images from previous step

		action_results = ''
		read_state_idx = 0

		for idx, action_result in enumerate(result):
			if action_result.include_extracted_content_only_once and action_result.extracted_content:
				self.state.read_state_description += (
					f'<read_state_{read_state_idx}>\n{action_result.extracted_content}\n</read_state_{read_state_idx}>\n'
				)
				read_state_idx += 1
				logger.debug(f'Added extracted_content to read_state_description: {action_result.extracted_content}')

			# Store images for one-time inclusion in the next message
			if action_result.images:
				self.state.read_state_images.extend(action_result.images)
				logger.debug(f'Added {len(action_result.images)} image(s) to read_state_images')

			if action_result.long_term_memory:
				action_results += f'{action_result.long_term_memory}\n'
				logger.debug(f'Added long_term_memory to action_results: {action_result.long_term_memory}')
			elif action_result.extracted_content and not action_result.include_extracted_content_only_once:
				action_results += f'{action_result.extracted_content}\n'
				logger.debug(f'Added extracted_content to action_results: {action_result.extracted_content}')

			if action_result.error:
				if len(action_result.error) > 200:
					error_text = action_result.error[:100] + '......' + action_result.error[-100:]
				else:
					error_text = action_result.error
				action_results += f'{error_text}\n'
				logger.debug(f'Added error to action_results: {error_text}')

		# Simple 60k character limit for read_state_description
		MAX_CONTENT_SIZE = 60000
		if len(self.state.read_state_description) > MAX_CONTENT_SIZE:
			self.state.read_state_description = (
				self.state.read_state_description[:MAX_CONTENT_SIZE] + '\n... [Content truncated at 60k characters]'
			)
			logger.debug(f'Truncated read_state_description to {MAX_CONTENT_SIZE} characters')

		self.state.read_state_description = self.state.read_state_description.strip('\n')

		if action_results:
			action_results = f'Result\n{action_results}'
		action_results = action_results.strip('\n') if action_results else None

		# Simple 60k character limit for action_results
		if action_results and len(action_results) > MAX_CONTENT_SIZE:
			action_results = action_results[:MAX_CONTENT_SIZE] + '\n... [Content truncated at 60k characters]'
			logger.debug(f'Truncated action_results to {MAX_CONTENT_SIZE} characters')

		# Build the history item
		if model_output is None:
			# Add history item for initial actions (step 0) or errors (step > 0)
			if step_number is not None:
				if step_number == 0 and action_results:
					# Step 0 with initial action results
					history_item = HistoryItem(step_number=step_number, action_results=action_results)
					self.state.agent_history_items.append(history_item)
				elif step_number > 0:
					# Error case for steps > 0
					history_item = HistoryItem(step_number=step_number, error='Agent failed to output in the right format.')
					self.state.agent_history_items.append(history_item)
		else:
			history_item = HistoryItem(
				step_number=step_number,
				evaluation_previous_goal=model_output.current_state.evaluation_previous_goal,
				memory=model_output.current_state.memory,
				next_goal=model_output.current_state.next_goal,
				action_results=action_results,
			)
			self.state.agent_history_items.append(history_item)

	def _get_sensitive_data_description(self, current_page_url) -> str:
		sensitive_data = self.sensitive_data
		if not sensitive_data:
			return ''

		# Collect placeholders for sensitive data
		placeholders: set[str] = set()

		for key, value in sensitive_data.items():
			if isinstance(value, dict):
				# New format: {domain: {key: value}}
				if current_page_url and match_url_with_domain_pattern(current_page_url, key, True):
					placeholders.update(value.keys())
			else:
				# Old format: {key: value}
				placeholders.add(key)

		if placeholders:
			placeholder_list = sorted(list(placeholders))
			# Format as bullet points for clarity
			formatted_placeholders = '\n'.join(f'  - {p}' for p in placeholder_list)

			info = 'SENSITIVE DATA - Use these placeholders for secure input:\n'
			info += f'{formatted_placeholders}\n\n'
			info += 'IMPORTANT: When entering sensitive values, you MUST wrap the placeholder name in <secret> tags.\n'
			info += f'Example: To enter the value for "{placeholder_list[0]}", use: <secret>{placeholder_list[0]}</secret>\n'
			info += 'The system will automatically replace these tags with the actual secret values.'
			return info

		return ''

	@observe_debug(ignore_input=True, ignore_output=True, name='create_state_messages')
	@time_execution_sync('--create_state_messages')
	def create_state_messages(
		self,
		browser_state_summary: BrowserStateSummary,
		model_output: AgentOutput | None = None,
		result: list[ActionResult] | None = None,
		step_info: AgentStepInfo | None = None,
		use_vision: bool | Literal['auto'] = True,
		page_filtered_actions: str | None = None,
		sensitive_data=None,
		available_file_paths: list[str] | None = None,  # Always pass current available_file_paths
		unavailable_skills_info: str | None = None,  # Information about skills that cannot be used yet
		plan_description: str | None = None,  # Rendered plan for injection into agent state
		skip_state_update: bool = False,
	) -> None:
		"""Create single state message with all content"""

		if not skip_state_update:
			self.prepare_step_state(
				browser_state_summary=browser_state_summary,
				model_output=model_output,
				result=result,
				step_info=step_info,
				sensitive_data=sensitive_data,
			)

		# Use only the current screenshot, but check if action results request screenshot inclusion
		screenshots = []
		include_screenshot_requested = False

		# Check if any action results request screenshot inclusion
		if result:
			for action_result in result:
				if action_result.metadata and action_result.metadata.get('include_screenshot'):
					include_screenshot_requested = True
					logger.debug('Screenshot inclusion requested by action result')
					break

		# Handle different use_vision modes:
		# - "auto": Only include screenshot if explicitly requested by action (e.g., screenshot)
		# - True: Always include screenshot
		# - False: Never include screenshot
		include_screenshot = False
		if use_vision is True:
			# Always include screenshot when use_vision=True
			include_screenshot = True
		elif use_vision == 'auto':
			# Only include screenshot if explicitly requested by action when use_vision="auto"
			include_screenshot = include_screenshot_requested
		# else: use_vision is False, never include screenshot (include_screenshot stays False)

		if include_screenshot and browser_state_summary.screenshot:
			screenshots.append(browser_state_summary.screenshot)

		# Use vision in the user message if screenshots are included
		effective_use_vision = len(screenshots) > 0

		# Create single state message with all content
		assert browser_state_summary
		state_message = AgentMessagePrompt(
			browser_state_summary=browser_state_summary,
			file_system=self.file_system,
			agent_history_description=self.agent_history_description,
			read_state_description=self.state.read_state_description,
			task=self.task,
			include_attributes=self.include_attributes,
			step_info=step_info,
			page_filtered_actions=page_filtered_actions,
			max_clickable_elements_length=self.max_clickable_elements_length,
			sensitive_data=self.sensitive_data_description,
			available_file_paths=available_file_paths,
			screenshots=screenshots,
			vision_detail_level=self.vision_detail_level,
			include_recent_events=self.include_recent_events,
			sample_images=self.sample_images,
			read_state_images=self.state.read_state_images,
			llm_screenshot_size=self.llm_screenshot_size,
			unavailable_skills_info=unavailable_skills_info,
			plan_description=plan_description,
		).get_user_message(effective_use_vision)

		# Store state message text for history
		self.last_state_message_text = state_message.text

		# Set the state message with caching enabled
		self._set_message_with_type(state_message, 'state')

	def _log_history_lines(self) -> str:
		"""Generate a formatted log string of message history for debugging / printing to terminal"""
		# TODO: fix logging

		# try:
		# 	total_input_tokens = 0
		# 	message_lines = []
		# 	terminal_width = shutil.get_terminal_size((80, 20)).columns

		# 	for i, m in enumerate(self.state.history.messages):
		# 		try:
		# 			total_input_tokens += m.metadata.tokens
		# 			is_last_message = i == len(self.state.history.messages) - 1

		# 			# Extract content for logging
		# 			content = _log_extract_message_content(m.message, is_last_message, m.metadata)

		# 			# Format the message line(s)
		# 			lines = _log_format_message_line(m, content, is_last_message, terminal_width)
		# 			message_lines.extend(lines)
		# 		except Exception as e:
		# 			logger.warning(f'Failed to format message {i} for logging: {e}')
		# 			# Add a fallback line for this message
		# 			message_lines.append('❓[   ?]: [Error formatting this message]')

		# 	# Build final log message
		# 	return (
		# 		f'📜 LLM Message history ({len(self.state.history.messages)} messages, {total_input_tokens} tokens):\n'
		# 		+ '\n'.join(message_lines)
		# 	)
		# except Exception as e:
		# 	logger.warning(f'Failed to generate history log: {e}')
		# 	# Return a minimal fallback message
		# 	return f'📜 LLM Message history (error generating log: {e})'

		return ''

	@time_execution_sync('--get_messages')
	def get_messages(self) -> list[BaseMessage]:
		"""Get current message list, potentially trimmed to max tokens"""

		# Log message history for debugging
		logger.debug(self._log_history_lines())
		self.last_input_messages = self.state.history.get_messages()
		return self.last_input_messages

	def _set_message_with_type(self, message: BaseMessage, message_type: Literal['system', 'state']) -> None:
		"""Replace a specific state message slot with a new message"""
		# System messages don't need filtering - they only contain instructions/placeholders
		# State messages need filtering - they include agent_history_description which contains
		# action results with real sensitive values (after placeholder replacement during execution)
		if message_type == 'system':
			self.state.history.system_message = message
		elif message_type == 'state':
			if self.sensitive_data:
				message = self._filter_sensitive_data(message)
			self.state.history.state_message = message
		else:
			raise ValueError(f'Invalid state message type: {message_type}')

	def _add_context_message(self, message: BaseMessage) -> None:
		"""Add a contextual message specific to this step (e.g., validation errors, retry instructions, timeout warnings)"""
		# Context messages typically contain error messages and validation info, not action results
		# with sensitive data, so filtering is not needed here
		self.state.history.context_messages.append(message)

	@time_execution_sync('--filter_sensitive_data')
	def _filter_sensitive_data(self, message: BaseMessage) -> BaseMessage:
		"""Filter out sensitive data from the message"""

		def replace_sensitive(value: str) -> str:
			if not self.sensitive_data:
				return value

			sensitive_values = collect_sensitive_data_values(self.sensitive_data)

			# If there are no valid sensitive data entries, just return the original value
			if not sensitive_values:
				logger.warning('No valid entries found in sensitive_data dictionary')
				return value

			return redact_sensitive_string(value, sensitive_values)

		if isinstance(message.content, str):
			message.content = replace_sensitive(message.content)
		elif isinstance(message.content, list):
			for i, item in enumerate(message.content):
				if isinstance(item, ContentPartTextParam):
					item.text = replace_sensitive(item.text)
					message.content[i] = item
		return message