browser-use/browser_use/agent/views.py

from __future__ import annotations

import hashlib
import json
import logging
import re
import traceback
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Generic, Literal

from pydantic import BaseModel, ConfigDict, Field, ValidationError, create_model, model_validator
from typing_extensions import TypeVar
from uuid_extensions import uuid7str

from browser_use.agent.message_manager.views import MessageManagerState
from browser_use.browser.views import BrowserStateHistory
from browser_use.dom.views import DEFAULT_INCLUDE_ATTRIBUTES, DOMInteractedElement, DOMSelectorMap

# from browser_use.dom.history_tree_processor.service import (
# 	DOMElementNode,
# 	DOMHistoryElement,
# 	HistoryTreeProcessor,
# )
# from browser_use.dom.views import SelectorMap
from browser_use.filesystem.file_system import FileSystemState
from browser_use.llm.base import BaseChatModel
from browser_use.tokens.views import UsageSummary
from browser_use.tools.registry.views import ActionModel
from browser_use.utils import collect_sensitive_data_values, redact_sensitive_string

logger = logging.getLogger(__name__)


class MessageCompactionSettings(BaseModel):
	"""Summarizes older history into a compact memory block to reduce prompt size."""

	enabled: bool = True
	compact_every_n_steps: int = 25
	trigger_char_count: int | None = None  # Min char floor; set via trigger_token_count if preferred
	trigger_token_count: int | None = None  # Alternative to trigger_char_count (~4 chars/token)
	chars_per_token: float = 4.0
	keep_last_items: int = 6
	summary_max_chars: int = 6000
	include_read_state: bool = False
	compaction_llm: BaseChatModel | None = None

	@model_validator(mode='after')
	def _resolve_trigger_threshold(self) -> MessageCompactionSettings:
		if self.trigger_char_count is not None and self.trigger_token_count is not None:
			raise ValueError('Set trigger_char_count or trigger_token_count, not both.')
		if self.trigger_token_count is not None:
			self.trigger_char_count = int(self.trigger_token_count * self.chars_per_token)
		elif self.trigger_char_count is None:
			self.trigger_char_count = 40000  # ~10k tokens
		return self


class AgentSettings(BaseModel):
	"""Configuration options for the Agent"""

	use_vision: bool | Literal['auto'] = True
	vision_detail_level: Literal['auto', 'low', 'high'] = 'auto'
	save_conversation_path: str | Path | None = None
	save_conversation_path_encoding: str | None = 'utf-8'
	max_failures: int = 5
	generate_gif: bool | str = False
	override_system_message: str | None = None
	extend_system_message: str | None = None
	include_attributes: list[str] | None = DEFAULT_INCLUDE_ATTRIBUTES
	max_actions_per_step: int = 5
	use_thinking: bool = True
	flash_mode: bool = False  # If enabled, disables evaluation_previous_goal and next_goal, and sets use_thinking = False
	use_judge: bool = True
	ground_truth: str | None = None  # Ground truth answer or criteria for judge validation
	max_history_items: int | None = None
	message_compaction: MessageCompactionSettings | None = None
	enable_planning: bool = True
	planning_replan_on_stall: int = 3  # consecutive failures before replan nudge; 0 = disabled
	planning_exploration_limit: int = 5  # steps without a plan before nudge; 0 = disabled

	page_extraction_llm: BaseChatModel | None = None
	calculate_cost: bool = False
	include_tool_call_examples: bool = False
	llm_timeout: int = 60  # Timeout in seconds for LLM calls (auto-detected: 30s for gemini, 90s for o3, 60s default)
	step_timeout: int = 180  # Timeout in seconds for each step
	final_response_after_failure: bool = True  # If True, attempt one final recovery call after max_failures

	# Loop detection settings
	loop_detection_window: int = 20  # Rolling window size for action similarity tracking
	loop_detection_enabled: bool = True  # Whether to enable loop detection nudges
	max_clickable_elements_length: int = 40000  # Max characters for clickable elements in prompt


class PageFingerprint(BaseModel):
	"""Lightweight fingerprint of the browser page state."""

	model_config = ConfigDict(frozen=True)

	url: str
	element_count: int
	text_hash: str  # First 16 chars of SHA-256 of the DOM text representation

	@staticmethod
	def from_browser_state(url: str, dom_text: str, element_count: int) -> PageFingerprint:
		text_hash = hashlib.sha256(dom_text.encode('utf-8', errors='replace')).hexdigest()[:16]
		return PageFingerprint(url=url, element_count=element_count, text_hash=text_hash)


def _normalize_action_for_hash(action_name: str, params: dict[str, Any]) -> str:
	"""Normalize action parameters for similarity hashing.

	For search actions: strip minor keyword variations by sorting tokens.
	For click actions: hash by element type + rough text content, ignoring index.
	For navigate: hash by URL domain only.
	For others: hash by action_name + sorted params.
	"""
	if action_name == 'search':
		query = str(params.get('query', ''))
		# Normalize search: lowercase, sort tokens, collapse whitespace
		tokens = sorted(set(re.sub(r'[^\w\s]', ' ', query.lower()).split()))
		engine = params.get('engine', 'google')
		return f'search|{engine}|{"|".join(tokens)}'

	if action_name in ('click', 'input'):
		# For element-interaction actions, we only use the index (element identity).
		# Two clicks on the same element index are the same action.
		index = params.get('index')
		if action_name == 'input':
			text = str(params.get('text', ''))
			# Normalize input text: lowercase, strip whitespace
			return f'input|{index}|{text.strip().lower()}'
		return f'click|{index}'

	if action_name == 'navigate':
		url = str(params.get('url', ''))
		# Hash by full URL — navigating to different paths is genuine exploration,
		# only repeated navigation to the exact same URL is a loop signal.
		return f'navigate|{url}'

	if action_name == 'scroll':
		direction = 'down' if params.get('down', True) else 'up'
		index = params.get('index')
		return f'scroll|{direction}|{index}'

	# Default: hash by action name + sorted params (excluding None values)
	filtered = {k: v for k, v in sorted(params.items()) if v is not None}
	return f'{action_name}|{json.dumps(filtered, sort_keys=True, default=str)}'


def compute_action_hash(action_name: str, params: dict[str, Any]) -> str:
	"""Compute a stable hash string for an action based on type + normalized parameters."""
	normalized = _normalize_action_for_hash(action_name, params)
	return hashlib.sha256(normalized.encode('utf-8')).hexdigest()[:12]


class ActionLoopDetector(BaseModel):
	"""Tracks action repetition and page stagnation to detect behavioral loops.

	This is a soft detection system — it generates context messages for the LLM
	but never blocks actions. The agent can still repeat if it wants to.
	"""

	model_config = ConfigDict(arbitrary_types_allowed=True)

	# Rolling window of recent action hashes
	window_size: int = 20
	recent_action_hashes: list[str] = Field(default_factory=list)

	# Page fingerprint tracking for stagnation detection
	recent_page_fingerprints: list[PageFingerprint] = Field(default_factory=list)

	# Current repetition state
	max_repetition_count: int = 0  # Highest count of any single hash in the window
	most_repeated_hash: str | None = None
	consecutive_stagnant_pages: int = 0  # How many consecutive steps had the same page fingerprint

	def record_action(self, action_name: str, params: dict[str, Any]) -> None:
		"""Record an action and update repetition statistics."""
		h = compute_action_hash(action_name, params)
		self.recent_action_hashes.append(h)
		# Trim to window size
		if len(self.recent_action_hashes) > self.window_size:
			self.recent_action_hashes = self.recent_action_hashes[-self.window_size :]
		self._update_repetition_stats()

	def record_page_state(self, url: str, dom_text: str, element_count: int) -> None:
		"""Record the current page fingerprint and update stagnation count."""
		fp = PageFingerprint.from_browser_state(url, dom_text, element_count)
		if self.recent_page_fingerprints and self.recent_page_fingerprints[-1] == fp:
			self.consecutive_stagnant_pages += 1
		else:
			self.consecutive_stagnant_pages = 0
		self.recent_page_fingerprints.append(fp)
		# Keep only last few fingerprints (no need for a large window)
		if len(self.recent_page_fingerprints) > 5:
			self.recent_page_fingerprints = self.recent_page_fingerprints[-5:]

	def _update_repetition_stats(self) -> None:
		"""Recompute max_repetition_count from the current window."""
		if not self.recent_action_hashes:
			self.max_repetition_count = 0
			self.most_repeated_hash = None
			return
		counts: dict[str, int] = {}
		for h in self.recent_action_hashes:
			counts[h] = counts.get(h, 0) + 1
		self.most_repeated_hash = max(counts, key=lambda k: counts[k])
		self.max_repetition_count = counts[self.most_repeated_hash]

	def get_nudge_message(self) -> str | None:
		"""Return an escalating awareness nudge based on repetition severity, or None if no loop detected."""
		messages: list[str] = []

		# Action repetition nudges (escalating at 5, 8, 12)
		if self.max_repetition_count >= 12:
			messages.append(
				f'Heads up: you have repeated a similar action {self.max_repetition_count} times '
				f'in the last {len(self.recent_action_hashes)} actions. '
				'If you are making progress with each repetition, keep going. '
				'If not, a different approach might get you there faster.'
			)
		elif self.max_repetition_count >= 8:
			messages.append(
				f'Heads up: you have repeated a similar action {self.max_repetition_count} times '
				f'in the last {len(self.recent_action_hashes)} actions. '
				'Are you still making progress with each attempt? '
				'If so, carry on. Otherwise, it might be worth trying a different approach.'
			)
		elif self.max_repetition_count >= 5:
			messages.append(
				f'Heads up: you have repeated a similar action {self.max_repetition_count} times '
				f'in the last {len(self.recent_action_hashes)} actions. '
				'If this is intentional and making progress, carry on. '
				'If not, it might be worth reconsidering your approach.'
			)

		# Page stagnation nudge
		if self.consecutive_stagnant_pages >= 5:
			messages.append(
				f'The page content has not changed across {self.consecutive_stagnant_pages} consecutive actions. '
				'Your actions might not be having the intended effect. '
				'It could be worth trying a different element or approach.'
			)

		if messages:
			return '\n\n'.join(messages)
		return None


class AgentState(BaseModel):
	"""Holds all state information for an Agent"""

	model_config = ConfigDict(arbitrary_types_allowed=True)

	agent_id: str = Field(default_factory=uuid7str)
	n_steps: int = 1
	consecutive_failures: int = 0
	last_result: list[ActionResult] | None = None
	plan: list[PlanItem] | None = None
	current_plan_item_index: int = 0
	plan_generation_step: int | None = None
	last_model_output: AgentOutput | None = None

	# Pause/resume state (kept serialisable for checkpointing)
	paused: bool = False
	stopped: bool = False
	session_initialized: bool = False  # Track if session events have been dispatched
	follow_up_task: bool = False  # Track if the agent is a follow-up task

	message_manager_state: MessageManagerState = Field(default_factory=MessageManagerState)
	file_system_state: FileSystemState | None = None

	# Loop detection state
	loop_detector: ActionLoopDetector = Field(default_factory=ActionLoopDetector)


@dataclass
class AgentStepInfo:
	step_number: int
	max_steps: int

	def is_last_step(self) -> bool:
		"""Check if this is the last step"""
		return self.step_number >= self.max_steps - 1


class JudgementResult(BaseModel):
	"""LLM judgement of agent trace"""

	reasoning: str | None = Field(default=None, description='Explanation of the judgement')
	verdict: bool = Field(description='Whether the trace was successful or not')
	failure_reason: str | None = Field(
		default=None,
		description='Max 5 sentences explanation of why the task was not completed successfully in case of failure. If verdict is true, use an empty string.',
	)
	impossible_task: bool = Field(
		default=False,
		description='True if the task was impossible to complete due to vague instructions, broken website, inaccessible links, missing login credentials, or other insurmountable obstacles',
	)
	reached_captcha: bool = Field(
		default=False,
		description='True if the agent encountered captcha challenges during task execution',
	)


class ActionResult(BaseModel):
	"""Result of executing an action"""

	# For done action
	is_done: bool | None = False
	success: bool | None = None

	# For trace judgement
	judgement: JudgementResult | None = None

	# Error handling - always include in long term memory
	error: str | None = None

	# Files
	attachments: list[str] | None = None  # Files to display in the done message

	# Images (base64 encoded) - separate from text content for efficient handling
	images: list[dict[str, Any]] | None = None  # [{"name": "file.jpg", "data": "base64_string"}]

	# Always include in long term memory
	long_term_memory: str | None = None  # Memory of this action

	# if update_only_read_state is True we add the extracted_content to the agent context only once for the next step
	# if update_only_read_state is False we add the extracted_content to the agent long term memory if no long_term_memory is provided
	extracted_content: str | None = None
	include_extracted_content_only_once: bool = False  # Whether the extracted content should be used to update the read_state

	# Metadata for observability (e.g., click coordinates)
	metadata: dict | None = None

	# Deprecated
	include_in_memory: bool = False  # whether to include in extracted_content inside long_term_memory

	@model_validator(mode='after')
	def validate_success_requires_done(self):
		"""Ensure success=True can only be set when is_done=True"""
		if self.success is True and self.is_done is not True:
			raise ValueError(
				'success=True can only be set when is_done=True. '
				'For regular actions that succeed, leave success as None. '
				'Use success=False only for actions that fail.'
			)
		return self


class RerunSummaryAction(BaseModel):
	"""AI-generated summary for rerun completion"""

	summary: str = Field(description='Summary of what happened during the rerun')
	success: bool = Field(description='Whether the rerun completed successfully based on visual inspection')
	completion_status: Literal['complete', 'partial', 'failed'] = Field(
		description='Status of rerun completion: complete (all steps succeeded), partial (some steps succeeded), failed (task did not complete)'
	)


class StepMetadata(BaseModel):
	"""Metadata for a single step including timing and token information"""

	step_start_time: float
	step_end_time: float
	step_number: int
	step_interval: float | None = None

	@property
	def duration_seconds(self) -> float:
		"""Calculate step duration in seconds"""
		return self.step_end_time - self.step_start_time


class PlanItem(BaseModel):
	text: str
	status: Literal['pending', 'current', 'done', 'skipped'] = 'pending'


class AgentBrain(BaseModel):
	thinking: str | None = None
	evaluation_previous_goal: str
	memory: str
	next_goal: str


class AgentOutput(BaseModel):
	model_config = ConfigDict(arbitrary_types_allowed=True, extra='forbid')

	thinking: str | None = None
	evaluation_previous_goal: str | None = None
	memory: str | None = None
	next_goal: str | None = None
	current_plan_item: int | None = None
	plan_update: list[str] | None = None
	action: list[ActionModel] = Field(
		...,
		json_schema_extra={'min_items': 1},  # Ensure at least one action is provided
	)

	@classmethod
	def model_json_schema(cls, **kwargs):
		schema = super().model_json_schema(**kwargs)
		schema['required'] = ['evaluation_previous_goal', 'memory', 'next_goal', 'action']
		return schema

	@property
	def current_state(self) -> AgentBrain:
		"""For backward compatibility - returns an AgentBrain with the flattened properties"""
		return AgentBrain(
			thinking=self.thinking,
			evaluation_previous_goal=self.evaluation_previous_goal if self.evaluation_previous_goal else '',
			memory=self.memory if self.memory else '',
			next_goal=self.next_goal if self.next_goal else '',
		)

	@staticmethod
	def type_with_custom_actions(custom_actions: type[ActionModel]) -> type[AgentOutput]:
		"""Extend actions with custom actions"""

		model_ = create_model(
			'AgentOutput',
			__base__=AgentOutput,
			action=(
				list[custom_actions],  # type: ignore
				Field(..., description='List of actions to execute', json_schema_extra={'min_items': 1}),
			),
			__module__=AgentOutput.__module__,
		)
		return model_

	@staticmethod
	def type_with_custom_actions_no_thinking(custom_actions: type[ActionModel]) -> type[AgentOutput]:
		"""Extend actions with custom actions and exclude thinking field"""

		class AgentOutputNoThinking(AgentOutput):
			@classmethod
			def model_json_schema(cls, **kwargs):
				schema = super().model_json_schema(**kwargs)
				del schema['properties']['thinking']
				schema['required'] = ['evaluation_previous_goal', 'memory', 'next_goal', 'action']
				return schema

		model = create_model(
			'AgentOutput',
			__base__=AgentOutputNoThinking,
			action=(
				list[custom_actions],  # type: ignore
				Field(..., json_schema_extra={'min_items': 1}),
			),
			__module__=AgentOutputNoThinking.__module__,
		)

		return model

	@staticmethod
	def type_with_custom_actions_flash_mode(custom_actions: type[ActionModel]) -> type[AgentOutput]:
		"""Extend actions with custom actions for flash mode - memory and action fields only"""

		class AgentOutputFlashMode(AgentOutput):
			@classmethod
			def model_json_schema(cls, **kwargs):
				schema = super().model_json_schema(**kwargs)
				# Remove thinking, evaluation_previous_goal, next_goal, and plan fields
				del schema['properties']['thinking']
				del schema['properties']['evaluation_previous_goal']
				del schema['properties']['next_goal']
				schema['properties'].pop('current_plan_item', None)
				schema['properties'].pop('plan_update', None)
				# Update required fields to only include remaining properties
				schema['required'] = ['memory', 'action']
				return schema

		model = create_model(
			'AgentOutput',
			__base__=AgentOutputFlashMode,
			action=(
				list[custom_actions],  # type: ignore
				Field(..., json_schema_extra={'min_items': 1}),
			),
			__module__=AgentOutputFlashMode.__module__,
		)

		return model


class AgentHistory(BaseModel):
	"""History item for agent actions"""

	model_output: AgentOutput | None
	result: list[ActionResult]
	state: BrowserStateHistory
	metadata: StepMetadata | None = None
	state_message: str | None = None

	model_config = ConfigDict(arbitrary_types_allowed=True, protected_namespaces=())

	@staticmethod
	def get_interacted_element(model_output: AgentOutput, selector_map: DOMSelectorMap) -> list[DOMInteractedElement | None]:
		elements = []
		for action in model_output.action:
			index = action.get_index()
			if index is not None and index in selector_map:
				el = selector_map[index]
				elements.append(DOMInteractedElement.load_from_enhanced_dom_tree(el))
			else:
				elements.append(None)
		return elements

	def _filter_sensitive_data_from_string(self, value: str, sensitive_data: dict[str, str | dict[str, str]] | None) -> str:
		"""Filter out sensitive data from a string value"""
		if not sensitive_data:
			return value

		sensitive_values = collect_sensitive_data_values(sensitive_data)

		# If there are no valid sensitive data entries, just return the original value
		if not sensitive_values:
			return value

		return redact_sensitive_string(value, sensitive_values)

	def _filter_sensitive_data_from_dict(
		self, data: dict[str, Any], sensitive_data: dict[str, str | dict[str, str]] | None
	) -> dict[str, Any]:
		"""Recursively filter sensitive data from a dictionary"""
		if not sensitive_data:
			return data

		filtered_data = {}
		for key, value in data.items():
			if isinstance(value, str):
				filtered_data[key] = self._filter_sensitive_data_from_string(value, sensitive_data)
			elif isinstance(value, dict):
				filtered_data[key] = self._filter_sensitive_data_from_dict(value, sensitive_data)
			elif isinstance(value, list):
				filtered_data[key] = [
					self._filter_sensitive_data_from_string(item, sensitive_data)
					if isinstance(item, str)
					else self._filter_sensitive_data_from_dict(item, sensitive_data)
					if isinstance(item, dict)
					else item
					for item in value
				]
			else:
				filtered_data[key] = value
		return filtered_data

	def model_dump(self, sensitive_data: dict[str, str | dict[str, str]] | None = None, **kwargs) -> dict[str, Any]:
		"""Custom serialization handling circular references and filtering sensitive data"""

		# Handle action serialization
		model_output_dump = None
		if self.model_output:
			action_dump = [action.model_dump(exclude_none=True, mode='json') for action in self.model_output.action]

			# Filter sensitive data only from input action parameters if sensitive_data is provided
			if sensitive_data:
				action_dump = [
					self._filter_sensitive_data_from_dict(action, sensitive_data) if 'input' in action else action
					for action in action_dump
				]

			model_output_dump = {
				'evaluation_previous_goal': self.model_output.evaluation_previous_goal,
				'memory': self.model_output.memory,
				'next_goal': self.model_output.next_goal,
				'action': action_dump,  # This preserves the actual action data
			}
			# Only include thinking if it's present
			if self.model_output.thinking is not None:
				model_output_dump['thinking'] = self.model_output.thinking
			if self.model_output.current_plan_item is not None:
				model_output_dump['current_plan_item'] = self.model_output.current_plan_item
			if self.model_output.plan_update is not None:
				model_output_dump['plan_update'] = self.model_output.plan_update

		# Handle result serialization - don't filter ActionResult data
		# as it should contain meaningful information for the agent
		result_dump = [r.model_dump(exclude_none=True, mode='json') for r in self.result]

		return {
			'model_output': model_output_dump,
			'result': result_dump,
			'state': self.state.to_dict(),
			'metadata': self.metadata.model_dump() if self.metadata else None,
			'state_message': self.state_message,
		}


AgentStructuredOutput = TypeVar('AgentStructuredOutput', bound=BaseModel)


class AgentHistoryList(BaseModel, Generic[AgentStructuredOutput]):
	"""List of AgentHistory messages, i.e. the history of the agent's actions and thoughts."""

	history: list[AgentHistory]
	usage: UsageSummary | None = None

	_output_model_schema: type[AgentStructuredOutput] | None = None

	def total_duration_seconds(self) -> float:
		"""Get total duration of all steps in seconds"""
		total = 0.0
		for h in self.history:
			if h.metadata:
				total += h.metadata.duration_seconds
		return total

	def __len__(self) -> int:
		"""Return the number of history items"""
		return len(self.history)

	def __str__(self) -> str:
		"""Representation of the AgentHistoryList object"""
		return f'AgentHistoryList(all_results={self.action_results()}, all_model_outputs={self.model_actions()})'

	def add_item(self, history_item: AgentHistory) -> None:
		"""Add a history item to the list"""
		self.history.append(history_item)

	def __repr__(self) -> str:
		"""Representation of the AgentHistoryList object"""
		return self.__str__()

	def save_to_file(self, filepath: str | Path, sensitive_data: dict[str, str | dict[str, str]] | None = None) -> None:
		"""Save history to JSON file with proper serialization and optional sensitive data filtering"""
		try:
			Path(filepath).parent.mkdir(parents=True, exist_ok=True)
			data = self.model_dump(sensitive_data=sensitive_data)
			with open(filepath, 'w', encoding='utf-8') as f:
				json.dump(data, f, indent=2, ensure_ascii=False)
		except Exception as e:
			raise e

	# def save_as_playwright_script(
	# 	self,
	# 	output_path: str | Path,
	# 	sensitive_data_keys: list[str] | None = None,
	# 	browser_config: BrowserConfig | None = None,
	# 	context_config: BrowserContextConfig | None = None,
	# ) -> None:
	# 	"""
	# 	Generates a Playwright script based on the agent's history and saves it to a file.
	# 	Args:
	# 		output_path: The path where the generated Python script will be saved.
	# 		sensitive_data_keys: A list of keys used as placeholders for sensitive data
	# 							 (e.g., ['username_placeholder', 'password_placeholder']).
	# 							 These will be loaded from environment variables in the
	# 							 generated script.
	# 		browser_config: Configuration of the original Browser instance.
	# 		context_config: Configuration of the original BrowserContext instance.
	# 	"""
	# 	from browser_use.agent.playwright_script_generator import PlaywrightScriptGenerator

	# 	try:
	# 		serialized_history = self.model_dump()['history']
	# 		generator = PlaywrightScriptGenerator(serialized_history, sensitive_data_keys, browser_config, context_config)

	# 		script_content = generator.generate_script_content()
	# 		path_obj = Path(output_path)
	# 		path_obj.parent.mkdir(parents=True, exist_ok=True)
	# 		with open(path_obj, 'w', encoding='utf-8') as f:
	# 			f.write(script_content)
	# 	except Exception as e:
	# 		raise e

	def model_dump(self, **kwargs) -> dict[str, Any]:
		"""Custom serialization that properly uses AgentHistory's model_dump"""
		return {
			'history': [h.model_dump(**kwargs) for h in self.history],
		}

	@classmethod
	def load_from_dict(cls, data: dict[str, Any], output_model: type[AgentOutput]) -> AgentHistoryList:
		# loop through history and validate output_model actions to enrich with custom actions
		for h in data.get('history', []):
			# Use .get() to avoid KeyError on incomplete or legacy history entries
			model_output = h.get('model_output')
			if model_output:
				if isinstance(model_output, dict):
					h['model_output'] = output_model.model_validate(model_output)
				else:
					h['model_output'] = None
			state = h.get('state') or {}
			if 'interacted_element' not in state:
				state['interacted_element'] = None
				h['state'] = state

		history = cls.model_validate(data)
		return history

	@classmethod
	def load_from_file(cls, filepath: str | Path, output_model: type[AgentOutput]) -> AgentHistoryList:
		"""Load history from JSON file"""
		with open(filepath, encoding='utf-8') as f:
			data = json.load(f)
		return cls.load_from_dict(data, output_model)

	def last_action(self) -> None | dict:
		"""Last action in history"""
		if self.history and self.history[-1].model_output:
			return self.history[-1].model_output.action[-1].model_dump(exclude_none=True, mode='json')
		return None

	def errors(self) -> list[str | None]:
		"""Get all errors from history, with None for steps without errors"""
		errors = []
		for h in self.history:
			step_errors = [r.error for r in h.result if r.error]

			# each step can have only one error
			errors.append(step_errors[0] if step_errors else None)
		return errors

	def final_result(self) -> None | str:
		"""Final result from history"""
		if self.history and len(self.history[-1].result) > 0:
			last_result = self.history[-1].result[-1]
			if last_result.extracted_content:
				return last_result.extracted_content
		return None

	def is_done(self) -> bool:
		"""Check if the agent is done"""
		if self.history and len(self.history[-1].result) > 0:
			last_result = self.history[-1].result[-1]
			return last_result.is_done is True
		return False

	def is_successful(self) -> bool | None:
		"""Check if the agent completed successfully - the agent decides in the last step if it was successful or not. None if not done yet."""
		if self.history and len(self.history[-1].result) > 0:
			last_result = self.history[-1].result[-1]
			if last_result.is_done is True:
				return last_result.success
		return None

	def has_errors(self) -> bool:
		"""Check if the agent has any non-None errors"""
		return any(error is not None for error in self.errors())

	def judgement(self) -> dict | None:
		"""Get the judgement result as a dictionary if it exists"""
		if self.history and len(self.history[-1].result) > 0:
			last_result = self.history[-1].result[-1]
			if last_result.judgement:
				return last_result.judgement.model_dump()
		return None

	def is_judged(self) -> bool:
		"""Check if the agent trace has been judged"""
		if self.history and len(self.history[-1].result) > 0:
			last_result = self.history[-1].result[-1]
			return last_result.judgement is not None
		return False

	def is_validated(self) -> bool | None:
		"""Check if the judge validated the agent execution (verdict is True). Returns None if not judged yet."""
		if self.history and len(self.history[-1].result) > 0:
			last_result = self.history[-1].result[-1]
			if last_result.judgement:
				return last_result.judgement.verdict
		return None

	def urls(self) -> list[str | None]:
		"""Get all unique URLs from history"""
		return [h.state.url if h.state.url is not None else None for h in self.history]

	def screenshot_paths(self, n_last: int | None = None, return_none_if_not_screenshot: bool = True) -> list[str | None]:
		"""Get all screenshot paths from history"""
		if n_last == 0:
			return []
		if n_last is None:
			if return_none_if_not_screenshot:
				return [h.state.screenshot_path if h.state.screenshot_path is not None else None for h in self.history]
			else:
				return [h.state.screenshot_path for h in self.history if h.state.screenshot_path is not None]
		else:
			if return_none_if_not_screenshot:
				return [h.state.screenshot_path if h.state.screenshot_path is not None else None for h in self.history[-n_last:]]
			else:
				return [h.state.screenshot_path for h in self.history[-n_last:] if h.state.screenshot_path is not None]

	def screenshots(self, n_last: int | None = None, return_none_if_not_screenshot: bool = True) -> list[str | None]:
		"""Get all screenshots from history as base64 strings"""
		if n_last == 0:
			return []

		history_items = self.history if n_last is None else self.history[-n_last:]
		screenshots = []

		for item in history_items:
			screenshot_b64 = item.state.get_screenshot()
			if screenshot_b64:
				screenshots.append(screenshot_b64)
			else:
				if return_none_if_not_screenshot:
					screenshots.append(None)
				# If return_none_if_not_screenshot is False, we skip None values

		return screenshots

	def action_names(self) -> list[str]:
		"""Get all action names from history"""
		action_names = []
		for action in self.model_actions():
			actions = list(action.keys())
			if actions:
				action_names.append(actions[0])
		return action_names

	def model_thoughts(self) -> list[AgentBrain]:
		"""Get all thoughts from history"""
		return [h.model_output.current_state for h in self.history if h.model_output]

	def model_outputs(self) -> list[AgentOutput]:
		"""Get all model outputs from history"""
		return [h.model_output for h in self.history if h.model_output]

	# get all actions with params
	def model_actions(self) -> list[dict]:
		"""Get all actions from history"""
		outputs = []

		for h in self.history:
			if h.model_output:
				# Guard against None interacted_element before zipping
				interacted_elements = h.state.interacted_element or [None] * len(h.model_output.action)
				for action, interacted_element in zip(h.model_output.action, interacted_elements):
					output = action.model_dump(exclude_none=True, mode='json')
					output['interacted_element'] = interacted_element
					outputs.append(output)
		return outputs

	def action_history(self) -> list[list[dict]]:
		"""Get truncated action history with only essential fields"""
		step_outputs = []

		for h in self.history:
			step_actions = []
			if h.model_output:
				# Guard against None interacted_element before zipping
				interacted_elements = h.state.interacted_element or [None] * len(h.model_output.action)
				# Zip actions with interacted elements and results
				for action, interacted_element, result in zip(h.model_output.action, interacted_elements, h.result):
					action_output = action.model_dump(exclude_none=True, mode='json')
					action_output['interacted_element'] = interacted_element
					# Only keep long_term_memory from result
					action_output['result'] = result.long_term_memory if result and result.long_term_memory else None
					step_actions.append(action_output)
			step_outputs.append(step_actions)

		return step_outputs

	def action_results(self) -> list[ActionResult]:
		"""Get all results from history"""
		results = []
		for h in self.history:
			results.extend([r for r in h.result if r])
		return results

	def extracted_content(self) -> list[str]:
		"""Get all extracted content from history"""
		content = []
		for h in self.history:
			content.extend([r.extracted_content for r in h.result if r.extracted_content])
		return content

	def model_actions_filtered(self, include: list[str] | None = None) -> list[dict]:
		"""Get all model actions from history as JSON"""
		if include is None:
			include = []
		outputs = self.model_actions()
		result = []
		for o in outputs:
			for i in include:
				if i == list(o.keys())[0]:
					result.append(o)
		return result

	def number_of_steps(self) -> int:
		"""Get the number of steps in the history"""
		return len(self.history)

	def agent_steps(self) -> list[str]:
		"""Format agent history as readable step descriptions for judge evaluation."""
		steps = []

		# Iterate through history items (each is an AgentHistory)
		for i, h in enumerate(self.history):
			step_text = f'Step {i + 1}:\n'

			# Get actions from model_output
			if h.model_output and h.model_output.action:
				# Use model_dump with mode='json' to serialize enums properly
				actions_list = [action.model_dump(exclude_none=True, mode='json') for action in h.model_output.action]
				action_json = json.dumps(actions_list, indent=1)
				step_text += f'Actions: {action_json}\n'

			# Get results (already a list[ActionResult] in h.result)
			if h.result:
				for j, result in enumerate(h.result):
					if result.extracted_content:
						content = str(result.extracted_content)
						step_text += f'Result {j + 1}: {content}\n'

					if result.error:
						error = str(result.error)
						step_text += f'Error {j + 1}: {error}\n'

			steps.append(step_text)

		return steps

	@property
	def structured_output(self) -> AgentStructuredOutput | None:
		"""Get the structured output from the history

		Returns:
			The structured output if both final_result and _output_model_schema are available,
			otherwise None
		"""
		final_result = self.final_result()
		if final_result is not None and self._output_model_schema is not None:
			return self._output_model_schema.model_validate_json(final_result)

		return None

	def get_structured_output(self, output_model: type[AgentStructuredOutput]) -> AgentStructuredOutput | None:
		"""Get the structured output from history, parsing with the provided schema.

		Use this method when accessing structured output from sandbox execution,
		since the _output_model_schema private attribute is not preserved during serialization.

		Args:
			output_model: The Pydantic model class to parse the output with

		Returns:
			The parsed structured output, or None if no final result exists
		"""
		final_result = self.final_result()
		if final_result is not None:
			return output_model.model_validate_json(final_result)
		return None


class AgentError:
	"""Container for agent error handling"""

	VALIDATION_ERROR = 'Invalid model output format. Please follow the correct schema.'
	RATE_LIMIT_ERROR = 'Rate limit reached. Waiting before retry.'
	NO_VALID_ACTION = 'No valid action found'

	@staticmethod
	def format_error(error: Exception, include_trace: bool = False) -> str:
		"""Format error message based on error type and optionally include trace"""
		message = ''
		if isinstance(error, ValidationError):
			return f'{AgentError.VALIDATION_ERROR}\nDetails: {str(error)}'
		# Lazy import to avoid loading openai SDK (~800ms) at module level
		from openai import RateLimitError

		if isinstance(error, RateLimitError):
			return AgentError.RATE_LIMIT_ERROR

		# Handle LLM response validation errors from llm_use
		error_str = str(error)
		if 'LLM response missing required fields' in error_str or 'Expected format: AgentOutput' in error_str:
			# Extract the main error message without the huge stacktrace
			lines = error_str.split('\n')
			main_error = lines[0] if lines else error_str

			# Provide a clearer error message
			helpful_msg = f'{main_error}\n\nThe previous response had an invalid output structure. Please stick to the required output format. \n\n'

			if include_trace:
				helpful_msg += f'\n\nFull stacktrace:\n{traceback.format_exc()}'

			return helpful_msg

		if include_trace:
			return f'{str(error)}\nStacktrace:\n{traceback.format_exc()}'
		return f'{str(error)}'


class DetectedVariable(BaseModel):
	"""A detected variable in agent history"""

	name: str
	original_value: str
	type: str = 'string'
	format: str | None = None


class VariableMetadata(BaseModel):
	"""Metadata about detected variables in history"""

	detected_variables: dict[str, DetectedVariable] = Field(default_factory=dict)