"""Data models for code-use mode.""" from __future__ import annotations import json from enum import Enum from pathlib import Path from typing import Any from pydantic import BaseModel, ConfigDict, Field, PrivateAttr from uuid_extensions import uuid7str from browser_use.tokens.views import UsageSummary class CellType(str, Enum): """Type of notebook cell.""" CODE = 'code' MARKDOWN = 'markdown' class ExecutionStatus(str, Enum): """Execution status of a cell.""" PENDING = 'pending' RUNNING = 'running' SUCCESS = 'success' ERROR = 'error' class CodeCell(BaseModel): """Represents a code cell in the notebook-like execution.""" model_config = ConfigDict(extra='forbid') id: str = Field(default_factory=uuid7str) cell_type: CellType = CellType.CODE source: str = Field(description='The code to execute') output: str | None = Field(default=None, description='The output of the code execution') execution_count: int | None = Field(default=None, description='The execution count') status: ExecutionStatus = Field(default=ExecutionStatus.PENDING) error: str | None = Field(default=None, description='Error message if execution failed') browser_state: str | None = Field(default=None, description='Browser state after execution') class NotebookSession(BaseModel): """Represents a notebook-like session.""" model_config = ConfigDict(extra='forbid') id: str = Field(default_factory=uuid7str) cells: list[CodeCell] = Field(default_factory=list) current_execution_count: int = Field(default=0) namespace: dict[str, Any] = Field(default_factory=dict, description='Current namespace state') _complete_history: list[CodeAgentHistory] = PrivateAttr(default_factory=list) _usage_summary: UsageSummary | None = PrivateAttr(default=None) def add_cell(self, source: str) -> CodeCell: """Add a new code cell to the session.""" cell = CodeCell(source=source) self.cells.append(cell) return cell def get_cell(self, cell_id: str) -> CodeCell | None: """Get a cell by ID.""" for cell in self.cells: if cell.id == cell_id: return cell return None def get_latest_cell(self) -> CodeCell | None: """Get the most recently added cell.""" if self.cells: return self.cells[-1] return None def increment_execution_count(self) -> int: """Increment and return the execution count.""" self.current_execution_count += 1 return self.current_execution_count @property def history(self) -> CodeAgentHistoryList: """Get the history as an AgentHistoryList-compatible object.""" return CodeAgentHistoryList(self._complete_history, self._usage_summary) class NotebookExport(BaseModel): """Export format for Jupyter notebook.""" model_config = ConfigDict(extra='forbid') nbformat: int = Field(default=4) nbformat_minor: int = Field(default=5) metadata: dict[str, Any] = Field(default_factory=dict) cells: list[dict[str, Any]] = Field(default_factory=list) class CodeAgentModelOutput(BaseModel): """Model output for CodeAgent - contains the code and full LLM response.""" model_config = ConfigDict(extra='forbid') model_output: str = Field(description='The extracted code from the LLM response') full_response: str = Field(description='The complete LLM response including any text/reasoning') class CodeAgentResult(BaseModel): """Result of executing a code cell in CodeAgent.""" model_config = ConfigDict(extra='forbid') extracted_content: str | None = Field(default=None, description='Output from code execution') error: str | None = Field(default=None, description='Error message if execution failed') is_done: bool = Field(default=False, description='Whether task is marked as done') success: bool | None = Field(default=None, description='Self-reported success from done() call') class CodeAgentState(BaseModel): """State information for a CodeAgent step.""" model_config = ConfigDict(extra='forbid', arbitrary_types_allowed=True) url: str | None = Field(default=None, description='Current page URL') title: str | None = Field(default=None, description='Current page title') screenshot_path: str | None = Field(default=None, description='Path to screenshot file') def get_screenshot(self) -> str | None: """Load screenshot from disk and return as base64 string.""" if not self.screenshot_path: return None import base64 from pathlib import Path path_obj = Path(self.screenshot_path) if not path_obj.exists(): return None try: with open(path_obj, 'rb') as f: screenshot_data = f.read() return base64.b64encode(screenshot_data).decode('utf-8') except Exception: return None class CodeAgentStepMetadata(BaseModel): """Metadata for a single CodeAgent step including timing and token information.""" model_config = ConfigDict(extra='forbid') input_tokens: int | None = Field(default=None, description='Number of input tokens used') output_tokens: int | None = Field(default=None, description='Number of output tokens used') step_start_time: float = Field(description='Step start timestamp (Unix time)') step_end_time: float = Field(description='Step end timestamp (Unix time)') @property def duration_seconds(self) -> float: """Calculate step duration in seconds.""" return self.step_end_time - self.step_start_time class CodeAgentHistory(BaseModel): """History item for CodeAgent actions.""" model_config = ConfigDict(extra='forbid', arbitrary_types_allowed=True) model_output: CodeAgentModelOutput | None = Field(default=None, description='LLM output for this step') result: list[CodeAgentResult] = Field(default_factory=list, description='Results from code execution') state: CodeAgentState = Field(description='Browser state at this step') metadata: CodeAgentStepMetadata | None = Field(default=None, description='Step timing and token metadata') screenshot_path: str | None = Field(default=None, description='Legacy field for screenshot path') def model_dump(self, **kwargs) -> dict[str, Any]: """Custom serialization for CodeAgentHistory.""" return { 'model_output': self.model_output.model_dump() if self.model_output else None, 'result': [r.model_dump() for r in self.result], 'state': self.state.model_dump(), 'metadata': self.metadata.model_dump() if self.metadata else None, 'screenshot_path': self.screenshot_path, } class CodeAgentHistoryList: """Compatibility wrapper for CodeAgentHistory that provides AgentHistoryList-like API.""" def __init__(self, complete_history: list[CodeAgentHistory], usage_summary: UsageSummary | None) -> None: """Initialize with CodeAgent history data.""" self._complete_history = complete_history self._usage_summary = usage_summary @property def history(self) -> list[CodeAgentHistory]: """Get the raw history list.""" return self._complete_history @property def usage(self) -> UsageSummary | None: """Get the usage summary.""" return self._usage_summary def __len__(self) -> int: """Return the number of history items.""" return len(self._complete_history) def __str__(self) -> str: """Representation of the CodeAgentHistoryList object.""" return f'CodeAgentHistoryList(steps={len(self._complete_history)}, action_results={len(self.action_results())})' def __repr__(self) -> str: """Representation of the CodeAgentHistoryList object.""" return self.__str__() def final_result(self) -> None | str: """Final result from history.""" if self._complete_history and self._complete_history[-1].result: return self._complete_history[-1].result[-1].extracted_content return None def is_done(self) -> bool: """Check if the agent is done.""" if self._complete_history and len(self._complete_history[-1].result) > 0: last_result = self._complete_history[-1].result[-1] return last_result.is_done is True return False def is_successful(self) -> bool | None: """Check if the agent completed successfully.""" if self._complete_history and len(self._complete_history[-1].result) > 0: last_result = self._complete_history[-1].result[-1] if last_result.is_done is True: return last_result.success return None def errors(self) -> list[str | None]: """Get all errors from history, with None for steps without errors.""" errors = [] for h in self._complete_history: step_errors = [r.error for r in h.result if r.error] # each step can have only one error errors.append(step_errors[0] if step_errors else None) return errors def has_errors(self) -> bool: """Check if the agent has any non-None errors.""" return any(error is not None for error in self.errors()) def urls(self) -> list[str | None]: """Get all URLs from history.""" return [h.state.url if h.state.url is not None else None for h in self._complete_history] def screenshot_paths(self, n_last: int | None = None, return_none_if_not_screenshot: bool = True) -> list[str | None]: """Get all screenshot paths from history.""" if n_last == 0: return [] if n_last is None: if return_none_if_not_screenshot: return [h.state.screenshot_path if h.state.screenshot_path is not None else None for h in self._complete_history] else: return [h.state.screenshot_path for h in self._complete_history if h.state.screenshot_path is not None] else: if return_none_if_not_screenshot: return [ h.state.screenshot_path if h.state.screenshot_path is not None else None for h in self._complete_history[-n_last:] ] else: return [h.state.screenshot_path for h in self._complete_history[-n_last:] if h.state.screenshot_path is not None] def screenshots(self, n_last: int | None = None, return_none_if_not_screenshot: bool = True) -> list[str | None]: """Get all screenshots from history as base64 strings.""" if n_last == 0: return [] history_items = self._complete_history if n_last is None else self._complete_history[-n_last:] screenshots = [] for item in history_items: screenshot_b64 = item.state.get_screenshot() if screenshot_b64: screenshots.append(screenshot_b64) else: if return_none_if_not_screenshot: screenshots.append(None) return screenshots def action_results(self) -> list[CodeAgentResult]: """Get all results from history.""" results = [] for h in self._complete_history: results.extend([r for r in h.result if r]) return results def extracted_content(self) -> list[str]: """Get all extracted content from history.""" content = [] for h in self._complete_history: content.extend([r.extracted_content for r in h.result if r.extracted_content]) return content def number_of_steps(self) -> int: """Get the number of steps in the history.""" return len(self._complete_history) def total_duration_seconds(self) -> float: """Get total duration of all steps in seconds.""" total = 0.0 for h in self._complete_history: if h.metadata: total += h.metadata.duration_seconds return total def last_action(self) -> None | dict: """Last action in history - returns the last code execution.""" if self._complete_history and self._complete_history[-1].model_output: return { 'execute_code': { 'code': self._complete_history[-1].model_output.model_output, 'full_response': self._complete_history[-1].model_output.full_response, } } return None def action_names(self) -> list[str]: """Get all action names from history - returns 'execute_code' for each code execution.""" action_names = [] for action in self.model_actions(): actions = list(action.keys()) if actions: action_names.append(actions[0]) return action_names def model_thoughts(self) -> list[Any]: """Get all thoughts from history - returns model_output for CodeAgent.""" return [h.model_output for h in self._complete_history if h.model_output] def model_outputs(self) -> list[CodeAgentModelOutput]: """Get all model outputs from history.""" return [h.model_output for h in self._complete_history if h.model_output] def model_actions(self) -> list[dict]: """Get all actions from history - returns code execution actions with their code.""" actions = [] for h in self._complete_history: if h.model_output: # Create one action dict per result (code execution) for _ in h.result: action_dict = { 'execute_code': { 'code': h.model_output.model_output, 'full_response': h.model_output.full_response, } } actions.append(action_dict) return actions def action_history(self) -> list[list[dict]]: """Get truncated action history grouped by step.""" step_outputs = [] for h in self._complete_history: step_actions = [] if h.model_output: for result in h.result: action_dict = { 'execute_code': { 'code': h.model_output.model_output, }, 'result': { 'extracted_content': result.extracted_content, 'is_done': result.is_done, 'success': result.success, 'error': result.error, }, } step_actions.append(action_dict) step_outputs.append(step_actions) return step_outputs def model_actions_filtered(self, include: list[str] | None = None) -> list[dict]: """Get all model actions from history filtered - returns empty for CodeAgent.""" return [] def add_item(self, history_item: CodeAgentHistory) -> None: """Add a history item to the list.""" self._complete_history.append(history_item) def model_dump(self, **kwargs) -> dict[str, Any]: """Custom serialization for CodeAgentHistoryList.""" return { 'history': [h.model_dump(**kwargs) for h in self._complete_history], 'usage': self._usage_summary.model_dump() if self._usage_summary else None, } def save_to_file(self, filepath: str | Path, sensitive_data: dict[str, str | dict[str, str]] | None = None) -> None: """Save history to JSON file.""" try: Path(filepath).parent.mkdir(parents=True, exist_ok=True) data = self.model_dump() with open(filepath, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2) except Exception as e: raise e