mirror of
https://github.com/browser-use/browser-use
synced 2026-05-13 17:56:35 +02:00
136 lines
3.6 KiB
Python
136 lines
3.6 KiB
Python
from dataclasses import dataclass, field
|
|
from typing import Any
|
|
|
|
from bubus import BaseEvent
|
|
from pydantic import BaseModel
|
|
|
|
from browser_use.dom.views import DOMInteractedElement, SerializedDOMState
|
|
|
|
# Known placeholder image data for about:blank pages - a 4x4 white PNG
|
|
PLACEHOLDER_4PX_SCREENSHOT = (
|
|
'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII='
|
|
)
|
|
|
|
|
|
# Pydantic
|
|
class TabInfo(BaseModel):
|
|
"""Represents information about a browser tab"""
|
|
|
|
# Original fields
|
|
page_id: int
|
|
url: str
|
|
title: str
|
|
parent_page_id: int | None = None # parent page that contains this popup or cross-origin iframe
|
|
|
|
# Additional fields for compatibility with dictionary format
|
|
id: str | None = None # tab ID string like "tab_0"
|
|
index: int | None = None # tab index
|
|
|
|
|
|
class PageInfo(BaseModel):
|
|
"""Comprehensive page size and scroll information"""
|
|
|
|
# Current viewport dimensions
|
|
viewport_width: int
|
|
viewport_height: int
|
|
|
|
# Total page dimensions
|
|
page_width: int
|
|
page_height: int
|
|
|
|
# Current scroll position
|
|
scroll_x: int
|
|
scroll_y: int
|
|
|
|
# Calculated scroll information
|
|
pixels_above: int
|
|
pixels_below: int
|
|
pixels_left: int
|
|
pixels_right: int
|
|
|
|
# Page statistics are now computed dynamically instead of stored
|
|
|
|
|
|
@dataclass
|
|
class BrowserStateSummary:
|
|
"""The summary of the browser's current state designed for an LLM to process"""
|
|
|
|
# provided by SerializedDOMState:
|
|
dom_state: SerializedDOMState
|
|
|
|
url: str
|
|
title: str
|
|
tabs: list[TabInfo]
|
|
screenshot: str | None = field(default=None, repr=False)
|
|
page_info: PageInfo | None = None # Enhanced page information
|
|
|
|
# Keep legacy fields for backward compatibility
|
|
pixels_above: int = 0
|
|
pixels_below: int = 0
|
|
browser_errors: list[str] = field(default_factory=list)
|
|
is_pdf_viewer: bool = False # Whether the current page is a PDF viewer
|
|
recent_events: str | None = None # Text summary of recent browser events
|
|
|
|
|
|
@dataclass
|
|
class BrowserStateHistory:
|
|
"""The summary of the browser's state at a past point in time to usse in LLM message history"""
|
|
|
|
url: str
|
|
title: str
|
|
tabs: list[TabInfo]
|
|
interacted_element: list[DOMInteractedElement | None] | list[None]
|
|
screenshot_path: str | None = None
|
|
|
|
def get_screenshot(self) -> str | None:
|
|
"""Load screenshot from disk and return as base64 string"""
|
|
if not self.screenshot_path:
|
|
return None
|
|
|
|
import base64
|
|
from pathlib import Path
|
|
|
|
path_obj = Path(self.screenshot_path)
|
|
if not path_obj.exists():
|
|
return None
|
|
|
|
try:
|
|
with open(path_obj, 'rb') as f:
|
|
screenshot_data = f.read()
|
|
return base64.b64encode(screenshot_data).decode('utf-8')
|
|
except Exception:
|
|
return None
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
data = {}
|
|
data['tabs'] = [tab.model_dump() for tab in self.tabs]
|
|
data['screenshot_path'] = self.screenshot_path
|
|
data['interacted_element'] = [el.to_dict() if el else None for el in self.interacted_element]
|
|
data['url'] = self.url
|
|
data['title'] = self.title
|
|
return data
|
|
|
|
|
|
class BrowserError(Exception):
|
|
"""Base class for all browser errors"""
|
|
|
|
message: str
|
|
details: dict[str, Any] | None = None
|
|
while_handling_event: BaseEvent[Any] | None = None
|
|
|
|
def __init__(self, message: str, details: dict[str, Any] | None = None, event: BaseEvent[Any] | None = None):
|
|
self.message = message
|
|
super().__init__(message)
|
|
self.details = details
|
|
self.while_handling_event = event
|
|
|
|
def __str__(self) -> str:
|
|
if self.details:
|
|
return f'{self.message} ({self.details}) during: {self.while_handling_event}'
|
|
else:
|
|
return f'{self.message} (while handling event: {self.while_handling_event})'
|
|
|
|
|
|
class URLNotAllowedError(BrowserError):
|
|
"""Error raised when a URL is not allowed"""
|