mirror of
https://github.com/browser-use/browser-use
synced 2026-05-06 17:52:15 +02:00
2545 lines
95 KiB
Python
2545 lines
95 KiB
Python
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import base64
|
||
import json
|
||
import logging
|
||
import os
|
||
import re
|
||
import time
|
||
from dataclasses import dataclass
|
||
from functools import wraps
|
||
from pathlib import Path
|
||
from typing import Any, Self
|
||
from urllib.parse import urlparse
|
||
|
||
os.environ['PW_TEST_SCREENSHOT_NO_FONTS_READY'] = '1' # https://github.com/microsoft/playwright/issues/35972
|
||
|
||
import psutil
|
||
from patchright.async_api import Playwright as PatchrightPlaywright
|
||
from playwright.async_api import Browser as PlaywrightBrowser
|
||
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
|
||
from playwright.async_api import ElementHandle, FrameLocator, Page, Playwright, async_playwright
|
||
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, InstanceOf, PrivateAttr, model_validator
|
||
|
||
from browser_use.browser.profile import BrowserProfile
|
||
from browser_use.browser.views import (
|
||
BrowserError,
|
||
BrowserStateSummary,
|
||
TabInfo,
|
||
URLNotAllowedError,
|
||
)
|
||
from browser_use.dom.clickable_element_processor.service import ClickableElementProcessor
|
||
from browser_use.dom.service import DomService
|
||
from browser_use.dom.views import DOMElementNode, SelectorMap
|
||
from browser_use.utils import match_url_with_domain_pattern, merge_dicts, time_execution_async, time_execution_sync
|
||
|
||
# Check if running in Docker
|
||
IN_DOCKER = os.environ.get('IN_DOCKER', 'false').lower()[0] in 'ty1'
|
||
|
||
logger = logging.getLogger('browser_use.browser.session')
|
||
|
||
|
||
_GLOB_WARNING_SHOWN = False # used inside _is_url_allowed to avoid spamming the logs with the same warning multiple times
|
||
|
||
|
||
def _log_glob_warning(domain: str, glob: str):
|
||
global _GLOB_WARNING_SHOWN
|
||
if not _GLOB_WARNING_SHOWN:
|
||
logger.warning(
|
||
# glob patterns are very easy to mess up and match too many domains by accident
|
||
# e.g. if you only need to access gmail, don't use *.google.com because an attacker could convince the agent to visit a malicious doc
|
||
# on docs.google.com/s/some/evil/doc to set up a prompt injection attack
|
||
f"⚠️ Allowing agent to visit {domain} based on allowed_domains=['{glob}', ...]. Set allowed_domains=['{domain}', ...] explicitly to avoid matching too many domains!"
|
||
)
|
||
_GLOB_WARNING_SHOWN = True
|
||
|
||
|
||
def _log_pretty_url(s: str, max_len: int | None = 22) -> str:
|
||
"""Truncate/pretty-print a URL with a maximum length, removing the protocol and www. prefix"""
|
||
s = s.replace('https://', '').replace('http://', '').replace('www.', '')
|
||
if max_len is not None and len(s) > max_len:
|
||
return s[:max_len] + '…'
|
||
return s
|
||
|
||
|
||
def _log_pretty_path(path: Path) -> str:
|
||
"""Pretty-print a path, shorten home dir to ~ and cwd to ."""
|
||
return str(path or '').replace(str(Path.home()), '~').replace(str(Path.cwd().resolve()), '.')
|
||
|
||
|
||
def require_initialization(func):
|
||
"""decorator for BrowserSession methods to require the BrowserSession be already active"""
|
||
|
||
assert asyncio.iscoroutinefunction(func), '@require_initialization only supports decorating async methods on BrowserSession'
|
||
|
||
@wraps(func)
|
||
async def wrapper(self, *args, **kwargs):
|
||
try:
|
||
if not self.initialized:
|
||
# raise RuntimeError('BrowserSession(...).start() must be called first to launch or connect to the browser')
|
||
await self.start() # just start it automatically if not already started
|
||
|
||
if not self.agent_current_page or self.agent_current_page.is_closed():
|
||
self.agent_current_page = (
|
||
self.browser_context.pages[0] if (self.browser_context and self.browser_context.pages) else None
|
||
)
|
||
|
||
if not self.agent_current_page or self.agent_current_page.is_closed():
|
||
await self.create_new_tab()
|
||
|
||
assert self.agent_current_page and not self.agent_current_page.is_closed()
|
||
|
||
if not hasattr(self, '_cached_browser_state_summary'):
|
||
raise RuntimeError('BrowserSession(...).start() must be called first to initialize the browser session')
|
||
|
||
return await func(self, *args, **kwargs)
|
||
|
||
except Exception as e:
|
||
# Check if this is a TargetClosedError or similar connection error
|
||
if 'TargetClosedError' in str(type(e)) or 'context or browser has been closed' in str(e):
|
||
logger.debug(f'Detected closed browser connection in {func.__name__}, resetting connection state')
|
||
self._reset_connection_state()
|
||
# Re-raise the error so the caller can handle it appropriately
|
||
raise
|
||
else:
|
||
# Re-raise other exceptions unchanged
|
||
raise
|
||
|
||
return wrapper
|
||
|
||
|
||
DEFAULT_BROWSER_PROFILE = BrowserProfile()
|
||
|
||
|
||
@dataclass
|
||
class CachedClickableElementHashes:
|
||
"""
|
||
Clickable elements hashes for the last state
|
||
"""
|
||
|
||
url: str
|
||
hashes: set[str]
|
||
|
||
|
||
class BrowserSession(BaseModel):
|
||
"""
|
||
Represents an active browser session with a running browser process somewhere.
|
||
|
||
Chromium flags should be passed via extra_launch_args.
|
||
Extra Playwright launch options (e.g., handle_sigterm, handle_sigint) can be passed as kwargs to BrowserSession and will be forwarded to the launch() call.
|
||
"""
|
||
|
||
model_config = ConfigDict(
|
||
extra='allow',
|
||
validate_assignment=False,
|
||
revalidate_instances='always',
|
||
frozen=False,
|
||
arbitrary_types_allowed=True,
|
||
populate_by_name=True,
|
||
)
|
||
# this class accepts arbitrary extra **kwargs in init because of the extra='allow' pydantic option
|
||
# they are saved on the model, then applied to self.browser_profile via .apply_session_overrides_to_profile()
|
||
|
||
# template profile for the BrowserSession, will be copied at init/validation time, and overrides applied to the copy
|
||
browser_profile: InstanceOf[BrowserProfile] = Field(
|
||
default=DEFAULT_BROWSER_PROFILE,
|
||
description='BrowserProfile() instance containing config for the BrowserSession',
|
||
validation_alias=AliasChoices(
|
||
'profile', 'config', 'new_context_config'
|
||
), # abbreviations = 'profile', old deprecated names = 'config', 'new_context_config'
|
||
)
|
||
|
||
# runtime props/state: these can be passed in as props at init, or get auto-setup by BrowserSession.start()
|
||
wss_url: str | None = Field(
|
||
default=None,
|
||
description='WSS URL of the node.js playwright browser server to connect to, outputted by (await chromium.launchServer()).wsEndpoint()',
|
||
)
|
||
cdp_url: str | None = Field(
|
||
default=None,
|
||
description='CDP URL of the browser to connect to, e.g. http://localhost:9222 or ws://127.0.0.1:9222/devtools/browser/387adf4c-243f-4051-a181-46798f4a46f4',
|
||
)
|
||
browser_pid: int | None = Field(
|
||
default=None,
|
||
description='pid of a running chromium-based browser process to connect to on localhost',
|
||
validation_alias=AliasChoices('chrome_pid'), # old deprecated name = chrome_pid
|
||
)
|
||
playwright: Playwright | PatchrightPlaywright | Playwright | None = Field(
|
||
default=None,
|
||
description='Playwright library object returned by: await (playwright or patchright).async_playwright().start()',
|
||
exclude=True,
|
||
)
|
||
browser: InstanceOf[PlaywrightBrowser] | None = Field(
|
||
default=None,
|
||
description='playwright Browser object to use (optional)',
|
||
validation_alias=AliasChoices('playwright_browser'),
|
||
exclude=True,
|
||
)
|
||
browser_context: InstanceOf[PlaywrightBrowserContext] | None = Field(
|
||
default=None,
|
||
description='playwright BrowserContext object to use (optional)',
|
||
validation_alias=AliasChoices('playwright_browser_context', 'context'),
|
||
exclude=True,
|
||
)
|
||
|
||
# runtime state: state that changes during the lifecycle of a BrowserSession(), updated by the methods below
|
||
initialized: bool = Field(
|
||
default=False,
|
||
description='Mark BrowserSession launch/connection as already ready and skip setup (not recommended)',
|
||
validation_alias=AliasChoices('is_initialized'),
|
||
)
|
||
agent_current_page: InstanceOf[Page] | None = Field( # mutated by self.create_new_tab(url)
|
||
default=None,
|
||
description='Foreground Page that the agent is focused on',
|
||
validation_alias=AliasChoices('current_page', 'page'), # alias page= allows passing in a playwright Page object easily
|
||
exclude=True,
|
||
)
|
||
human_current_page: InstanceOf[Page] | None = Field( # mutated by self._setup_current_page_change_listeners()
|
||
default=None,
|
||
description='Foreground Page that the human is focused on',
|
||
exclude=True,
|
||
)
|
||
|
||
_cached_browser_state_summary: BrowserStateSummary | None = PrivateAttr(default=None)
|
||
_cached_clickable_element_hashes: CachedClickableElementHashes | None = PrivateAttr(default=None)
|
||
_start_lock: asyncio.Lock = PrivateAttr(default_factory=asyncio.Lock)
|
||
|
||
@model_validator(mode='after')
|
||
def apply_session_overrides_to_profile(self) -> Self:
|
||
"""Apply any extra **kwargs passed to BrowserSession(...) as config overrides on top of browser_profile"""
|
||
session_own_fields = type(self).model_fields.keys()
|
||
|
||
# get all the extra kwarg overrides passed to BrowserSession(...) that are actually
|
||
# config Fields tracked by BrowserProfile, instead of BrowserSession's own args
|
||
profile_overrides = self.model_dump(exclude=set(session_own_fields))
|
||
|
||
# FOR REPL DEBUGGING ONLY, NEVER ALLOW CIRCULAR REFERENCES IN REAL CODE:
|
||
# self.browser_profile._in_use_by_session = self
|
||
|
||
# Only create a copy if there are actual overrides to apply
|
||
if profile_overrides:
|
||
# replace browser_profile with patched version
|
||
self.browser_profile = self.browser_profile.model_copy(update=profile_overrides)
|
||
|
||
# FOR REPL DEBUGGING ONLY, NEVER ALLOW CIRCULAR REFERENCES IN REAL CODE:
|
||
# self.browser_profile._in_use_by_session = self
|
||
|
||
return self
|
||
|
||
# def __getattr__(self, key: str) -> Any:
|
||
# """
|
||
# fall back to getting any attrs from the underlying self.browser_profile when not defined on self.
|
||
# (extra kwargs passed e.g. BrowserSession(extra_kwarg=124) on init get saved into self.browser_profile on validation,
|
||
# so this also allows you to read those: browser_session.extra_kwarg => browser_session.browser_profile.extra_kwarg)
|
||
# """
|
||
# return getattr(self.browser_profile, key)
|
||
|
||
async def start(self) -> Self:
|
||
"""
|
||
Starts the browser session by either connecting to an existing browser or launching a new one.
|
||
Precedence order for launching/connecting:
|
||
1. page=Page playwright object, will use its page.context as browser_context
|
||
2. browser_context=PlaywrightBrowserContext object, will use its browser
|
||
3. browser=PlaywrightBrowser object, will use its first available context
|
||
4. browser_pid=int, will connect to a local chromium-based browser via pid
|
||
5. wss_url=str, will connect to a remote playwright browser server via WSS
|
||
6. cdp_url=str, will connect to a remote chromium-based browser via CDP
|
||
7. playwright=Playwright object, will use its chromium instance to launch a new browser
|
||
"""
|
||
|
||
async with self._start_lock:
|
||
# if we're already initialized and the connection is still valid, return the existing session state and start from scratch
|
||
if self.initialized and self.is_connected():
|
||
return self
|
||
self._reset_connection_state()
|
||
|
||
self.initialized = True # set this first to ensure two parallel calls to start() don't clash with each other
|
||
try:
|
||
# apply last-minute runtime-computed options to the the browser_profile, validate profile, set up folders on disk
|
||
assert isinstance(self.browser_profile, BrowserProfile)
|
||
self.browser_profile.prepare_user_data_dir() # create/unlock the <user_data_dir>/SingletonLock
|
||
self.browser_profile.detect_display_configuration() # adjusts config values, must come before launch/connect
|
||
|
||
# launch/connect to the browser:
|
||
# setup playwright library client, Browser, and BrowserContext objects
|
||
await self.setup_playwright()
|
||
await self.setup_browser_via_passed_objects()
|
||
await self.setup_browser_via_browser_pid()
|
||
await self.setup_browser_via_wss_url()
|
||
await self.setup_browser_via_cdp_url()
|
||
await (
|
||
self.setup_new_browser_context()
|
||
) # creates a new context in existing browser or launches a new persistent context
|
||
assert self.browser_context, f'Failed to connect to or create a new BrowserContext for browser={self.browser}'
|
||
|
||
# resize the existing pages and set up foreground tab detection
|
||
await self._setup_viewports()
|
||
await self._setup_current_page_change_listeners()
|
||
except Exception:
|
||
self.initialized = False
|
||
raise
|
||
|
||
return self
|
||
|
||
async def stop(self) -> None:
|
||
"""Shuts down the BrowserSession, killing the browser process if keep_alive=False"""
|
||
|
||
self.initialized = False
|
||
|
||
if self.browser_profile.keep_alive:
|
||
return # nothing to do if keep_alive=True, leave the browser running
|
||
|
||
if self.browser_context or self.browser:
|
||
try:
|
||
await (self.browser_context or self.browser).close()
|
||
logger.info(
|
||
f'🛑 Stopped the {self.browser_profile.channel.name.lower()} browser '
|
||
f'keep_alive=False user_data_dir={_log_pretty_path(self.browser_profile.user_data_dir) or "<incognito>"} cdp_url={self.cdp_url or self.wss_url} pid={self.browser_pid}'
|
||
)
|
||
self.browser_context = None
|
||
except Exception as e:
|
||
logger.debug(f'❌ Error closing playwright BrowserContext {self.browser_context}: {type(e).__name__}: {e}')
|
||
|
||
# kill the chrome subprocess if we were the ones that started it
|
||
if self.browser_pid:
|
||
try:
|
||
psutil.Process(pid=self.browser_pid).terminate()
|
||
logger.info(f' ↳ Killed browser subprocess with browser_pid={self.browser_pid} keep_alive=False')
|
||
self.browser_pid = None
|
||
except Exception as e:
|
||
if 'NoSuchProcess' not in type(e).__name__:
|
||
logger.debug(f'❌ Error terminating subprocess with browser_pid={self.browser_pid}: {type(e).__name__}: {e}')
|
||
|
||
async def close(self) -> None:
|
||
"""Deprecated: Provides backwards-compatibility with old class method Browser().close()"""
|
||
await self.stop()
|
||
|
||
async def new_context(self, **kwargs):
|
||
"""Deprecated: Provides backwards-compatibility with old class method Browser().new_context()"""
|
||
return self
|
||
|
||
async def __aenter__(self) -> BrowserSession:
|
||
await self.start()
|
||
return self
|
||
|
||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||
await self.stop()
|
||
|
||
async def setup_playwright(self) -> None:
|
||
"""
|
||
Set up playwright library client object: usually the result of (await async_playwright().start())
|
||
Override to customize the set up of the playwright or patchright library object
|
||
"""
|
||
self.playwright = self.playwright or (await async_playwright().start())
|
||
# self.playwright = self.playwright or (await async_patchright().start())
|
||
|
||
# if isinstance(self.playwright, PatchrightPlaywright):
|
||
# # patchright handles all its own default args, dont mess with them
|
||
# self.browser_profile.ignore_default_args = True
|
||
|
||
# return self.playwright
|
||
|
||
async def setup_browser_via_passed_objects(self) -> None:
|
||
"""Override to customize the set up of the connection to an existing browser"""
|
||
|
||
# 1. check for a passed Page object, if present, it always takes priority, set browser_context = page.context
|
||
self.browser_context = (self.agent_current_page and self.agent_current_page.context) or self.browser_context or None
|
||
|
||
# 2. Check if the current browser connection is valid, if not clear the invalid objects
|
||
if self.browser_context:
|
||
try:
|
||
# Try to access a property that would fail if the context is closed
|
||
_ = self.browser_context.pages
|
||
# Additional check: verify the browser is still connected
|
||
if self.browser_context.browser and not self.browser_context.browser.is_connected():
|
||
self.browser_context = None
|
||
except Exception:
|
||
# Context is closed, clear it
|
||
self.browser_context = None
|
||
|
||
# 3. if we have a browser object but it's disconnected, clear it and the context because we cant use either
|
||
if self.browser and not self.browser.is_connected():
|
||
if self.browser_context and (self.browser_context.browser is self.browser):
|
||
self.browser_context = None
|
||
self.browser = None
|
||
|
||
# 4. if we have a context now, it always takes precedence, set browser = context.browser, otherwise use the passed browser
|
||
browser_from_context = self.browser_context and self.browser_context.browser
|
||
if browser_from_context and browser_from_context.is_connected():
|
||
self.browser = browser_from_context
|
||
|
||
if self.browser or self.browser_context:
|
||
logger.info(f'🌎 Connected to existing user-provided browser_context: {self.browser_context}')
|
||
self._set_browser_keep_alive(True) # we connected to an existing browser, dont kill it at the end
|
||
|
||
async def setup_browser_via_browser_pid(self) -> None:
|
||
"""if browser_pid is provided, calcuclate its CDP URL by looking for --remote-debugging-port=... in its CLI args, then connect to it"""
|
||
|
||
if self.browser or self.browser_context:
|
||
return # already connected to a browser
|
||
if not self.browser_pid:
|
||
return # no browser_pid provided, nothing to do
|
||
|
||
chrome_process = psutil.Process(pid=self.browser_pid)
|
||
assert chrome_process.is_running(), 'Chrome process is not running'
|
||
args = chrome_process.cmdline()
|
||
debug_port = next((arg for arg in args if arg.startswith('--remote-debugging-port=')), '').split('=')[-1].strip()
|
||
assert debug_port, (
|
||
f'Could not find --remote-debugging-port=... to connect to in browser launch args: browser_pid={self.browser_pid} {args}'
|
||
)
|
||
# we could automatically relaunch the browser process with that arg added here, but they may have tabs open they dont want to lose
|
||
self.cdp_url = self.cdp_url or f'http://localhost:{debug_port}/'
|
||
logger.info(f'🌎 Connecting to existing local browser process: browser_pid={self.browser_pid} on {self.cdp_url}')
|
||
self.browser = self.browser or await self.playwright.chromium.connect_over_cdp(
|
||
self.cdp_url,
|
||
**self.browser_profile.kwargs_for_connect().model_dump(),
|
||
)
|
||
self._set_browser_keep_alive(True) # we connected to an existing browser, dont kill it at the end
|
||
|
||
async def setup_browser_via_wss_url(self) -> None:
|
||
"""check for a passed wss_url, connect to a remote playwright browser server via WSS"""
|
||
|
||
if self.browser or self.browser_context:
|
||
return # already connected to a browser
|
||
if not self.wss_url:
|
||
return # no wss_url provided, nothing to do
|
||
|
||
logger.info(f'🌎 Connecting to existing remote chromium playwright node.js server over WSS: {self.wss_url}')
|
||
self.browser = self.browser or await self.playwright.chromium.connect(
|
||
self.wss_url,
|
||
**self.browser_profile.kwargs_for_connect().model_dump(),
|
||
)
|
||
self._set_browser_keep_alive(True) # we connected to an existing browser, dont kill it at the end
|
||
|
||
async def setup_browser_via_cdp_url(self) -> None:
|
||
"""check for a passed cdp_url, connect to a remote chromium-based browser via CDP"""
|
||
|
||
if self.browser or self.browser_context:
|
||
return # already connected to a browser
|
||
if not self.cdp_url:
|
||
return # no cdp_url provided, nothing to do
|
||
|
||
logger.info(f'🌎 Connecting to existing remote chromium-based browser over CDP: {self.cdp_url}')
|
||
self.browser = self.browser or await self.playwright.chromium.connect_over_cdp(
|
||
self.cdp_url,
|
||
**self.browser_profile.kwargs_for_connect().model_dump(),
|
||
)
|
||
self._set_browser_keep_alive(True) # we connected to an existing browser, dont kill it at the end
|
||
|
||
async def setup_new_browser_context(self) -> None:
|
||
"""Launch a new browser and browser_context"""
|
||
current_process = psutil.Process(os.getpid())
|
||
child_pids_before_launch = {child.pid for child in current_process.children(recursive=True)}
|
||
|
||
# if we have a browser object but no browser_context, use the first context discovered or make a new one
|
||
if self.browser and not self.browser_context:
|
||
if self.browser.contexts:
|
||
self.browser_context = self.browser.contexts[0]
|
||
logger.info(f'🌎 Using first browser_context available in existing browser: {self.browser_context}')
|
||
else:
|
||
self.browser_context = await self.browser.new_context(
|
||
**self.browser_profile.kwargs_for_new_context().model_dump()
|
||
)
|
||
storage_info = (
|
||
f' + loaded storage_state={len(self.browser_profile.storage_state.cookies) if self.browser_profile.storage_state else 0} cookies'
|
||
if self.browser_profile.storage_state
|
||
else ''
|
||
)
|
||
logger.info(f'🌎 Created new empty browser_context in existing browser{storage_info}: {self.browser_context}')
|
||
|
||
# if we still have no browser_context by now, launch a new local one using launch_persistent_context()
|
||
if not self.browser_context:
|
||
logger.info(
|
||
f'🌎 Launching local browser '
|
||
f'driver={str(type(self.playwright).__module__).split(".")[0]} channel={self.browser_profile.channel.name.lower()} '
|
||
f'user_data_dir={_log_pretty_path(self.browser_profile.user_data_dir) if self.browser_profile.user_data_dir else "<incognito>"}'
|
||
)
|
||
if not self.browser_profile.user_data_dir:
|
||
# if no user_data_dir is provided, launch an incognito context with no persistent user_data_dir
|
||
self.browser = self.browser or await self.playwright.chromium.launch(
|
||
**self.browser_profile.kwargs_for_launch().model_dump()
|
||
)
|
||
self.browser_context = await self.browser.new_context(
|
||
**self.browser_profile.kwargs_for_new_context().model_dump()
|
||
)
|
||
else:
|
||
# user data dir was provided, prepare it for use
|
||
self.browser_profile.prepare_user_data_dir()
|
||
|
||
# search for potentially conflicting local processes running on the same user_data_dir
|
||
for proc in psutil.process_iter(['pid', 'cmdline']):
|
||
if f'--user-data-dir={self.browser_profile.user_data_dir}' in (proc.info['cmdline'] or []):
|
||
logger.warning(
|
||
f'🚨 Found potentially conflicting browser process browser_pid={proc.info["pid"]} '
|
||
f'already running with the same user_data_dir={_log_pretty_path(self.browser_profile.user_data_dir)}'
|
||
)
|
||
# self._fork_locked_user_data_dir()
|
||
break
|
||
|
||
# if a user_data_dir is provided, launch a persistent context with that user_data_dir
|
||
self.browser_context = await self.playwright.chromium.launch_persistent_context(
|
||
**self.browser_profile.kwargs_for_launch_persistent_context().model_dump()
|
||
)
|
||
|
||
# Only restore browser from context if it's connected, otherwise keep it None to force new launch
|
||
browser_from_context = self.browser_context and self.browser_context.browser
|
||
if browser_from_context and browser_from_context.is_connected():
|
||
self.browser = browser_from_context
|
||
# ^ self.browser can unfortunately still be None at the end ^
|
||
# playwright does not give us a browser object at all when we use launch_persistent_context()!
|
||
|
||
# Detect any new child chrome processes that we might have launched above
|
||
try:
|
||
child_pids_after_launch = {child.pid for child in current_process.children(recursive=True)}
|
||
new_child_pids = child_pids_after_launch - child_pids_before_launch
|
||
new_child_procs = [psutil.Process(pid) for pid in new_child_pids]
|
||
new_chrome_procs = [proc for proc in new_child_procs if 'Helper' not in proc.name() and proc.status() == 'running']
|
||
except Exception as e:
|
||
logger.debug(f'❌ Error trying to find child chrome processes after launching new browser: {type(e).__name__}: {e}')
|
||
new_chrome_procs = []
|
||
|
||
if new_chrome_procs and not self.browser_pid:
|
||
self.browser_pid = new_chrome_procs[0].pid
|
||
logger.debug(
|
||
f' ↳ Spawned browser subprocess: browser_pid={self.browser_pid} {" ".join(new_chrome_procs[0].cmdline())}'
|
||
)
|
||
self._set_browser_keep_alive(False) # close the browser at the end because we launched it
|
||
|
||
if self.browser:
|
||
connection_method = 'WSS' if self.wss_url else 'CDP' if (self.cdp_url and not self.browser_pid) else 'Local'
|
||
assert self.browser.is_connected(), (
|
||
f'Browser is not connected, did the browser process crash or get killed? (connection method: {connection_method})'
|
||
)
|
||
logger.debug(
|
||
f'🌎 {connection_method} browser connected: v{self.browser.version} {self.cdp_url or self.wss_url or self.browser_profile.executable_path or "(playwright)"}'
|
||
)
|
||
|
||
assert self.browser_context, (
|
||
f'Failed to create a playwright BrowserContext {self.browser_context} for browser={self.browser}'
|
||
)
|
||
|
||
init_script = """
|
||
// check to make sure we're not inside the PDF viewer
|
||
window.isPdfViewer = !!document?.body?.querySelector('body > embed[type="application/pdf"][width="100%"]')
|
||
if (!window.isPdfViewer) {
|
||
|
||
// Permissions
|
||
const originalQuery = window.navigator.permissions.query;
|
||
window.navigator.permissions.query = (parameters) => (
|
||
parameters.name === 'notifications' ?
|
||
Promise.resolve({ state: Notification.permission }) :
|
||
originalQuery(parameters)
|
||
);
|
||
(() => {
|
||
if (window._eventListenerTrackerInitialized) return;
|
||
window._eventListenerTrackerInitialized = true;
|
||
|
||
const originalAddEventListener = EventTarget.prototype.addEventListener;
|
||
const eventListenersMap = new WeakMap();
|
||
|
||
EventTarget.prototype.addEventListener = function(type, listener, options) {
|
||
if (typeof listener === "function") {
|
||
let listeners = eventListenersMap.get(this);
|
||
if (!listeners) {
|
||
listeners = [];
|
||
eventListenersMap.set(this, listeners);
|
||
}
|
||
|
||
listeners.push({
|
||
type,
|
||
listener,
|
||
listenerPreview: listener.toString().slice(0, 100),
|
||
options
|
||
});
|
||
}
|
||
|
||
return originalAddEventListener.call(this, type, listener, options);
|
||
};
|
||
|
||
window.getEventListenersForNode = (node) => {
|
||
const listeners = eventListenersMap.get(node) || [];
|
||
return listeners.map(({ type, listenerPreview, options }) => ({
|
||
type,
|
||
listenerPreview,
|
||
options
|
||
}));
|
||
};
|
||
})();
|
||
}
|
||
"""
|
||
|
||
# Expose anti-detection scripts
|
||
await self.browser_context.add_init_script(init_script)
|
||
|
||
# Load cookies from file if specified
|
||
await self.load_cookies_from_file()
|
||
|
||
# async def _fork_locked_user_data_dir(self) -> None:
|
||
# """Fork an in-use user_data_dir by cloning it to a new location to allow a second browser to use it"""
|
||
# # TODO: implement copy-on-write using overlayfs or zfs or something
|
||
# suffix_num = str(self.browser_profile.user_data_dir).rsplit('.', 1)[-1] or '1'
|
||
# suffix_num = int(suffix_num) if suffix_num.isdigit() else 1
|
||
# dir_name = self.browser_profile.user_data_dir.name
|
||
# incremented_name = dir_name.replace(f'.{suffix_num}', f'.{suffix_num + 1}')
|
||
# fork_path = self.browser_profile.user_data_dir.parent / incremented_name
|
||
|
||
# # keep incrementing the suffix_num until we find a path that doesn't exist
|
||
# while fork_path.exists():
|
||
# suffix_num += 1
|
||
# fork_path = self.browser_profile.user_data_dir.parent / (dir_name.rsplit('.', 1)[0] + f'.{suffix_num}')
|
||
|
||
# # use shutil to recursively copy the user_data_dir to a new location
|
||
# shutil.copytree(
|
||
# str(self.browser_profile.user_data_dir),
|
||
# str(fork_path),
|
||
# symlinks=True,
|
||
# ignore_dangling_symlinks=True,
|
||
# dirs_exist_ok=False,
|
||
# )
|
||
# self.browser_profile.user_data_dir = fork_path
|
||
# self.browser_profile.prepare_user_data_dir()
|
||
|
||
async def _setup_current_page_change_listeners(self) -> None:
|
||
# Uses a combination of:
|
||
# - visibilitychange events
|
||
# - window focus/blur events
|
||
# - pointermove events
|
||
|
||
# This annoying multi-method approach is needed for more reliable detection across browsers because playwright provides no API for this.
|
||
|
||
# TODO: pester the playwright team to add a new event that fires when a headful tab is focused.
|
||
# OR implement a browser-use chrome extension that acts as a bridge to the chrome.tabs API.
|
||
|
||
# - https://github.com/microsoft/playwright/issues/1290
|
||
# - https://github.com/microsoft/playwright/issues/2286
|
||
# - https://github.com/microsoft/playwright/issues/3570
|
||
# - https://github.com/microsoft/playwright/issues/13989
|
||
|
||
# set up / detect foreground page
|
||
assert self.browser_context is not None, 'BrowserContext object is not set'
|
||
pages = self.browser_context.pages
|
||
foreground_page = None
|
||
if pages:
|
||
foreground_page = pages[0]
|
||
logger.debug(
|
||
f'📜 Found {len(pages)} existing tabs in browser, agent will start focused on Tab [{pages.index(foreground_page)}]: {foreground_page.url}'
|
||
)
|
||
else:
|
||
foreground_page = await self.browser_context.new_page()
|
||
pages = [foreground_page]
|
||
logger.debug('➕ Opened new tab in empty browser context...')
|
||
|
||
self.agent_current_page = self.agent_current_page or foreground_page
|
||
self.human_current_page = self.human_current_page or foreground_page
|
||
|
||
def _BrowserUseonTabVisibilityChange(source: dict[str, str]):
|
||
"""hook callback fired when init script injected into a page detects a focus event"""
|
||
new_page = source['page']
|
||
|
||
# Update human foreground tab state
|
||
old_foreground = self.human_current_page
|
||
assert self.browser_context is not None, 'BrowserContext object is not set'
|
||
assert old_foreground is not None, 'Old foreground page is not set'
|
||
old_tab_idx = self.browser_context.pages.index(old_foreground)
|
||
self.human_current_page = new_page
|
||
new_tab_idx = self.browser_context.pages.index(new_page)
|
||
|
||
# Log before and after for debugging
|
||
old_url = old_foreground and old_foreground.url or 'about:blank'
|
||
new_url = new_page and new_page.url or 'about:blank'
|
||
agent_url = self.agent_current_page and self.agent_current_page.url or 'about:blank'
|
||
agent_tab_idx = self.browser_context.pages.index(self.agent_current_page)
|
||
if old_url != new_url:
|
||
logger.info(
|
||
f'👁️ Foregound tab changed by human from [{old_tab_idx}]{_log_pretty_url(old_url)} '
|
||
f'➡️ [{new_tab_idx}]{_log_pretty_url(new_url)} '
|
||
f'(agent will stay on [{agent_tab_idx}]{_log_pretty_url(agent_url)})'
|
||
)
|
||
|
||
try:
|
||
await self.browser_context.expose_binding('_BrowserUseonTabVisibilityChange', _BrowserUseonTabVisibilityChange)
|
||
|
||
except Exception as e:
|
||
if 'Function "_BrowserUseonTabVisibilityChange" has been already registered' in str(e):
|
||
logger.debug(
|
||
'⚠️ Function "_BrowserUseonTabVisibilityChange" has been already registered, '
|
||
'this is likely because the browser was already started with an existing BrowserSession()'
|
||
)
|
||
|
||
else:
|
||
raise
|
||
|
||
update_tab_focus_script = """
|
||
// --- Method 1: visibilitychange event (unfortunately *all* tabs are always marked visible by playwright, usually does not fire) ---
|
||
document.addEventListener('visibilitychange', async () => {
|
||
if (document.visibilityState === 'visible') {
|
||
await window._BrowserUseonTabVisibilityChange({ source: 'visibilitychange', url: document.location.href });
|
||
console.log('BrowserUse Foreground tab change event fired', document.location.href);
|
||
}
|
||
});
|
||
|
||
// --- Method 2: focus/blur events, most reliable method for headful browsers ---
|
||
window.addEventListener('focus', async () => {
|
||
await window._BrowserUseonTabVisibilityChange({ source: 'focus', url: document.location.href });
|
||
console.log('BrowserUse Foreground tab change event fired', document.location.href);
|
||
});
|
||
|
||
// --- Method 3: pointermove events (may be fired by agent if we implement AI hover movements, also very noisy) ---
|
||
// Use a throttled handler to avoid excessive calls
|
||
// let lastMove = 0;
|
||
// window.addEventListener('pointermove', async () => {
|
||
// const now = Date.now();
|
||
// if (now - lastMove > 1000) { // Throttle to once per second
|
||
// lastMove = now;
|
||
// await window._BrowserUseonTabVisibilityChange({ source: 'pointermove', url: document.location.href });
|
||
// console.log('BrowserUse Foreground tab change event fired', document.location.href);
|
||
// }
|
||
// });
|
||
"""
|
||
await self.browser_context.add_init_script(update_tab_focus_script)
|
||
|
||
# Set up visibility listeners for all existing tabs
|
||
for page in self.browser_context.pages:
|
||
try:
|
||
# logger.debug(f'👁️ Added visibility listener to existing tab: {page.url}')
|
||
await page.evaluate(update_tab_focus_script)
|
||
except Exception as e:
|
||
page_idx = self.browser_context.pages.index(page)
|
||
logger.debug(
|
||
f'⚠️ Failed to add visibility listener to existing tab, is it crashed or ignoring CDP commands?: [{page_idx}]{page.url}: {type(e).__name__}: {e}'
|
||
)
|
||
|
||
async def _setup_viewports(self) -> None:
|
||
"""Resize any existing page viewports to match the configured size"""
|
||
|
||
# log the viewport settings to terminal
|
||
viewport = self.browser_profile.viewport
|
||
logger.debug(
|
||
'📐 Setting up viewport: '
|
||
+ f'headless={self.browser_profile.headless} '
|
||
+ (
|
||
f'window={self.browser_profile.window_size["width"]}x{self.browser_profile.window_size["height"]}px '
|
||
if self.browser_profile.window_size
|
||
else '(no window) '
|
||
)
|
||
+ (
|
||
f'screen={self.browser_profile.screen["width"]}x{self.browser_profile.screen["height"]}px '
|
||
if self.browser_profile.screen
|
||
else ''
|
||
)
|
||
+ (f'viewport={viewport["width"]}x{viewport["height"]}px ' if viewport else '(no viewport) ')
|
||
+ f'device_scale_factor={self.browser_profile.device_scale_factor or 1.0} '
|
||
+ f'is_mobile={self.browser_profile.is_mobile} '
|
||
+ (f'color_scheme={self.browser_profile.color_scheme.value} ' if self.browser_profile.color_scheme else '')
|
||
+ (f'locale={self.browser_profile.locale} ' if self.browser_profile.locale else '')
|
||
+ (f'timezone_id={self.browser_profile.timezone_id} ' if self.browser_profile.timezone_id else '')
|
||
+ (f'geolocation={self.browser_profile.geolocation} ' if self.browser_profile.geolocation else '')
|
||
+ (f'permissions={",".join(self.browser_profile.permissions or ["<none>"])} ')
|
||
)
|
||
|
||
# if we have any viewport settings in the profile, make sure to apply them to the entire browser_context as defaults
|
||
if self.browser_profile.permissions:
|
||
try:
|
||
await self.browser_context.grant_permissions(self.browser_profile.permissions)
|
||
except Exception as e:
|
||
logger.warning(
|
||
f'⚠️ Failed to grant browser permissions {self.browser_profile.permissions}: {type(e).__name__}: {e}'
|
||
)
|
||
try:
|
||
if self.browser_profile.default_timeout:
|
||
await self.browser_context.set_default_timeout(self.browser_profile.default_timeout)
|
||
if self.browser_profile.default_navigation_timeout:
|
||
await self.browser_context.set_default_navigation_timeout(self.browser_profile.default_navigation_timeout)
|
||
except Exception as e:
|
||
logger.warning(
|
||
f'⚠️ Failed to set playwright timeout settings '
|
||
f'cdp_api={self.browser_profile.default_timeout} '
|
||
f'navigation={self.browser_profile.default_navigation_timeout}: {type(e).__name__}: {e}'
|
||
)
|
||
try:
|
||
if self.browser_profile.extra_http_headers:
|
||
await self.browser_context.set_extra_http_headers(self.browser_profile.extra_http_headers)
|
||
except Exception as e:
|
||
logger.warning(
|
||
f'⚠️ Failed to setup playwright extra_http_headers: {type(e).__name__}: {e}'
|
||
) # dont print the secret header contents in the logs!
|
||
|
||
try:
|
||
if self.browser_profile.geolocation:
|
||
await self.browser_context.set_geolocation(self.browser_profile.geolocation)
|
||
except Exception as e:
|
||
logger.warning(f'⚠️ Failed to update browser geolocation {self.browser_profile.geolocation}: {type(e).__name__}: {e}')
|
||
|
||
if self.storage_state:
|
||
await self.load_storage_state()
|
||
|
||
page = None
|
||
|
||
for page in self.browser_context.pages:
|
||
# apply viewport size settings to any existing pages
|
||
if viewport:
|
||
await page.set_viewport_size(viewport)
|
||
|
||
# show browser-use dvd screensaver-style bouncing loading animation on any about:blank pages
|
||
if page.url == 'about:blank':
|
||
await self._show_dvd_screensaver_loading_animation(page)
|
||
|
||
page = page or (await self.browser_context.new_page())
|
||
|
||
if (not viewport) and (self.browser_profile.window_size is not None) and not self.browser_profile.headless:
|
||
# attempt to resize the actual browser window
|
||
|
||
# cdp api: https://chromedevtools.github.io/devtools-protocol/tot/Browser/#method-setWindowBounds
|
||
try:
|
||
cdp_session = await page.context.new_cdp_session(page)
|
||
window_id_result = await cdp_session.send('Browser.getWindowForTarget')
|
||
await cdp_session.send(
|
||
'Browser.setWindowBounds',
|
||
{
|
||
'windowId': window_id_result['windowId'],
|
||
'bounds': {
|
||
**self.browser_profile.window_size,
|
||
'windowState': 'normal', # Ensure window is not minimized/maximized
|
||
},
|
||
},
|
||
)
|
||
await cdp_session.detach()
|
||
except Exception as e:
|
||
_log_size = lambda size: f'{size["width"]}x{size["height"]}px'
|
||
try:
|
||
# fallback to javascript resize if cdp setWindowBounds fails
|
||
await page.evaluate(
|
||
"""(width, height) => {window.resizeTo(width, height)}""",
|
||
**self.browser_profile.window_size,
|
||
)
|
||
return
|
||
except Exception as e:
|
||
pass
|
||
|
||
logger.warning(
|
||
f'⚠️ Failed to resize browser window to {_log_size(self.browser_profile.window_size)} using CDP setWindowBounds: {type(e).__name__}: {e}'
|
||
)
|
||
|
||
def _set_browser_keep_alive(self, keep_alive: bool | None) -> None:
|
||
"""set the keep_alive flag on the browser_profile, defaulting to True if keep_alive is None"""
|
||
if self.browser_profile.keep_alive is None:
|
||
self.browser_profile.keep_alive = keep_alive
|
||
|
||
def is_connected(self) -> bool:
|
||
"""
|
||
Check if the browser session has valid, connected browser and context objects.
|
||
Returns False if any of the following conditions are met:
|
||
- No browser_context exists
|
||
- Browser exists but is disconnected
|
||
- Browser_context's browser exists but is disconnected
|
||
- Browser_context itself is closed/unusable
|
||
"""
|
||
# Check if browser_context is missing
|
||
if not self.browser_context:
|
||
return False
|
||
|
||
# Check if browser exists but is disconnected
|
||
if self.browser and not self.browser.is_connected():
|
||
return False
|
||
|
||
# Check if browser_context's browser exists but is disconnected
|
||
if self.browser_context.browser and not self.browser_context.browser.is_connected():
|
||
return False
|
||
|
||
# Check if the browser_context itself is closed/unusable
|
||
try:
|
||
# Try to access a property that would fail if the context is closed
|
||
_ = self.browser_context.pages
|
||
# Additional check: try to access the browser property which might fail if context is closed
|
||
if self.browser_context.browser and not self.browser_context.browser.is_connected():
|
||
return False
|
||
return True
|
||
except Exception:
|
||
return False
|
||
|
||
def _reset_connection_state(self) -> None:
|
||
"""Reset the browser connection state when disconnection is detected"""
|
||
self.initialized = False
|
||
self.browser = None
|
||
self.browser_context = None
|
||
# Also clear browser_pid since the process may no longer exist
|
||
self.browser_pid = None
|
||
|
||
# --- Tab management ---
|
||
async def get_current_page(self) -> Page:
|
||
"""Get the current page + ensure it's not None / closed"""
|
||
|
||
if not self.initialized:
|
||
await self.start()
|
||
|
||
# get-or-create the browser_context if it's not already set up
|
||
if not self.browser_context:
|
||
await self.start()
|
||
assert self.browser_context, 'BrowserContext is not set up'
|
||
|
||
# if either focused page is closed, clear it so we dont use a dead object
|
||
if (not self.human_current_page) or self.human_current_page.is_closed():
|
||
self.human_current_page = None
|
||
if (not self.agent_current_page) or self.agent_current_page.is_closed():
|
||
self.agent_current_page = None
|
||
|
||
# if either one is None, fallback to using the other one for both
|
||
self.agent_current_page = self.agent_current_page or self.human_current_page or None
|
||
self.human_current_page = self.human_current_page or self.agent_current_page or None
|
||
|
||
# if both are still None, fallback to using the first open tab we can find
|
||
if self.agent_current_page is None:
|
||
if self.browser_context.pages:
|
||
first_available_tab = self.browser_context.pages[0]
|
||
self.agent_current_page = first_available_tab
|
||
self.human_current_page = first_available_tab
|
||
else:
|
||
# if all tabs are closed, open a new one
|
||
new_tab = await self.create_new_tab()
|
||
self.agent_current_page = new_tab
|
||
self.human_current_page = new_tab
|
||
|
||
assert self.agent_current_page is not None, 'Failed to find or create a new page for the agent'
|
||
assert self.human_current_page is not None, 'Failed to find or create a new page for the human'
|
||
|
||
return self.agent_current_page
|
||
|
||
@property
|
||
def tabs(self) -> list[Page]:
|
||
if not self.browser_context:
|
||
return []
|
||
return list(self.browser_context.pages)
|
||
|
||
@require_initialization
|
||
async def new_tab(self, url: str | None = None) -> Page:
|
||
return await self.create_new_tab(url=url)
|
||
|
||
@require_initialization
|
||
async def switch_tab(self, tab_index: int) -> Page:
|
||
pages = self.browser_context.pages
|
||
if not pages or tab_index >= len(pages):
|
||
raise IndexError('Tab index out of range')
|
||
page = pages[tab_index]
|
||
self.agent_current_page = page
|
||
|
||
return page
|
||
|
||
@require_initialization
|
||
async def wait_for_element(self, selector: str, timeout: int = 10000) -> None:
|
||
page = await self.get_current_page()
|
||
await page.wait_for_selector(selector, state='visible', timeout=timeout)
|
||
|
||
@require_initialization
|
||
@time_execution_async('--remove_highlights')
|
||
async def remove_highlights(self):
|
||
"""
|
||
Removes all highlight overlays and labels created by the highlightElement function.
|
||
Handles cases where the page might be closed or inaccessible.
|
||
"""
|
||
page = await self.get_current_page()
|
||
try:
|
||
await page.evaluate(
|
||
"""
|
||
try {
|
||
// Remove the highlight container and all its contents
|
||
const container = document.getElementById('playwright-highlight-container');
|
||
if (container) {
|
||
container.remove();
|
||
}
|
||
|
||
// Remove highlight attributes from elements
|
||
const highlightedElements = document.querySelectorAll('[browser-user-highlight-id^="playwright-highlight-"]');
|
||
highlightedElements.forEach(el => {
|
||
el.removeAttribute('browser-user-highlight-id');
|
||
});
|
||
} catch (e) {
|
||
console.error('Failed to remove highlights:', e);
|
||
}
|
||
"""
|
||
)
|
||
except Exception as e:
|
||
logger.debug(f'⚠ Failed to remove highlights (this is usually ok): {type(e).__name__}: {e}')
|
||
# Don't raise the error since this is not critical functionality
|
||
|
||
@require_initialization
|
||
async def get_dom_element_by_index(self, index: int) -> Any | None:
|
||
"""Get DOM element by index."""
|
||
selector_map = await self.get_selector_map()
|
||
return selector_map.get(index)
|
||
|
||
@require_initialization
|
||
@time_execution_async('--click_element_node')
|
||
async def _click_element_node(self, element_node: DOMElementNode) -> str | None:
|
||
"""
|
||
Optimized method to click an element using xpath.
|
||
"""
|
||
page = await self.get_current_page()
|
||
try:
|
||
# Highlight before clicking
|
||
# if element_node.highlight_index is not None:
|
||
# await self._update_state(focus_element=element_node.highlight_index)
|
||
|
||
element_handle = await self.get_locate_element(element_node)
|
||
|
||
if element_handle is None:
|
||
raise Exception(f'Element: {repr(element_node)} not found')
|
||
|
||
async def perform_click(click_func):
|
||
"""Performs the actual click, handling both download
|
||
and navigation scenarios."""
|
||
if self.browser_profile.downloads_dir:
|
||
try:
|
||
# Try short-timeout expect_download to detect a file download has been been triggered
|
||
async with page.expect_download(timeout=5000) as download_info:
|
||
await click_func()
|
||
download = await download_info.value
|
||
# Determine file path
|
||
suggested_filename = download.suggested_filename
|
||
unique_filename = await self._get_unique_filename(self.browser_profile.downloads_dir, suggested_filename)
|
||
download_path = os.path.join(self.browser_profile.downloads_dir, unique_filename)
|
||
await download.save_as(download_path)
|
||
logger.debug(f'⬇️ Download triggered. Saved file to: {download_path}')
|
||
return download_path
|
||
except TimeoutError:
|
||
# If no download is triggered, treat as normal click
|
||
logger.debug('No download triggered within timeout. Checking navigation...')
|
||
await page.wait_for_load_state()
|
||
await self._check_and_handle_navigation(page)
|
||
else:
|
||
# Standard click logic if no download is expected
|
||
await click_func()
|
||
await page.wait_for_load_state()
|
||
await self._check_and_handle_navigation(page)
|
||
|
||
try:
|
||
return await perform_click(lambda: element_handle.click(timeout=1500))
|
||
except URLNotAllowedError as e:
|
||
raise e
|
||
except Exception:
|
||
try:
|
||
return await perform_click(lambda: page.evaluate('(el) => el.click()', element_handle))
|
||
except URLNotAllowedError as e:
|
||
raise e
|
||
except Exception as e:
|
||
raise Exception(f'Failed to click element: {str(e)}')
|
||
|
||
except URLNotAllowedError as e:
|
||
raise e
|
||
except Exception as e:
|
||
raise Exception(f'Failed to click element: {repr(element_node)}. Error: {str(e)}')
|
||
|
||
@require_initialization
|
||
@time_execution_async('--get_tabs_info')
|
||
async def get_tabs_info(self) -> list[TabInfo]:
|
||
"""Get information about all tabs"""
|
||
|
||
tabs_info = []
|
||
for page_id, page in enumerate(self.browser_context.pages):
|
||
try:
|
||
tab_info = TabInfo(page_id=page_id, url=page.url, title=await asyncio.wait_for(page.title(), timeout=1))
|
||
except TimeoutError:
|
||
# page.title() can hang forever on tabs that are crashed/disappeared/about:blank
|
||
# we dont want to try automating those tabs because they will hang the whole script
|
||
logger.debug('⚠ Failed to get tab info for tab #%s: %s (ignoring)', page_id, page.url)
|
||
tab_info = TabInfo(page_id=page_id, url='about:blank', title='ignore this tab and do not use it')
|
||
tabs_info.append(tab_info)
|
||
|
||
return tabs_info
|
||
|
||
@require_initialization
|
||
async def close_tab(self, tab_index: int | None = None) -> None:
|
||
pages = self.browser_context.pages
|
||
if not pages:
|
||
return
|
||
|
||
if tab_index is None:
|
||
# to tab_index passed, just close the current agent page
|
||
page = await self.get_current_page()
|
||
else:
|
||
# otherwise close the tab at the given index
|
||
page = pages[tab_index]
|
||
|
||
await page.close()
|
||
|
||
# reset the self.agent_current_page and self.human_current_page references to first available tab
|
||
await self.get_current_page()
|
||
|
||
# --- Page navigation ---
|
||
@require_initialization
|
||
async def navigate(self, url: str) -> None:
|
||
if self.agent_current_page:
|
||
await self.agent_current_page.goto(url)
|
||
else:
|
||
await self.create_new_tab(url)
|
||
|
||
@require_initialization
|
||
async def refresh(self) -> None:
|
||
if self.agent_current_page and not self.agent_current_page.is_closed():
|
||
await self.agent_current_page.reload()
|
||
else:
|
||
await self.create_new_tab()
|
||
|
||
@require_initialization
|
||
async def execute_javascript(self, script: str) -> Any:
|
||
page = await self.get_current_page()
|
||
return await page.evaluate(script)
|
||
|
||
async def get_cookies(self) -> list[dict[str, Any]]:
|
||
if self.browser_context:
|
||
return await self.browser_context.cookies()
|
||
return []
|
||
|
||
async def save_cookies(self, *args, **kwargs) -> None:
|
||
"""
|
||
Old name for the new save_storage_state() function.
|
||
"""
|
||
await self.save_storage_state(*args, **kwargs)
|
||
|
||
@require_initialization
|
||
async def save_storage_state(self, path: Path | None = None) -> None:
|
||
"""
|
||
Save cookies to the specified path or the configured cookies_file and/or storage_state.
|
||
"""
|
||
storage_state = await self.browser_context.storage_state()
|
||
cookies = storage_state['cookies']
|
||
if cookies and self.browser_profile.cookies_file:
|
||
# only show warning if they configured cookies_file (not if they passed in a path to this function as an arg)
|
||
logger.warning(
|
||
'⚠️ cookies_file is deprecated and will be removed in a future version. '
|
||
'Please use storage_state instead for loading cookies and other browser state. '
|
||
'See: https://playwright.dev/python/docs/api/class-browsercontext#browser-context-storage-state'
|
||
)
|
||
|
||
# save cookies_file if passed a cookies file path or if profile cookies_file is configured
|
||
path_is_storage_state = path and str(path).endswith('storage_state.json')
|
||
if (path and not path_is_storage_state) or self.browser_profile.cookies_file:
|
||
try:
|
||
cookies_file_path = Path(path or self.browser_profile.cookies_file).expanduser().resolve()
|
||
cookies_file_path.parent.mkdir(parents=True, exist_ok=True)
|
||
cookies_file_path.write_text(json.dumps(cookies, indent=4)) # TODO: convert to async
|
||
logger.info(f'🍪 Saved {len(cookies)} cookies to cookies_file={_log_pretty_path(cookies_file_path)}')
|
||
except Exception as e:
|
||
logger.warning(
|
||
f'❌ Failed to save cookies to cookies_file={_log_pretty_path(cookies_file_path)}: {type(e).__name__}: {e}'
|
||
)
|
||
|
||
if path:
|
||
# if they passed in a path to the old save_cookies function,
|
||
# also save a new storage_state.json next to it to encourage adoption of the new format
|
||
storage_state_path = Path(path).expanduser().resolve().parent / 'storage_state.json'
|
||
else:
|
||
# otherwise use configured storage_state path
|
||
storage_state_path = self.browser_profile.storage_state
|
||
|
||
if storage_state_path is None:
|
||
return
|
||
elif not isinstance(storage_state_path, (str, Path)):
|
||
logger.warning('⚠️ storage_state must be a json file path to be able to update it, skipping...')
|
||
return
|
||
|
||
try:
|
||
storage_state_path = Path(storage_state_path).expanduser().resolve()
|
||
storage_state_path.parent.mkdir(parents=True, exist_ok=True)
|
||
storage_state = await self.browser_context.storage_state()
|
||
|
||
# always merge storage states, never overwrite (so two browsers can share the same storage_state.json)
|
||
if storage_state_path.exists():
|
||
try:
|
||
existing_storage_state = json.loads(storage_state_path.read_text()) # TODO: convert to async
|
||
merged_storage_state = merge_dicts(existing_storage_state, storage_state)
|
||
# in case another process races us and updates the file here, we will overwrite their changes
|
||
# if we really want to support real concurrency we need a sqlite database or something
|
||
storage_state_path.write_text(json.dumps(merged_storage_state, indent=4)) # TODO: convert to async
|
||
except Exception as e:
|
||
logger.warning(
|
||
f'❌ Failed to merge storage state with existing storage_state={_log_pretty_path(storage_state_path)}: {type(e).__name__}: {e}'
|
||
)
|
||
return
|
||
|
||
storage_state_path.write_text(json.dumps(storage_state, indent=4)) # TODO: convert to async
|
||
logger.info(
|
||
f'🍪 Saved {len(storage_state["cookies"])} cookies to storage_state={_log_pretty_path(storage_state_path)}'
|
||
)
|
||
except Exception as e:
|
||
logger.warning(
|
||
f'❌ Failed to save storage state to storage_state={_log_pretty_path(storage_state_path)}: {type(e).__name__}: {e}'
|
||
)
|
||
|
||
@require_initialization
|
||
async def load_storage_state(self) -> None:
|
||
"""
|
||
Load cookies from the storage_state or cookies_file and apply them to the browser context.
|
||
"""
|
||
|
||
if self.browser_profile.cookies_file:
|
||
# Show deprecation warning
|
||
logger.warning(
|
||
'⚠️ cookies_file is deprecated and will be removed in a future version. '
|
||
'Please use storage_state instead for loading cookies and other browser state. '
|
||
'See: https://playwright.dev/python/docs/api/class-browsercontext#browser-context-storage-state'
|
||
)
|
||
|
||
cookies_path = Path(self.browser_profile.cookies_file)
|
||
if not cookies_path.is_absolute():
|
||
cookies_path = Path(self.browser_profile.downloads_dir or '.') / cookies_path
|
||
|
||
try:
|
||
cookies_data = json.loads(cookies_path.read_text())
|
||
if cookies_data:
|
||
await self.browser_context.add_cookies(cookies_data)
|
||
logger.info(f'🍪 Loaded {len(cookies_data)} cookies from cookies_file={_log_pretty_path(cookies_path)}')
|
||
except Exception as e:
|
||
logger.warning(
|
||
f'❌ Failed to load cookies from cookies_file={_log_pretty_path(cookies_path)}: {type(e).__name__}: {e}'
|
||
)
|
||
|
||
if self.browser_profile.storage_state:
|
||
storage_state = self.browser_profile.storage_state
|
||
if isinstance(storage_state, (str, Path)):
|
||
try:
|
||
storage_state = json.loads(Path(storage_state).read_text())
|
||
except Exception as e:
|
||
logger.warning(
|
||
f'❌ Failed to load cookies from storage_state={_log_pretty_path(self.browser_profile.storage_state)}: {type(e).__name__}: {e}'
|
||
)
|
||
return
|
||
|
||
try:
|
||
assert isinstance(storage_state, dict), f'Got unexpected type for storage_state: {type(storage_state)}'
|
||
await self.browser_context.add_cookies(storage_state['cookies'])
|
||
# TODO: also handle localStroage, IndexedDB, SessionStorage
|
||
# playwright doesn't provide an API for setting these before launch
|
||
# https://playwright.dev/python/docs/auth#session-storage
|
||
# await self.browser_context.add_local_storage(storage_state['localStorage'])
|
||
logger.info(
|
||
f'🍪 Loaded {len(storage_state["cookies"])} cookies from storage_state={_log_pretty_path(self.browser_profile.storage_state)}'
|
||
)
|
||
except Exception as e:
|
||
logger.warning(
|
||
f'❌ Failed to load cookies from storage_state={_log_pretty_path(self.browser_profile.storage_state)}: {type(e).__name__}: {e}'
|
||
)
|
||
return
|
||
|
||
async def load_cookies_from_file(self, *args, **kwargs) -> None:
|
||
"""
|
||
Old name for the new load_storage_state() function.
|
||
"""
|
||
await self.load_storage_state(*args, **kwargs)
|
||
|
||
# @property
|
||
# def browser_extension_pages(self) -> list[Page]:
|
||
# if not self.browser_context:
|
||
# return []
|
||
# return [p for p in self.browser_context.pages if p.url.startswith('chrome-extension://')]
|
||
|
||
# @property
|
||
# def saved_downloads(self) -> list[Path]:
|
||
# """
|
||
# Return a list of files in the downloads_dir.
|
||
# """
|
||
# return list(Path(self.browser_profile.downloads_dir).glob('*'))
|
||
|
||
async def _wait_for_stable_network(self):
|
||
pending_requests = set()
|
||
last_activity = asyncio.get_event_loop().time()
|
||
|
||
page = await self.get_current_page()
|
||
|
||
# Define relevant resource types and content types
|
||
RELEVANT_RESOURCE_TYPES = {
|
||
'document',
|
||
'stylesheet',
|
||
'image',
|
||
'font',
|
||
'script',
|
||
'iframe',
|
||
}
|
||
|
||
RELEVANT_CONTENT_TYPES = {
|
||
'text/html',
|
||
'text/css',
|
||
'application/javascript',
|
||
'image/',
|
||
'font/',
|
||
'application/json',
|
||
}
|
||
|
||
# Additional patterns to filter out
|
||
IGNORED_URL_PATTERNS = {
|
||
# Analytics and tracking
|
||
'analytics',
|
||
'tracking',
|
||
'telemetry',
|
||
'beacon',
|
||
'metrics',
|
||
# Ad-related
|
||
'doubleclick',
|
||
'adsystem',
|
||
'adserver',
|
||
'advertising',
|
||
# Social media widgets
|
||
'facebook.com/plugins',
|
||
'platform.twitter',
|
||
'linkedin.com/embed',
|
||
# Live chat and support
|
||
'livechat',
|
||
'zendesk',
|
||
'intercom',
|
||
'crisp.chat',
|
||
'hotjar',
|
||
# Push notifications
|
||
'push-notifications',
|
||
'onesignal',
|
||
'pushwoosh',
|
||
# Background sync/heartbeat
|
||
'heartbeat',
|
||
'ping',
|
||
'alive',
|
||
# WebRTC and streaming
|
||
'webrtc',
|
||
'rtmp://',
|
||
'wss://',
|
||
# Common CDNs for dynamic content
|
||
'cloudfront.net',
|
||
'fastly.net',
|
||
}
|
||
|
||
async def on_request(request):
|
||
# Filter by resource type
|
||
if request.resource_type not in RELEVANT_RESOURCE_TYPES:
|
||
return
|
||
|
||
# Filter out streaming, websocket, and other real-time requests
|
||
if request.resource_type in {
|
||
'websocket',
|
||
'media',
|
||
'eventsource',
|
||
'manifest',
|
||
'other',
|
||
}:
|
||
return
|
||
|
||
# Filter out by URL patterns
|
||
url = request.url.lower()
|
||
if any(pattern in url for pattern in IGNORED_URL_PATTERNS):
|
||
return
|
||
|
||
# Filter out data URLs and blob URLs
|
||
if url.startswith(('data:', 'blob:')):
|
||
return
|
||
|
||
# Filter out requests with certain headers
|
||
headers = request.headers
|
||
if headers.get('purpose') == 'prefetch' or headers.get('sec-fetch-dest') in [
|
||
'video',
|
||
'audio',
|
||
]:
|
||
return
|
||
|
||
nonlocal last_activity
|
||
pending_requests.add(request)
|
||
last_activity = asyncio.get_event_loop().time()
|
||
# logger.debug(f'Request started: {request.url} ({request.resource_type})')
|
||
|
||
async def on_response(response):
|
||
request = response.request
|
||
if request not in pending_requests:
|
||
return
|
||
|
||
# Filter by content type if available
|
||
content_type = response.headers.get('content-type', '').lower()
|
||
|
||
# Skip if content type indicates streaming or real-time data
|
||
if any(
|
||
t in content_type
|
||
for t in [
|
||
'streaming',
|
||
'video',
|
||
'audio',
|
||
'webm',
|
||
'mp4',
|
||
'event-stream',
|
||
'websocket',
|
||
'protobuf',
|
||
]
|
||
):
|
||
pending_requests.remove(request)
|
||
return
|
||
|
||
# Only process relevant content types
|
||
if not any(ct in content_type for ct in RELEVANT_CONTENT_TYPES):
|
||
pending_requests.remove(request)
|
||
return
|
||
|
||
# Skip if response is too large (likely not essential for page load)
|
||
content_length = response.headers.get('content-length')
|
||
if content_length and int(content_length) > 5 * 1024 * 1024: # 5MB
|
||
pending_requests.remove(request)
|
||
return
|
||
|
||
nonlocal last_activity
|
||
pending_requests.remove(request)
|
||
last_activity = asyncio.get_event_loop().time()
|
||
# logger.debug(f'Request resolved: {request.url} ({content_type})')
|
||
|
||
# Attach event listeners
|
||
page.on('request', on_request)
|
||
page.on('response', on_response)
|
||
|
||
now = asyncio.get_event_loop().time()
|
||
try:
|
||
# Wait for idle time
|
||
start_time = asyncio.get_event_loop().time()
|
||
while True:
|
||
await asyncio.sleep(0.1)
|
||
now = asyncio.get_event_loop().time()
|
||
if (
|
||
len(pending_requests) == 0
|
||
and (now - last_activity) >= self.browser_profile.wait_for_network_idle_page_load_time
|
||
):
|
||
break
|
||
if now - start_time > self.browser_profile.maximum_wait_page_load_time:
|
||
logger.debug(
|
||
f'Network timeout after {self.browser_profile.maximum_wait_page_load_time}s with {len(pending_requests)} '
|
||
f'pending requests: {[r.url for r in pending_requests]}'
|
||
)
|
||
break
|
||
|
||
finally:
|
||
# Clean up event listeners
|
||
page.remove_listener('request', on_request)
|
||
page.remove_listener('response', on_response)
|
||
|
||
elapsed = now - start_time
|
||
if elapsed > 1:
|
||
logger.debug(f'💤 Page network traffic calmed down after {now - start_time:.2f} seconds')
|
||
|
||
async def _wait_for_page_and_frames_load(self, timeout_overwrite: float | None = None):
|
||
"""
|
||
Ensures page is fully loaded before continuing.
|
||
Waits for either network to be idle or minimum WAIT_TIME, whichever is longer.
|
||
Also checks if the loaded URL is allowed.
|
||
"""
|
||
# Start timing
|
||
start_time = time.time()
|
||
|
||
# Wait for page load
|
||
page = await self.get_current_page()
|
||
try:
|
||
await self._wait_for_stable_network()
|
||
|
||
# Check if the loaded URL is allowed
|
||
await self._check_and_handle_navigation(page)
|
||
except URLNotAllowedError as e:
|
||
raise e
|
||
except Exception:
|
||
logger.warning('⚠️ Page load failed, continuing...')
|
||
pass
|
||
|
||
# Calculate remaining time to meet minimum WAIT_TIME
|
||
elapsed = time.time() - start_time
|
||
remaining = max((timeout_overwrite or self.browser_profile.minimum_wait_page_load_time) - elapsed, 0)
|
||
|
||
# just for logging, calculate how much data was downloaded
|
||
try:
|
||
bytes_used = await page.evaluate("""
|
||
() => {
|
||
let total = 0;
|
||
for (const entry of performance.getEntriesByType('resource')) {
|
||
total += entry.transferSize || 0;
|
||
}
|
||
for (const nav of performance.getEntriesByType('navigation')) {
|
||
total += nav.transferSize || 0;
|
||
}
|
||
return total;
|
||
}
|
||
""")
|
||
except Exception:
|
||
bytes_used = None
|
||
|
||
tab_idx = self.tabs.index(page)
|
||
if bytes_used is not None:
|
||
logger.debug(
|
||
f'➡️ Page navigation [{tab_idx}]{_log_pretty_url(page.url, 40)} used {bytes_used / 1024:.1f} KB in {elapsed:.2f}s, waiting +{remaining:.2f}s for all frames to finish'
|
||
)
|
||
else:
|
||
logger.debug(
|
||
f'➡️ Page navigation [{tab_idx}]{_log_pretty_url(page.url, 40)} took {elapsed:.2f}s, waiting +{remaining:.2f}s for all frames to finish'
|
||
)
|
||
|
||
# Sleep remaining time if needed
|
||
if remaining > 0:
|
||
await asyncio.sleep(remaining)
|
||
|
||
def _is_url_allowed(self, url: str) -> bool:
|
||
"""
|
||
Check if a URL is allowed based on the whitelist configuration. SECURITY CRITICAL.
|
||
|
||
Supports optional glob patterns and schemes in allowed_domains:
|
||
- *.example.com will match sub.example.com and example.com
|
||
- *google.com will match google.com, agoogle.com, and www.google.com
|
||
- http*://example.com will match http://example.com, https://example.com
|
||
- chrome-extension://* will match chrome-extension://aaaaaaaaaaaa and chrome-extension://bbbbbbbbbbbbb
|
||
"""
|
||
|
||
if not self.browser_profile.allowed_domains:
|
||
return True # allowed_domains are not configured, allow everything by default
|
||
|
||
# Special case: Always allow 'about:blank' new tab page
|
||
if url == 'about:blank':
|
||
return True
|
||
|
||
for allowed_domain in self.browser_profile.allowed_domains:
|
||
try:
|
||
if match_url_with_domain_pattern(url, allowed_domain, log_warnings=True):
|
||
# If it's a pattern with wildcards, show a warning
|
||
if '*' in allowed_domain:
|
||
parsed_url = urlparse(url)
|
||
domain = parsed_url.hostname.lower() if parsed_url.hostname else ''
|
||
_log_glob_warning(domain, allowed_domain)
|
||
return True
|
||
except AssertionError:
|
||
# This would only happen if about:blank is passed to match_url_with_domain_pattern,
|
||
# which shouldn't occur since we check for it above
|
||
continue
|
||
|
||
return False
|
||
|
||
async def _check_and_handle_navigation(self, page: Page) -> None:
|
||
"""Check if current page URL is allowed and handle if not."""
|
||
if not self._is_url_allowed(page.url):
|
||
logger.warning(f'⛔️ Navigation to non-allowed URL detected: {page.url}')
|
||
try:
|
||
await self.go_back()
|
||
except Exception as e:
|
||
logger.error(f'⛔️ Failed to go back after detecting non-allowed URL: {str(e)}')
|
||
raise URLNotAllowedError(f'Navigation to non-allowed URL: {page.url}')
|
||
|
||
async def navigate_to(self, url: str):
|
||
"""Navigate the agent's current tab to a URL"""
|
||
if not self._is_url_allowed(url):
|
||
raise BrowserError(f'Navigation to non-allowed URL: {url}')
|
||
|
||
page = await self.get_current_page()
|
||
await page.goto(url)
|
||
await page.wait_for_load_state()
|
||
|
||
async def refresh_page(self):
|
||
"""Refresh the agent's current page"""
|
||
|
||
page = await self.get_current_page()
|
||
await page.reload()
|
||
await page.wait_for_load_state()
|
||
|
||
async def go_back(self):
|
||
"""Navigate the agent's tab back in browser history"""
|
||
try:
|
||
# 10 ms timeout
|
||
page = await self.get_current_page()
|
||
await page.go_back(timeout=10, wait_until='domcontentloaded')
|
||
|
||
# await self._wait_for_page_and_frames_load(timeout_overwrite=1.0)
|
||
except Exception as e:
|
||
# Continue even if its not fully loaded, because we wait later for the page to load
|
||
logger.debug(f'⏮️ Error during go_back: {e}')
|
||
|
||
async def go_forward(self):
|
||
"""Navigate the agent's tab forward in browser history"""
|
||
try:
|
||
page = await self.get_current_page()
|
||
await page.go_forward(timeout=10, wait_until='domcontentloaded')
|
||
except Exception as e:
|
||
# Continue even if its not fully loaded, because we wait later for the page to load
|
||
logger.debug(f'⏭️ Error during go_forward: {e}')
|
||
|
||
async def close_current_tab(self):
|
||
"""Close the current tab that the agent is working with.
|
||
|
||
This closes the tab that the agent is currently using (agent_current_page),
|
||
not necessarily the tab that is visible to the user (human_current_page).
|
||
If they are the same tab, both references will be updated.
|
||
"""
|
||
assert self.browser_context is not None, 'Browser context is not set'
|
||
assert self.agent_current_page is not None, 'Agent current page is not set'
|
||
|
||
# Check if this is the foreground tab as well
|
||
is_foreground = self.agent_current_page == self.human_current_page
|
||
|
||
# Close the tab
|
||
try:
|
||
await self.agent_current_page.close()
|
||
except Exception as e:
|
||
logger.debug(f'⛔️ Error during close_current_tab: {e}')
|
||
|
||
# Clear agent's reference to the closed tab
|
||
self.agent_current_page = None
|
||
|
||
# Clear foreground reference if needed
|
||
if is_foreground:
|
||
self.human_current_page = None
|
||
|
||
# Switch to the first available tab if any exist
|
||
if self.browser_context.pages:
|
||
await self.switch_to_tab(0)
|
||
# switch_to_tab already updates both tab references
|
||
|
||
# Otherwise, the browser will be closed
|
||
|
||
async def get_page_html(self) -> str:
|
||
"""Get the HTML content of the agent's current page"""
|
||
page = await self.get_current_page()
|
||
return await page.content()
|
||
|
||
async def get_page_structure(self) -> str:
|
||
"""Get a debug view of the page structure including iframes"""
|
||
debug_script = """(() => {
|
||
function getPageStructure(element = document, depth = 0, maxDepth = 10) {
|
||
if (depth >= maxDepth) return '';
|
||
|
||
const indent = ' '.repeat(depth);
|
||
let structure = '';
|
||
|
||
// Skip certain elements that clutter the output
|
||
const skipTags = new Set(['script', 'style', 'link', 'meta', 'noscript']);
|
||
|
||
// Add current element info if it's not the document
|
||
if (element !== document) {
|
||
const tagName = element.tagName.toLowerCase();
|
||
|
||
// Skip uninteresting elements
|
||
if (skipTags.has(tagName)) return '';
|
||
|
||
const id = element.id ? `#${element.id}` : '';
|
||
const classes = element.className && typeof element.className === 'string' ?
|
||
`.${element.className.split(' ').filter(c => c).join('.')}` : '';
|
||
|
||
// Get additional useful attributes
|
||
const attrs = [];
|
||
if (element.getAttribute('role')) attrs.push(`role="${element.getAttribute('role')}"`);
|
||
if (element.getAttribute('aria-label')) attrs.push(`aria-label="${element.getAttribute('aria-label')}"`);
|
||
if (element.getAttribute('type')) attrs.push(`type="${element.getAttribute('type')}"`);
|
||
if (element.getAttribute('name')) attrs.push(`name="${element.getAttribute('name')}"`);
|
||
if (element.getAttribute('src')) {
|
||
const src = element.getAttribute('src');
|
||
attrs.push(`src="${src.substring(0, 50)}${src.length > 50 ? '...' : ''}"`);
|
||
}
|
||
|
||
// Add element info
|
||
structure += `${indent}${tagName}${id}${classes}${attrs.length ? ' [' + attrs.join(', ') + ']' : ''}\\n`;
|
||
|
||
// Handle iframes specially
|
||
if (tagName === 'iframe') {
|
||
try {
|
||
const iframeDoc = element.contentDocument || element.contentWindow?.document;
|
||
if (iframeDoc) {
|
||
structure += `${indent} [IFRAME CONTENT]:\\n`;
|
||
structure += getPageStructure(iframeDoc, depth + 2, maxDepth);
|
||
} else {
|
||
structure += `${indent} [IFRAME: No access - likely cross-origin]\\n`;
|
||
}
|
||
} catch (e) {
|
||
structure += `${indent} [IFRAME: Access denied - ${e.message}]\\n`;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Get all child elements
|
||
const children = element.children || element.childNodes;
|
||
for (const child of children) {
|
||
if (child.nodeType === 1) { // Element nodes only
|
||
structure += getPageStructure(child, depth + 1, maxDepth);
|
||
}
|
||
}
|
||
|
||
return structure;
|
||
}
|
||
|
||
return getPageStructure();
|
||
})()"""
|
||
|
||
page = await self.get_current_page()
|
||
structure = await page.evaluate(debug_script)
|
||
return structure
|
||
|
||
@time_execution_sync('--get_state_summary') # This decorator might need to be updated to handle async
|
||
async def get_state_summary(self, cache_clickable_elements_hashes: bool) -> BrowserStateSummary:
|
||
"""Get a summary of the current browser state
|
||
|
||
This method builds a BrowserStateSummary object that captures the current state
|
||
of the browser, including url, title, tabs, screenshot, and DOM tree.
|
||
|
||
Parameters:
|
||
-----------
|
||
cache_clickable_elements_hashes: bool
|
||
If True, cache the clickable elements hashes for the current state.
|
||
This is used to calculate which elements are new to the LLM since the last message,
|
||
which helps reduce token usage.
|
||
"""
|
||
await self._wait_for_page_and_frames_load()
|
||
updated_state = await self._get_updated_state()
|
||
|
||
# Find out which elements are new
|
||
# Do this only if url has not changed
|
||
if cache_clickable_elements_hashes:
|
||
# if we are on the same url as the last state, we can use the cached hashes
|
||
if self._cached_clickable_element_hashes and self._cached_clickable_element_hashes.url == updated_state.url:
|
||
# Pointers, feel free to edit in place
|
||
updated_state_clickable_elements = ClickableElementProcessor.get_clickable_elements(updated_state.element_tree)
|
||
|
||
for dom_element in updated_state_clickable_elements:
|
||
dom_element.is_new = (
|
||
ClickableElementProcessor.hash_dom_element(dom_element)
|
||
not in self._cached_clickable_element_hashes.hashes # see which elements are new from the last state where we cached the hashes
|
||
)
|
||
# in any case, we need to cache the new hashes
|
||
self._cached_clickable_element_hashes = CachedClickableElementHashes(
|
||
url=updated_state.url,
|
||
hashes=ClickableElementProcessor.get_clickable_elements_hashes(updated_state.element_tree),
|
||
)
|
||
|
||
assert updated_state
|
||
self._cached_browser_state_summary = updated_state
|
||
|
||
# Save cookies if a file is specified
|
||
if self.browser_profile.cookies_file:
|
||
asyncio.create_task(self.save_cookies())
|
||
|
||
return self._cached_browser_state_summary
|
||
|
||
async def _get_updated_state(self, focus_element: int = -1) -> BrowserStateSummary:
|
||
"""Update and return state."""
|
||
|
||
page = await self.get_current_page()
|
||
|
||
# Check if current page is still valid, if not switch to another available page
|
||
try:
|
||
# Test if page is still accessible
|
||
await page.evaluate('1')
|
||
except Exception as e:
|
||
logger.debug(f'👋 Current page is no longer accessible: {type(e).__name__}: {e}')
|
||
raise BrowserError('Browser closed: no valid pages available')
|
||
|
||
try:
|
||
await self.remove_highlights()
|
||
dom_service = DomService(page)
|
||
content = await dom_service.get_clickable_elements(
|
||
focus_element=focus_element,
|
||
viewport_expansion=self.browser_profile.viewport_expansion,
|
||
highlight_elements=self.browser_profile.highlight_elements,
|
||
)
|
||
|
||
tabs_info = await self.get_tabs_info()
|
||
|
||
# Get all cross-origin iframes within the page and open them in new tabs
|
||
# mark the titles of the new tabs so the LLM knows to check them for additional content
|
||
# unfortunately too buggy for now, too many sites use invisible cross-origin iframes for ads, tracking, youtube videos, social media, etc.
|
||
# and it distracts the bot by opening a lot of new tabs
|
||
# iframe_urls = await dom_service.get_cross_origin_iframes()
|
||
# outer_page = self.agent_current_page
|
||
# for url in iframe_urls:
|
||
# if url in [tab.url for tab in tabs_info]:
|
||
# continue # skip if the iframe if we already have it open in a tab
|
||
# new_page_id = tabs_info[-1].page_id + 1
|
||
# logger.debug(f'Opening cross-origin iframe in new tab #{new_page_id}: {url}')
|
||
# await self.create_new_tab(url)
|
||
# tabs_info.append(
|
||
# TabInfo(
|
||
# page_id=new_page_id,
|
||
# url=url,
|
||
# title=f'iFrame opened as new tab, treat as if embedded inside page {outer_page.url}: {page.url}',
|
||
# parent_page_url=outer_page.url,
|
||
# )
|
||
# )
|
||
|
||
screenshot_b64 = await self.take_screenshot()
|
||
pixels_above, pixels_below = await self.get_scroll_info(page)
|
||
|
||
self.browser_state_summary = BrowserStateSummary(
|
||
element_tree=content.element_tree,
|
||
selector_map=content.selector_map,
|
||
url=page.url,
|
||
title=await page.title(),
|
||
tabs=tabs_info,
|
||
screenshot=screenshot_b64,
|
||
pixels_above=pixels_above,
|
||
pixels_below=pixels_below,
|
||
)
|
||
|
||
return self.browser_state_summary
|
||
except Exception as e:
|
||
logger.error(f'❌ Failed to update state: {e}')
|
||
# Return last known good state if available
|
||
if hasattr(self, 'browser_state_summary'):
|
||
return self.browser_state_summary
|
||
raise
|
||
|
||
# region - Browser Actions
|
||
@require_initialization
|
||
@time_execution_async('--take_screenshot')
|
||
async def take_screenshot(self, full_page: bool = False) -> str:
|
||
"""
|
||
Returns a base64 encoded screenshot of the current page.
|
||
"""
|
||
assert self.agent_current_page is not None, 'Agent current page is not set'
|
||
|
||
page = await self.get_current_page()
|
||
await page.wait_for_load_state(
|
||
timeout=5000,
|
||
) # page has already loaded by this point, this is extra for previous action animations/frame loads to settle
|
||
|
||
# 0. Attempt full-page screenshot (sometimes times out for huge pages)
|
||
try:
|
||
screenshot = await page.screenshot(
|
||
full_page=full_page,
|
||
scale='css',
|
||
timeout=15000,
|
||
animations='disabled',
|
||
caret='initial',
|
||
)
|
||
|
||
screenshot_b64 = base64.b64encode(screenshot).decode('utf-8')
|
||
return screenshot_b64
|
||
except Exception as e:
|
||
logger.error(f'❌ Failed to take full-page screenshot: {e} falling back to viewport-only screenshot')
|
||
|
||
# Fallback method: manually expand the viewport and take a screenshot of the entire viewport
|
||
|
||
# 1. Get current page dimensions
|
||
dimensions = await page.evaluate("""() => {
|
||
return {
|
||
width: window.innerWidth,
|
||
height: window.innerHeight,
|
||
devicePixelRatio: window.devicePixelRatio || 1
|
||
};
|
||
}""")
|
||
|
||
# 2. Save current viewport state and calculate expanded dimensions
|
||
original_viewport = page.viewport_size
|
||
viewport_expansion = self.browser_profile.viewport_expansion if self.browser_profile.viewport_expansion else 0
|
||
|
||
expanded_width = dimensions['width'] # Keep width unchanged
|
||
expanded_height = dimensions['height'] + viewport_expansion
|
||
|
||
# 3. Expand the viewport if we are using one
|
||
if original_viewport:
|
||
await page.set_viewport_size({'width': expanded_width, 'height': expanded_height})
|
||
|
||
try:
|
||
# 4. Take full-viewport screenshot
|
||
screenshot = await page.screenshot(
|
||
full_page=False,
|
||
scale='css',
|
||
timeout=30000,
|
||
clip={'x': 0, 'y': 0, 'width': expanded_width, 'height': expanded_height},
|
||
# animations='disabled', # these can cause CSP errors on some pages, leading to a red herring "waiting for fonts to load" error
|
||
# caret='initial',
|
||
)
|
||
# TODO: manually take multiple clipped screenshots to capture the full height and stitch them together?
|
||
|
||
screenshot_b64 = base64.b64encode(screenshot).decode('utf-8')
|
||
return screenshot_b64
|
||
|
||
finally:
|
||
# 5. Restore original viewport state if we expanded it
|
||
if original_viewport:
|
||
# Viewport was originally enabled, restore to original dimensions
|
||
await page.set_viewport_size(original_viewport)
|
||
else:
|
||
# Viewport was originally disabled, no need to restore it
|
||
# await page.set_viewport_size(None) # unfortunately this is not supported by playwright
|
||
pass
|
||
|
||
# region - User Actions
|
||
|
||
@staticmethod
|
||
async def _get_unique_filename(directory: str, filename: str) -> str:
|
||
"""Generate a unique filename for downloads by appending (1), (2), etc., if a file already exists."""
|
||
base, ext = os.path.splitext(filename)
|
||
counter = 1
|
||
new_filename = filename
|
||
while os.path.exists(os.path.join(directory, new_filename)):
|
||
new_filename = f'{base} ({counter}){ext}'
|
||
counter += 1
|
||
return new_filename
|
||
|
||
@staticmethod
|
||
def _convert_simple_xpath_to_css_selector(xpath: str) -> str:
|
||
"""Converts simple XPath expressions to CSS selectors."""
|
||
if not xpath:
|
||
return ''
|
||
|
||
# Remove leading slash if present
|
||
xpath = xpath.lstrip('/')
|
||
|
||
# Split into parts
|
||
parts = xpath.split('/')
|
||
css_parts = []
|
||
|
||
for part in parts:
|
||
if not part:
|
||
continue
|
||
|
||
# Handle custom elements with colons by escaping them
|
||
if ':' in part and '[' not in part:
|
||
base_part = part.replace(':', r'\:')
|
||
css_parts.append(base_part)
|
||
continue
|
||
|
||
# Handle index notation [n]
|
||
if '[' in part:
|
||
base_part = part[: part.find('[')]
|
||
# Handle custom elements with colons in the base part
|
||
if ':' in base_part:
|
||
base_part = base_part.replace(':', r'\:')
|
||
index_part = part[part.find('[') :]
|
||
|
||
# Handle multiple indices
|
||
indices = [i.strip('[]') for i in index_part.split(']')[:-1]]
|
||
|
||
for idx in indices:
|
||
try:
|
||
# Handle numeric indices
|
||
if idx.isdigit():
|
||
index = int(idx) - 1
|
||
base_part += f':nth-of-type({index + 1})'
|
||
# Handle last() function
|
||
elif idx == 'last()':
|
||
base_part += ':last-of-type'
|
||
# Handle position() functions
|
||
elif 'position()' in idx:
|
||
if '>1' in idx:
|
||
base_part += ':nth-of-type(n+2)'
|
||
except ValueError:
|
||
continue
|
||
|
||
css_parts.append(base_part)
|
||
else:
|
||
css_parts.append(part)
|
||
|
||
base_selector = ' > '.join(css_parts)
|
||
return base_selector
|
||
|
||
@classmethod
|
||
@time_execution_sync('--enhanced_css_selector_for_element')
|
||
def _enhanced_css_selector_for_element(cls, element: DOMElementNode, include_dynamic_attributes: bool = True) -> str:
|
||
"""
|
||
Creates a CSS selector for a DOM element, handling various edge cases and special characters.
|
||
|
||
Args:
|
||
element: The DOM element to create a selector for
|
||
|
||
Returns:
|
||
A valid CSS selector string
|
||
"""
|
||
try:
|
||
# Get base selector from XPath
|
||
css_selector = cls._convert_simple_xpath_to_css_selector(element.xpath)
|
||
|
||
# Handle class attributes
|
||
if 'class' in element.attributes and element.attributes['class'] and include_dynamic_attributes:
|
||
# Define a regex pattern for valid class names in CSS
|
||
valid_class_name_pattern = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_-]*$')
|
||
|
||
# Iterate through the class attribute values
|
||
classes = element.attributes['class'].split()
|
||
for class_name in classes:
|
||
# Skip empty class names
|
||
if not class_name.strip():
|
||
continue
|
||
|
||
# Check if the class name is valid
|
||
if valid_class_name_pattern.match(class_name):
|
||
# Append the valid class name to the CSS selector
|
||
css_selector += f'.{class_name}'
|
||
else:
|
||
# Skip invalid class names
|
||
continue
|
||
|
||
# Expanded set of safe attributes that are stable and useful for selection
|
||
SAFE_ATTRIBUTES = {
|
||
# Data attributes (if they're stable in your application)
|
||
'id',
|
||
# Standard HTML attributes
|
||
'name',
|
||
'type',
|
||
'placeholder',
|
||
# Accessibility attributes
|
||
'aria-label',
|
||
'aria-labelledby',
|
||
'aria-describedby',
|
||
'role',
|
||
# Common form attributes
|
||
'for',
|
||
'autocomplete',
|
||
'required',
|
||
'readonly',
|
||
# Media attributes
|
||
'alt',
|
||
'title',
|
||
'src',
|
||
# Custom stable attributes (add any application-specific ones)
|
||
'href',
|
||
'target',
|
||
}
|
||
|
||
if include_dynamic_attributes:
|
||
dynamic_attributes = {
|
||
'data-id',
|
||
'data-qa',
|
||
'data-cy',
|
||
'data-testid',
|
||
}
|
||
SAFE_ATTRIBUTES.update(dynamic_attributes)
|
||
|
||
# Handle other attributes
|
||
for attribute, value in element.attributes.items():
|
||
if attribute == 'class':
|
||
continue
|
||
|
||
# Skip invalid attribute names
|
||
if not attribute.strip():
|
||
continue
|
||
|
||
if attribute not in SAFE_ATTRIBUTES:
|
||
continue
|
||
|
||
# Escape special characters in attribute names
|
||
safe_attribute = attribute.replace(':', r'\:')
|
||
|
||
# Handle different value cases
|
||
if value == '':
|
||
css_selector += f'[{safe_attribute}]'
|
||
elif any(char in value for char in '"\'<>`\n\r\t'):
|
||
# Use contains for values with special characters
|
||
# For newline-containing text, only use the part before the newline
|
||
if '\n' in value:
|
||
value = value.split('\n')[0]
|
||
# Regex-substitute *any* whitespace with a single space, then strip.
|
||
collapsed_value = re.sub(r'\s+', ' ', value).strip()
|
||
# Escape embedded double-quotes.
|
||
safe_value = collapsed_value.replace('"', '\\"')
|
||
css_selector += f'[{safe_attribute}*="{safe_value}"]'
|
||
else:
|
||
css_selector += f'[{safe_attribute}="{value}"]'
|
||
|
||
return css_selector
|
||
|
||
except Exception:
|
||
# Fallback to a more basic selector if something goes wrong
|
||
tag_name = element.tag_name or '*'
|
||
return f"{tag_name}[highlight_index='{element.highlight_index}']"
|
||
|
||
@require_initialization
|
||
@time_execution_async('--is_visible')
|
||
async def _is_visible(self, element: ElementHandle) -> bool:
|
||
"""
|
||
Checks if an element is visible on the page.
|
||
We use our own implementation instead of relying solely on Playwright's is_visible() because
|
||
of edge cases with CSS frameworks like Tailwind. When elements use Tailwind's 'hidden' class,
|
||
the computed style may return display as '' (empty string) instead of 'none', causing Playwright
|
||
to incorrectly consider hidden elements as visible. By additionally checking the bounding box
|
||
dimensions, we catch elements that have zero width/height regardless of how they were hidden.
|
||
"""
|
||
is_hidden = await element.is_hidden()
|
||
bbox = await element.bounding_box()
|
||
|
||
return not is_hidden and bbox is not None and bbox['width'] > 0 and bbox['height'] > 0
|
||
|
||
@require_initialization
|
||
@time_execution_async('--get_locate_element')
|
||
async def get_locate_element(self, element: DOMElementNode) -> ElementHandle | None:
|
||
page = await self.get_current_page()
|
||
current_frame = page
|
||
|
||
# Start with the target element and collect all parents
|
||
parents: list[DOMElementNode] = []
|
||
current = element
|
||
while current.parent is not None:
|
||
parent = current.parent
|
||
parents.append(parent)
|
||
current = parent
|
||
|
||
# Reverse the parents list to process from top to bottom
|
||
parents.reverse()
|
||
|
||
# Process all iframe parents in sequence
|
||
iframes = [item for item in parents if item.tag_name == 'iframe']
|
||
for parent in iframes:
|
||
css_selector = self._enhanced_css_selector_for_element(
|
||
parent,
|
||
include_dynamic_attributes=self.browser_profile.include_dynamic_attributes,
|
||
)
|
||
current_frame = current_frame.frame_locator(css_selector)
|
||
|
||
css_selector = self._enhanced_css_selector_for_element(
|
||
element, include_dynamic_attributes=self.browser_profile.include_dynamic_attributes
|
||
)
|
||
|
||
try:
|
||
if isinstance(current_frame, FrameLocator):
|
||
element_handle = await current_frame.locator(css_selector).element_handle()
|
||
return element_handle
|
||
else:
|
||
# Try to scroll into view if hidden
|
||
element_handle = await current_frame.query_selector(css_selector)
|
||
if element_handle:
|
||
is_visible = await self._is_visible(element_handle)
|
||
if is_visible:
|
||
await element_handle.scroll_into_view_if_needed()
|
||
return element_handle
|
||
return None
|
||
except Exception as e:
|
||
logger.error(f'❌ Failed to locate element: {str(e)}')
|
||
return None
|
||
|
||
@require_initialization
|
||
@time_execution_async('--get_locate_element_by_xpath')
|
||
async def get_locate_element_by_xpath(self, xpath: str) -> ElementHandle | None:
|
||
"""
|
||
Locates an element on the page using the provided XPath.
|
||
"""
|
||
page = await self.get_current_page()
|
||
|
||
try:
|
||
# Use XPath to locate the element
|
||
element_handle = await page.query_selector(f'xpath={xpath}')
|
||
if element_handle:
|
||
is_visible = await self._is_visible(element_handle)
|
||
if is_visible:
|
||
await element_handle.scroll_into_view_if_needed()
|
||
return element_handle
|
||
return None
|
||
except Exception as e:
|
||
logger.error(f'❌ Failed to locate element by XPath {xpath}: {str(e)}')
|
||
return None
|
||
|
||
@require_initialization
|
||
@time_execution_async('--get_locate_element_by_css_selector')
|
||
async def get_locate_element_by_css_selector(self, css_selector: str) -> ElementHandle | None:
|
||
"""
|
||
Locates an element on the page using the provided CSS selector.
|
||
"""
|
||
page = await self.get_current_page()
|
||
|
||
try:
|
||
# Use CSS selector to locate the element
|
||
element_handle = await page.query_selector(css_selector)
|
||
if element_handle:
|
||
is_visible = await self._is_visible(element_handle)
|
||
if is_visible:
|
||
await element_handle.scroll_into_view_if_needed()
|
||
return element_handle
|
||
return None
|
||
except Exception as e:
|
||
logger.error(f'❌ Failed to locate element by CSS selector {css_selector}: {str(e)}')
|
||
return None
|
||
|
||
@require_initialization
|
||
@time_execution_async('--get_locate_element_by_text')
|
||
async def get_locate_element_by_text(
|
||
self, text: str, nth: int | None = 0, element_type: str | None = None
|
||
) -> ElementHandle | None:
|
||
"""
|
||
Locates an element on the page using the provided text.
|
||
If `nth` is provided, it returns the nth matching element (0-based).
|
||
If `element_type` is provided, filters by tag name (e.g., 'button', 'span').
|
||
"""
|
||
page = await self.get_current_page()
|
||
try:
|
||
# handle also specific element type or use any type.
|
||
selector = f'{element_type or "*"}:text("{text}")'
|
||
elements = await page.query_selector_all(selector)
|
||
# considering only visible elements
|
||
elements = [el for el in elements if await self._is_visible(el)]
|
||
|
||
if not elements:
|
||
logger.error(f"No visible element with text '{text}' found.")
|
||
return None
|
||
|
||
if nth is not None:
|
||
if 0 <= nth < len(elements):
|
||
element_handle = elements[nth]
|
||
else:
|
||
logger.error(f"Visible element with text '{text}' not found at index {nth}.")
|
||
return None
|
||
else:
|
||
element_handle = elements[0]
|
||
|
||
is_visible = await self._is_visible(element_handle)
|
||
if is_visible:
|
||
await element_handle.scroll_into_view_if_needed()
|
||
return element_handle
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to locate element by text '{text}': {str(e)}")
|
||
return None
|
||
|
||
@require_initialization
|
||
@time_execution_async('--input_text_element_node')
|
||
async def _input_text_element_node(self, element_node: DOMElementNode, text: str):
|
||
"""
|
||
Input text into an element with proper error handling and state management.
|
||
Handles different types of input fields and ensures proper element state before input.
|
||
"""
|
||
try:
|
||
# Highlight before typing
|
||
# if element_node.highlight_index is not None:
|
||
# await self._update_state(focus_element=element_node.highlight_index)
|
||
|
||
element_handle = await self.get_locate_element(element_node)
|
||
|
||
if element_handle is None:
|
||
raise BrowserError(f'Element: {repr(element_node)} not found')
|
||
|
||
# Ensure element is ready for input
|
||
try:
|
||
await element_handle.wait_for_element_state('stable', timeout=1000)
|
||
is_visible = await self._is_visible(element_handle)
|
||
if is_visible:
|
||
await element_handle.scroll_into_view_if_needed(timeout=1000)
|
||
except Exception:
|
||
pass
|
||
|
||
# Get element properties to determine input method
|
||
tag_handle = await element_handle.get_property('tagName')
|
||
tag_name = (await tag_handle.json_value()).lower()
|
||
is_contenteditable = await element_handle.get_property('isContentEditable')
|
||
readonly_handle = await element_handle.get_property('readOnly')
|
||
disabled_handle = await element_handle.get_property('disabled')
|
||
|
||
readonly = await readonly_handle.json_value() if readonly_handle else False
|
||
disabled = await disabled_handle.json_value() if disabled_handle else False
|
||
|
||
# always click the element first to make sure it's in the focus
|
||
await element_handle.click()
|
||
await asyncio.sleep(0.1)
|
||
|
||
try:
|
||
if (await is_contenteditable.json_value() or tag_name == 'input') and not (readonly or disabled):
|
||
await element_handle.evaluate('el => {el.textContent = ""; el.value = "";}')
|
||
await element_handle.type(text, delay=5)
|
||
else:
|
||
await element_handle.fill(text)
|
||
except Exception:
|
||
# last resort fallback, assume it's already focused after we clicked on it,
|
||
# just simulate keypresses on the entire page
|
||
page = await self.get_current_page()
|
||
await page.keyboard.type(text)
|
||
|
||
except Exception as e:
|
||
logger.debug(f'❌ Failed to input text into element: {repr(element_node)}. Error: {str(e)}')
|
||
raise BrowserError(f'Failed to input text into index {element_node.highlight_index}')
|
||
|
||
@require_initialization
|
||
@time_execution_async('--switch_to_tab')
|
||
async def switch_to_tab(self, page_id: int) -> Page:
|
||
"""Switch to a specific tab by its page_id (aka tab index exposed to LLM)"""
|
||
assert self.browser_context is not None, 'Browser context is not set'
|
||
pages = self.browser_context.pages
|
||
|
||
if page_id >= len(pages):
|
||
raise BrowserError(f'No tab found with page_id: {page_id}')
|
||
|
||
page = pages[page_id]
|
||
|
||
# Check if the tab's URL is allowed before switching
|
||
if not self._is_url_allowed(page.url):
|
||
raise BrowserError(f'Cannot switch to tab with non-allowed URL: {page.url}')
|
||
|
||
# Update both tab references - agent wants this tab, and it's now in the foreground
|
||
self.agent_current_page = page
|
||
self.human_current_page = page
|
||
|
||
# Bring tab to front and wait for it to load
|
||
await page.bring_to_front()
|
||
await page.wait_for_load_state()
|
||
|
||
# Set the viewport size for the tab
|
||
if self.browser_profile.viewport:
|
||
await page.set_viewport_size(self.browser_profile.viewport)
|
||
|
||
return page
|
||
|
||
@time_execution_async('--create_new_tab')
|
||
async def create_new_tab(self, url: str | None = None) -> Page:
|
||
"""Create a new tab and optionally navigate to a URL"""
|
||
|
||
if url and not self._is_url_allowed(url):
|
||
raise BrowserError(f'Cannot create new tab with non-allowed URL: {url}')
|
||
|
||
new_page = await self.browser_context.new_page()
|
||
|
||
# Update agent tab reference
|
||
self.agent_current_page = new_page
|
||
|
||
# Update human tab reference if there is no human tab yet
|
||
if (not self.human_current_page) or self.human_current_page.is_closed():
|
||
self.human_current_page = new_page
|
||
|
||
await new_page.wait_for_load_state()
|
||
|
||
# Set the viewport size for the new tab
|
||
if self.browser_profile.viewport:
|
||
await new_page.set_viewport_size(self.browser_profile.viewport)
|
||
|
||
if url:
|
||
await new_page.goto(url, wait_until='domcontentloaded', timeout=10000)
|
||
await self._wait_for_page_and_frames_load(timeout_overwrite=1)
|
||
|
||
assert self.human_current_page is not None
|
||
assert self.agent_current_page is not None
|
||
# if url: # sometimes this does not pass because JS or HTTP redirects the page really fast
|
||
# assert self.agent_current_page.url == url
|
||
# else:
|
||
# assert self.agent_current_page.url == 'about:blank'
|
||
|
||
# if there are any unused about:blank tabs after we open a new tab, close them to clean up unused tabs
|
||
for page in self.browser_context.pages:
|
||
if page.url == 'about:blank' and page != self.agent_current_page:
|
||
await page.close()
|
||
self.human_current_page = ( # in case we just closed the human's tab, fix the refs
|
||
self.human_current_page if not self.human_current_page.is_closed() else self.agent_current_page
|
||
)
|
||
|
||
return new_page
|
||
|
||
# region - Helper methods for easier access to the DOM
|
||
|
||
@require_initialization
|
||
async def get_selector_map(self) -> SelectorMap:
|
||
if self._cached_browser_state_summary is None:
|
||
return {}
|
||
return self._cached_browser_state_summary.selector_map
|
||
|
||
@require_initialization
|
||
async def get_element_by_index(self, index: int) -> ElementHandle | None:
|
||
selector_map = await self.get_selector_map()
|
||
element_handle = await self.get_locate_element(selector_map[index])
|
||
return element_handle
|
||
|
||
@require_initialization
|
||
async def find_file_upload_element_by_index(self, index: int) -> DOMElementNode | None:
|
||
"""
|
||
Find a file upload element related to the element at the given index:
|
||
- Check if the element itself is a file input
|
||
- Check if it's a label pointing to a file input
|
||
- Recursively search children for file inputs
|
||
- Check siblings for file inputs
|
||
|
||
Args:
|
||
index: The index of the candidate element (could be a file input, label, or parent element)
|
||
|
||
Returns:
|
||
The DOM element for the file input if found, None otherwise
|
||
"""
|
||
try:
|
||
selector_map = await self.get_selector_map()
|
||
if index not in selector_map:
|
||
return None
|
||
|
||
candidate_element = selector_map[index]
|
||
|
||
def is_file_input(node: DOMElementNode) -> bool:
|
||
return isinstance(node, DOMElementNode) and node.tag_name == 'input' and node.attributes.get('type') == 'file'
|
||
|
||
def find_element_by_id(node: DOMElementNode, element_id: str) -> DOMElementNode | None:
|
||
if isinstance(node, DOMElementNode):
|
||
if node.attributes.get('id') == element_id:
|
||
return node
|
||
for child in node.children:
|
||
result = find_element_by_id(child, element_id)
|
||
if result:
|
||
return result
|
||
return None
|
||
|
||
def get_root(node: DOMElementNode) -> DOMElementNode:
|
||
root = node
|
||
while root.parent:
|
||
root = root.parent
|
||
return root
|
||
|
||
# Recursively search for file input in node and its children
|
||
def find_file_input_recursive(
|
||
node: DOMElementNode, max_depth: int = 3, current_depth: int = 0
|
||
) -> DOMElementNode | None:
|
||
if current_depth > max_depth or not isinstance(node, DOMElementNode):
|
||
return None
|
||
|
||
# Check current element
|
||
if is_file_input(node):
|
||
return node
|
||
|
||
# Recursively check children
|
||
if node.children and current_depth < max_depth:
|
||
for child in node.children:
|
||
if isinstance(child, DOMElementNode):
|
||
result = find_file_input_recursive(child, max_depth, current_depth + 1)
|
||
if result:
|
||
return result
|
||
return None
|
||
|
||
# Check if current element is a file input
|
||
if is_file_input(candidate_element):
|
||
return candidate_element
|
||
|
||
# Check if it's a label pointing to a file input
|
||
if candidate_element.tag_name == 'label' and candidate_element.attributes.get('for'):
|
||
input_id = candidate_element.attributes.get('for')
|
||
root_element = get_root(candidate_element)
|
||
|
||
target_input = find_element_by_id(root_element, input_id)
|
||
if target_input and is_file_input(target_input):
|
||
return target_input
|
||
|
||
# Recursively check children
|
||
child_result = find_file_input_recursive(candidate_element)
|
||
if child_result:
|
||
return child_result
|
||
|
||
# Check siblings
|
||
if candidate_element.parent:
|
||
for sibling in candidate_element.parent.children:
|
||
if sibling is not candidate_element and isinstance(sibling, DOMElementNode):
|
||
if is_file_input(sibling):
|
||
return sibling
|
||
return None
|
||
|
||
except Exception as e:
|
||
logger.debug(f'Error in find_file_upload_element_by_index: {e}')
|
||
return None
|
||
|
||
@require_initialization
|
||
async def get_scroll_info(self, page: Page) -> tuple[int, int]:
|
||
"""Get scroll position information for the current page."""
|
||
scroll_y = await page.evaluate('window.scrollY')
|
||
viewport_height = await page.evaluate('window.innerHeight')
|
||
total_height = await page.evaluate('document.documentElement.scrollHeight')
|
||
pixels_above = scroll_y
|
||
pixels_below = total_height - (scroll_y + viewport_height)
|
||
return pixels_above, pixels_below
|
||
|
||
@require_initialization
|
||
async def _scroll_container(self, pixels: int) -> None:
|
||
"""Scroll the element that truly owns vertical scroll.Starts at the focused node ➜ climbs to the first big, scroll-enabled ancestor otherwise picks the first scrollable element or the root, then calls `element.scrollBy` (or `window.scrollBy` for the root) by the supplied pixel value."""
|
||
|
||
# An element can *really* scroll if: overflow-y is auto|scroll|overlay, it has more content than fits, its own viewport is not a postage stamp (more than 50 % of window).
|
||
SMART_SCROLL_JS = """(dy) => {
|
||
const bigEnough = el => el.clientHeight >= window.innerHeight * 0.5;
|
||
const canScroll = el =>
|
||
el &&
|
||
/(auto|scroll|overlay)/.test(getComputedStyle(el).overflowY) &&
|
||
el.scrollHeight > el.clientHeight &&
|
||
bigEnough(el);
|
||
|
||
let el = document.activeElement;
|
||
while (el && !canScroll(el) && el !== document.body) el = el.parentElement;
|
||
|
||
el = canScroll(el)
|
||
? el
|
||
: [...document.querySelectorAll('*')].find(canScroll)
|
||
|| document.scrollingElement
|
||
|| document.documentElement;
|
||
|
||
if (el === document.scrollingElement ||
|
||
el === document.documentElement ||
|
||
el === document.body) {
|
||
window.scrollBy(0, dy);
|
||
} else {
|
||
el.scrollBy({ top: dy, behavior: 'auto' });
|
||
}
|
||
}"""
|
||
page = await self.get_current_page()
|
||
await page.evaluate(SMART_SCROLL_JS, pixels)
|
||
|
||
# --- DVD Screensaver Loading Animation Helper ---
|
||
async def _show_dvd_screensaver_loading_animation(self, page: Page) -> None:
|
||
"""
|
||
Injects a DVD screensaver-style bouncing logo loading animation overlay into the given Playwright Page.
|
||
This is used to visually indicate that the browser is setting up or waiting.
|
||
"""
|
||
await page.evaluate("""() => {
|
||
document.title = 'Setting up...';
|
||
|
||
// Create the main overlay
|
||
const loadingOverlay = document.createElement('div');
|
||
loadingOverlay.id = 'pretty-loading-animation';
|
||
loadingOverlay.style.position = 'fixed';
|
||
loadingOverlay.style.top = '0';
|
||
loadingOverlay.style.left = '0';
|
||
loadingOverlay.style.width = '100vw';
|
||
loadingOverlay.style.height = '100vh';
|
||
loadingOverlay.style.background = '#000';
|
||
loadingOverlay.style.zIndex = '99999';
|
||
loadingOverlay.style.overflow = 'hidden';
|
||
|
||
// Create the image element
|
||
const img = document.createElement('img');
|
||
img.src = 'https://github.com/browser-use.png';
|
||
img.alt = 'Browser-Use';
|
||
img.style.width = '200px';
|
||
img.style.height = 'auto';
|
||
img.style.position = 'absolute';
|
||
img.style.left = '0px';
|
||
img.style.top = '0px';
|
||
img.style.zIndex = '2';
|
||
img.style.opacity = '0.8';
|
||
|
||
loadingOverlay.appendChild(img);
|
||
document.body.appendChild(loadingOverlay);
|
||
|
||
// DVD screensaver bounce logic
|
||
let x = Math.random() * (window.innerWidth - 300);
|
||
let y = Math.random() * (window.innerHeight - 300);
|
||
let dx = 1.2 + Math.random() * 0.4; // px per frame
|
||
let dy = 1.2 + Math.random() * 0.4;
|
||
// Randomize direction
|
||
if (Math.random() > 0.5) dx = -dx;
|
||
if (Math.random() > 0.5) dy = -dy;
|
||
|
||
function animate() {
|
||
const imgWidth = img.offsetWidth || 300;
|
||
const imgHeight = img.offsetHeight || 300;
|
||
x += dx;
|
||
y += dy;
|
||
|
||
if (x <= 0) {
|
||
x = 0;
|
||
dx = Math.abs(dx);
|
||
} else if (x + imgWidth >= window.innerWidth) {
|
||
x = window.innerWidth - imgWidth;
|
||
dx = -Math.abs(dx);
|
||
}
|
||
if (y <= 0) {
|
||
y = 0;
|
||
dy = Math.abs(dy);
|
||
} else if (y + imgHeight >= window.innerHeight) {
|
||
y = window.innerHeight - imgHeight;
|
||
dy = -Math.abs(dy);
|
||
}
|
||
|
||
img.style.left = `${x}px`;
|
||
img.style.top = `${y}px`;
|
||
|
||
requestAnimationFrame(animate);
|
||
}
|
||
animate();
|
||
|
||
// Responsive: update bounds on resize
|
||
window.addEventListener('resize', () => {
|
||
x = Math.min(x, window.innerWidth - img.offsetWidth);
|
||
y = Math.min(y, window.innerHeight - img.offsetHeight);
|
||
});
|
||
|
||
// Add a little CSS for smoothness
|
||
const style = document.createElement('style');
|
||
style.innerHTML = `
|
||
#pretty-loading-animation {
|
||
/*backdrop-filter: blur(2px) brightness(0.9);*/
|
||
}
|
||
#pretty-loading-animation img {
|
||
user-select: none;
|
||
pointer-events: none;
|
||
}
|
||
`;
|
||
document.head.appendChild(style);
|
||
}""")
|