mirror of
https://github.com/browser-use/browser-use
synced 2026-05-13 17:56:35 +02:00
Merge branch 'main' into feat/custom-screenshot-quality
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from browser_use.logging_config import setup_logging
|
||||
|
||||
@@ -13,21 +14,6 @@ else:
|
||||
# Monkeypatch BaseSubprocessTransport.__del__ to handle closed event loops gracefully
|
||||
from asyncio import base_subprocess
|
||||
|
||||
from browser_use.agent.prompts import SystemPrompt
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import ActionModel, ActionResult, AgentHistoryList
|
||||
from browser_use.browser import Browser, BrowserConfig, BrowserContext, BrowserContextConfig, BrowserProfile, BrowserSession
|
||||
from browser_use.controller.service import Controller
|
||||
from browser_use.dom.service import DomService
|
||||
from browser_use.llm import (
|
||||
ChatAnthropic,
|
||||
ChatAzureOpenAI,
|
||||
ChatGoogle,
|
||||
ChatGroq,
|
||||
ChatOllama,
|
||||
ChatOpenAI,
|
||||
)
|
||||
|
||||
_original_del = base_subprocess.BaseSubprocessTransport.__del__
|
||||
|
||||
|
||||
@@ -50,6 +36,71 @@ def _patched_del(self):
|
||||
base_subprocess.BaseSubprocessTransport.__del__ = _patched_del
|
||||
|
||||
|
||||
# Type stubs for lazy imports - fixes linter warnings
|
||||
if TYPE_CHECKING:
|
||||
from browser_use.agent.prompts import SystemPrompt
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import ActionModel, ActionResult, AgentHistoryList
|
||||
from browser_use.browser import Browser, BrowserConfig, BrowserContext, BrowserContextConfig, BrowserProfile, BrowserSession
|
||||
from browser_use.controller.service import Controller
|
||||
from browser_use.dom.service import DomService
|
||||
from browser_use.llm.anthropic.chat import ChatAnthropic
|
||||
from browser_use.llm.azure.chat import ChatAzureOpenAI
|
||||
from browser_use.llm.google.chat import ChatGoogle
|
||||
from browser_use.llm.groq.chat import ChatGroq
|
||||
from browser_use.llm.ollama.chat import ChatOllama
|
||||
from browser_use.llm.openai.chat import ChatOpenAI
|
||||
|
||||
|
||||
# Lazy imports mapping - only import when actually accessed
|
||||
_LAZY_IMPORTS = {
|
||||
# Agent service (heavy due to dependencies)
|
||||
'Agent': ('browser_use.agent.service', 'Agent'),
|
||||
# System prompt (moderate weight due to agent.views imports)
|
||||
'SystemPrompt': ('browser_use.agent.prompts', 'SystemPrompt'),
|
||||
# Agent views (very heavy - over 1 second!)
|
||||
'ActionModel': ('browser_use.agent.views', 'ActionModel'),
|
||||
'ActionResult': ('browser_use.agent.views', 'ActionResult'),
|
||||
'AgentHistoryList': ('browser_use.agent.views', 'AgentHistoryList'),
|
||||
# Browser components (heavy due to playwright/patchright)
|
||||
'Browser': ('browser_use.browser', 'Browser'),
|
||||
'BrowserConfig': ('browser_use.browser', 'BrowserConfig'),
|
||||
'BrowserSession': ('browser_use.browser', 'BrowserSession'),
|
||||
'BrowserProfile': ('browser_use.browser', 'BrowserProfile'),
|
||||
'BrowserContext': ('browser_use.browser', 'BrowserContext'),
|
||||
'BrowserContextConfig': ('browser_use.browser', 'BrowserContextConfig'),
|
||||
# Controller (moderate weight)
|
||||
'Controller': ('browser_use.controller.service', 'Controller'),
|
||||
# DOM service (moderate weight)
|
||||
'DomService': ('browser_use.dom.service', 'DomService'),
|
||||
# Chat models (very heavy imports)
|
||||
'ChatOpenAI': ('browser_use.llm.openai.chat', 'ChatOpenAI'),
|
||||
'ChatGoogle': ('browser_use.llm.google.chat', 'ChatGoogle'),
|
||||
'ChatAnthropic': ('browser_use.llm.anthropic.chat', 'ChatAnthropic'),
|
||||
'ChatGroq': ('browser_use.llm.groq.chat', 'ChatGroq'),
|
||||
'ChatAzureOpenAI': ('browser_use.llm.azure.chat', 'ChatAzureOpenAI'),
|
||||
'ChatOllama': ('browser_use.llm.ollama.chat', 'ChatOllama'),
|
||||
}
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
"""Lazy import mechanism - only import modules when they're actually accessed."""
|
||||
if name in _LAZY_IMPORTS:
|
||||
module_path, attr_name = _LAZY_IMPORTS[name]
|
||||
try:
|
||||
from importlib import import_module
|
||||
|
||||
module = import_module(module_path)
|
||||
attr = getattr(module, attr_name)
|
||||
# Cache the imported attribute in the module's globals
|
||||
globals()[name] = attr
|
||||
return attr
|
||||
except ImportError as e:
|
||||
raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e
|
||||
|
||||
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
||||
|
||||
|
||||
__all__ = [
|
||||
'Agent',
|
||||
'Browser',
|
||||
|
||||
@@ -34,7 +34,8 @@ from bubus import EventBus
|
||||
from pydantic import ValidationError
|
||||
from uuid_extensions import uuid7str
|
||||
|
||||
from browser_use.agent.gif import create_history_gif
|
||||
# Lazy import for gif to avoid heavy agent.views import at startup
|
||||
# from browser_use.agent.gif import create_history_gif
|
||||
from browser_use.agent.message_manager.service import (
|
||||
MessageManager,
|
||||
)
|
||||
@@ -184,6 +185,8 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
display_files_in_done_text: bool = True,
|
||||
include_tool_call_examples: bool = False,
|
||||
vision_detail_level: Literal['auto', 'low', 'high'] = 'auto',
|
||||
llm_timeout: int = 60,
|
||||
step_timeout: int = 180,
|
||||
**kwargs,
|
||||
):
|
||||
# Check for deprecated planner parameters
|
||||
@@ -261,6 +264,8 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
extend_planner_system_message=None, # Always None now (deprecated)
|
||||
calculate_cost=calculate_cost,
|
||||
include_tool_call_examples=include_tool_call_examples,
|
||||
llm_timeout=llm_timeout,
|
||||
step_timeout=step_timeout,
|
||||
)
|
||||
|
||||
# Token cost service
|
||||
@@ -280,7 +285,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
self._set_browser_use_version_and_source(source)
|
||||
self.initial_actions = self._convert_initial_actions(initial_actions) if initial_actions else None
|
||||
|
||||
# Verify we can connect to the LLM and setup the tool calling method
|
||||
# Verify we can connect to the model
|
||||
self._verify_and_setup_llm()
|
||||
|
||||
# TODO: move this logic to the LLMs
|
||||
@@ -644,6 +649,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
self.task = new_task
|
||||
self._message_manager.add_new_task(new_task)
|
||||
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='_raise_if_stopped_or_paused')
|
||||
async def _raise_if_stopped_or_paused(self) -> None:
|
||||
"""Utility function that raises an InterruptedError if the agent is stopped or paused."""
|
||||
|
||||
@@ -655,24 +661,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
# self.logger.debug('Agent paused after getting state')
|
||||
raise InterruptedError
|
||||
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='get_browser_state_with_recovery')
|
||||
async def _get_browser_state_with_recovery(self, cache_clickable_elements_hashes: bool = True) -> BrowserStateSummary:
|
||||
"""Get browser state with multiple fallback strategies for error recovery"""
|
||||
|
||||
assert self.browser_session is not None, 'BrowserSession is not set up'
|
||||
|
||||
# Try 1: Full state summary (current implementation) - like main branch
|
||||
try:
|
||||
return await self.browser_session.get_state_summary(cache_clickable_elements_hashes)
|
||||
except Exception as e:
|
||||
if self.state.last_result is None:
|
||||
self.state.last_result = []
|
||||
self.state.last_result.append(ActionResult(error=str(e)))
|
||||
self.logger.warning(f'Full state retrieval failed: {type(e).__name__}: {e}')
|
||||
|
||||
self.logger.warning('🔄 Falling back to minimal state summary')
|
||||
return await self.browser_session.get_minimal_state_summary()
|
||||
|
||||
@observe(name='agent.step', ignore_output=True, ignore_input=True)
|
||||
@time_execution_async('--step')
|
||||
async def step(self, step_info: AgentStepInfo | None = None) -> None:
|
||||
@@ -707,7 +695,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
assert self.browser_session is not None, 'BrowserSession is not set up'
|
||||
|
||||
self.logger.debug(f'🌐 Step {self.state.n_steps + 1}: Getting browser state...')
|
||||
browser_state_summary = await self._get_browser_state_with_recovery(cache_clickable_elements_hashes=True)
|
||||
browser_state_summary = await self.browser_session.get_browser_state_with_recovery(
|
||||
cache_clickable_elements_hashes=True, include_screenshot=self.settings.use_vision
|
||||
)
|
||||
current_page = await self.browser_session.get_current_page()
|
||||
|
||||
# Check for new downloads after getting browser state (catches PDF auto-downloads and previous step downloads)
|
||||
@@ -744,6 +734,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
await self._handle_final_step(step_info)
|
||||
return browser_state_summary
|
||||
|
||||
@observe_debug(ignore_input=True, name='get_next_action')
|
||||
async def _get_next_action(self, browser_state_summary: BrowserStateSummary) -> None:
|
||||
"""Execute LLM interaction with retry logic and handle callbacks"""
|
||||
input_messages = self._message_manager.get_messages()
|
||||
@@ -751,7 +742,15 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
f'🤖 Step {self.state.n_steps + 1}: Calling LLM with {len(input_messages)} messages (model: {self.llm.model})...'
|
||||
)
|
||||
|
||||
model_output = await self._get_model_output_with_retry(input_messages)
|
||||
try:
|
||||
model_output = await asyncio.wait_for(
|
||||
self._get_model_output_with_retry(input_messages), timeout=self.settings.llm_timeout
|
||||
)
|
||||
except TimeoutError:
|
||||
raise TimeoutError(
|
||||
f'LLM call timed out after {self.settings.llm_timeout} seconds. Keep your thinking and output short.'
|
||||
)
|
||||
|
||||
self.state.last_model_output = model_output
|
||||
|
||||
# Check again for paused/stopped state after getting model output
|
||||
@@ -988,6 +987,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
return text.strip()
|
||||
|
||||
@time_execution_async('--get_next_action')
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='get_model_output')
|
||||
async def get_model_output(self, input_messages: list[BaseMessage]) -> AgentOutput:
|
||||
"""Get next action from LLM based on current state"""
|
||||
|
||||
@@ -1249,15 +1249,15 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
self.step(step_info),
|
||||
timeout=300, # 5 minute step timeout - more generous for slow LLM calls
|
||||
timeout=self.settings.step_timeout,
|
||||
)
|
||||
self.logger.debug(f'✅ Completed step {step + 1}/{max_steps}')
|
||||
except TimeoutError:
|
||||
# Handle step timeout gracefully
|
||||
error_msg = f'Step {step + 1} timed out after 300 seconds'
|
||||
error_msg = f'Step {step + 1} timed out after {self.settings.step_timeout} seconds'
|
||||
self.logger.error(f'⏰ {error_msg}')
|
||||
self.state.consecutive_failures += 1
|
||||
self.state.last_result = [ActionResult(error=error_msg, include_in_memory=True)]
|
||||
self.state.last_result = [ActionResult(error=error_msg)]
|
||||
|
||||
if on_step_end is not None:
|
||||
await on_step_end(self)
|
||||
@@ -1347,6 +1347,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
if isinstance(self.settings.generate_gif, str):
|
||||
output_path = self.settings.generate_gif
|
||||
|
||||
# Lazy import gif module to avoid heavy startup cost
|
||||
from browser_use.agent.gif import create_history_gif
|
||||
|
||||
create_history_gif(task=self.task, history=self.state.history, output_path=output_path)
|
||||
|
||||
# Emit output file generated event for GIF
|
||||
@@ -1381,56 +1384,63 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
results: list[ActionResult] = []
|
||||
|
||||
assert self.browser_session is not None, 'BrowserSession is not set up'
|
||||
cached_selector_map = await self.browser_session.get_selector_map()
|
||||
cached_path_hashes = {e.hash.branch_path_hash for e in cached_selector_map.values()}
|
||||
|
||||
try:
|
||||
await self.browser_session.remove_highlights()
|
||||
except TimeoutError:
|
||||
# we don't care if this times out
|
||||
self.logger.debug('Timeout to remove highlights')
|
||||
|
||||
for i, action in enumerate(actions):
|
||||
# DO NOT ALLOW TO CALL `done` AS A SINGLE ACTION
|
||||
if i > 0 and action.model_dump(exclude_unset=True).get('done') is not None:
|
||||
msg = f'Done action is allowed only as a single action - stopped after action {i} / {len(actions)}.'
|
||||
logger.info(msg)
|
||||
cached_selector_map = {}
|
||||
cached_path_hashes = set()
|
||||
# check all actions if any has index, if so, get the selector map
|
||||
for action in actions:
|
||||
if action.get_index() is not None:
|
||||
cached_selector_map = await self.browser_session.get_selector_map()
|
||||
cached_path_hashes = {e.hash.branch_path_hash for e in cached_selector_map.values()}
|
||||
break
|
||||
|
||||
if action.get_index() is not None and i != 0:
|
||||
new_browser_state_summary = await self.browser_session.get_state_summary(cache_clickable_elements_hashes=False)
|
||||
new_selector_map = new_browser_state_summary.selector_map
|
||||
|
||||
# Detect index change after previous action
|
||||
orig_target = cached_selector_map.get(action.get_index()) # type: ignore
|
||||
orig_target_hash = orig_target.hash.branch_path_hash if orig_target else None
|
||||
new_target = new_selector_map.get(action.get_index()) # type: ignore
|
||||
new_target_hash = new_target.hash.branch_path_hash if new_target else None
|
||||
if orig_target_hash != new_target_hash:
|
||||
msg = f'Element index changed after action {i} / {len(actions)}, because page changed.'
|
||||
# loop over actions and execute them
|
||||
for i, action in enumerate(actions):
|
||||
if i > 0:
|
||||
# ONLY ALLOW TO CALL `done` IF IT IS A SINGLE ACTION
|
||||
if action.model_dump(exclude_unset=True).get('done') is not None:
|
||||
msg = f'Done action is allowed only as a single action - stopped after action {i} / {len(actions)}.'
|
||||
logger.info(msg)
|
||||
results.append(
|
||||
ActionResult(
|
||||
extracted_content=msg,
|
||||
include_in_memory=True,
|
||||
long_term_memory=msg,
|
||||
)
|
||||
)
|
||||
break
|
||||
|
||||
new_path_hashes = {e.hash.branch_path_hash for e in new_selector_map.values()}
|
||||
if check_for_new_elements and not new_path_hashes.issubset(cached_path_hashes):
|
||||
# next action requires index but there are new elements on the page
|
||||
msg = f'Something new appeared after action {i} / {len(actions)}, following actions are NOT executed and should be retried.'
|
||||
logger.info(msg)
|
||||
results.append(
|
||||
ActionResult(
|
||||
extracted_content=msg,
|
||||
include_in_memory=True,
|
||||
long_term_memory=msg,
|
||||
)
|
||||
if action.get_index() is not None:
|
||||
new_browser_state_summary = await self.browser_session.get_browser_state_with_recovery(
|
||||
cache_clickable_elements_hashes=False, include_screenshot=False
|
||||
)
|
||||
break
|
||||
new_selector_map = new_browser_state_summary.selector_map
|
||||
|
||||
# Detect index change after previous action
|
||||
orig_target = cached_selector_map.get(action.get_index()) # type: ignore
|
||||
orig_target_hash = orig_target.hash.branch_path_hash if orig_target else None
|
||||
new_target = new_selector_map.get(action.get_index()) # type: ignore
|
||||
new_target_hash = new_target.hash.branch_path_hash if new_target else None
|
||||
if orig_target_hash != new_target_hash:
|
||||
msg = f'Element index changed after action {i} / {len(actions)}, because page changed.'
|
||||
logger.info(msg)
|
||||
results.append(
|
||||
ActionResult(
|
||||
extracted_content=msg,
|
||||
include_in_memory=True,
|
||||
long_term_memory=msg,
|
||||
)
|
||||
)
|
||||
break
|
||||
|
||||
new_path_hashes = {e.hash.branch_path_hash for e in new_selector_map.values()}
|
||||
if check_for_new_elements and not new_path_hashes.issubset(cached_path_hashes):
|
||||
# next action requires index but there are new elements on the page
|
||||
msg = f'Something new appeared after action {i} / {len(actions)}, following actions are NOT executed and should be retried.'
|
||||
logger.info(msg)
|
||||
results.append(
|
||||
ActionResult(
|
||||
extracted_content=msg,
|
||||
include_in_memory=True,
|
||||
long_term_memory=msg,
|
||||
)
|
||||
)
|
||||
break
|
||||
|
||||
# wait between actions
|
||||
await asyncio.sleep(self.browser_profile.wait_between_actions)
|
||||
|
||||
try:
|
||||
await self._raise_if_stopped_or_paused()
|
||||
@@ -1455,9 +1465,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
if results[-1].is_done or results[-1].error or i == len(actions) - 1:
|
||||
break
|
||||
|
||||
await asyncio.sleep(self.browser_profile.wait_between_actions)
|
||||
# hash all elements. if it is a subset of cached_state its fine - else break (new elements on page)
|
||||
|
||||
except Exception as e:
|
||||
# Handle any exceptions during action execution
|
||||
self.logger.error(f'Action {i + 1} failed: {type(e).__name__}: {e}')
|
||||
@@ -1535,7 +1542,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
async def _execute_history_step(self, history_item: AgentHistory, delay: float) -> list[ActionResult]:
|
||||
"""Execute a single step from history with element validation"""
|
||||
assert self.browser_session is not None, 'BrowserSession is not set up'
|
||||
state = await self.browser_session.get_state_summary(cache_clickable_elements_hashes=False)
|
||||
state = await self.browser_session.get_browser_state_with_recovery(
|
||||
cache_clickable_elements_hashes=False, include_screenshot=False
|
||||
)
|
||||
if not state or not history_item.model_output:
|
||||
raise ValueError('Invalid state or model output')
|
||||
updated_actions = []
|
||||
|
||||
@@ -65,6 +65,8 @@ class AgentSettings(BaseModel):
|
||||
extend_planner_system_message: str | None = None
|
||||
calculate_cost: bool = False
|
||||
include_tool_call_examples: bool = False
|
||||
llm_timeout: int = 60 # Timeout in seconds for LLM calls
|
||||
step_timeout: int = 180 # Timeout in seconds for each step
|
||||
|
||||
|
||||
class AgentState(BaseModel):
|
||||
|
||||
@@ -1,6 +1,41 @@
|
||||
from .browser import Browser, BrowserConfig
|
||||
from .context import BrowserContext, BrowserContextConfig
|
||||
from .profile import BrowserProfile
|
||||
from .session import BrowserSession
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
# Type stubs for lazy imports
|
||||
if TYPE_CHECKING:
|
||||
from .browser import Browser, BrowserConfig
|
||||
from .context import BrowserContext, BrowserContextConfig
|
||||
from .profile import BrowserProfile
|
||||
from .session import BrowserSession
|
||||
|
||||
# Lazy imports mapping for heavy browser components
|
||||
_LAZY_IMPORTS = {
|
||||
'Browser': ('.browser', 'Browser'),
|
||||
'BrowserConfig': ('.browser', 'BrowserConfig'),
|
||||
'BrowserContext': ('.context', 'BrowserContext'),
|
||||
'BrowserContextConfig': ('.context', 'BrowserContextConfig'),
|
||||
'BrowserProfile': ('.profile', 'BrowserProfile'),
|
||||
'BrowserSession': ('.session', 'BrowserSession'),
|
||||
}
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
"""Lazy import mechanism for heavy browser components."""
|
||||
if name in _LAZY_IMPORTS:
|
||||
module_path, attr_name = _LAZY_IMPORTS[name]
|
||||
try:
|
||||
from importlib import import_module
|
||||
|
||||
# Use relative import for current package
|
||||
full_module_path = f'browser_use.browser{module_path}'
|
||||
module = import_module(full_module_path)
|
||||
attr = getattr(module, attr_name)
|
||||
# Cache the imported attribute in the module's globals
|
||||
globals()[name] = attr
|
||||
return attr
|
||||
except ImportError as e:
|
||||
raise ImportError(f'Failed to import {name} from {full_module_path}: {e}') from e
|
||||
|
||||
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
||||
|
||||
|
||||
__all__ = ['Browser', 'BrowserConfig', 'BrowserContext', 'BrowserContextConfig', 'BrowserSession', 'BrowserProfile']
|
||||
|
||||
@@ -169,6 +169,10 @@ CHROME_DEFAULT_ARGS = [
|
||||
'--disable-desktop-notifications',
|
||||
'--noerrdialogs',
|
||||
'--silent-debugger-extension-api',
|
||||
# Extension welcome tab suppression for automation
|
||||
'--disable-extensions-http-throttling',
|
||||
'--extensions-on-chrome-urls',
|
||||
'--disable-default-apps',
|
||||
f'--disable-features={",".join(CHROME_DISABLED_COMPONENTS)}',
|
||||
]
|
||||
|
||||
@@ -558,6 +562,10 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
|
||||
description='List of allowed domains for navigation e.g. ["*.google.com", "https://example.com", "chrome-extension://*"]',
|
||||
)
|
||||
keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.')
|
||||
enable_default_extensions: bool = Field(
|
||||
default=True,
|
||||
description="Enable automation-optimized extensions: ad blocking (uBlock Origin), cookie handling (I still don't care about cookies), and URL cleaning (ClearURLs). All extensions work automatically without manual intervention. Extensions are automatically downloaded and loaded when enabled.",
|
||||
)
|
||||
window_size: ViewportSize | None = Field(
|
||||
default=None,
|
||||
description='Browser window size to use when headless=False.',
|
||||
@@ -620,6 +628,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
|
||||
window_size['width'] = window_size['width'] or self.window_width or 1280
|
||||
window_size['height'] = window_size['height'] or self.window_height or 1100
|
||||
self.window_size = window_size
|
||||
|
||||
return self
|
||||
|
||||
@model_validator(mode='after')
|
||||
@@ -699,12 +708,162 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
|
||||
if self.window_position
|
||||
else []
|
||||
),
|
||||
*(self._get_extension_args() if self.enable_default_extensions else []),
|
||||
]
|
||||
|
||||
# convert to dict and back to dedupe and merge duplicate args
|
||||
final_args_list = BrowserLaunchArgs.args_as_list(BrowserLaunchArgs.args_as_dict(pre_conversion_args))
|
||||
return final_args_list
|
||||
|
||||
def _get_extension_args(self) -> list[str]:
|
||||
"""Get Chrome args for enabling default extensions (ad blocker and cookie handler)."""
|
||||
extension_paths = self._ensure_default_extensions_downloaded()
|
||||
|
||||
args = [
|
||||
'--enable-extensions',
|
||||
'--disable-extensions-file-access-check',
|
||||
'--disable-extensions-http-throttling',
|
||||
'--enable-extension-activity-logging',
|
||||
]
|
||||
|
||||
if extension_paths:
|
||||
args.append(f'--load-extension={",".join(extension_paths)}')
|
||||
|
||||
return args
|
||||
|
||||
def _ensure_default_extensions_downloaded(self) -> list[str]:
|
||||
"""
|
||||
Ensure default extensions are downloaded and cached locally.
|
||||
Returns list of paths to extension directories.
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
# Extension definitions - optimized for automation and content extraction
|
||||
extensions = [
|
||||
{
|
||||
'name': 'uBlock Origin',
|
||||
'id': 'cjpalhdlnbpafiamejdnhcphjbkeiagm',
|
||||
'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dcjpalhdlnbpafiamejdnhcphjbkeiagm%26uc',
|
||||
},
|
||||
{
|
||||
'name': "I still don't care about cookies",
|
||||
'id': 'edibdbjcniadpccecjdfdjjppcpchdlm',
|
||||
'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dedibdbjcniadpccecjdfdjjppcpchdlm%26uc',
|
||||
},
|
||||
{
|
||||
'name': 'ClearURLs',
|
||||
'id': 'lckanjgmijmafbedllaakclkaicjfmnk',
|
||||
'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dlckanjgmijmafbedllaakclkaicjfmnk%26uc',
|
||||
},
|
||||
]
|
||||
|
||||
# Create extensions cache directory
|
||||
cache_dir = Path.home() / '.browser-use' / 'extensions'
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
extension_paths = []
|
||||
loaded_extension_names = []
|
||||
|
||||
for ext in extensions:
|
||||
ext_dir = cache_dir / ext['id']
|
||||
crx_file = cache_dir / f'{ext["id"]}.crx'
|
||||
|
||||
# Check if extension is already extracted
|
||||
if ext_dir.exists() and (ext_dir / 'manifest.json').exists():
|
||||
extension_paths.append(str(ext_dir))
|
||||
loaded_extension_names.append(ext['name'])
|
||||
continue
|
||||
|
||||
try:
|
||||
# Download extension if not cached
|
||||
if not crx_file.exists():
|
||||
logger.info(f'📦 Downloading {ext["name"]} extension...')
|
||||
self._download_extension(ext['url'], crx_file)
|
||||
|
||||
# Extract extension
|
||||
if crx_file.exists():
|
||||
logger.info(f'📂 Extracting {ext["name"]} extension...')
|
||||
self._extract_extension(crx_file, ext_dir)
|
||||
extension_paths.append(str(ext_dir))
|
||||
loaded_extension_names.append(ext['name'])
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f'⚠️ Failed to setup {ext["name"]} extension: {e}')
|
||||
continue
|
||||
|
||||
if extension_paths:
|
||||
logger.info(f'✅ Extensions ready: {len(extension_paths)} extensions loaded ({", ".join(loaded_extension_names)})')
|
||||
else:
|
||||
logger.warning('⚠️ No default extensions could be loaded')
|
||||
|
||||
return extension_paths
|
||||
|
||||
def _download_extension(self, url: str, output_path: Path) -> None:
|
||||
"""Download extension .crx file."""
|
||||
import urllib.request
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(url) as response:
|
||||
with open(output_path, 'wb') as f:
|
||||
f.write(response.read())
|
||||
except Exception as e:
|
||||
raise Exception(f'Failed to download extension: {e}')
|
||||
|
||||
def _extract_extension(self, crx_path: Path, extract_dir: Path) -> None:
|
||||
"""Extract .crx file to directory."""
|
||||
import os
|
||||
import zipfile
|
||||
|
||||
# Remove existing directory
|
||||
if extract_dir.exists():
|
||||
import shutil
|
||||
|
||||
shutil.rmtree(extract_dir)
|
||||
|
||||
extract_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
# CRX files are ZIP files with a header, try to extract as ZIP
|
||||
with zipfile.ZipFile(crx_path, 'r') as zip_ref:
|
||||
zip_ref.extractall(extract_dir)
|
||||
|
||||
# Verify manifest exists
|
||||
if not (extract_dir / 'manifest.json').exists():
|
||||
raise Exception('No manifest.json found in extension')
|
||||
|
||||
except zipfile.BadZipFile:
|
||||
# CRX files have a header before the ZIP data
|
||||
# Skip the CRX header and extract the ZIP part
|
||||
with open(crx_path, 'rb') as f:
|
||||
# Read CRX header to find ZIP start
|
||||
magic = f.read(4)
|
||||
if magic != b'Cr24':
|
||||
raise Exception('Invalid CRX file format')
|
||||
|
||||
version = int.from_bytes(f.read(4), 'little')
|
||||
if version == 2:
|
||||
pubkey_len = int.from_bytes(f.read(4), 'little')
|
||||
sig_len = int.from_bytes(f.read(4), 'little')
|
||||
f.seek(16 + pubkey_len + sig_len) # Skip to ZIP data
|
||||
elif version == 3:
|
||||
header_len = int.from_bytes(f.read(4), 'little')
|
||||
f.seek(12 + header_len) # Skip to ZIP data
|
||||
|
||||
# Extract ZIP data
|
||||
zip_data = f.read()
|
||||
|
||||
# Write ZIP data to temp file and extract
|
||||
import tempfile
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_zip:
|
||||
temp_zip.write(zip_data)
|
||||
temp_zip.flush()
|
||||
|
||||
with zipfile.ZipFile(temp_zip.name, 'r') as zip_ref:
|
||||
zip_ref.extractall(extract_dir)
|
||||
|
||||
os.unlink(temp_zip.name)
|
||||
|
||||
def kwargs_for_launch_persistent_context(self) -> BrowserLaunchPersistentContextArgs:
|
||||
"""Return the kwargs for BrowserType.launch()."""
|
||||
return BrowserLaunchPersistentContextArgs(**self.model_dump(exclude={'args'}), args=self.get_args())
|
||||
@@ -721,22 +880,6 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
|
||||
"""Return the kwargs for BrowserType.connect_over_cdp()."""
|
||||
return BrowserLaunchArgs(**self.model_dump(exclude={'args'}), args=self.get_args())
|
||||
|
||||
# def preinstall_extensions(self) -> None:
|
||||
# """Preinstall the extensions."""
|
||||
|
||||
# # create the local unpacked extensions dir
|
||||
# extensions_dir = self.user_data_dir / 'Extensions'
|
||||
# extensions_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# # download from the chrome web store using the chrome web store api
|
||||
# for extension_id in self.extension_ids_to_preinstall:
|
||||
# extension_path = extensions_dir / f'{extension_id}.crx'
|
||||
# if extension_path.exists():
|
||||
# logger.warning(f'⚠️ Extension {extension_id} is already installed, skipping preinstall.')
|
||||
# else:
|
||||
# logger.info(f'🔍 Preinstalling extension {extension_id}...')
|
||||
# # TODO: copy this from ArchiveBox implementation
|
||||
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='detect_display_configuration')
|
||||
def detect_display_configuration(self) -> None:
|
||||
"""
|
||||
|
||||
@@ -51,8 +51,10 @@ from browser_use.browser.views import (
|
||||
TabInfo,
|
||||
URLNotAllowedError,
|
||||
)
|
||||
from browser_use.dom.clickable_element_processor.service import ClickableElementProcessor
|
||||
from browser_use.dom.service import DomService
|
||||
|
||||
# Lazy imports for heavy DOM services to improve startup time
|
||||
# from browser_use.dom.clickable_element_processor.service import ClickableElementProcessor
|
||||
# from browser_use.dom.service import DomService
|
||||
from browser_use.dom.views import DOMElementNode, SelectorMap
|
||||
from browser_use.utils import (
|
||||
is_new_tab_page,
|
||||
@@ -160,12 +162,14 @@ def require_healthy_browser(usable_page=True, reopen_page=True):
|
||||
await self._recover_unresponsive_page(
|
||||
func.__name__, timeout_ms=int(self.browser_profile.default_navigation_timeout or 5000) + 5_000
|
||||
)
|
||||
page_url = self.agent_current_page.url if self.agent_current_page else 'unknown page'
|
||||
self.logger.debug(
|
||||
f'🤕 Crashed page recovery finished, attempting to continue with {func.__name__}() on {_log_pretty_url(self.agent_current_page.url)}...'
|
||||
f'🤕 Crashed page recovery finished, attempting to continue with {func.__name__}() on {_log_pretty_url(page_url)}...'
|
||||
)
|
||||
except Exception as e:
|
||||
page_url = self.agent_current_page.url if self.agent_current_page else 'unknown page'
|
||||
self.logger.warning(
|
||||
f'❌ Crashed page recovery failed, could not run {func.__name__}(), page is stuck unresponsive on {_log_pretty_url(self.agent_current_page.url)}...'
|
||||
f'❌ Crashed page recovery failed, could not run {func.__name__}(), page is stuck unresponsive on {_log_pretty_url(page_url)}...'
|
||||
)
|
||||
raise # Re-raise to let retry decorator / callsite handle it
|
||||
|
||||
@@ -384,10 +388,19 @@ class BrowserSession(BaseModel):
|
||||
# Ensure we have a context
|
||||
assert self.browser_context, f'Failed to create BrowserContext for browser={self.browser}'
|
||||
|
||||
# Configure browser
|
||||
await self._setup_viewports()
|
||||
await self._setup_current_page_change_listeners()
|
||||
await self._start_context_tracing()
|
||||
# Configure browser - run some setup tasks in parallel for speed
|
||||
setup_results = await asyncio.gather(
|
||||
self._setup_viewports(),
|
||||
self._setup_current_page_change_listeners(),
|
||||
self._start_context_tracing(),
|
||||
return_exceptions=True,
|
||||
)
|
||||
|
||||
# Check for exceptions in setup results
|
||||
for i, result in enumerate(setup_results):
|
||||
if isinstance(result, Exception):
|
||||
setup_task_names = ['_setup_viewports', '_setup_current_page_change_listeners', '_start_context_tracing']
|
||||
raise Exception(f'Browser setup failed in {setup_task_names[i]}: {result}') from result
|
||||
|
||||
self.initialized = True
|
||||
return self
|
||||
@@ -837,6 +850,7 @@ class BrowserSession(BaseModel):
|
||||
|
||||
atexit.register(shudown_playwright)
|
||||
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='setup_browser_via_passed_objects')
|
||||
async def setup_browser_via_passed_objects(self) -> None:
|
||||
"""Override to customize the set up of the connection to an existing browser"""
|
||||
|
||||
@@ -878,6 +892,7 @@ class BrowserSession(BaseModel):
|
||||
self.logger.info(f'🎭 Connected to existing user-provided browser: {self.browser_context}')
|
||||
self._set_browser_keep_alive(True) # we connected to an existing browser, dont kill it at the end
|
||||
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='setup_browser_via_browser_pid')
|
||||
async def setup_browser_via_browser_pid(self) -> None:
|
||||
"""if browser_pid is provided, calcuclate its CDP URL by looking for --remote-debugging-port=... in its CLI args, then connect to it"""
|
||||
|
||||
@@ -922,11 +937,10 @@ class BrowserSession(BaseModel):
|
||||
# Wait for CDP port to become available (Chrome might still be starting)
|
||||
import httpx
|
||||
|
||||
# Add initial delay to give Chrome time to start up before first check
|
||||
await asyncio.sleep(2)
|
||||
# No initial sleep needed - the polling loop below handles waiting if Chrome isn't ready yet
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
for i in range(30): # 30 second timeout
|
||||
for i in range(30): # timeout
|
||||
# First check if the Chrome process has exited
|
||||
try:
|
||||
chrome_process = psutil.Process(pid=self.browser_pid)
|
||||
@@ -988,7 +1002,7 @@ class BrowserSession(BaseModel):
|
||||
except (httpx.ConnectError, httpx.TimeoutException):
|
||||
if i == 0:
|
||||
self.logger.debug(f'⏳ Waiting for Chrome CDP port {debug_port} to become available...')
|
||||
await asyncio.sleep(1)
|
||||
await asyncio.sleep(0.5)
|
||||
else:
|
||||
self.logger.error(f'❌ Chrome CDP port {debug_port} did not become available after 30 seconds')
|
||||
self.browser_pid = None
|
||||
@@ -1010,6 +1024,7 @@ class BrowserSession(BaseModel):
|
||||
)
|
||||
self._set_browser_keep_alive(True) # we connected to an existing browser, dont kill it at the end
|
||||
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='setup_browser_via_wss_url')
|
||||
async def setup_browser_via_wss_url(self) -> None:
|
||||
"""check for a passed wss_url, connect to a remote playwright browser server via WSS"""
|
||||
|
||||
@@ -1044,7 +1059,8 @@ class BrowserSession(BaseModel):
|
||||
)
|
||||
self._set_browser_keep_alive(True) # we connected to an existing browser, dont kill it at the end
|
||||
|
||||
@retry(wait=1, retries=2, timeout=45, semaphore_limit=1, semaphore_scope='self', semaphore_lax=False)
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='setup_new_browser_context')
|
||||
@retry(wait=0.1, retries=5, timeout=45, semaphore_limit=1, semaphore_scope='self', semaphore_lax=False)
|
||||
async def setup_new_browser_context(self) -> None:
|
||||
"""Launch a new browser and browser_context"""
|
||||
# Double-check after semaphore acquisition to prevent duplicate browser launches
|
||||
@@ -1059,6 +1075,7 @@ class BrowserSession(BaseModel):
|
||||
pass
|
||||
await self._unsafe_setup_new_browser_context()
|
||||
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='_unsafe_setup_new_browser_context')
|
||||
async def _unsafe_setup_new_browser_context(self) -> None:
|
||||
"""Unsafe browser context setup without retry protection."""
|
||||
|
||||
@@ -2015,7 +2032,6 @@ class BrowserSession(BaseModel):
|
||||
await page.wait_for_selector(selector, state='visible', timeout=timeout)
|
||||
|
||||
@observe_debug(name='remove_highlights', ignore_output=True, ignore_input=True)
|
||||
@require_healthy_browser(usable_page=True, reopen_page=True)
|
||||
@time_execution_async('--remove_highlights')
|
||||
@retry(timeout=2, retries=0)
|
||||
async def remove_highlights(self):
|
||||
@@ -2048,14 +2064,16 @@ class BrowserSession(BaseModel):
|
||||
self.logger.debug(f'⚠️ Failed to remove highlights (this is usually ok): {type(e).__name__}: {e}')
|
||||
# Don't raise the error since this is not critical functionality
|
||||
|
||||
@observe_debug(ignore_output=True, name='get_dom_element_by_index')
|
||||
@require_healthy_browser(usable_page=True, reopen_page=True)
|
||||
async def get_dom_element_by_index(self, index: int) -> DOMElementNode | None:
|
||||
"""Get DOM element by index."""
|
||||
selector_map = await self.get_selector_map()
|
||||
return selector_map.get(index)
|
||||
|
||||
@require_healthy_browser(usable_page=True, reopen_page=True)
|
||||
@time_execution_async('--click_element_node')
|
||||
@observe_debug(ignore_input=True, name='click_element_node')
|
||||
@require_healthy_browser(usable_page=True, reopen_page=True)
|
||||
async def _click_element_node(self, element_node: DOMElementNode) -> str | None:
|
||||
"""
|
||||
Optimized method to click an element using xpath.
|
||||
@@ -2069,7 +2087,8 @@ class BrowserSession(BaseModel):
|
||||
element_handle = await self.get_locate_element(element_node)
|
||||
|
||||
if element_handle is None:
|
||||
raise Exception(f'Element: {repr(element_node)} not found')
|
||||
self.logger.debug(f'Element: {repr(element_node)} not found')
|
||||
raise Exception('Element not found')
|
||||
|
||||
async def perform_click(click_func):
|
||||
"""Performs the actual click, handling both download and navigation scenarios."""
|
||||
@@ -2163,10 +2182,10 @@ class BrowserSession(BaseModel):
|
||||
except URLNotAllowedError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
raise Exception(f'Failed to click element: {repr(element_node)}. Error: {str(e)}')
|
||||
raise Exception(f'Failed to click element. Error: {str(e)}')
|
||||
|
||||
@time_execution_async('--get_tabs_info')
|
||||
@retry(timeout=6, retries=1)
|
||||
@retry(timeout=3, retries=1)
|
||||
@require_healthy_browser(usable_page=False, reopen_page=False)
|
||||
async def get_tabs_info(self) -> list[TabInfo]:
|
||||
"""Get information about all tabs"""
|
||||
@@ -2174,7 +2193,7 @@ class BrowserSession(BaseModel):
|
||||
tabs_info = []
|
||||
for page_id, page in enumerate(self.browser_context.pages):
|
||||
try:
|
||||
title = await asyncio.wait_for(page.title(), timeout=3.0)
|
||||
title = await asyncio.wait_for(page.title(), timeout=2.0)
|
||||
tab_info = TabInfo(page_id=page_id, url=page.url, title=title)
|
||||
except Exception:
|
||||
# page.title() can hang forever on tabs that are crashed/disappeared/about:blank
|
||||
@@ -2255,8 +2274,14 @@ class BrowserSession(BaseModel):
|
||||
# Check if URL is allowed
|
||||
if not self._is_url_allowed(normalized_url):
|
||||
raise BrowserError(f'⛔️ Navigation to non-allowed URL: {normalized_url}')
|
||||
|
||||
timeout_ms = min(3000, int(timeout_ms or self.browser_profile.default_navigation_timeout or 12000))
|
||||
# If timeout_ms is not None, use it (even if 0); else try profile.default_navigation_timeout (even if 0); else 12000
|
||||
if timeout_ms is not None:
|
||||
user_timeout_ms = int(timeout_ms)
|
||||
elif self.browser_profile.default_navigation_timeout is not None:
|
||||
user_timeout_ms = int(self.browser_profile.default_navigation_timeout)
|
||||
else:
|
||||
user_timeout_ms = 12000
|
||||
timeout_ms = min(3000, user_timeout_ms)
|
||||
|
||||
# Handle new tab creation
|
||||
if new_tab:
|
||||
@@ -2279,7 +2304,7 @@ class BrowserSession(BaseModel):
|
||||
|
||||
# Navigate to URL
|
||||
try:
|
||||
# Use asyncio.wait to prevent hanging on slow page loads
|
||||
# Use asyncio.wait to prevent hanging on a slow page loads
|
||||
# Don't cap the timeout - respect what was requested
|
||||
self.logger.debug(f'🧭 Starting navigation to {_log_pretty_url(normalized_url)} with timeout {timeout_ms}ms')
|
||||
nav_task = asyncio.create_task(page.goto(normalized_url, wait_until='load', timeout=timeout_ms))
|
||||
@@ -2797,15 +2822,27 @@ class BrowserSession(BaseModel):
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='wait_for_page_and_frames_load')
|
||||
async def _wait_for_page_and_frames_load(self, timeout_overwrite: float | None = None):
|
||||
"""
|
||||
Ensures page is fully loaded before continuing.
|
||||
Waits for either network to be idle or minimum WAIT_TIME, whichever is longer.
|
||||
Ensures page is fully loaded and stable before continuing.
|
||||
Waits for network idle, DOM stability, and minimum WAIT_TIME.
|
||||
Also checks if the loaded URL is allowed.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
timeout_overwrite: float | None
|
||||
Override the minimum wait time
|
||||
"""
|
||||
# Start timing
|
||||
start_time = time.time()
|
||||
|
||||
# Wait for page load
|
||||
page = await self.get_current_page()
|
||||
|
||||
# Skip network waiting for new tab pages (about:blank, chrome://new-tab-page, etc.)
|
||||
# These pages load instantly and don't need network idle time
|
||||
if is_new_tab_page(page.url):
|
||||
self.logger.debug(f'⚡ Skipping page load wait for new tab page: {page.url}')
|
||||
return
|
||||
|
||||
try:
|
||||
await self._wait_for_stable_network()
|
||||
|
||||
@@ -3052,7 +3089,9 @@ class BrowserSession(BaseModel):
|
||||
@observe_debug(ignore_input=True, ignore_output=True)
|
||||
@time_execution_async('--get_state_summary')
|
||||
@require_healthy_browser(usable_page=True, reopen_page=True)
|
||||
async def get_state_summary(self, cache_clickable_elements_hashes: bool) -> BrowserStateSummary:
|
||||
async def get_state_summary(
|
||||
self, cache_clickable_elements_hashes: bool, include_screenshot: bool = True
|
||||
) -> BrowserStateSummary:
|
||||
self.logger.debug('🔄 Starting get_state_summary...')
|
||||
"""Get a summary of the current browser state
|
||||
|
||||
@@ -3065,13 +3104,19 @@ class BrowserSession(BaseModel):
|
||||
If True, cache the clickable elements hashes for the current state.
|
||||
This is used to calculate which elements are new to the LLM since the last message,
|
||||
which helps reduce token usage.
|
||||
include_screenshot: bool
|
||||
If True, include screenshot in the state summary. Set to False to improve performance
|
||||
when screenshots are not needed (e.g., in multi_act element validation).
|
||||
"""
|
||||
await self._wait_for_page_and_frames_load()
|
||||
updated_state = await self._get_updated_state()
|
||||
|
||||
updated_state = await self._get_updated_state(include_screenshot=include_screenshot)
|
||||
|
||||
# Find out which elements are new
|
||||
# Do this only if url has not changed
|
||||
if cache_clickable_elements_hashes:
|
||||
# Lazy import heavy DOM service
|
||||
from browser_use.dom.clickable_element_processor.service import ClickableElementProcessor
|
||||
|
||||
# if we are on the same url as the last state, we can use the cached hashes
|
||||
if self._cached_clickable_element_hashes and self._cached_clickable_element_hashes.url == updated_state.url:
|
||||
# Pointers, feel free to edit in place
|
||||
@@ -3142,20 +3187,12 @@ class BrowserSession(BaseModel):
|
||||
)
|
||||
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='get_updated_state')
|
||||
async def _get_updated_state(self, focus_element: int = -1) -> BrowserStateSummary:
|
||||
async def _get_updated_state(self, focus_element: int = -1, include_screenshot: bool = True) -> BrowserStateSummary:
|
||||
"""Update and return state."""
|
||||
|
||||
# Check if current page is still valid, if not switch to another available page
|
||||
page = await self.get_current_page()
|
||||
|
||||
try:
|
||||
# Test if page is still accessible
|
||||
# NOTE: This also happens on invalid urls like www.sadfdsafdssdafd.com
|
||||
await asyncio.wait_for(page.evaluate('1'), timeout=2.5)
|
||||
except Exception as e:
|
||||
self.logger.debug(f'👋 Current page is not accessible: {type(e).__name__}: {e}')
|
||||
raise BrowserError('Page is not accessible')
|
||||
|
||||
try:
|
||||
self.logger.debug('🧹 Removing highlights...')
|
||||
try:
|
||||
@@ -3172,6 +3209,8 @@ class BrowserSession(BaseModel):
|
||||
self.logger.debug(f'PDF auto-download check failed: {type(e).__name__}: {e}')
|
||||
|
||||
self.logger.debug('🌳 Starting DOM processing...')
|
||||
from browser_use.dom.service import DomService
|
||||
|
||||
dom_service = DomService(page, logger=self.logger)
|
||||
try:
|
||||
content = await asyncio.wait_for(
|
||||
@@ -3228,13 +3267,16 @@ class BrowserSession(BaseModel):
|
||||
# )
|
||||
# )
|
||||
|
||||
try:
|
||||
self.logger.debug('📸 Capturing screenshot...')
|
||||
# Reasonable timeout for screenshot
|
||||
screenshot_b64 = await self.take_screenshot()
|
||||
# self.logger.debug('✅ Screenshot completed')
|
||||
except Exception as e:
|
||||
self.logger.warning(f'❌ Screenshot failed for {_log_pretty_url(page.url)}: {type(e).__name__} {e}')
|
||||
if include_screenshot:
|
||||
try:
|
||||
self.logger.debug('📸 Capturing screenshot...')
|
||||
# Reasonable timeout for screenshot
|
||||
screenshot_b64 = await self.take_screenshot()
|
||||
# self.logger.debug('✅ Screenshot completed')
|
||||
except Exception as e:
|
||||
self.logger.warning(f'❌ Screenshot failed for {_log_pretty_url(page.url)}: {type(e).__name__} {e}')
|
||||
screenshot_b64 = None
|
||||
else:
|
||||
screenshot_b64 = None
|
||||
|
||||
# Get comprehensive page information
|
||||
@@ -3475,6 +3517,7 @@ class BrowserSession(BaseModel):
|
||||
'Browser is unable to load any new about:blank pages (something is very wrong or browser is extremely overloaded)'
|
||||
)
|
||||
|
||||
@observe_debug(ignore_input=True, name='recover_unresponsive_page')
|
||||
async def _recover_unresponsive_page(self, calling_method: str, timeout_ms: int | None = None) -> None:
|
||||
"""Recover from an unresponsive page by closing and reopening it."""
|
||||
self.logger.warning(f'⚠️ Page JS engine became unresponsive in {calling_method}(), attempting recovery...')
|
||||
@@ -3828,6 +3871,7 @@ class BrowserSession(BaseModel):
|
||||
|
||||
@require_healthy_browser(usable_page=True, reopen_page=True)
|
||||
@time_execution_async('--get_locate_element')
|
||||
@observe_debug(ignore_input=True, name='get_locate_element')
|
||||
async def get_locate_element(self, element: DOMElementNode) -> ElementHandle | None:
|
||||
page = await self.get_current_page()
|
||||
current_frame = page
|
||||
@@ -3881,7 +3925,7 @@ class BrowserSession(BaseModel):
|
||||
if element_handle:
|
||||
is_visible = await self._is_visible(element_handle)
|
||||
if is_visible:
|
||||
await element_handle.scroll_into_view_if_needed()
|
||||
await element_handle.scroll_into_view_if_needed(timeout=1_000)
|
||||
return element_handle
|
||||
return None
|
||||
except Exception as e:
|
||||
@@ -3897,7 +3941,7 @@ class BrowserSession(BaseModel):
|
||||
if element_handle:
|
||||
is_visible = await self._is_visible(element_handle)
|
||||
if is_visible:
|
||||
await element_handle.scroll_into_view_if_needed()
|
||||
await element_handle.scroll_into_view_if_needed(timeout=1_000)
|
||||
return element_handle
|
||||
except Exception as xpath_e:
|
||||
self.logger.error(
|
||||
@@ -3924,7 +3968,7 @@ class BrowserSession(BaseModel):
|
||||
if element_handle:
|
||||
is_visible = await self._is_visible(element_handle)
|
||||
if is_visible:
|
||||
await element_handle.scroll_into_view_if_needed()
|
||||
await element_handle.scroll_into_view_if_needed(timeout=1_000)
|
||||
return element_handle
|
||||
return None
|
||||
except Exception as e:
|
||||
@@ -3945,7 +3989,7 @@ class BrowserSession(BaseModel):
|
||||
if element_handle:
|
||||
is_visible = await self._is_visible(element_handle)
|
||||
if is_visible:
|
||||
await element_handle.scroll_into_view_if_needed()
|
||||
await element_handle.scroll_into_view_if_needed(timeout=1_000)
|
||||
return element_handle
|
||||
return None
|
||||
except Exception as e:
|
||||
@@ -3989,7 +4033,7 @@ class BrowserSession(BaseModel):
|
||||
|
||||
is_visible = await self._is_visible(element_handle)
|
||||
if is_visible:
|
||||
await element_handle.scroll_into_view_if_needed()
|
||||
await element_handle.scroll_into_view_if_needed(timeout=1_000)
|
||||
return element_handle
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
@@ -3999,6 +4043,7 @@ class BrowserSession(BaseModel):
|
||||
|
||||
@require_healthy_browser(usable_page=True, reopen_page=True)
|
||||
@time_execution_async('--input_text_element_node')
|
||||
@observe_debug(ignore_input=True, name='input_text_element_node')
|
||||
async def _input_text_element_node(self, element_node: DOMElementNode, text: str):
|
||||
"""
|
||||
Input text into an element with proper error handling and state management.
|
||||
@@ -4022,7 +4067,7 @@ class BrowserSession(BaseModel):
|
||||
# let's first try to click and type
|
||||
try:
|
||||
await element_handle.evaluate('el => {el.textContent = ""; el.value = "";}')
|
||||
await element_handle.click()
|
||||
await element_handle.click(timeout=2_000) # Add 2 second timeout
|
||||
await asyncio.sleep(0.1) # Increased sleep time
|
||||
page = await self.get_current_page()
|
||||
await page.keyboard.type(text)
|
||||
@@ -4044,9 +4089,9 @@ class BrowserSession(BaseModel):
|
||||
try:
|
||||
if (await is_contenteditable.json_value() or tag_name == 'input') and not (readonly or disabled):
|
||||
await element_handle.evaluate('el => {el.textContent = ""; el.value = "";}')
|
||||
await element_handle.type(text, delay=5)
|
||||
await element_handle.type(text, delay=5, timeout=5_000) # Add 5 second timeout
|
||||
else:
|
||||
await element_handle.fill(text)
|
||||
await element_handle.fill(text, timeout=3_000) # Add 3 second timeout
|
||||
except Exception as e:
|
||||
self.logger.error(f'Error during input text into element: {type(e).__name__}: {e}')
|
||||
raise BrowserError(f'Failed to input text into element: {repr(element_node)}')
|
||||
@@ -4471,32 +4516,29 @@ class BrowserSession(BaseModel):
|
||||
except Exception as e:
|
||||
self.logger.debug(f'❌ Failed to show 📀 DVD loading animation: {type(e).__name__}: {e}')
|
||||
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='get_state_summary_with_fallback')
|
||||
@require_healthy_browser(usable_page=True, reopen_page=True)
|
||||
@time_execution_async('--get_state_summary_with_fallback')
|
||||
async def get_state_summary_with_fallback(self, cache_clickable_elements_hashes: bool = True) -> BrowserStateSummary:
|
||||
"""Get browser state with fallback to minimal state on errors
|
||||
|
||||
This method first tries to get a full state summary. If that fails,
|
||||
it falls back to a minimal state summary to allow basic navigation.
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='get_browser_state_with_recovery')
|
||||
async def get_browser_state_with_recovery(
|
||||
self, cache_clickable_elements_hashes: bool = True, include_screenshot: bool = True
|
||||
) -> BrowserStateSummary:
|
||||
"""Get browser state with multiple fallback strategies for error recovery
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
cache_clickable_elements_hashes: bool
|
||||
If True, cache the clickable elements hashes for the current state.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
BrowserStateSummary: Either full state or minimal fallback state
|
||||
include_screenshot: bool
|
||||
If True, include screenshot in the state summary. Set to False to improve performance
|
||||
when screenshots are not needed (e.g., in multi_act element validation).
|
||||
"""
|
||||
# Try 1: Full state summary (current implementation)
|
||||
|
||||
# Try 1: Full state summary (current implementation) - like main branch
|
||||
try:
|
||||
return await self.get_state_summary(cache_clickable_elements_hashes)
|
||||
await self._wait_for_page_and_frames_load()
|
||||
return await self.get_state_summary(cache_clickable_elements_hashes, include_screenshot=include_screenshot)
|
||||
except Exception as e:
|
||||
self.logger.warning(f'Full state retrieval failed: {type(e).__name__}: {e}')
|
||||
self.logger.warning('🔄 Falling back to minimal state summary')
|
||||
|
||||
# Try 2: Minimal state summary as fallback
|
||||
self.logger.warning('🔄 Falling back to minimal state summary')
|
||||
return await self.get_minimal_state_summary()
|
||||
|
||||
async def _is_pdf_viewer(self, page: Page) -> bool:
|
||||
|
||||
@@ -130,23 +130,20 @@ class Controller(Generic[Context]):
|
||||
await browser_session.go_back()
|
||||
msg = '🔙 Navigated back'
|
||||
logger.info(msg)
|
||||
return ActionResult(extracted_content=msg, include_in_memory=True, long_term_memory='Navigated back')
|
||||
return ActionResult(extracted_content=msg)
|
||||
|
||||
# wait for x seconds
|
||||
|
||||
@self.registry.action('Wait for x seconds default 3 (max 10 seconds)')
|
||||
@self.registry.action(
|
||||
'Wait for x seconds default 3 (max 10 seconds). This can be used to wait until the page is fully loaded.'
|
||||
)
|
||||
async def wait(seconds: int = 3):
|
||||
# Cap wait time at maximum 10 seconds
|
||||
actual_seconds = min(max(seconds, 0), 10)
|
||||
if actual_seconds != seconds:
|
||||
msg = f'🕒 Waiting for {actual_seconds} seconds (capped from {seconds} seconds, max 10 seconds)'
|
||||
else:
|
||||
msg = f'🕒 Waiting for {actual_seconds} seconds'
|
||||
# Reduce the wait time by 3 seconds to account for the llm call which takes at least 3 seconds
|
||||
# So if the model decides to wait for 5 seconds, the llm call took at least 3 seconds, so we only need to wait for 2 seconds
|
||||
actual_seconds = min(max(seconds - 3, 0), 10)
|
||||
msg = f'🕒 Waiting for {actual_seconds + 3} seconds'
|
||||
logger.info(msg)
|
||||
await asyncio.sleep(actual_seconds)
|
||||
return ActionResult(
|
||||
extracted_content=msg, include_in_memory=True, long_term_memory=f'Waited for {actual_seconds} seconds'
|
||||
)
|
||||
return ActionResult(extracted_content=msg)
|
||||
|
||||
# Element Interaction Actions
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ from browser_use.dom.views import (
|
||||
SelectorMap,
|
||||
ViewportInfo,
|
||||
)
|
||||
from browser_use.observability import observe_debug
|
||||
from browser_use.utils import is_new_tab_page, time_execution_async
|
||||
|
||||
# @dataclass
|
||||
@@ -34,6 +35,7 @@ class DomService:
|
||||
self.js_code = resources.files('browser_use.dom.dom_tree').joinpath('index.js').read_text()
|
||||
|
||||
# region - Clickable elements
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='get_clickable_elements')
|
||||
@time_execution_async('--get_clickable_elements')
|
||||
async def get_clickable_elements(
|
||||
self,
|
||||
|
||||
@@ -4,14 +4,10 @@ We have switched all of our code from langchain to openai.types.chat.chat_comple
|
||||
For easier transition we have
|
||||
"""
|
||||
|
||||
from browser_use.llm.anthropic.chat import ChatAnthropic
|
||||
from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock
|
||||
from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock
|
||||
from browser_use.llm.azure.chat import ChatAzureOpenAI
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
# Lightweight imports that are commonly used
|
||||
from browser_use.llm.base import BaseChatModel
|
||||
from browser_use.llm.deepseek.chat import ChatDeepSeek
|
||||
from browser_use.llm.google.chat import ChatGoogle
|
||||
from browser_use.llm.groq.chat import ChatGroq
|
||||
from browser_use.llm.messages import (
|
||||
AssistantMessage,
|
||||
BaseMessage,
|
||||
@@ -27,11 +23,52 @@ from browser_use.llm.messages import (
|
||||
from browser_use.llm.messages import (
|
||||
ContentPartTextParam as ContentText,
|
||||
)
|
||||
from browser_use.llm.ollama.chat import ChatOllama
|
||||
from browser_use.llm.openai.chat import ChatOpenAI
|
||||
from browser_use.llm.openrouter.chat import ChatOpenRouter
|
||||
|
||||
# Make better names for the message
|
||||
# Type stubs for lazy imports
|
||||
if TYPE_CHECKING:
|
||||
from browser_use.llm.anthropic.chat import ChatAnthropic
|
||||
from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock
|
||||
from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock
|
||||
from browser_use.llm.azure.chat import ChatAzureOpenAI
|
||||
from browser_use.llm.deepseek.chat import ChatDeepSeek
|
||||
from browser_use.llm.google.chat import ChatGoogle
|
||||
from browser_use.llm.groq.chat import ChatGroq
|
||||
from browser_use.llm.ollama.chat import ChatOllama
|
||||
from browser_use.llm.openai.chat import ChatOpenAI
|
||||
from browser_use.llm.openrouter.chat import ChatOpenRouter
|
||||
|
||||
# Lazy imports mapping for heavy chat models
|
||||
_LAZY_IMPORTS = {
|
||||
'ChatAnthropic': ('browser_use.llm.anthropic.chat', 'ChatAnthropic'),
|
||||
'ChatAnthropicBedrock': ('browser_use.llm.aws.chat_anthropic', 'ChatAnthropicBedrock'),
|
||||
'ChatAWSBedrock': ('browser_use.llm.aws.chat_bedrock', 'ChatAWSBedrock'),
|
||||
'ChatAzureOpenAI': ('browser_use.llm.azure.chat', 'ChatAzureOpenAI'),
|
||||
'ChatDeepSeek': ('browser_use.llm.deepseek.chat', 'ChatDeepSeek'),
|
||||
'ChatGoogle': ('browser_use.llm.google.chat', 'ChatGoogle'),
|
||||
'ChatGroq': ('browser_use.llm.groq.chat', 'ChatGroq'),
|
||||
'ChatOllama': ('browser_use.llm.ollama.chat', 'ChatOllama'),
|
||||
'ChatOpenAI': ('browser_use.llm.openai.chat', 'ChatOpenAI'),
|
||||
'ChatOpenRouter': ('browser_use.llm.openrouter.chat', 'ChatOpenRouter'),
|
||||
}
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
"""Lazy import mechanism for heavy chat model imports."""
|
||||
if name in _LAZY_IMPORTS:
|
||||
module_path, attr_name = _LAZY_IMPORTS[name]
|
||||
try:
|
||||
from importlib import import_module
|
||||
|
||||
module = import_module(module_path)
|
||||
attr = getattr(module, attr_name)
|
||||
# Cache the imported attribute in the module's globals
|
||||
globals()[name] = attr
|
||||
return attr
|
||||
except ImportError as e:
|
||||
raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e
|
||||
|
||||
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
||||
|
||||
|
||||
__all__ = [
|
||||
# Message types -> for easier transition from langchain
|
||||
|
||||
@@ -1,5 +1,34 @@
|
||||
from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock
|
||||
from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
# Type stubs for lazy imports
|
||||
if TYPE_CHECKING:
|
||||
from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock
|
||||
from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock
|
||||
|
||||
# Lazy imports mapping for AWS chat models
|
||||
_LAZY_IMPORTS = {
|
||||
'ChatAnthropicBedrock': ('browser_use.llm.aws.chat_anthropic', 'ChatAnthropicBedrock'),
|
||||
'ChatAWSBedrock': ('browser_use.llm.aws.chat_bedrock', 'ChatAWSBedrock'),
|
||||
}
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
"""Lazy import mechanism for AWS chat models."""
|
||||
if name in _LAZY_IMPORTS:
|
||||
module_path, attr_name = _LAZY_IMPORTS[name]
|
||||
try:
|
||||
from importlib import import_module
|
||||
|
||||
module = import_module(module_path)
|
||||
attr = getattr(module, attr_name)
|
||||
# Cache the imported attribute in the module's globals
|
||||
globals()[name] = attr
|
||||
return attr
|
||||
except ImportError as e:
|
||||
raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e
|
||||
|
||||
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
||||
|
||||
|
||||
__all__ = [
|
||||
'ChatAWSBedrock',
|
||||
|
||||
@@ -35,7 +35,8 @@ class ChatOpenAI(BaseChatModel):
|
||||
model: ChatModel | str
|
||||
|
||||
# Model params
|
||||
temperature: float | None = None
|
||||
temperature: float | None = 0.2
|
||||
frequency_penalty: float | None = 0.05
|
||||
reasoning_effort: ReasoningEffort = 'low'
|
||||
|
||||
# Client initialization parameters
|
||||
@@ -50,6 +51,8 @@ class ChatOpenAI(BaseChatModel):
|
||||
default_query: Mapping[str, object] | None = None
|
||||
http_client: httpx.AsyncClient | None = None
|
||||
_strict_response_validation: bool = False
|
||||
max_completion_tokens: int | None = 8000
|
||||
top_p: float | None = None
|
||||
|
||||
# Static
|
||||
@property
|
||||
@@ -144,12 +147,24 @@ class ChatOpenAI(BaseChatModel):
|
||||
|
||||
try:
|
||||
model_params: dict[str, Any] = {}
|
||||
if self.model in ReasoningModels:
|
||||
model_params['reasoning_effort'] = self.reasoning_effort
|
||||
|
||||
if self.temperature is not None:
|
||||
model_params['temperature'] = self.temperature
|
||||
|
||||
if self.frequency_penalty is not None:
|
||||
model_params['frequency_penalty'] = self.frequency_penalty
|
||||
|
||||
if self.max_completion_tokens is not None:
|
||||
model_params['max_completion_tokens'] = self.max_completion_tokens
|
||||
|
||||
if self.top_p is not None:
|
||||
model_params['top_p'] = self.top_p
|
||||
|
||||
if self.model in ReasoningModels:
|
||||
model_params['reasoning_effort'] = self.reasoning_effort
|
||||
model_params['temperature'] = 1
|
||||
model_params['frequency_penalty'] = 0
|
||||
|
||||
if output_format is None:
|
||||
# Return string response
|
||||
response = await self.get_client().chat.completions.create(
|
||||
|
||||
@@ -659,7 +659,7 @@ class BrowserUseServer:
|
||||
if not self.browser_session:
|
||||
return 'Error: No browser session active'
|
||||
|
||||
state = await self.browser_session.get_state_summary(cache_clickable_elements_hashes=False)
|
||||
state = await self.browser_session.get_browser_state_with_recovery(cache_clickable_elements_hashes=False)
|
||||
|
||||
result = {
|
||||
'url': state.url,
|
||||
|
||||
@@ -2,18 +2,50 @@
|
||||
Telemetry for Browser Use.
|
||||
"""
|
||||
|
||||
from browser_use.telemetry.service import ProductTelemetry
|
||||
from browser_use.telemetry.views import (
|
||||
BaseTelemetryEvent,
|
||||
CLITelemetryEvent,
|
||||
MCPClientTelemetryEvent,
|
||||
MCPServerTelemetryEvent,
|
||||
)
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
# Type stubs for lazy imports
|
||||
if TYPE_CHECKING:
|
||||
from browser_use.telemetry.service import ProductTelemetry
|
||||
from browser_use.telemetry.views import (
|
||||
BaseTelemetryEvent,
|
||||
CLITelemetryEvent,
|
||||
MCPClientTelemetryEvent,
|
||||
MCPServerTelemetryEvent,
|
||||
)
|
||||
|
||||
# Lazy imports mapping
|
||||
_LAZY_IMPORTS = {
|
||||
'ProductTelemetry': ('browser_use.telemetry.service', 'ProductTelemetry'),
|
||||
'BaseTelemetryEvent': ('browser_use.telemetry.views', 'BaseTelemetryEvent'),
|
||||
'CLITelemetryEvent': ('browser_use.telemetry.views', 'CLITelemetryEvent'),
|
||||
'MCPClientTelemetryEvent': ('browser_use.telemetry.views', 'MCPClientTelemetryEvent'),
|
||||
'MCPServerTelemetryEvent': ('browser_use.telemetry.views', 'MCPServerTelemetryEvent'),
|
||||
}
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
"""Lazy import mechanism for telemetry components."""
|
||||
if name in _LAZY_IMPORTS:
|
||||
module_path, attr_name = _LAZY_IMPORTS[name]
|
||||
try:
|
||||
from importlib import import_module
|
||||
|
||||
module = import_module(module_path)
|
||||
attr = getattr(module, attr_name)
|
||||
# Cache the imported attribute in the module's globals
|
||||
globals()[name] = attr
|
||||
return attr
|
||||
except ImportError as e:
|
||||
raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e
|
||||
|
||||
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
||||
|
||||
|
||||
__all__ = [
|
||||
'BaseTelemetryEvent',
|
||||
'ProductTelemetry',
|
||||
'CLITelemetryEvent',
|
||||
'MCPClientTelemetryEvent',
|
||||
'MCPServerTelemetryEvent',
|
||||
'CLITelemetryEvent',
|
||||
]
|
||||
|
||||
@@ -31,10 +31,10 @@ dependencies = [
|
||||
"typing-extensions>=4.12.2",
|
||||
"uuid7>=0.1.0",
|
||||
"authlib>=1.6.0",
|
||||
"google-genai>=1.21.1",
|
||||
"openai>=1.81.0",
|
||||
"anthropic>=0.54.0",
|
||||
"groq>=0.28.0",
|
||||
"google-genai>=1.26.0",
|
||||
"openai>=1.97.0",
|
||||
"anthropic>=0.58.2",
|
||||
"groq>=0.30.0",
|
||||
"ollama>=0.5.1",
|
||||
"google-api-python-client>=2.174.0",
|
||||
"google-auth>=2.40.3",
|
||||
|
||||
@@ -328,8 +328,30 @@ class TestControllerIntegration:
|
||||
assert result.extracted_content is not None
|
||||
assert 'Waiting for' in result.extracted_content
|
||||
|
||||
# Verify that at least 1 second has passed
|
||||
assert end_time - start_time >= 0.9 # Allow some timing margin
|
||||
# Verify that less than 0.1 second has passed (because we deducted 3 seconds to account for the llm call)
|
||||
assert end_time - start_time <= 0.1 # Allow some timing margin
|
||||
|
||||
# longer wait
|
||||
# Create wait action for 1 second - fix to use a dictionary
|
||||
wait_action = {'wait': {'seconds': 5}} # Corrected format
|
||||
|
||||
# Record start time
|
||||
start_time = time.time()
|
||||
|
||||
# Execute wait action
|
||||
result = await controller.act(WaitActionModel(**wait_action), browser_session)
|
||||
|
||||
# Record end time
|
||||
end_time = time.time()
|
||||
|
||||
# Verify the result
|
||||
assert isinstance(result, ActionResult)
|
||||
assert result.extracted_content is not None
|
||||
assert 'Waiting for' in result.extracted_content
|
||||
|
||||
# Verify that we took 2 sec (5s-3s (llm call)= 2s)
|
||||
assert end_time - start_time <= 2.1 # Allow some timing margin
|
||||
assert end_time - start_time >= 1.9 # Allow some timing margin
|
||||
|
||||
async def test_go_back_action(self, controller, browser_session, base_url):
|
||||
"""Test that go_back action navigates to the previous page."""
|
||||
|
||||
Reference in New Issue
Block a user