diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 9d9f751ce..46bfb424c 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -206,6 +206,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): message_compaction: MessageCompactionSettings | bool | None = True, max_clickable_elements_length: int = 40000, _url_shortening_limit: int = 25, + enable_signal_handler: bool = True, **kwargs, ): # Validate llm_screenshot_size @@ -421,6 +422,9 @@ class Agent(Generic[Context, AgentStructuredOutput]): if self.settings.message_compaction and self.settings.message_compaction.compaction_llm: self.token_cost_service.register_llm(self.settings.message_compaction.compaction_llm) + # Store signal handler setting (not part of AgentSettings as it's runtime behavior) + self.enable_signal_handler = enable_signal_handler + # Initialize state self.state = injected_agent_state or AgentState() @@ -2494,6 +2498,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): resume_callback=self.resume, custom_exit_callback=on_force_exit_log_telemetry, # Pass the new telemetrycallback exit_on_second_int=True, + disabled=not self.enable_signal_handler, ) signal_handler.register() diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index e4f42098f..701e39a7a 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -909,6 +909,7 @@ class BrowserSession(BaseModel): target_id, timeout=event.timeout_ms / 1000 if event.timeout_ms is not None else None, wait_until=event.wait_until, + nav_timeout=event.event_timeout, ) # Close any extension options pages that might have opened @@ -952,11 +953,13 @@ class BrowserSession(BaseModel): target_id: str, timeout: float | None = None, wait_until: str = 'load', + nav_timeout: float | None = None, ) -> None: """Navigate to URL and wait for page readiness using CDP lifecycle events. Polls stored lifecycle events (registered once per session in SessionManager). wait_until controls the minimum acceptable signal: 'commit', 'domcontentloaded', 'load', 'networkidle'. + nav_timeout controls the timeout for the CDP Page.navigate() call itself (defaults to 20.0s). """ cdp_session = await self.get_or_create_cdp_session(target_id, focus=False) @@ -973,7 +976,9 @@ class BrowserSession(BaseModel): nav_start_time = asyncio.get_event_loop().time() # Wrap Page.navigate() with timeout — heavy sites can block here for 10s+ - nav_timeout = 20.0 + # Use nav_timeout parameter if provided, otherwise default to 20.0 + if nav_timeout is None: + nav_timeout = 20.0 try: nav_result = await asyncio.wait_for( cdp_session.cdp_client.send.Page.navigate( diff --git a/browser_use/browser/watchdogs/security_watchdog.py b/browser_use/browser/watchdogs/security_watchdog.py index f95a26260..176f2c5a5 100644 --- a/browser_use/browser/watchdogs/security_watchdog.py +++ b/browser_use/browser/watchdogs/security_watchdog.py @@ -68,7 +68,6 @@ class SecurityWatchdog(BaseWatchdog): await session.cdp_client.send.Page.navigate(params={'url': 'about:blank'}, session_id=session.session_id) self.logger.info(f'⛔️ Navigated to about:blank after blocked URL: {event.url}') except Exception as e: - pass self.logger.error(f'⛔️ Failed to navigate to about:blank: {type(e).__name__} {e}') async def on_TabCreatedEvent(self, event: TabCreatedEvent) -> None: diff --git a/browser_use/llm/aws/chat_bedrock.py b/browser_use/llm/aws/chat_bedrock.py index 610379f90..3796db472 100644 --- a/browser_use/llm/aws/chat_bedrock.py +++ b/browser_use/llm/aws/chat_bedrock.py @@ -9,6 +9,7 @@ from browser_use.llm.aws.serializer import AWSBedrockMessageSerializer from browser_use.llm.base import BaseChatModel from browser_use.llm.exceptions import ModelProviderError, ModelRateLimitError from browser_use.llm.messages import BaseMessage +from browser_use.llm.schema import SchemaOptimizer from browser_use.llm.views import ChatInvokeCompletion, ChatInvokeUsage if TYPE_CHECKING: @@ -116,27 +117,14 @@ class ChatAWSBedrock(BaseChatModel): def _format_tools_for_request(self, output_format: type[BaseModel]) -> list[dict[str, Any]]: """Format a Pydantic model as a tool for structured output.""" - schema = output_format.model_json_schema() - - # Convert Pydantic schema to Bedrock tool format - properties = {} - required = [] - - for prop_name, prop_info in schema.get('properties', {}).items(): - properties[prop_name] = { - 'type': prop_info.get('type', 'string'), - 'description': prop_info.get('description', ''), - } - - # Add required fields - required = schema.get('required', []) + schema = SchemaOptimizer.create_optimized_json_schema(output_format) return [ { 'toolSpec': { 'name': f'extract_{output_format.__name__.lower()}', 'description': f'Extract information in the format of {output_format.__name__}', - 'inputSchema': {'json': {'type': 'object', 'properties': properties, 'required': required}}, + 'inputSchema': {'json': schema}, } } ] diff --git a/browser_use/llm/google/chat.py b/browser_use/llm/google/chat.py index 6ffe3c40b..3965266ce 100644 --- a/browser_use/llm/google/chat.py +++ b/browser_use/llm/google/chat.py @@ -85,7 +85,7 @@ class ChatGoogle(BaseChatModel): # Model configuration model: VerifiedGeminiModels | str - temperature: float | None = 0.5 + temperature: float | None = None top_p: float | None = None seed: int | None = None thinking_budget: int | None = None # for Gemini 2.5: -1 for dynamic (default), 0 disables, or token count @@ -222,6 +222,8 @@ class ChatGoogle(BaseChatModel): # Apply model-specific configuration (these can override config) if self.temperature is not None: config['temperature'] = self.temperature + else: + config['temperature'] = 1.0 if 'gemini-3' in self.model else 0.5 # Add system instruction if present if system_instruction: diff --git a/browser_use/skill_cli/daemon.py b/browser_use/skill_cli/daemon.py index 32a3b77aa..27b521410 100644 --- a/browser_use/skill_cli/daemon.py +++ b/browser_use/skill_cli/daemon.py @@ -11,8 +11,8 @@ import argparse import asyncio import json import logging +import os import signal -import sys from pathlib import Path from typing import TYPE_CHECKING @@ -94,12 +94,23 @@ class Daemon: return self._session async def _watch_browser(self) -> None: - """Poll BrowserSession.is_cdp_connected every 2s. Shutdown when browser dies.""" + """Poll BrowserSession.is_cdp_connected every 2s. Shutdown when browser dies. + + Skips checks while the BrowserSession is reconnecting. If reconnection fails, + next poll will see is_cdp_connected=False and trigger shutdown. + """ while self.running: await asyncio.sleep(2.0) - if self._session and not self._session.browser_session.is_cdp_connected: + if not self._session: + continue + bs = self._session.browser_session + # Don't shut down while a reconnection attempt is in progress + if bs.is_reconnecting: + continue + if not bs.is_cdp_connected: logger.info('Browser disconnected, shutting down daemon') - await self.shutdown() + if not self._shutdown_task or self._shutdown_task.done(): + self._shutdown_task = asyncio.create_task(self.shutdown()) return async def handle_connection( @@ -208,8 +219,6 @@ class Daemon: Stale sockets are cleaned up by is_daemon_alive() and by the next daemon's startup (unlink before bind). """ - import os - from browser_use.skill_cli.utils import get_pid_path, get_socket_path # Setup signal handlers @@ -297,10 +306,13 @@ class Daemon: try: # Only kill the browser if the daemon launched it. # For external connections (--connect, --cdp-url, cloud), just disconnect. + # Timeout ensures daemon exits even if CDP calls hang on a dead connection if self.cdp_url or self.use_cloud: - await self._session.browser_session.stop() + await asyncio.wait_for(self._session.browser_session.stop(), timeout=10.0) else: - await self._session.browser_session.kill() + await asyncio.wait_for(self._session.browser_session.kill(), timeout=10.0) + except TimeoutError: + logger.warning('Browser cleanup timed out after 10s, forcing exit') except Exception as e: logger.warning(f'Error closing session: {e}') self._session = None @@ -334,13 +346,19 @@ def main() -> None: session=args.session, ) + exit_code = 0 try: asyncio.run(daemon.run()) except KeyboardInterrupt: logger.info('Interrupted') except Exception as e: logger.exception(f'Daemon error: {e}') - sys.exit(1) + exit_code = 1 + finally: + # asyncio.run() may hang trying to cancel lingering tasks + # Force-exit to prevent the daemon from becoming an orphan + logger.info('Daemon process exiting') + os._exit(exit_code) if __name__ == '__main__': diff --git a/browser_use/skill_cli/install.sh b/browser_use/skill_cli/install.sh index 757dd55a4..b0ebf382c 100755 --- a/browser_use/skill_cli/install.sh +++ b/browser_use/skill_cli/install.sh @@ -380,14 +380,13 @@ configure_path() { local bin_path=$(get_venv_bin_dir) local local_bin="$HOME/.local/bin" - # Detect shell - if [ -n "$BASH_VERSION" ]; then - shell_rc="$HOME/.bashrc" - elif [ -n "$ZSH_VERSION" ]; then - shell_rc="$HOME/.zshrc" - else - shell_rc="$HOME/.profile" - fi + # Detect user's login shell (not the running shell, since this script + # is typically executed via "curl ... | bash" which always sets BASH_VERSION) + case "$(basename "$SHELL")" in + zsh) shell_rc="$HOME/.zshrc" ;; + bash) shell_rc="$HOME/.bashrc" ;; + *) shell_rc="$HOME/.profile" ;; + esac # Check if already in PATH (browser-use-env matches both /bin and /Scripts) if grep -q "browser-use-env" "$shell_rc" 2>/dev/null; then @@ -455,11 +454,12 @@ validate() { # ============================================================================= print_next_steps() { - # Detect shell for source command - local shell_rc=".bashrc" - if [ -n "$ZSH_VERSION" ]; then - shell_rc=".zshrc" - fi + # Detect shell for source command (must match configure_path logic) + case "$(basename "$SHELL")" in + zsh) local shell_rc=".zshrc" ;; + bash) local shell_rc=".bashrc" ;; + *) local shell_rc=".profile" ;; + esac echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" diff --git a/browser_use/utils.py b/browser_use/utils.py index 1baac45d5..5661c9f34 100644 --- a/browser_use/utils.py +++ b/browser_use/utils.py @@ -77,6 +77,7 @@ class SignalHandler: - Management of event loop state across signals - Standardized handling of first and second Ctrl+C presses - Cross-platform compatibility (with simplified behavior on Windows) + - Option to disable signal handling for embedding in applications that manage their own signals """ def __init__( @@ -87,6 +88,7 @@ class SignalHandler: custom_exit_callback: Callable[[], None] | None = None, exit_on_second_int: bool = True, interruptible_task_patterns: list[str] | None = None, + disabled: bool = False, ): """ Initialize the signal handler. @@ -99,6 +101,8 @@ class SignalHandler: exit_on_second_int: Whether to exit on second SIGINT (Ctrl+C) interruptible_task_patterns: List of patterns to match task names that should be canceled on first Ctrl+C (default: ['step', 'multi_act', 'get_next_action']) + disabled: If True, signal handling is disabled and register() is a no-op. + Useful when embedding browser-use in applications that manage their own signals. """ self.loop = loop or asyncio.get_event_loop() self.pause_callback = pause_callback @@ -107,6 +111,7 @@ class SignalHandler: self.exit_on_second_int = exit_on_second_int self.interruptible_task_patterns = interruptible_task_patterns or ['step', 'multi_act', 'get_next_action'] self.is_windows = platform.system() == 'Windows' + self.disabled = disabled # Initialize loop state attributes self._initialize_loop_state() @@ -121,7 +126,13 @@ class SignalHandler: setattr(self.loop, 'waiting_for_input', False) def register(self) -> None: - """Register signal handlers for SIGINT and SIGTERM.""" + """Register signal handlers for SIGINT and SIGTERM. + + If disabled=True was passed to __init__, this method does nothing. + """ + if self.disabled: + return + try: if self.is_windows: # On Windows, use simple signal handling with immediate exit on Ctrl+C @@ -146,7 +157,13 @@ class SignalHandler: pass def unregister(self) -> None: - """Unregister signal handlers and restore original handlers if possible.""" + """Unregister signal handlers and restore original handlers if possible. + + If disabled=True was passed to __init__, this method does nothing. + """ + if self.disabled: + return + try: if self.is_windows: # On Windows, just restore the original SIGINT handler