diff --git a/browser_use/browser/_cdp_timeout.py b/browser_use/browser/_cdp_timeout.py new file mode 100644 index 000000000..55c74225c --- /dev/null +++ b/browser_use/browser/_cdp_timeout.py @@ -0,0 +1,107 @@ +"""Per-CDP-request timeout wrapper around cdp_use.CDPClient. + +cdp_use's `send_raw()` awaits a future that only resolves when the browser +sends a matching response. If the server goes silent mid-session (observed +failure mode against remote cloud browsers: WebSocket stays "alive" at the +TCP/keepalive layer while the browser container is dead or the proxy has +lost its upstream) the future never resolves and the whole agent hangs. + +This module provides a thin subclass that wraps each `send_raw()` in +`asyncio.wait_for`. Any CDP method that doesn't get a response within the +cap raises `TimeoutError`, which propagates through existing +error-handling paths in browser-use instead of hanging indefinitely. + +Configure the cap via: +- `BROWSER_USE_CDP_TIMEOUT_S` env var (process-wide default) +- `TimeoutWrappedCDPClient(..., cdp_request_timeout_s=...)` constructor arg + +Default (60s) is generous for slow operations like `Page.captureScreenshot` +or `Page.printToPDF` on heavy pages, but well below the 180s agent step +timeout and the typical outer agent watchdog. +""" + +from __future__ import annotations + +import asyncio +import logging +import math +import os +from typing import Any + +from cdp_use import CDPClient + +logger = logging.getLogger(__name__) + +_CDP_TIMEOUT_FALLBACK_S = 60.0 + + +def _parse_env_cdp_timeout(raw: str | None) -> float: + """Parse BROWSER_USE_CDP_TIMEOUT_S defensively. + + Accepts only finite positive values; everything else falls back to the + hardcoded default with a warning. Mirrors the guard on + BROWSER_USE_ACTION_TIMEOUT_S in tools/service.py — a bad env value here + would otherwise make every CDP call time out immediately (nan) or never + (inf / negative / zero). + """ + if raw is None or raw == '': + return _CDP_TIMEOUT_FALLBACK_S + try: + parsed = float(raw) + except ValueError: + logger.warning( + 'Invalid BROWSER_USE_CDP_TIMEOUT_S=%r; falling back to %.0fs', + raw, + _CDP_TIMEOUT_FALLBACK_S, + ) + return _CDP_TIMEOUT_FALLBACK_S + if not math.isfinite(parsed) or parsed <= 0: + logger.warning( + 'BROWSER_USE_CDP_TIMEOUT_S=%r is not a finite positive number; falling back to %.0fs', + raw, + _CDP_TIMEOUT_FALLBACK_S, + ) + return _CDP_TIMEOUT_FALLBACK_S + return parsed + + +DEFAULT_CDP_REQUEST_TIMEOUT_S: float = _parse_env_cdp_timeout(os.getenv('BROWSER_USE_CDP_TIMEOUT_S')) + + +class TimeoutWrappedCDPClient(CDPClient): + """CDPClient subclass that enforces a per-request timeout on send_raw. + + Any CDP method that doesn't receive a response within `cdp_request_timeout_s` + raises `TimeoutError` instead of hanging forever. This turns silent-hang + failure modes (cloud proxy alive, browser dead) into fast observable errors. + """ + + def __init__( + self, + *args: Any, + cdp_request_timeout_s: float | None = None, + **kwargs: Any, + ) -> None: + super().__init__(*args, **kwargs) + self._cdp_request_timeout_s: float = ( + cdp_request_timeout_s if cdp_request_timeout_s is not None else DEFAULT_CDP_REQUEST_TIMEOUT_S + ) + + async def send_raw( + self, + method: str, + params: Any | None = None, + session_id: str | None = None, + ) -> dict[str, Any]: + try: + return await asyncio.wait_for( + super().send_raw(method=method, params=params, session_id=session_id), + timeout=self._cdp_request_timeout_s, + ) + except TimeoutError as e: + # Raise a plain TimeoutError so existing `except TimeoutError` + # handlers in browser-use / tools treat this uniformly. + raise TimeoutError( + f'CDP method {method!r} did not respond within {self._cdp_request_timeout_s:.0f}s. ' + f'The browser may be unresponsive (silent WebSocket — container crashed or proxy lost upstream).' + ) from e diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index b6dbb88e2..188e05299 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -20,6 +20,7 @@ from cdp_use.cdp.target.commands import CreateTargetParameters from pydantic import BaseModel, ConfigDict, Field, PrivateAttr from uuid_extensions import uuid7str +from browser_use.browser._cdp_timeout import TimeoutWrappedCDPClient from browser_use.browser.cloud.cloud import CloudBrowserAuthError, CloudBrowserClient, CloudBrowserError # CDP logging is now handled by setup_logging() in logging_config.py @@ -1770,7 +1771,7 @@ class BrowserSession(BaseModel): from browser_use.utils import get_browser_use_version headers.setdefault('User-Agent', f'browser-use/{get_browser_use_version()}') - self._cdp_client_root = CDPClient( + self._cdp_client_root = TimeoutWrappedCDPClient( self.cdp_url, additional_headers=headers or None, max_ws_frame_size=200 * 1024 * 1024, # Use 200MB limit to handle pages with very large DOMs @@ -2068,7 +2069,7 @@ class BrowserSession(BaseModel): from browser_use.utils import get_browser_use_version headers.setdefault('User-Agent', f'browser-use/{get_browser_use_version()}') - self._cdp_client_root = CDPClient( + self._cdp_client_root = TimeoutWrappedCDPClient( self.cdp_url, additional_headers=headers or None, max_ws_frame_size=200 * 1024 * 1024, diff --git a/tests/ci/test_cdp_timeout.py b/tests/ci/test_cdp_timeout.py new file mode 100644 index 000000000..b2da259db --- /dev/null +++ b/tests/ci/test_cdp_timeout.py @@ -0,0 +1,84 @@ +"""Regression tests for TimeoutWrappedCDPClient. + +cdp_use.CDPClient.send_raw awaits a future that only resolves when the browser +sends a matching response. When the server goes silent (observed against cloud +browsers whose WebSocket stays connected at TCP/keepalive layer but never +replies), send_raw hangs forever. The wrapper turns that hang into a fast +TimeoutError. +""" + +from __future__ import annotations + +import asyncio +import time + +import pytest + +from browser_use.browser._cdp_timeout import ( + DEFAULT_CDP_REQUEST_TIMEOUT_S, + TimeoutWrappedCDPClient, + _parse_env_cdp_timeout, +) + + +class _HangingClient(TimeoutWrappedCDPClient): + """Wrapper whose parent send_raw never returns — simulates a silent server.""" + + def __init__(self, cdp_request_timeout_s: float) -> None: + # Skip real CDPClient.__init__ — we only exercise the timeout wrapper. + self._cdp_request_timeout_s = cdp_request_timeout_s + self.call_count = 0 + + async def _parent_send_raw(self, *_args, **_kwargs): + self.call_count += 1 + await asyncio.sleep(30) # Way longer than any test timeout. + return {} + + async def send_raw(self, method, params=None, session_id=None): + # Inline version of TimeoutWrappedCDPClient.send_raw using our _parent_send_raw + # (avoids needing a real WebSocket). + try: + return await asyncio.wait_for( + self._parent_send_raw(method=method, params=params, session_id=session_id), + timeout=self._cdp_request_timeout_s, + ) + except TimeoutError as e: + raise TimeoutError(f'CDP method {method!r} did not respond within {self._cdp_request_timeout_s:.0f}s.') from e + + +@pytest.mark.asyncio +async def test_send_raw_times_out_on_silent_server(): + """A CDP method that gets no response must raise TimeoutError within the cap.""" + client = _HangingClient(cdp_request_timeout_s=0.5) + + start = time.monotonic() + with pytest.raises(TimeoutError) as exc: + await client.send_raw('Target.getTargets') + elapsed = time.monotonic() - start + + assert client.call_count == 1 + # Returned within the cap (plus a small scheduling margin), not after the + # full 30s sleep. + assert elapsed < 2.0, f'wrapper did not enforce timeout; took {elapsed:.2f}s' + assert 'Target.getTargets' in str(exc.value) + assert '0s' in str(exc.value) or '1s' in str(exc.value) + + +def test_default_cdp_timeout_is_reasonable(): + """Default must give headroom above typical slow CDP calls but stay below + the 180s agent step_timeout so hangs surface before step-level kills.""" + assert 10.0 <= DEFAULT_CDP_REQUEST_TIMEOUT_S <= 120.0, ( + f'Default CDP timeout ({DEFAULT_CDP_REQUEST_TIMEOUT_S}s) is outside the sensible 10–120s range' + ) + + +def test_parse_env_rejects_malformed_values(): + """Mirrors the defensive parse used for BROWSER_USE_ACTION_TIMEOUT_S.""" + for bad in ('', 'nan', 'NaN', 'inf', '-inf', '0', '-5', 'abc'): + assert _parse_env_cdp_timeout(bad) == 60.0, f'Expected fallback for {bad!r}' + + # Finite positive values take effect. + assert _parse_env_cdp_timeout('30') == 30.0 + assert _parse_env_cdp_timeout('15.5') == 15.5 + # None (env var not set) also falls back. + assert _parse_env_cdp_timeout(None) == 60.0