updated _navigate_and_wait

This commit is contained in:
Saurav Panda
2026-02-21 19:45:54 -08:00
parent 3d7c71f4a6
commit 6713afc636
3 changed files with 236 additions and 38 deletions

View File

@@ -119,7 +119,7 @@ class NavigateToUrlEvent(BaseEvent[None]):
# existing_tab: PageHandle | None = None # TODO
# time limits enforced by bubus, not exposed to LLM:
event_timeout: float | None = Field(default_factory=lambda: _get_timeout('TIMEOUT_NavigateToUrlEvent', 15.0)) # seconds
event_timeout: float | None = Field(default_factory=lambda: _get_timeout('TIMEOUT_NavigateToUrlEvent', 30.0)) # seconds
class ClickElementEvent(ElementSelectedEvent[dict[str, Any] | None]):

View File

@@ -850,7 +850,7 @@ class BrowserSession(BaseModel):
await self.event_bus.dispatch(NavigationStartedEvent(target_id=target_id, url=event.url))
# Navigate to URL with proper lifecycle waiting
await self._navigate_and_wait(event.url, target_id)
await self._navigate_and_wait(event.url, target_id, wait_until=event.wait_until)
# Close any extension options pages that might have opened
await self._close_extension_options_pages()
@@ -887,17 +887,17 @@ class BrowserSession(BaseModel):
await self.event_bus.dispatch(AgentFocusChangedEvent(target_id=target_id, url=event.url))
raise
async def _navigate_and_wait(self, url: str, target_id: str, timeout: float | None = None) -> None:
async def _navigate_and_wait(
self,
url: str,
target_id: str,
timeout: float | None = None,
wait_until: str = 'load',
) -> None:
"""Navigate to URL and wait for page readiness using CDP lifecycle events.
Two-strategy approach optimized for speed with robust fallback:
1. networkIdle - Returns ASAP when no network activity (~50-200ms for cached pages)
2. load - Fallback when page has ongoing network activity (all resources loaded)
This gives us instant returns for cached content while being robust for dynamic pages.
NO handler registration here - handlers are registered ONCE per session in SessionManager.
We poll stored events instead to avoid handler accumulation.
Polls stored lifecycle events (registered once per session in SessionManager).
wait_until controls the minimum acceptable signal: 'commit', 'domcontentloaded', 'load', 'networkidle'.
"""
cdp_session = await self.get_or_create_cdp_session(target_id, focus=False)
@@ -909,28 +909,37 @@ class BrowserSession(BaseModel):
if url.startswith('http') and current_url.startswith('http')
else False
)
timeout = 2.0 if same_domain else 4.0
timeout = 3.0 if same_domain else 8.0
# Start performance tracking
nav_start_time = asyncio.get_event_loop().time()
nav_result = await cdp_session.cdp_client.send.Page.navigate(
params={'url': url, 'transitionType': 'address_bar'},
session_id=cdp_session.session_id,
)
# Wrap Page.navigate() with timeout — heavy sites can block here for 10s+
nav_timeout = 20.0
try:
nav_result = await asyncio.wait_for(
cdp_session.cdp_client.send.Page.navigate(
params={'url': url, 'transitionType': 'address_bar'},
session_id=cdp_session.session_id,
),
timeout=nav_timeout,
)
except TimeoutError:
duration_ms = (asyncio.get_event_loop().time() - nav_start_time) * 1000
self.logger.warning(f'⚠️ Page.navigate() timed out after {nav_timeout}s ({duration_ms:.0f}ms) for {url}')
return
# Check for immediate navigation errors
if nav_result.get('errorText'):
raise RuntimeError(f'Navigation failed: {nav_result["errorText"]}')
# Track this specific navigation
if wait_until == 'commit':
duration_ms = (asyncio.get_event_loop().time() - nav_start_time) * 1000
self.logger.debug(f'✅ Page ready for {url} (commit, {duration_ms:.0f}ms)')
return
navigation_id = nav_result.get('loaderId')
start_time = asyncio.get_event_loop().time()
seen_events = []
# Poll stored lifecycle events
seen_events = [] # Track events for timeout diagnostics
# Check if session has lifecycle monitoring enabled
if not hasattr(cdp_session, '_lifecycle_events'):
raise RuntimeError(
f'❌ Lifecycle monitoring not enabled for {cdp_session.target_id[:8]}! '
@@ -938,42 +947,37 @@ class BrowserSession(BaseModel):
f'Session: {cdp_session}'
)
# Poll for lifecycle events until timeout
poll_interval = 0.05 # Poll every 50ms
# Acceptable events by readiness level (higher is always acceptable)
acceptable_events: set[str] = {'networkIdle'}
if wait_until in ('load', 'domcontentloaded'):
acceptable_events.add('load')
if wait_until == 'domcontentloaded':
acceptable_events.add('DOMContentLoaded')
poll_interval = 0.05
while (asyncio.get_event_loop().time() - start_time) < timeout:
# Check stored events
try:
# Get recent events matching our navigation
for event_data in list(cdp_session._lifecycle_events):
event_name = event_data.get('name')
event_loader_id = event_data.get('loaderId')
# Track events
event_str = f'{event_name}(loader={event_loader_id[:8] if event_loader_id else "none"})'
if event_str not in seen_events:
seen_events.append(event_str)
# Only respond to events from our navigation (or accept all if no loaderId)
if event_loader_id and navigation_id and event_loader_id != navigation_id:
continue
if event_name == 'networkIdle':
if event_name in acceptable_events:
duration_ms = (asyncio.get_event_loop().time() - nav_start_time) * 1000
self.logger.debug(f'✅ Page ready for {url} (networkIdle, {duration_ms:.0f}ms)')
return
elif event_name == 'load':
duration_ms = (asyncio.get_event_loop().time() - nav_start_time) * 1000
self.logger.debug(f'✅ Page ready for {url} (load, {duration_ms:.0f}ms)')
self.logger.debug(f'✅ Page ready for {url} ({event_name}, {duration_ms:.0f}ms)')
return
except Exception as e:
self.logger.debug(f'Error polling lifecycle events: {e}')
# Wait before next poll
await asyncio.sleep(poll_interval)
# Timeout - continue anyway with detailed diagnostics
duration_ms = (asyncio.get_event_loop().time() - nav_start_time) * 1000
if not seen_events:
self.logger.error(

View File

@@ -0,0 +1,194 @@
"""
Test navigation on heavy/slow-loading pages (e.g. e-commerce PDPs).
Reproduces the issue where navigating to heavy pages like stevemadden.com PDPs
fails due to NavigateToUrlEvent timing out.
Usage:
uv run pytest tests/ci/browser/test_navigation_slow_pages.py -v -s
"""
import asyncio
import time
import pytest
from pytest_httpserver import HTTPServer
from werkzeug import Response
from browser_use.agent.service import Agent
from browser_use.browser import BrowserSession
from browser_use.browser.events import NavigateToUrlEvent
from browser_use.browser.profile import BrowserProfile
from tests.ci.conftest import create_mock_llm
HEAVY_PDP_HTML = """
<!DOCTYPE html>
<html>
<head><title>Frosting Black Velvet - Steve Madden</title></head>
<body>
<h1>FROSTING</h1>
<p class="price">$129.95</p>
<button id="add-to-cart">ADD TO BAG</button>
</body>
</html>
"""
@pytest.fixture(scope='session')
def heavy_page_server():
server = HTTPServer()
server.start()
def slow_initial_response(request):
time.sleep(6)
return Response(HEAVY_PDP_HTML, content_type='text/html')
server.expect_request('/slow-server-pdp').respond_with_handler(slow_initial_response)
def redirect_step1(request):
return Response('', status=302, headers={'Location': f'http://{server.host}:{server.port}/redirect-step2'})
def redirect_step2(request):
return Response('', status=302, headers={'Location': f'http://{server.host}:{server.port}/redirect-final'})
def redirect_final(request):
time.sleep(3)
return Response(HEAVY_PDP_HTML, content_type='text/html')
server.expect_request('/redirect-step1').respond_with_handler(redirect_step1)
server.expect_request('/redirect-step2').respond_with_handler(redirect_step2)
server.expect_request('/redirect-final').respond_with_handler(redirect_final)
server.expect_request('/fast-dom-slow-load').respond_with_data(HEAVY_PDP_HTML, content_type='text/html')
server.expect_request('/quick-page').respond_with_data(
'<html><body><h1>Quick Page</h1></body></html>', content_type='text/html'
)
yield server
server.stop()
@pytest.fixture(scope='session')
def heavy_base_url(heavy_page_server):
return f'http://{heavy_page_server.host}:{heavy_page_server.port}'
@pytest.fixture(scope='function')
async def browser_session():
session = BrowserSession(
browser_profile=BrowserProfile(headless=True, user_data_dir=None, keep_alive=True)
)
await session.start()
yield session
await session.kill()
def _nav_actions(url: str, msg: str = 'Done') -> list[str]:
"""Helper to build a navigate-then-done action sequence."""
return [
f"""
{{
"thinking": "Navigate to the page",
"evaluation_previous_goal": "Starting task",
"memory": "Navigating",
"next_goal": "Navigate",
"action": [{{"navigate": {{"url": "{url}"}}}}]
}}
""",
f"""
{{
"thinking": "Page loaded",
"evaluation_previous_goal": "Navigation completed",
"memory": "Page loaded",
"next_goal": "Done",
"action": [{{"done": {{"text": "{msg}", "success": true}}}}]
}}
""",
]
class TestHeavyPageNavigation:
async def test_slow_server_response_completes(self, browser_session, heavy_base_url):
"""Navigation succeeds even when server takes 6s to respond."""
url = f'{heavy_base_url}/slow-server-pdp'
agent = Agent(
task=f'Navigate to {url}',
llm=create_mock_llm(actions=_nav_actions(url)),
browser_session=browser_session,
)
start = time.time()
history = await asyncio.wait_for(agent.run(max_steps=3), timeout=60)
assert len(history) > 0
assert history.final_result() is not None
assert time.time() - start >= 5, 'Should have waited for slow server'
async def test_redirect_chain_completes(self, browser_session, heavy_base_url):
"""Navigation handles multi-step redirects + slow final response."""
url = f'{heavy_base_url}/redirect-step1'
agent = Agent(
task=f'Navigate to {url}',
llm=create_mock_llm(actions=_nav_actions(url)),
browser_session=browser_session,
)
history = await asyncio.wait_for(agent.run(max_steps=3), timeout=60)
assert len(history) > 0
assert history.final_result() is not None
async def test_navigate_event_accepts_domcontentloaded(self, browser_session, heavy_base_url):
"""NavigateToUrlEvent with fast page should complete quickly via DOMContentLoaded/load."""
url = f'{heavy_base_url}/fast-dom-slow-load'
event = browser_session.event_bus.dispatch(NavigateToUrlEvent(url=url))
await asyncio.wait_for(event, timeout=15)
await event.event_result(raise_if_any=True, raise_if_none=False)
async def test_recovery_after_slow_navigation(self, browser_session, heavy_base_url):
"""Agent recovers and navigates to a fast page after a slow one."""
slow_url = f'{heavy_base_url}/slow-server-pdp'
quick_url = f'{heavy_base_url}/quick-page'
actions = [
f"""
{{
"thinking": "Navigate to slow page",
"evaluation_previous_goal": "Starting",
"memory": "Going to slow page",
"next_goal": "Navigate",
"action": [{{"navigate": {{"url": "{slow_url}"}}}}]
}}
""",
f"""
{{
"thinking": "Now navigate to quick page",
"evaluation_previous_goal": "Slow page loaded",
"memory": "Trying quick page",
"next_goal": "Navigate",
"action": [{{"navigate": {{"url": "{quick_url}"}}}}]
}}
""",
"""
{
"thinking": "Both done",
"evaluation_previous_goal": "Quick page loaded",
"memory": "Recovery successful",
"next_goal": "Done",
"action": [{"done": {"text": "Recovery succeeded", "success": true}}]
}
""",
]
agent = Agent(
task='Navigate to slow then quick page',
llm=create_mock_llm(actions=actions),
browser_session=browser_session,
)
history = await asyncio.wait_for(agent.run(max_steps=4), timeout=90)
assert len(history) >= 2
assert history.final_result() is not None
async def test_event_timeout_sufficient_for_heavy_pages(self, browser_session):
"""event_timeout should be >= 30s to handle slow servers + redirect chains."""
event = NavigateToUrlEvent(url='http://example.com')
assert event.event_timeout is not None
assert event.event_timeout >= 30.0, (
f'event_timeout={event.event_timeout}s is too low for heavy pages (need >= 30s)'
)