mirror of
https://github.com/browser-use/browser-use
synced 2026-04-22 17:45:09 +02:00
updated _navigate_and_wait
This commit is contained in:
@@ -119,7 +119,7 @@ class NavigateToUrlEvent(BaseEvent[None]):
|
||||
# existing_tab: PageHandle | None = None # TODO
|
||||
|
||||
# time limits enforced by bubus, not exposed to LLM:
|
||||
event_timeout: float | None = Field(default_factory=lambda: _get_timeout('TIMEOUT_NavigateToUrlEvent', 15.0)) # seconds
|
||||
event_timeout: float | None = Field(default_factory=lambda: _get_timeout('TIMEOUT_NavigateToUrlEvent', 30.0)) # seconds
|
||||
|
||||
|
||||
class ClickElementEvent(ElementSelectedEvent[dict[str, Any] | None]):
|
||||
|
||||
@@ -850,7 +850,7 @@ class BrowserSession(BaseModel):
|
||||
await self.event_bus.dispatch(NavigationStartedEvent(target_id=target_id, url=event.url))
|
||||
|
||||
# Navigate to URL with proper lifecycle waiting
|
||||
await self._navigate_and_wait(event.url, target_id)
|
||||
await self._navigate_and_wait(event.url, target_id, wait_until=event.wait_until)
|
||||
|
||||
# Close any extension options pages that might have opened
|
||||
await self._close_extension_options_pages()
|
||||
@@ -887,17 +887,17 @@ class BrowserSession(BaseModel):
|
||||
await self.event_bus.dispatch(AgentFocusChangedEvent(target_id=target_id, url=event.url))
|
||||
raise
|
||||
|
||||
async def _navigate_and_wait(self, url: str, target_id: str, timeout: float | None = None) -> None:
|
||||
async def _navigate_and_wait(
|
||||
self,
|
||||
url: str,
|
||||
target_id: str,
|
||||
timeout: float | None = None,
|
||||
wait_until: str = 'load',
|
||||
) -> None:
|
||||
"""Navigate to URL and wait for page readiness using CDP lifecycle events.
|
||||
|
||||
Two-strategy approach optimized for speed with robust fallback:
|
||||
1. networkIdle - Returns ASAP when no network activity (~50-200ms for cached pages)
|
||||
2. load - Fallback when page has ongoing network activity (all resources loaded)
|
||||
|
||||
This gives us instant returns for cached content while being robust for dynamic pages.
|
||||
|
||||
NO handler registration here - handlers are registered ONCE per session in SessionManager.
|
||||
We poll stored events instead to avoid handler accumulation.
|
||||
Polls stored lifecycle events (registered once per session in SessionManager).
|
||||
wait_until controls the minimum acceptable signal: 'commit', 'domcontentloaded', 'load', 'networkidle'.
|
||||
"""
|
||||
cdp_session = await self.get_or_create_cdp_session(target_id, focus=False)
|
||||
|
||||
@@ -909,28 +909,37 @@ class BrowserSession(BaseModel):
|
||||
if url.startswith('http') and current_url.startswith('http')
|
||||
else False
|
||||
)
|
||||
timeout = 2.0 if same_domain else 4.0
|
||||
timeout = 3.0 if same_domain else 8.0
|
||||
|
||||
# Start performance tracking
|
||||
nav_start_time = asyncio.get_event_loop().time()
|
||||
|
||||
nav_result = await cdp_session.cdp_client.send.Page.navigate(
|
||||
params={'url': url, 'transitionType': 'address_bar'},
|
||||
session_id=cdp_session.session_id,
|
||||
)
|
||||
# Wrap Page.navigate() with timeout — heavy sites can block here for 10s+
|
||||
nav_timeout = 20.0
|
||||
try:
|
||||
nav_result = await asyncio.wait_for(
|
||||
cdp_session.cdp_client.send.Page.navigate(
|
||||
params={'url': url, 'transitionType': 'address_bar'},
|
||||
session_id=cdp_session.session_id,
|
||||
),
|
||||
timeout=nav_timeout,
|
||||
)
|
||||
except TimeoutError:
|
||||
duration_ms = (asyncio.get_event_loop().time() - nav_start_time) * 1000
|
||||
self.logger.warning(f'⚠️ Page.navigate() timed out after {nav_timeout}s ({duration_ms:.0f}ms) for {url}')
|
||||
return
|
||||
|
||||
# Check for immediate navigation errors
|
||||
if nav_result.get('errorText'):
|
||||
raise RuntimeError(f'Navigation failed: {nav_result["errorText"]}')
|
||||
|
||||
# Track this specific navigation
|
||||
if wait_until == 'commit':
|
||||
duration_ms = (asyncio.get_event_loop().time() - nav_start_time) * 1000
|
||||
self.logger.debug(f'✅ Page ready for {url} (commit, {duration_ms:.0f}ms)')
|
||||
return
|
||||
|
||||
navigation_id = nav_result.get('loaderId')
|
||||
start_time = asyncio.get_event_loop().time()
|
||||
seen_events = []
|
||||
|
||||
# Poll stored lifecycle events
|
||||
seen_events = [] # Track events for timeout diagnostics
|
||||
|
||||
# Check if session has lifecycle monitoring enabled
|
||||
if not hasattr(cdp_session, '_lifecycle_events'):
|
||||
raise RuntimeError(
|
||||
f'❌ Lifecycle monitoring not enabled for {cdp_session.target_id[:8]}! '
|
||||
@@ -938,42 +947,37 @@ class BrowserSession(BaseModel):
|
||||
f'Session: {cdp_session}'
|
||||
)
|
||||
|
||||
# Poll for lifecycle events until timeout
|
||||
poll_interval = 0.05 # Poll every 50ms
|
||||
# Acceptable events by readiness level (higher is always acceptable)
|
||||
acceptable_events: set[str] = {'networkIdle'}
|
||||
if wait_until in ('load', 'domcontentloaded'):
|
||||
acceptable_events.add('load')
|
||||
if wait_until == 'domcontentloaded':
|
||||
acceptable_events.add('DOMContentLoaded')
|
||||
|
||||
poll_interval = 0.05
|
||||
while (asyncio.get_event_loop().time() - start_time) < timeout:
|
||||
# Check stored events
|
||||
try:
|
||||
# Get recent events matching our navigation
|
||||
for event_data in list(cdp_session._lifecycle_events):
|
||||
event_name = event_data.get('name')
|
||||
event_loader_id = event_data.get('loaderId')
|
||||
|
||||
# Track events
|
||||
event_str = f'{event_name}(loader={event_loader_id[:8] if event_loader_id else "none"})'
|
||||
if event_str not in seen_events:
|
||||
seen_events.append(event_str)
|
||||
|
||||
# Only respond to events from our navigation (or accept all if no loaderId)
|
||||
if event_loader_id and navigation_id and event_loader_id != navigation_id:
|
||||
continue
|
||||
|
||||
if event_name == 'networkIdle':
|
||||
if event_name in acceptable_events:
|
||||
duration_ms = (asyncio.get_event_loop().time() - nav_start_time) * 1000
|
||||
self.logger.debug(f'✅ Page ready for {url} (networkIdle, {duration_ms:.0f}ms)')
|
||||
return
|
||||
|
||||
elif event_name == 'load':
|
||||
duration_ms = (asyncio.get_event_loop().time() - nav_start_time) * 1000
|
||||
self.logger.debug(f'✅ Page ready for {url} (load, {duration_ms:.0f}ms)')
|
||||
self.logger.debug(f'✅ Page ready for {url} ({event_name}, {duration_ms:.0f}ms)')
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug(f'Error polling lifecycle events: {e}')
|
||||
|
||||
# Wait before next poll
|
||||
await asyncio.sleep(poll_interval)
|
||||
|
||||
# Timeout - continue anyway with detailed diagnostics
|
||||
duration_ms = (asyncio.get_event_loop().time() - nav_start_time) * 1000
|
||||
if not seen_events:
|
||||
self.logger.error(
|
||||
|
||||
194
tests/ci/browser/test_navigation_slow_pages.py
Normal file
194
tests/ci/browser/test_navigation_slow_pages.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""
|
||||
Test navigation on heavy/slow-loading pages (e.g. e-commerce PDPs).
|
||||
|
||||
Reproduces the issue where navigating to heavy pages like stevemadden.com PDPs
|
||||
fails due to NavigateToUrlEvent timing out.
|
||||
|
||||
Usage:
|
||||
uv run pytest tests/ci/browser/test_navigation_slow_pages.py -v -s
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from pytest_httpserver import HTTPServer
|
||||
from werkzeug import Response
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.browser import BrowserSession
|
||||
from browser_use.browser.events import NavigateToUrlEvent
|
||||
from browser_use.browser.profile import BrowserProfile
|
||||
from tests.ci.conftest import create_mock_llm
|
||||
|
||||
|
||||
HEAVY_PDP_HTML = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>Frosting Black Velvet - Steve Madden</title></head>
|
||||
<body>
|
||||
<h1>FROSTING</h1>
|
||||
<p class="price">$129.95</p>
|
||||
<button id="add-to-cart">ADD TO BAG</button>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def heavy_page_server():
|
||||
server = HTTPServer()
|
||||
server.start()
|
||||
|
||||
def slow_initial_response(request):
|
||||
time.sleep(6)
|
||||
return Response(HEAVY_PDP_HTML, content_type='text/html')
|
||||
|
||||
server.expect_request('/slow-server-pdp').respond_with_handler(slow_initial_response)
|
||||
|
||||
def redirect_step1(request):
|
||||
return Response('', status=302, headers={'Location': f'http://{server.host}:{server.port}/redirect-step2'})
|
||||
|
||||
def redirect_step2(request):
|
||||
return Response('', status=302, headers={'Location': f'http://{server.host}:{server.port}/redirect-final'})
|
||||
|
||||
def redirect_final(request):
|
||||
time.sleep(3)
|
||||
return Response(HEAVY_PDP_HTML, content_type='text/html')
|
||||
|
||||
server.expect_request('/redirect-step1').respond_with_handler(redirect_step1)
|
||||
server.expect_request('/redirect-step2').respond_with_handler(redirect_step2)
|
||||
server.expect_request('/redirect-final').respond_with_handler(redirect_final)
|
||||
|
||||
server.expect_request('/fast-dom-slow-load').respond_with_data(HEAVY_PDP_HTML, content_type='text/html')
|
||||
server.expect_request('/quick-page').respond_with_data(
|
||||
'<html><body><h1>Quick Page</h1></body></html>', content_type='text/html'
|
||||
)
|
||||
|
||||
yield server
|
||||
server.stop()
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def heavy_base_url(heavy_page_server):
|
||||
return f'http://{heavy_page_server.host}:{heavy_page_server.port}'
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
async def browser_session():
|
||||
session = BrowserSession(
|
||||
browser_profile=BrowserProfile(headless=True, user_data_dir=None, keep_alive=True)
|
||||
)
|
||||
await session.start()
|
||||
yield session
|
||||
await session.kill()
|
||||
|
||||
|
||||
def _nav_actions(url: str, msg: str = 'Done') -> list[str]:
|
||||
"""Helper to build a navigate-then-done action sequence."""
|
||||
return [
|
||||
f"""
|
||||
{{
|
||||
"thinking": "Navigate to the page",
|
||||
"evaluation_previous_goal": "Starting task",
|
||||
"memory": "Navigating",
|
||||
"next_goal": "Navigate",
|
||||
"action": [{{"navigate": {{"url": "{url}"}}}}]
|
||||
}}
|
||||
""",
|
||||
f"""
|
||||
{{
|
||||
"thinking": "Page loaded",
|
||||
"evaluation_previous_goal": "Navigation completed",
|
||||
"memory": "Page loaded",
|
||||
"next_goal": "Done",
|
||||
"action": [{{"done": {{"text": "{msg}", "success": true}}}}]
|
||||
}}
|
||||
""",
|
||||
]
|
||||
|
||||
|
||||
class TestHeavyPageNavigation:
|
||||
|
||||
async def test_slow_server_response_completes(self, browser_session, heavy_base_url):
|
||||
"""Navigation succeeds even when server takes 6s to respond."""
|
||||
url = f'{heavy_base_url}/slow-server-pdp'
|
||||
agent = Agent(
|
||||
task=f'Navigate to {url}',
|
||||
llm=create_mock_llm(actions=_nav_actions(url)),
|
||||
browser_session=browser_session,
|
||||
)
|
||||
start = time.time()
|
||||
history = await asyncio.wait_for(agent.run(max_steps=3), timeout=60)
|
||||
assert len(history) > 0
|
||||
assert history.final_result() is not None
|
||||
assert time.time() - start >= 5, 'Should have waited for slow server'
|
||||
|
||||
async def test_redirect_chain_completes(self, browser_session, heavy_base_url):
|
||||
"""Navigation handles multi-step redirects + slow final response."""
|
||||
url = f'{heavy_base_url}/redirect-step1'
|
||||
agent = Agent(
|
||||
task=f'Navigate to {url}',
|
||||
llm=create_mock_llm(actions=_nav_actions(url)),
|
||||
browser_session=browser_session,
|
||||
)
|
||||
history = await asyncio.wait_for(agent.run(max_steps=3), timeout=60)
|
||||
assert len(history) > 0
|
||||
assert history.final_result() is not None
|
||||
|
||||
async def test_navigate_event_accepts_domcontentloaded(self, browser_session, heavy_base_url):
|
||||
"""NavigateToUrlEvent with fast page should complete quickly via DOMContentLoaded/load."""
|
||||
url = f'{heavy_base_url}/fast-dom-slow-load'
|
||||
event = browser_session.event_bus.dispatch(NavigateToUrlEvent(url=url))
|
||||
await asyncio.wait_for(event, timeout=15)
|
||||
await event.event_result(raise_if_any=True, raise_if_none=False)
|
||||
|
||||
async def test_recovery_after_slow_navigation(self, browser_session, heavy_base_url):
|
||||
"""Agent recovers and navigates to a fast page after a slow one."""
|
||||
slow_url = f'{heavy_base_url}/slow-server-pdp'
|
||||
quick_url = f'{heavy_base_url}/quick-page'
|
||||
actions = [
|
||||
f"""
|
||||
{{
|
||||
"thinking": "Navigate to slow page",
|
||||
"evaluation_previous_goal": "Starting",
|
||||
"memory": "Going to slow page",
|
||||
"next_goal": "Navigate",
|
||||
"action": [{{"navigate": {{"url": "{slow_url}"}}}}]
|
||||
}}
|
||||
""",
|
||||
f"""
|
||||
{{
|
||||
"thinking": "Now navigate to quick page",
|
||||
"evaluation_previous_goal": "Slow page loaded",
|
||||
"memory": "Trying quick page",
|
||||
"next_goal": "Navigate",
|
||||
"action": [{{"navigate": {{"url": "{quick_url}"}}}}]
|
||||
}}
|
||||
""",
|
||||
"""
|
||||
{
|
||||
"thinking": "Both done",
|
||||
"evaluation_previous_goal": "Quick page loaded",
|
||||
"memory": "Recovery successful",
|
||||
"next_goal": "Done",
|
||||
"action": [{"done": {"text": "Recovery succeeded", "success": true}}]
|
||||
}
|
||||
""",
|
||||
]
|
||||
agent = Agent(
|
||||
task='Navigate to slow then quick page',
|
||||
llm=create_mock_llm(actions=actions),
|
||||
browser_session=browser_session,
|
||||
)
|
||||
history = await asyncio.wait_for(agent.run(max_steps=4), timeout=90)
|
||||
assert len(history) >= 2
|
||||
assert history.final_result() is not None
|
||||
|
||||
async def test_event_timeout_sufficient_for_heavy_pages(self, browser_session):
|
||||
"""event_timeout should be >= 30s to handle slow servers + redirect chains."""
|
||||
event = NavigateToUrlEvent(url='http://example.com')
|
||||
assert event.event_timeout is not None
|
||||
assert event.event_timeout >= 30.0, (
|
||||
f'event_timeout={event.event_timeout}s is too low for heavy pages (need >= 30s)'
|
||||
)
|
||||
Reference in New Issue
Block a user