mirror of
https://github.com/browser-use/browser-use
synced 2026-05-06 17:52:15 +02:00
305 lines
9.4 KiB
Python
305 lines
9.4 KiB
Python
"""
|
|
Debug test for iframe scrolling issue where DOM tree only shows top elements after scrolling.
|
|
|
|
This test verifies that after scrolling inside an iframe, the selector_map correctly
|
|
contains lower input elements like City, State, Zip Code, etc.
|
|
"""
|
|
|
|
import asyncio
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add parent directory to path to import browser_use modules
|
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
|
|
from browser_use.agent.service import Agent
|
|
from browser_use.agent.views import ActionModel, AgentOutput
|
|
from browser_use.browser import BrowserProfile, BrowserSession
|
|
from browser_use.browser.events import BrowserStateRequestEvent
|
|
from browser_use.controller.service import Controller
|
|
from browser_use.controller.views import GoToUrlAction, ScrollAction
|
|
|
|
# Import the mock LLM helper from conftest
|
|
from tests.ci.conftest import create_mock_llm
|
|
|
|
|
|
async def debug_iframe_scrolling():
|
|
"""Debug iframe scrolling and DOM visibility issue."""
|
|
|
|
print("Starting iframe scrolling debug test...")
|
|
|
|
# Create the sequence of actions for the mock LLM
|
|
# We need to format these as the LLM would return them
|
|
actions = [
|
|
# First action: Navigate to the test URL
|
|
"""
|
|
{
|
|
"thinking": "Navigating to the iframe test page",
|
|
"evaluation_previous_goal": null,
|
|
"memory": "Starting test",
|
|
"next_goal": "Navigate to the iframe test page",
|
|
"action": [
|
|
{
|
|
"go_to_url": {
|
|
"url": "https://browser-use.github.io/stress-tests/challenges/iframe-inception-level1.html",
|
|
"new_tab": false
|
|
}
|
|
}
|
|
]
|
|
}
|
|
""",
|
|
# Second action: Input text in the first name field (to verify we can interact)
|
|
"""
|
|
{
|
|
"thinking": "Inputting text in the first name field to test interaction",
|
|
"evaluation_previous_goal": "Successfully navigated to the page",
|
|
"memory": "Page loaded with nested iframes",
|
|
"next_goal": "Type text in the first name field",
|
|
"action": [
|
|
{
|
|
"input_text": {
|
|
"index": 1,
|
|
"text": "TestName"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
""",
|
|
# Third action: Scroll the iframe (element_index=2 should be the iframe)
|
|
"""
|
|
{
|
|
"thinking": "Scrolling inside the iframe to reveal lower form elements",
|
|
"evaluation_previous_goal": "Successfully typed in first name field",
|
|
"memory": "Typed TestName in first field",
|
|
"next_goal": "Scroll inside the innermost iframe to see more form fields",
|
|
"action": [
|
|
{
|
|
"scroll": {
|
|
"down": true,
|
|
"num_pages": 1.0,
|
|
"index": 2
|
|
}
|
|
}
|
|
]
|
|
}
|
|
""",
|
|
# Fourth action: Done
|
|
"""
|
|
{
|
|
"thinking": "Completed scrolling, ready to inspect DOM",
|
|
"evaluation_previous_goal": "Successfully scrolled inside iframe",
|
|
"memory": "Scrolled to reveal lower form fields",
|
|
"next_goal": "Task completed",
|
|
"action": [
|
|
{
|
|
"done": {
|
|
"text": "Scrolling completed",
|
|
"success": true
|
|
}
|
|
}
|
|
]
|
|
}
|
|
"""
|
|
]
|
|
|
|
# Create mock LLM with our action sequence
|
|
mock_llm = create_mock_llm(actions=actions)
|
|
|
|
# Create browser session with headless=False so we can see what's happening
|
|
browser_session = BrowserSession(
|
|
browser_profile=BrowserProfile(
|
|
headless=False, # Set to False to see the browser
|
|
user_data_dir=None, # Use temporary directory
|
|
keep_alive=True,
|
|
enable_default_extensions=True,
|
|
cross_origin_iframes=True, # Enable cross-origin iframe support
|
|
)
|
|
)
|
|
|
|
try:
|
|
# Start the browser session
|
|
await browser_session.start()
|
|
print("Browser session started")
|
|
|
|
# Create an agent with the mock LLM
|
|
agent = Agent(
|
|
task="Navigate to the iframe test page and scroll inside the iframe",
|
|
llm=mock_llm,
|
|
browser_session=browser_session,
|
|
)
|
|
|
|
# Helper function to capture and analyze DOM state
|
|
async def capture_dom_state(label: str) -> dict:
|
|
"""Capture DOM state and return analysis"""
|
|
print(f"\n📸 Capturing DOM state: {label}")
|
|
state_event = browser_session.event_bus.dispatch(
|
|
BrowserStateRequestEvent(
|
|
include_dom=True,
|
|
include_screenshot=False,
|
|
cache_clickable_elements_hashes=True,
|
|
include_recent_events=False
|
|
)
|
|
)
|
|
browser_state = await state_event.event_result()
|
|
|
|
if browser_state and browser_state.dom_state and browser_state.dom_state.selector_map:
|
|
selector_map = browser_state.dom_state.selector_map
|
|
element_count = len(selector_map)
|
|
|
|
# Check for specific elements
|
|
found_elements = {}
|
|
expected_checks = [
|
|
("First Name", ["firstName", "first name"]),
|
|
("Last Name", ["lastName", "last name"]),
|
|
("Email", ["email"]),
|
|
("City", ["city"]),
|
|
("State", ["state"]),
|
|
("Zip", ["zip", "zipCode"]),
|
|
]
|
|
|
|
for name, keywords in expected_checks:
|
|
for index, element in selector_map.items():
|
|
element_str = str(element).lower()
|
|
if any(kw.lower() in element_str for kw in keywords):
|
|
found_elements[name] = True
|
|
break
|
|
|
|
return {
|
|
"label": label,
|
|
"total_elements": element_count,
|
|
"found_elements": found_elements,
|
|
"selector_map": selector_map
|
|
}
|
|
return {"label": label, "error": "No DOM state available"}
|
|
|
|
# Capture initial state before any actions
|
|
print("\n" + "="*80)
|
|
print("PHASE 1: INITIAL PAGE LOAD")
|
|
print("="*80)
|
|
|
|
# Navigate to the page first
|
|
from browser_use.controller.views import GoToUrlAction
|
|
from browser_use.controller.service import Controller
|
|
controller = Controller()
|
|
|
|
# Create the action model for navigation
|
|
goto_action = ActionModel.model_validate_json(actions[0])
|
|
await controller.act(goto_action, browser_session)
|
|
await asyncio.sleep(2) # Wait for page to fully load
|
|
|
|
initial_state = await capture_dom_state("INITIAL (after page load)")
|
|
|
|
# Now run the rest of the actions via the agent
|
|
print("\n" + "="*80)
|
|
print("PHASE 2: EXECUTING ACTIONS")
|
|
print("="*80)
|
|
|
|
# Create new agent with remaining actions
|
|
remaining_actions = actions[1:] # Skip the navigation we already did
|
|
mock_llm_remaining = create_mock_llm(actions=remaining_actions)
|
|
agent = Agent(
|
|
task="Input text and scroll inside the iframe",
|
|
llm=mock_llm_remaining,
|
|
browser_session=browser_session,
|
|
)
|
|
|
|
# Hook into agent actions to capture state after each one
|
|
states = []
|
|
original_act = controller.act
|
|
async def wrapped_act(action, session):
|
|
result = await original_act(action, session)
|
|
# Capture state after each action
|
|
action_type = "unknown"
|
|
if hasattr(action, 'input_text') and action.input_text:
|
|
action_type = "input_text"
|
|
await asyncio.sleep(1) # Give time for DOM to update
|
|
state = await capture_dom_state("AFTER INPUT_TEXT")
|
|
states.append(state)
|
|
elif hasattr(action, 'scroll') and action.scroll:
|
|
action_type = "scroll"
|
|
await asyncio.sleep(2) # Give more time after scroll
|
|
state = await capture_dom_state("AFTER SCROLL")
|
|
states.append(state)
|
|
return result
|
|
|
|
controller.act = wrapped_act
|
|
|
|
# Run the agent with remaining actions
|
|
result = await agent.run()
|
|
print(f"\nAgent completed with result: {result}")
|
|
|
|
# Analyze all captured states
|
|
print("\n" + "="*80)
|
|
print("PHASE 3: ANALYSIS OF DOM STATES")
|
|
print("="*80)
|
|
|
|
all_states = [initial_state] + states
|
|
|
|
for state in all_states:
|
|
if "error" in state:
|
|
print(f"\n❌ {state['label']}: {state['error']}")
|
|
else:
|
|
print(f"\n📊 {state['label']}:")
|
|
print(f" Total elements: {state['total_elements']}")
|
|
print(f" Found elements:")
|
|
for elem_name, found in state['found_elements'].items():
|
|
status = "✓" if found else "✗"
|
|
print(f" {status} {elem_name}")
|
|
|
|
# Compare states
|
|
print("\n" + "="*80)
|
|
print("COMPARISON SUMMARY")
|
|
print("="*80)
|
|
|
|
if len(all_states) >= 3:
|
|
initial = all_states[0]
|
|
after_input = all_states[1] if len(all_states) > 1 else None
|
|
after_scroll = all_states[2] if len(all_states) > 2 else None
|
|
|
|
print(f"\nElement count changes:")
|
|
print(f" Initial: {initial.get('total_elements', 0)} elements")
|
|
if after_input:
|
|
print(f" After input_text: {after_input.get('total_elements', 0)} elements")
|
|
if after_scroll:
|
|
print(f" After scroll: {after_scroll.get('total_elements', 0)} elements")
|
|
|
|
# Check if lower form fields appear after scroll
|
|
if after_scroll and "found_elements" in after_scroll:
|
|
lower_fields = ["City", "State", "Zip"]
|
|
missing_fields = [f for f in lower_fields if not after_scroll["found_elements"].get(f, False)]
|
|
|
|
if missing_fields:
|
|
print(f"\n⚠️ BUG CONFIRMED: Lower form fields missing after scroll:")
|
|
for field in missing_fields:
|
|
print(f" ✗ {field}")
|
|
print("\nThis confirms that scrolling inside iframes does not update the DOM tree properly.")
|
|
else:
|
|
print("\n✅ SUCCESS: All lower form fields are visible after scrolling!")
|
|
|
|
# Show first few elements from final state for debugging
|
|
if states and "selector_map" in states[-1]:
|
|
print("\n" + "="*80)
|
|
print("DEBUG: First 5 elements in final selector_map")
|
|
print("="*80)
|
|
final_map = states[-1]["selector_map"]
|
|
for i, (index, element) in enumerate(list(final_map.items())[:5]):
|
|
elem_preview = str(element)[:150]
|
|
print(f"\n [{index}]: {elem_preview}...")
|
|
|
|
# Keep browser open for manual inspection if needed
|
|
print("\n" + "="*80)
|
|
print("Test complete. Browser will remain open for 10 seconds for inspection...")
|
|
print("="*80)
|
|
await asyncio.sleep(10)
|
|
|
|
finally:
|
|
# Clean up
|
|
print("\nCleaning up...")
|
|
await browser_session.kill()
|
|
await browser_session.event_bus.stop(clear=True, timeout=5)
|
|
print("Browser session closed")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Run the debug test
|
|
asyncio.run(debug_iframe_scrolling()) |