diff --git a/tests/old/test_cross_origin_iframe_unified_tree.py b/tests/old/test_cross_origin_iframe_unified_tree.py
new file mode 100644
index 000000000..998728c62
--- /dev/null
+++ b/tests/old/test_cross_origin_iframe_unified_tree.py
@@ -0,0 +1,407 @@
+import asyncio
+
+import pytest
+from pytest_httpserver import HTTPServer
+
+from browser_use import BrowserSession
+from browser_use.browser import BrowserProfile
+
+
+@pytest.fixture
+def cross_origin_iframe_html():
+ """HTML pages for testing cross-origin iframe handling."""
+
+ # Main page (served on one port)
+ main_page = """
+
+
+
Main Page
+
+ Main Page Content
+
+
+
+
+
+
+ Some text after iframe
+ Main Link
+
+
+ """
+
+ # First iframe content (served on different port)
+ frame1_content = """
+
+
+ Frame 1
+
+ Frame 1 Content
+
+
+
+
+
+
+ """
+
+ # Second nested iframe content (served on yet another port/path)
+ frame2_content = """
+
+
+ Frame 2
+
+ Frame 2 Content (Nested)
+
+
+
+
+
+
+
+
+
+
+ """
+
+ # Third level nested iframe
+ frame3_content = """
+
+
+ Frame 3
+
+ Frame 3 Content (Deeply Nested)
+
+
+
+ """
+
+ return {'main': main_page, 'frame1': frame1_content, 'frame2': frame2_content, 'frame3': frame3_content}
+
+
+@pytest.fixture
+def setup_cross_origin_servers(httpserver: HTTPServer, cross_origin_iframe_html):
+ """Set up multiple HTTP servers to simulate cross-origin iframes."""
+ import socket
+
+ from pytest_httpserver import HTTPServer as HTTPServerClass
+
+ # Main server (already provided by httpserver fixture)
+ main_port = httpserver.port
+
+ # Create additional servers for cross-origin simulation
+ # Find available ports
+ def get_free_port():
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+ s.bind(('', 0))
+ s.listen(1)
+ port = s.getsockname()[1]
+ return port
+
+ frame1_port = get_free_port()
+ frame2_port = get_free_port()
+ frame3_port = get_free_port()
+
+ # Create additional HTTP servers
+ frame1_server = HTTPServerClass(host='127.0.0.1', port=frame1_port)
+ frame2_server = HTTPServerClass(host='127.0.0.1', port=frame2_port)
+ frame3_server = HTTPServerClass(host='127.0.0.1', port=frame3_port)
+
+ frame1_server.start()
+ frame2_server.start()
+ frame3_server.start()
+
+ # URLs for each frame
+ main_url = f'http://127.0.0.1:{main_port}'
+ frame1_url = f'http://127.0.0.1:{frame1_port}/frame1'
+ frame2_url = f'http://127.0.0.1:{frame2_port}/frame2'
+ frame3_url = f'http://127.0.0.1:{frame3_port}/frame3'
+
+ # Set up routes with proper iframe URLs injected
+ httpserver.expect_request('/').respond_with_data(
+ cross_origin_iframe_html['main'].format(frame1_url=frame1_url), content_type='text/html'
+ )
+
+ frame1_server.expect_request('/frame1').respond_with_data(
+ cross_origin_iframe_html['frame1'].format(frame2_url=frame2_url), content_type='text/html'
+ )
+
+ frame2_server.expect_request('/frame2').respond_with_data(
+ cross_origin_iframe_html['frame2'].format(frame3_url=frame3_url), content_type='text/html'
+ )
+
+ frame3_server.expect_request('/frame3').respond_with_data(cross_origin_iframe_html['frame3'], content_type='text/html')
+
+ yield {
+ 'main_url': main_url,
+ 'servers': {'main': httpserver, 'frame1': frame1_server, 'frame2': frame2_server, 'frame3': frame3_server},
+ }
+
+ # Cleanup
+ frame1_server.stop()
+ frame2_server.stop()
+ frame3_server.stop()
+
+
+@pytest.mark.asyncio
+async def test_cross_origin_iframe_detection_current_behavior(setup_cross_origin_servers):
+ """Test current behavior with cross-origin iframes - demonstrates the problem."""
+ servers_info = setup_cross_origin_servers
+ main_url = servers_info['main_url']
+
+ profile = BrowserProfile(headless=True)
+ session = BrowserSession(browser_profile=profile)
+ try:
+ await session.start()
+ page = await session.get_current_page()
+ await page.goto(main_url)
+
+ # Wait for all iframes to load
+ await page.wait_for_load_state('networkidle')
+ await asyncio.sleep(1) # Extra wait for nested iframes
+
+ # Get the current DOM tree (without CDP)
+ from browser_use.dom.service import DomService
+
+ dom_service = DomService(page, logger=session.logger)
+ dom_state = await dom_service.get_clickable_elements(highlight_elements=True)
+
+ # Extract all interactive elements
+ interactive_elements = []
+
+ def collect_interactive_elements(node, elements_list):
+ """Recursively collect all interactive elements."""
+ if hasattr(node, 'is_interactive') and node.is_interactive:
+ elements_list.append(
+ {
+ 'tag': node.tag_name,
+ 'xpath': node.xpath,
+ 'highlight_index': node.highlight_index,
+ 'attributes': node.attributes,
+ }
+ )
+
+ if hasattr(node, 'children'):
+ for child in node.children:
+ collect_interactive_elements(child, elements_list)
+
+ collect_interactive_elements(dom_state.element_tree, interactive_elements)
+
+ # Check what elements were found
+ found_ids = {elem['attributes'].get('id') for elem in interactive_elements if 'id' in elem['attributes']}
+ print(f'\nFound elements: {found_ids}')
+
+ # Expected elements in main frame only (current behavior)
+ main_frame_ids = {'main-button', 'main-input', 'main-link'}
+
+ # Elements that SHOULD be found but won't be due to cross-origin
+ cross_origin_ids = {
+ 'frame1-input',
+ 'frame1-select',
+ 'frame1-submit',
+ 'frame2-email',
+ 'frame2-textarea',
+ 'frame2-button',
+ 'frame3-password',
+ 'frame3-checkbox',
+ 'frame3-button',
+ }
+
+ # Current behavior: only main frame elements are found
+ assert main_frame_ids.issubset(found_ids), f'Main frame elements missing: {main_frame_ids - found_ids}'
+
+ # This assertion will fail - demonstrating the problem
+ missing_cross_origin = cross_origin_ids - found_ids
+ print(f'\nMissing cross-origin elements: {missing_cross_origin}')
+ assert not missing_cross_origin, f'Cross-origin elements not detected: {missing_cross_origin}'
+ finally:
+ await session.close()
+
+
+@pytest.mark.asyncio
+async def test_cross_origin_iframe_unified_tree_with_cdp(setup_cross_origin_servers):
+ """Test that unified tree can detect all elements across cross-origin iframes using CDP."""
+ servers_info = setup_cross_origin_servers
+ main_url = servers_info['main_url']
+
+ profile = BrowserProfile(headless=True)
+ session = BrowserSession(browser_profile=profile)
+ try:
+ await session.start()
+ page = await session.get_current_page()
+ await page.goto(main_url)
+
+ # Wait for all iframes to load
+ await page.wait_for_load_state('networkidle')
+ await asyncio.sleep(1)
+
+ # This is what we want to implement - get unified tree with CDP
+ # For now, let's test what CDP can access
+
+ # Create CDP session for main frame
+ cdp_session = await page.context.new_cdp_session(page)
+
+ # Enable required domains
+ await cdp_session.send('Accessibility.enable')
+ await cdp_session.send('DOM.enable')
+
+ # Get accessibility tree
+ ax_tree = await cdp_session.send('Accessibility.getFullAXTree')
+
+ # Check if we can see iframe content
+ def count_nodes(node, depth=0):
+ """Count nodes in accessibility tree."""
+ count = 1
+ indent = ' ' * depth
+ role = node.get('role', {}).get('value', 'unknown')
+ name = node.get('name', {}).get('value', '')
+ print(f'{indent}{role}: {name}')
+
+ for child_id in node.get('childIds', []):
+ # In a real implementation, we'd need to look up the child node
+ count += 1
+
+ return count
+
+ if 'root' in ax_tree:
+ print('\nAccessibility tree from main frame:')
+ node_count = count_nodes(ax_tree['root'])
+ print(f'\nTotal nodes in main frame: {node_count}')
+
+ # Try to access other frames
+ print(f'\nTotal frames found: {len(page.frames)}')
+ for i, frame in enumerate(page.frames):
+ print(f'Frame {i}: {frame.url}')
+
+ # For cross-origin frames, we need separate CDP sessions
+ if frame != page.main_frame:
+ try:
+ frame_cdp = await page.context.new_cdp_session(frame)
+ await frame_cdp.send('Accessibility.enable')
+ frame_ax_tree = await frame_cdp.send('Accessibility.getFullAXTree')
+ print(f' - Successfully got accessibility tree for frame {i}')
+ await frame_cdp.detach()
+ except Exception as e:
+ print(f' - Failed to access frame {i}: {e}')
+
+ # This test demonstrates what we need to implement:
+ # 1. Create CDP sessions for each frame
+ # 2. Get accessibility trees from all frames
+ # 3. Merge them with unique IDs
+ # 4. Build unified XPaths that cross frame boundaries
+ finally:
+ await session.close()
+
+
+@pytest.mark.asyncio
+async def test_desired_unified_tree_behavior(setup_cross_origin_servers):
+ """Test demonstrating the desired behavior with unified tree."""
+ servers_info = setup_cross_origin_servers
+ main_url = servers_info['main_url']
+
+ profile = BrowserProfile(headless=True)
+ session = BrowserSession(browser_profile=profile)
+ try:
+ await session.start()
+ page = await session.get_current_page()
+ await page.goto(main_url)
+
+ # Wait for all iframes to load
+ await page.wait_for_load_state('networkidle')
+ await asyncio.sleep(1)
+
+ # What we want to achieve:
+ # 1. Get unified tree that includes all frames
+ unified_tree = {
+ 'tag': 'body',
+ 'encoded_id': 'f0:n1',
+ 'children': [
+ {'tag': 'h1', 'encoded_id': 'f0:n2'},
+ {'tag': 'button', 'id': 'main-button', 'encoded_id': 'f0:n3'},
+ {'tag': 'input', 'id': 'main-input', 'encoded_id': 'f0:n4'},
+ {
+ 'tag': 'iframe',
+ 'id': 'frame1',
+ 'encoded_id': 'f0:n5',
+ 'frame_ordinal': 1,
+ 'children': [
+ # Frame 1 content with f1: prefix
+ {'tag': 'h2', 'encoded_id': 'f1:n1'},
+ {'tag': 'input', 'id': 'frame1-input', 'encoded_id': 'f1:n2'},
+ {'tag': 'select', 'id': 'frame1-select', 'encoded_id': 'f1:n3'},
+ {'tag': 'button', 'id': 'frame1-submit', 'encoded_id': 'f1:n4'},
+ {
+ 'tag': 'iframe',
+ 'id': 'frame2',
+ 'encoded_id': 'f1:n5',
+ 'frame_ordinal': 2,
+ 'children': [
+ # Frame 2 content with f2: prefix
+ {'tag': 'input', 'id': 'frame2-email', 'encoded_id': 'f2:n1'},
+ {'tag': 'textarea', 'id': 'frame2-textarea', 'encoded_id': 'f2:n2'},
+ {'tag': 'button', 'id': 'frame2-button', 'encoded_id': 'f2:n3'},
+ {
+ 'tag': 'iframe',
+ 'id': 'frame3',
+ 'encoded_id': 'f2:n4',
+ 'frame_ordinal': 3,
+ 'children': [
+ # Frame 3 content with f3: prefix
+ {'tag': 'input', 'id': 'frame3-password', 'encoded_id': 'f3:n1'},
+ {'tag': 'input', 'id': 'frame3-checkbox', 'encoded_id': 'f3:n2'},
+ {'tag': 'button', 'id': 'frame3-button', 'encoded_id': 'f3:n3'},
+ ],
+ },
+ ],
+ },
+ ],
+ },
+ {'tag': 'p', 'encoded_id': 'f0:n6'},
+ {'tag': 'a', 'id': 'main-link', 'encoded_id': 'f0:n7'},
+ ],
+ }
+
+ # 2. Deep XPaths that include frame traversal
+ expected_xpaths = {
+ 'main-button': '//button[@id="main-button"]',
+ 'frame1-input': '//iframe[@id="frame1"]//input[@id="frame1-input"]',
+ 'frame2-email': '//iframe[@id="frame1"]//iframe[@id="frame2"]//input[@id="frame2-email"]',
+ 'frame3-button': '//iframe[@id="frame1"]//iframe[@id="frame2"]//iframe[@id="frame3"]//button[@id="frame3-button"]',
+ }
+
+ # 3. Ability to click elements in any frame
+ # await session.click_element_by_encoded_id('f3:n3') # Click button in deepest frame
+
+ print('\nThis test demonstrates the desired unified tree structure')
+ print('with encoded IDs and deep XPaths for cross-frame navigation')
+ finally:
+ await session.close()