Fix cross-origin iframe DOM retrieval (#1965)

This commit is contained in:
Nick Sweeting
2025-06-19 23:01:49 -07:00
committed by GitHub
4 changed files with 186 additions and 48 deletions

View File

@@ -1480,8 +1480,7 @@ class BrowserSession(BaseModel):
"""
page = await self.get_current_page()
try:
await page.evaluate(
"""
script = """
try {
// Remove the highlight container and all its contents
const container = document.getElementById('playwright-highlight-container');
@@ -1498,7 +1497,13 @@ class BrowserSession(BaseModel):
console.error('Failed to remove highlights:', e);
}
"""
)
await page.evaluate(script)
for iframe in page.frames:
if iframe.url and iframe.url != page.url and not iframe.url.startswith('data:'):
await iframe.evaluate(script)
except Exception as e:
self.logger.debug(f'⚠️ Failed to remove highlights (this is usually ok): {type(e).__name__}: {e}')
# Don't raise the error since this is not critical functionality

View File

@@ -4,10 +4,11 @@
focusHighlightIndex: -1,
viewportExpansion: 0,
debugMode: false,
initialIndex: 0,
}
) => {
const { doHighlightElements, focusHighlightIndex, viewportExpansion, debugMode } = args;
let highlightIndex = 0; // Reset highlight index
const { doHighlightElements, focusHighlightIndex, viewportExpansion, debugMode, initialIndex } = args;
let highlightIndex = initialIndex; // Reset highlight index
// Add timing stack to handle recursion
const TIMING_STACK = {
@@ -210,7 +211,7 @@
*/
const DOM_HASH_MAP = {};
const ID = { current: 0 };
const ID = { current: initialIndex };
const HIGHLIGHT_CONTAINER_ID = "playwright-highlight-container";
@@ -1354,6 +1355,7 @@
if (domElement) nodeData.children.push(domElement);
}
}
nodeData.hasIframeContent = true;
} catch (e) {
console.warn("Unable to access iframe:", e);
}

View File

@@ -22,6 +22,42 @@ from browser_use.utils import time_execution_async
# height: int
@dataclass
class PageFrameEvaluationResult:
url: str
result: dict
name: str | None = None
id: str | None = None
@property
def known_frame_urls(self) -> list[str]:
return [
v.get('attributes', {}).get('src')
for v in self.map.values()
if v.get('hasIframeContent') and v.get('attributes', {}).get('src')
]
@property
def map(self) -> dict:
return self.result.get('map', {})
@property
def map_size(self) -> int:
return len(self.map)
@property
def perf_metrics(self) -> dict:
return self.result.get('perfMetrics', {})
@property
def short_url(self) -> str:
return self.url[:50] + '...' if len(self.url) > 50 else self.url
@property
def root_id(self) -> str | None:
return self.result.get('rootId')
class DomService:
logger: logging.Logger
@@ -94,73 +130,160 @@ class DomService:
'focusHighlightIndex': focus_element,
'viewportExpansion': viewport_expansion,
'debugMode': debug_mode,
'initialIndex': 0,
}
try:
eval_page: dict = await self.page.evaluate(self.js_code, args)
page_eval_result = PageFrameEvaluationResult(
url=self.page.url,
result=eval_page,
)
except Exception as e:
self.logger.error('Error evaluating JavaScript: %s', e)
raise
frames = [page_eval_result]
total_map_size = page_eval_result.map_size
known_frame_urls = page_eval_result.known_frame_urls
# TODO: only look in iframes from enabled_domains
for iframe in self.page.frames:
if (
iframe.url
and iframe.url != self.page.url
and not iframe.url.startswith('data:')
and iframe.url not in known_frame_urls
):
try:
frame_element = await iframe.frame_element()
except Exception as e:
self.logger.error('Error getting frame element for iframe %s: %s', iframe.url, e)
continue
if not await frame_element.is_visible():
continue
args['initialIndex'] = total_map_size # continue indexing from the last index
try:
name = await frame_element.get_attribute('name')
id = await frame_element.get_attribute('id')
iframe_eval_result = await iframe.evaluate(self.js_code, args)
frame = PageFrameEvaluationResult(
url=iframe.url,
result=iframe_eval_result,
name=name,
id=id,
)
frames.append(frame)
known_frame_urls.append(iframe.url)
known_frame_urls.extend(frame.known_frame_urls)
total_map_size += frame.map_size
except Exception as e:
self.logger.error('Error evaluating JavaScript in iframe %s: %s', iframe.url, e)
continue
# Only log performance metrics in debug mode
if debug_mode and 'perfMetrics' in eval_page:
perf = eval_page['perfMetrics']
if debug_mode and len(frames) > 1:
for index, frame in enumerate(frames):
perf = frame.perf_metrics
if perf:
# Get key metrics for summary
total_nodes = perf.get('nodeMetrics', {}).get('totalNodes', 0)
# processed_nodes = perf.get('nodeMetrics', {}).get('processedNodes', 0)
# Get key metrics for summary
total_nodes = perf.get('nodeMetrics', {}).get('totalNodes', 0)
# processed_nodes = perf.get('nodeMetrics', {}).get('processedNodes', 0)
# Count interactive elements from the DOM map
interactive_count = 0
for node_data in frame.map.values():
if isinstance(node_data, dict) and node_data.get('isInteractive'):
interactive_count += 1
# Count interactive elements from the DOM map
interactive_count = 0
if 'map' in eval_page:
for node_data in eval_page['map'].values():
if isinstance(node_data, dict) and node_data.get('isInteractive'):
interactive_count += 1
# Create concise summary
self.logger.debug(
f'🔎 Ran buildDOMTree.js interactive element detection on{" iframe" if index > 0 else ""}: %s interactive=%d/%d\n',
frame.short_url,
interactive_count,
total_nodes,
# processed_nodes,
)
# Create concise summary
url_short = self.page.url[:50] + '...' if len(self.page.url) > 50 else self.page.url
self.logger.debug(
'🔎 Ran buildDOMTree.js interactive element detection on: %s interactive=%d/%d\n',
url_short,
interactive_count,
total_nodes,
# processed_nodes,
)
return await self._construct_dom_tree(eval_page)
return await self._construct_dom_tree(frames)
@time_execution_async('--construct_dom_tree')
async def _construct_dom_tree(
self,
eval_page: dict,
frames: list[PageFrameEvaluationResult],
) -> tuple[DOMElementNode, SelectorMap]:
js_node_map = eval_page['map']
js_root_id = eval_page['rootId']
# The first page in eval_pages is the main page, and it contains the rootId
js_root_id = frames[0].root_id
if js_root_id is None:
raise ValueError('No rootId found in the evaluated page structure')
selector_map = {}
node_map = {}
selector_map: SelectorMap = {}
node_map: dict[str, DOMBaseNode] = {}
for id, node_data in js_node_map.items():
node, children_ids = self._parse_node(node_data)
if node is None:
continue
for frame in frames:
js_node_map = frame.map
for id, node_data in js_node_map.items():
node, children_ids = self._parse_node(node_data)
if node is None:
continue
node_map[id] = node
node_map[id] = node
if isinstance(node, DOMElementNode) and node.highlight_index is not None:
selector_map[node.highlight_index] = node
if isinstance(node, DOMElementNode) and node.highlight_index is not None:
selector_map[node.highlight_index] = node
# NOTE: We know that we are building the tree bottom up
# and all children are already processed.
if isinstance(node, DOMElementNode):
for child_id in children_ids:
if child_id not in node_map:
# NOTE: We know that we are building the tree bottom up
# and all children are already processed.
if isinstance(node, DOMElementNode):
for child_id in children_ids:
if child_id not in node_map:
continue
child_node = node_map[child_id]
child_node.parent = node
node.children.append(child_node)
# For each child iframe, we need to set the parent of the root element to the iframe element.
for frame in frames[1:]:
content_root_node = node_map.get(frame.root_id)
if content_root_node:
# Find the iframe element in the main page
iframe_element_node = next(
(
node
for node in node_map.values()
if isinstance(node, DOMElementNode)
and node.is_iframe_element(url=frame.url, name=frame.name, id=frame.id)
),
None,
)
if iframe_element_node:
if not iframe_element_node.children:
iframe_element_node.children = [content_root_node]
content_root_node.parent = iframe_element_node
continue
else:
self.logger.warning(
'Iframe element %s already has children, skipping',
frame.short_url,
)
else:
self.logger.warning(
'Could not find iframe element for %s in the main page DOM',
frame.short_url,
)
child_node = node_map[child_id]
# If we could not find the iframe element, remove the frame's nodes from the maps.
for id in frame.map.keys():
node = node_map.get(id)
# Remove the node from the selector map if it has a highlight index
if isinstance(node, DOMElementNode) and node.highlight_index is not None and node.highlight_index in selector_map:
del selector_map[node.highlight_index]
child_node.parent = node
node.children.append(child_node)
del node_map[id]
html_to_dict = node_map[str(js_root_id)]

View File

@@ -230,6 +230,14 @@ class DOMElementNode(DOMBaseNode):
process_node(self, 0)
return '\n'.join(formatted_text)
def is_iframe_element(self, url: str, name: str | None = None, id: str | None = None) -> bool:
return (
self.tag_name.lower() == 'iframe'
and self.attributes.get('src') == url
and (name is None or self.attributes.get('name') == name)
and (id is None or self.attributes.get('id') == id)
)
SelectorMap = dict[int, DOMElementNode]