From 13d5468aa29eefcf41a227157fea33256843fa97 Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Thu, 12 Jun 2025 14:07:21 -0700 Subject: [PATCH 1/3] Fix cross-origin iframe DOM retrieval --- browser_use/browser/session.py | 11 +- browser_use/dom/buildDomTree.js | 8 +- browser_use/dom/service.py | 204 +++++++++++++++++++++++++------- browser_use/dom/views.py | 5 + 4 files changed, 179 insertions(+), 49 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 46c9b5969..e43f47cd3 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -1389,8 +1389,7 @@ class BrowserSession(BaseModel): """ page = await self.get_current_page() try: - await page.evaluate( - """ + script = """ try { // Remove the highlight container and all its contents const container = document.getElementById('playwright-highlight-container'); @@ -1407,7 +1406,13 @@ class BrowserSession(BaseModel): console.error('Failed to remove highlights:', e); } """ - ) + + await page.evaluate(script) + + for iframe in page.frames: + if iframe.url and iframe.url != page.url and not iframe.url.startswith('data:'): + await iframe.evaluate(script) + except Exception as e: self.logger.debug(f'⚠️ Failed to remove highlights (this is usually ok): {type(e).__name__}: {e}') # Don't raise the error since this is not critical functionality diff --git a/browser_use/dom/buildDomTree.js b/browser_use/dom/buildDomTree.js index 1abf4ac4e..13d49fa16 100644 --- a/browser_use/dom/buildDomTree.js +++ b/browser_use/dom/buildDomTree.js @@ -4,10 +4,11 @@ focusHighlightIndex: -1, viewportExpansion: 0, debugMode: false, + initialIndex: 0, } ) => { - const { doHighlightElements, focusHighlightIndex, viewportExpansion, debugMode } = args; - let highlightIndex = 0; // Reset highlight index + const { doHighlightElements, focusHighlightIndex, viewportExpansion, debugMode, initialIndex } = args; + let highlightIndex = initialIndex; // Reset highlight index // Add timing stack to handle recursion const TIMING_STACK = { @@ -210,7 +211,7 @@ */ const DOM_HASH_MAP = {}; - const ID = { current: 0 }; + const ID = { current: initialIndex }; const HIGHLIGHT_CONTAINER_ID = "playwright-highlight-container"; @@ -1354,6 +1355,7 @@ if (domElement) nodeData.children.push(domElement); } } + nodeData.hasIframeContent = true; } catch (e) { console.warn("Unable to access iframe:", e); } diff --git a/browser_use/dom/service.py b/browser_use/dom/service.py index 31b2244d9..23dd337eb 100644 --- a/browser_use/dom/service.py +++ b/browser_use/dom/service.py @@ -22,6 +22,41 @@ class ViewportInfo: width: int height: int +@dataclass +class PageFrameEvaluationResult: + url: str + result: dict + name: str | None = None + id: str | None = None + + @property + def known_frame_urls(self) -> list[str]: + return [ + v.get('attributes', {}).get('src') for v + in self.map.values() + if v.get('hasIframeContent') and v.get('attributes', {}).get('src') + ] + + @property + def map(self) -> dict: + return self.result.get('map', {}) + + @property + def map_size(self) -> int: + return len(self.map) + + @property + def perf_metrics(self) -> dict: + return self.result.get('perfMetrics', {}) + + @property + def short_url(self) -> str: + return self.url[:50] + '...' if len(self.url) > 50 else self.url + + @property + def root_id(self) -> str | None: + return self.result.get('rootId') + class DomService: logger: logging.Logger @@ -95,73 +130,156 @@ class DomService: 'focusHighlightIndex': focus_element, 'viewportExpansion': viewport_expansion, 'debugMode': debug_mode, + 'initialIndex': 0, } try: eval_page: dict = await self.page.evaluate(self.js_code, args) + page_eval_result = PageFrameEvaluationResult( + url=self.page.url, + result=eval_page, + ) except Exception as e: self.logger.error('Error evaluating JavaScript: %s', e) raise + frames = [page_eval_result] + total_map_size = page_eval_result.map_size + + known_frame_urls = page_eval_result.known_frame_urls + # TODO: only look in iframes from enabled_domains + for iframe in self.page.frames: + if (iframe.url and + iframe.url != self.page.url and + not iframe.url.startswith('data:') and + iframe.url not in known_frame_urls): + + try: + frame_element = await iframe.frame_element() + except Exception as e: + self.logger.error('Error getting frame element for iframe %s: %s', iframe.url, e) + continue + + if not await frame_element.is_visible(): + continue + + args['initialIndex'] = total_map_size # continue indexing from the last index + try: + name = await frame_element.get_attribute('name') + id = await frame_element.get_attribute('id') + iframe_eval_result = await iframe.evaluate(self.js_code, args) + frame = PageFrameEvaluationResult( + url=iframe.url, + result=iframe_eval_result, + name=name, + id=id, + ) + frames.append(frame) + known_frame_urls.append(iframe.url) + known_frame_urls.extend(frame.known_frame_urls) + total_map_size += frame.map_size + except Exception as e: + self.logger.error('Error evaluating JavaScript in iframe %s: %s', iframe.url, e) + continue + # Only log performance metrics in debug mode - if debug_mode and 'perfMetrics' in eval_page: - perf = eval_page['perfMetrics'] + if debug_mode and len(frames) > 1: + for index, frame in enumerate(frames): + perf = frame.perf_metrics + if perf: + # Get key metrics for summary + total_nodes = perf.get('nodeMetrics', {}).get('totalNodes', 0) + # processed_nodes = perf.get('nodeMetrics', {}).get('processedNodes', 0) - # Get key metrics for summary - total_nodes = perf.get('nodeMetrics', {}).get('totalNodes', 0) - # processed_nodes = perf.get('nodeMetrics', {}).get('processedNodes', 0) + # Count interactive elements from the DOM map + interactive_count = 0 + for node_data in frame.map.values(): + if isinstance(node_data, dict) and node_data.get('isInteractive'): + interactive_count += 1 - # Count interactive elements from the DOM map - interactive_count = 0 - if 'map' in eval_page: - for node_data in eval_page['map'].values(): - if isinstance(node_data, dict) and node_data.get('isInteractive'): - interactive_count += 1 - - # Create concise summary - url_short = self.page.url[:50] + '...' if len(self.page.url) > 50 else self.page.url - self.logger.debug( - '🔎 Ran buildDOMTree.js interactive element detection on: %s interactive=%d/%d\n', - url_short, - interactive_count, - total_nodes, - # processed_nodes, - ) - - return await self._construct_dom_tree(eval_page) + # Create concise summary + self.logger.debug( + f'🔎 Ran buildDOMTree.js interactive element detection on{" iframe" if index > 0 else ""}: %s interactive=%d/%d\n', + frame.short_url, + interactive_count, + total_nodes, + # processed_nodes, + ) + + return await self._construct_dom_tree(frames) @time_execution_async('--construct_dom_tree') async def _construct_dom_tree( self, - eval_page: dict, + frames: list[PageFrameEvaluationResult], ) -> tuple[DOMElementNode, SelectorMap]: - js_node_map = eval_page['map'] - js_root_id = eval_page['rootId'] + # The first page in eval_pages is the main page, and it contains the rootId + js_root_id = frames[0].root_id + if js_root_id is None: + raise ValueError('No rootId found in the evaluated page structure') - selector_map = {} - node_map = {} + selector_map: SelectorMap = {} + node_map: dict[str, DOMBaseNode] = {} - for id, node_data in js_node_map.items(): - node, children_ids = self._parse_node(node_data) - if node is None: - continue + for frame in frames: + js_node_map = frame.map + for id, node_data in js_node_map.items(): + node, children_ids = self._parse_node(node_data) + if node is None: + continue - node_map[id] = node + node_map[id] = node - if isinstance(node, DOMElementNode) and node.highlight_index is not None: - selector_map[node.highlight_index] = node + if isinstance(node, DOMElementNode) and node.highlight_index is not None: + selector_map[node.highlight_index] = node - # NOTE: We know that we are building the tree bottom up - # and all children are already processed. - if isinstance(node, DOMElementNode): - for child_id in children_ids: - if child_id not in node_map: + # NOTE: We know that we are building the tree bottom up + # and all children are already processed. + if isinstance(node, DOMElementNode): + for child_id in children_ids: + if child_id not in node_map: + continue + + child_node = node_map[child_id] + + child_node.parent = node + node.children.append(child_node) + + # For each child iframe, we need to set the parent of the root element to the iframe element. + for frame in frames[1:]: + content_root_node = node_map.get(frame.root_id) + if content_root_node: + # Find the iframe element in the main page + iframe_element_node = next( + (node for node in node_map.values() + if isinstance(node, DOMElementNode) and + node.is_iframe_element(url=frame.url, name=frame.name, id=frame.id)), + None + ) + if iframe_element_node: + if not iframe_element_node.children: + iframe_element_node.children=[content_root_node] + content_root_node.parent = iframe_element_node continue + else: + self.logger.warning( + 'Iframe element %s already has children, skipping', + frame.short_url, + ) + else: + self.logger.warning( + 'Could not find iframe element for %s in the main page DOM', + frame.short_url, + ) - child_node = node_map[child_id] + # If we could not find the iframe element, remove the frame's nodes from the maps. + for id in frame.map.keys(): + node = node_map.get(id) + # Remove the node from the selector map if it has a highlight index + if isinstance(node, DOMElementNode) and node.highlight_index is not None and node.highlight_index in selector_map: + del selector_map[node.highlight_index] - child_node.parent = node - node.children.append(child_node) + del node_map[id] html_to_dict = node_map[str(js_root_id)] diff --git a/browser_use/dom/views.py b/browser_use/dom/views.py index e96723068..911669983 100644 --- a/browser_use/dom/views.py +++ b/browser_use/dom/views.py @@ -233,6 +233,11 @@ class DOMElementNode(DOMBaseNode): process_node(self, 0) return '\n'.join(formatted_text) + def is_iframe_element(self, url: str, name: str | None = None, id: str | None = None) -> bool: + return (self.tag_name.lower() == 'iframe' and + self.attributes.get('src') == url and + (name is None or self.attributes.get('name') == name) and + (id is None or self.attributes.get('id') == id)) SelectorMap = dict[int, DOMElementNode] From d20a3b55d6a2ffe1bae79b4e2ad8ffe98a707400 Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Thu, 12 Jun 2025 17:30:59 -0700 Subject: [PATCH 2/3] Fix pre-commit lint issues and compile error in multiple_agents_same_browser --- browser_use/dom/service.py | 43 +++++++++------- browser_use/dom/views.py | 11 ++-- .../features/multiple_agents_same_browser.py | 50 ++++++++++++++++++- 3 files changed, 80 insertions(+), 24 deletions(-) diff --git a/browser_use/dom/service.py b/browser_use/dom/service.py index 23dd337eb..d31f1673f 100644 --- a/browser_use/dom/service.py +++ b/browser_use/dom/service.py @@ -22,6 +22,7 @@ class ViewportInfo: width: int height: int + @dataclass class PageFrameEvaluationResult: url: str @@ -32,27 +33,27 @@ class PageFrameEvaluationResult: @property def known_frame_urls(self) -> list[str]: return [ - v.get('attributes', {}).get('src') for v - in self.map.values() + v.get('attributes', {}).get('src') + for v in self.map.values() if v.get('hasIframeContent') and v.get('attributes', {}).get('src') ] - + @property def map(self) -> dict: return self.result.get('map', {}) - + @property def map_size(self) -> int: return len(self.map) - + @property - def perf_metrics(self) -> dict: + def perf_metrics(self) -> dict: return self.result.get('perfMetrics', {}) - + @property def short_url(self) -> str: return self.url[:50] + '...' if len(self.url) > 50 else self.url - + @property def root_id(self) -> str | None: return self.result.get('rootId') @@ -149,11 +150,12 @@ class DomService: known_frame_urls = page_eval_result.known_frame_urls # TODO: only look in iframes from enabled_domains for iframe in self.page.frames: - if (iframe.url and - iframe.url != self.page.url and - not iframe.url.startswith('data:') and - iframe.url not in known_frame_urls): - + if ( + iframe.url + and iframe.url != self.page.url + and not iframe.url.startswith('data:') + and iframe.url not in known_frame_urls + ): try: frame_element = await iframe.frame_element() except Exception as e: @@ -205,7 +207,7 @@ class DomService: total_nodes, # processed_nodes, ) - + return await self._construct_dom_tree(frames) @time_execution_async('--construct_dom_tree') @@ -251,14 +253,17 @@ class DomService: if content_root_node: # Find the iframe element in the main page iframe_element_node = next( - (node for node in node_map.values() - if isinstance(node, DOMElementNode) and - node.is_iframe_element(url=frame.url, name=frame.name, id=frame.id)), - None + ( + node + for node in node_map.values() + if isinstance(node, DOMElementNode) + and node.is_iframe_element(url=frame.url, name=frame.name, id=frame.id) + ), + None, ) if iframe_element_node: if not iframe_element_node.children: - iframe_element_node.children=[content_root_node] + iframe_element_node.children = [content_root_node] content_root_node.parent = iframe_element_node continue else: diff --git a/browser_use/dom/views.py b/browser_use/dom/views.py index 911669983..0d2a01ede 100644 --- a/browser_use/dom/views.py +++ b/browser_use/dom/views.py @@ -234,10 +234,13 @@ class DOMElementNode(DOMBaseNode): return '\n'.join(formatted_text) def is_iframe_element(self, url: str, name: str | None = None, id: str | None = None) -> bool: - return (self.tag_name.lower() == 'iframe' and - self.attributes.get('src') == url and - (name is None or self.attributes.get('name') == name) and - (id is None or self.attributes.get('id') == id)) + return ( + self.tag_name.lower() == 'iframe' + and self.attributes.get('src') == url + and (name is None or self.attributes.get('name') == name) + and (id is None or self.attributes.get('id') == id) + ) + SelectorMap = dict[int, DOMElementNode] diff --git a/examples/features/multiple_agents_same_browser.py b/examples/features/multiple_agents_same_browser.py index db59f0b7d..2da1ef017 120000 --- a/examples/features/multiple_agents_same_browser.py +++ b/examples/features/multiple_agents_same_browser.py @@ -1 +1,49 @@ -../browser/multiple_agents_same_browser.py \ No newline at end of file +# See also ../browser/multiple_agents_same_browser.py + +import asyncio +import os +import sys + +sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +from dotenv import load_dotenv + +load_dotenv() + + +from langchain_openai import ChatOpenAI + +from browser_use import Agent +from browser_use.browser.session import BrowserSession + + +async def main(): + browser_session = BrowserSession( + keep_alive=True, + user_data_dir=None, + headless=False, + ) + await browser_session.start() + + current_agent = None + llm = ChatOpenAI(model='gpt-4o') + + task1 = 'find todays weather on San Francisco and extract it as json' + task2 = 'find todays weather in Zurich and extract it as json' + + agent1 = Agent( + task=task1, + browser_session=browser_session, + llm=llm, + ) + agent2 = Agent( + task=task2, + browser_session=browser_session, + llm=llm, + ) + + await asyncio.gather(agent1.run(), agent2.run()) + await browser_session.kill() + + +asyncio.run(main()) From e1b3ff9e9de918997b15110800153fa7456c1382 Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Thu, 12 Jun 2025 17:40:40 -0700 Subject: [PATCH 3/3] Revert changes to examples/features/multiple_agents_same_browser.py --- .../features/multiple_agents_same_browser.py | 50 +------------------ 1 file changed, 1 insertion(+), 49 deletions(-) diff --git a/examples/features/multiple_agents_same_browser.py b/examples/features/multiple_agents_same_browser.py index 2da1ef017..db59f0b7d 120000 --- a/examples/features/multiple_agents_same_browser.py +++ b/examples/features/multiple_agents_same_browser.py @@ -1,49 +1 @@ -# See also ../browser/multiple_agents_same_browser.py - -import asyncio -import os -import sys - -sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) - -from dotenv import load_dotenv - -load_dotenv() - - -from langchain_openai import ChatOpenAI - -from browser_use import Agent -from browser_use.browser.session import BrowserSession - - -async def main(): - browser_session = BrowserSession( - keep_alive=True, - user_data_dir=None, - headless=False, - ) - await browser_session.start() - - current_agent = None - llm = ChatOpenAI(model='gpt-4o') - - task1 = 'find todays weather on San Francisco and extract it as json' - task2 = 'find todays weather in Zurich and extract it as json' - - agent1 = Agent( - task=task1, - browser_session=browser_session, - llm=llm, - ) - agent2 = Agent( - task=task2, - browser_session=browser_session, - llm=llm, - ) - - await asyncio.gather(agent1.run(), agent2.run()) - await browser_session.kill() - - -asyncio.run(main()) +../browser/multiple_agents_same_browser.py \ No newline at end of file