From 10448eb97af003766c79e68c1e838f8ef469a39a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Wed, 16 Jul 2025 20:27:19 +0200 Subject: [PATCH 01/56] Load addblocker and cookie banner --- browser_use/browser/profile.py | 163 +++++++++++++++++++++++++++++---- 1 file changed, 147 insertions(+), 16 deletions(-) diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index b8ada7f1d..ce20f2a4b 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -558,6 +558,10 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro description='List of allowed domains for navigation e.g. ["*.google.com", "https://example.com", "chrome-extension://*"]', ) keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.') + enable_default_extensions: bool = Field( + default=False, + description="Enable default extensions for ad blocking (uBlock Origin) and cookie handling (I don't care about cookies). Extensions are automatically downloaded and loaded when enabled.", + ) window_size: ViewportSize | None = Field( default=None, description='Browser window size to use when headless=False.', @@ -620,6 +624,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro window_size['width'] = window_size['width'] or self.window_width or 1280 window_size['height'] = window_size['height'] or self.window_height or 1100 self.window_size = window_size + return self @model_validator(mode='after') @@ -699,12 +704,154 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro if self.window_position else [] ), + *(self._get_extension_args() if self.enable_default_extensions else []), ] # convert to dict and back to dedupe and merge duplicate args final_args_list = BrowserLaunchArgs.args_as_list(BrowserLaunchArgs.args_as_dict(pre_conversion_args)) return final_args_list + def _get_extension_args(self) -> list[str]: + """Get Chrome args for enabling default extensions (ad blocker and cookie handler).""" + extension_paths = self._ensure_default_extensions_downloaded() + + args = [ + '--enable-extensions', + '--disable-extensions-file-access-check', + '--disable-extensions-http-throttling', + '--enable-extension-activity-logging', + ] + + if extension_paths: + args.append(f'--load-extension={",".join(extension_paths)}') + + return args + + def _ensure_default_extensions_downloaded(self) -> list[str]: + """ + Ensure default extensions are downloaded and cached locally. + Returns list of paths to extension directories. + """ + from pathlib import Path + + # Extension definitions + extensions = [ + { + 'name': 'uBlock Origin', + 'id': 'cjpalhdlnbpafiamejdnhcphjbkeiagm', + 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dcjpalhdlnbpafiamejdnhcphjbkeiagm%26uc', + }, + { + 'name': "I don't care about cookies", + 'id': 'fihnjjcciajhdojfnbdddfaoknhalnja', + 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dfihnjjcciajhdojfnbdddfaoknhalnja%26uc', + }, + ] + + # Create extensions cache directory + cache_dir = Path.home() / '.browser-use' / 'extensions' + cache_dir.mkdir(parents=True, exist_ok=True) + + extension_paths = [] + + for ext in extensions: + ext_dir = cache_dir / ext['id'] + crx_file = cache_dir / f'{ext["id"]}.crx' + + # Check if extension is already extracted + if ext_dir.exists() and (ext_dir / 'manifest.json').exists(): + extension_paths.append(str(ext_dir)) + continue + + try: + # Download extension if not cached + if not crx_file.exists(): + logger.info(f'๐Ÿ“ฆ Downloading {ext["name"]} extension...') + self._download_extension(ext['url'], crx_file) + + # Extract extension + if crx_file.exists(): + logger.info(f'๐Ÿ“‚ Extracting {ext["name"]} extension...') + self._extract_extension(crx_file, ext_dir) + extension_paths.append(str(ext_dir)) + + except Exception as e: + logger.warning(f'โš ๏ธ Failed to setup {ext["name"]} extension: {e}') + continue + + if extension_paths: + logger.info(f'โœ… Default extensions ready: {len(extension_paths)} extensions loaded') + else: + logger.warning('โš ๏ธ No default extensions could be loaded') + + return extension_paths + + def _download_extension(self, url: str, output_path: Path) -> None: + """Download extension .crx file.""" + import urllib.request + + try: + with urllib.request.urlopen(url) as response: + with open(output_path, 'wb') as f: + f.write(response.read()) + except Exception as e: + raise Exception(f'Failed to download extension: {e}') + + def _extract_extension(self, crx_path: Path, extract_dir: Path) -> None: + """Extract .crx file to directory.""" + import os + import zipfile + + # Remove existing directory + if extract_dir.exists(): + import shutil + + shutil.rmtree(extract_dir) + + extract_dir.mkdir(parents=True, exist_ok=True) + + try: + # CRX files are ZIP files with a header, try to extract as ZIP + with zipfile.ZipFile(crx_path, 'r') as zip_ref: + zip_ref.extractall(extract_dir) + + # Verify manifest exists + if not (extract_dir / 'manifest.json').exists(): + raise Exception('No manifest.json found in extension') + + except zipfile.BadZipFile: + # CRX files have a header before the ZIP data + # Skip the CRX header and extract the ZIP part + with open(crx_path, 'rb') as f: + # Read CRX header to find ZIP start + magic = f.read(4) + if magic != b'Cr24': + raise Exception('Invalid CRX file format') + + version = int.from_bytes(f.read(4), 'little') + if version == 2: + pubkey_len = int.from_bytes(f.read(4), 'little') + sig_len = int.from_bytes(f.read(4), 'little') + f.seek(16 + pubkey_len + sig_len) # Skip to ZIP data + elif version == 3: + header_len = int.from_bytes(f.read(4), 'little') + f.seek(12 + header_len) # Skip to ZIP data + + # Extract ZIP data + zip_data = f.read() + + # Write ZIP data to temp file and extract + import tempfile + + with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_zip: + temp_zip.write(zip_data) + temp_zip.flush() + + with zipfile.ZipFile(temp_zip.name, 'r') as zip_ref: + zip_ref.extractall(extract_dir) + + os.unlink(temp_zip.name) + def kwargs_for_launch_persistent_context(self) -> BrowserLaunchPersistentContextArgs: """Return the kwargs for BrowserType.launch().""" return BrowserLaunchPersistentContextArgs(**self.model_dump(exclude={'args'}), args=self.get_args()) @@ -721,22 +868,6 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro """Return the kwargs for BrowserType.connect_over_cdp().""" return BrowserLaunchArgs(**self.model_dump(exclude={'args'}), args=self.get_args()) - # def preinstall_extensions(self) -> None: - # """Preinstall the extensions.""" - - # # create the local unpacked extensions dir - # extensions_dir = self.user_data_dir / 'Extensions' - # extensions_dir.mkdir(parents=True, exist_ok=True) - - # # download from the chrome web store using the chrome web store api - # for extension_id in self.extension_ids_to_preinstall: - # extension_path = extensions_dir / f'{extension_id}.crx' - # if extension_path.exists(): - # logger.warning(f'โš ๏ธ Extension {extension_id} is already installed, skipping preinstall.') - # else: - # logger.info(f'๐Ÿ” Preinstalling extension {extension_id}...') - # # TODO: copy this from ArchiveBox implementation - @observe_debug(ignore_input=True, ignore_output=True, name='detect_display_configuration') def detect_display_configuration(self) -> None: """ From ce5ba6bc064194cc5af46daea14d2a3d3c30a0cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Wed, 16 Jul 2025 20:29:07 +0200 Subject: [PATCH 02/56] Install by default --- browser_use/browser/profile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index ce20f2a4b..3bc9aa972 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -559,7 +559,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro ) keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.') enable_default_extensions: bool = Field( - default=False, + default=True, description="Enable default extensions for ad blocking (uBlock Origin) and cookie handling (I don't care about cookies). Extensions are automatically downloaded and loaded when enabled.", ) window_size: ViewportSize | None = Field( From bb62b9d6094ca6e01c1773bb21474c522b72181d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Fri, 18 Jul 2025 17:24:17 +0200 Subject: [PATCH 03/56] Include lmnr debug --- browser_use/browser/session.py | 1 + 1 file changed, 1 insertion(+) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 17b5b7667..c8afaf964 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -837,6 +837,7 @@ class BrowserSession(BaseModel): atexit.register(shudown_playwright) + @observe_debug(ignore_input=True, ignore_output=True, name='setup_browser_via_passed_objects') async def setup_browser_via_passed_objects(self) -> None: """Override to customize the set up of the connection to an existing browser""" From 3ae32e509229c1c71a643d7198484bbb9369fb18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Fri, 18 Jul 2025 17:40:56 +0200 Subject: [PATCH 04/56] Remove Remove_highlights only use it before we screenshot --- browser_use/agent/service.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 7d586cce4..f8ce9fbeb 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -1381,12 +1381,6 @@ class Agent(Generic[Context, AgentStructuredOutput]): cached_selector_map = await self.browser_session.get_selector_map() cached_path_hashes = {e.hash.branch_path_hash for e in cached_selector_map.values()} - try: - await self.browser_session.remove_highlights() - except TimeoutError: - # we don't care if this times out - self.logger.debug('Timeout to remove highlights') - for i, action in enumerate(actions): # DO NOT ALLOW TO CALL `done` AS A SINGLE ACTION if i > 0 and action.model_dump(exclude_unset=True).get('done') is not None: From 15dd27f2b5bd3441ee5b76ec2d3b62641467b35d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Fri, 18 Jul 2025 18:11:45 +0200 Subject: [PATCH 05/56] Dont take screenshots in multact and move get_browser_state_with_recovery to the browser session --- browser_use/agent/service.py | 32 +++++------------- browser_use/browser/session.py | 60 +++++++++++++++++++++++++++------- 2 files changed, 56 insertions(+), 36 deletions(-) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index f8ce9fbeb..197e49352 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -641,6 +641,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): self.task = new_task self._message_manager.add_new_task(new_task) + @observe_debug(ignore_input=True, ignore_output=True, name='_raise_if_stopped_or_paused') async def _raise_if_stopped_or_paused(self) -> None: """Utility function that raises an InterruptedError if the agent is stopped or paused.""" @@ -652,24 +653,6 @@ class Agent(Generic[Context, AgentStructuredOutput]): # self.logger.debug('Agent paused after getting state') raise InterruptedError - @observe_debug(ignore_input=True, ignore_output=True, name='get_browser_state_with_recovery') - async def _get_browser_state_with_recovery(self, cache_clickable_elements_hashes: bool = True) -> BrowserStateSummary: - """Get browser state with multiple fallback strategies for error recovery""" - - assert self.browser_session is not None, 'BrowserSession is not set up' - - # Try 1: Full state summary (current implementation) - like main branch - try: - return await self.browser_session.get_state_summary(cache_clickable_elements_hashes) - except Exception as e: - if self.state.last_result is None: - self.state.last_result = [] - self.state.last_result.append(ActionResult(error=str(e))) - self.logger.warning(f'Full state retrieval failed: {type(e).__name__}: {e}') - - self.logger.warning('๐Ÿ”„ Falling back to minimal state summary') - return await self.browser_session.get_minimal_state_summary() - @observe(name='agent.step', ignore_output=True, ignore_input=True) @time_execution_async('--step') async def step(self, step_info: AgentStepInfo | None = None) -> None: @@ -704,7 +687,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): assert self.browser_session is not None, 'BrowserSession is not set up' self.logger.debug(f'๐ŸŒ Step {self.state.n_steps + 1}: Getting browser state...') - browser_state_summary = await self._get_browser_state_with_recovery(cache_clickable_elements_hashes=True) + browser_state_summary = await self.browser_session.get_browser_state_with_recovery(cache_clickable_elements_hashes=True) current_page = await self.browser_session.get_current_page() # Check for new downloads after getting browser state (catches PDF auto-downloads and previous step downloads) @@ -1389,7 +1372,9 @@ class Agent(Generic[Context, AgentStructuredOutput]): break if action.get_index() is not None and i != 0: - new_browser_state_summary = await self.browser_session.get_state_summary(cache_clickable_elements_hashes=False) + new_browser_state_summary = await self.browser_session.get_browser_state_with_recovery( + cache_clickable_elements_hashes=False, include_screenshot=False + ) new_selector_map = new_browser_state_summary.selector_map # Detect index change after previous action @@ -1446,9 +1431,6 @@ class Agent(Generic[Context, AgentStructuredOutput]): if results[-1].is_done or results[-1].error or i == len(actions) - 1: break - await asyncio.sleep(self.browser_profile.wait_between_actions) - # hash all elements. if it is a subset of cached_state its fine - else break (new elements on page) - except Exception as e: # Handle any exceptions during action execution self.logger.error(f'Action {i + 1} failed: {type(e).__name__}: {e}') @@ -1526,7 +1508,9 @@ class Agent(Generic[Context, AgentStructuredOutput]): async def _execute_history_step(self, history_item: AgentHistory, delay: float) -> list[ActionResult]: """Execute a single step from history with element validation""" assert self.browser_session is not None, 'BrowserSession is not set up' - state = await self.browser_session.get_state_summary(cache_clickable_elements_hashes=False) + state = await self.browser_session.get_browser_state_with_recovery( + cache_clickable_elements_hashes=False, include_screenshot=False + ) if not state or not history_item.model_output: raise ValueError('Invalid state or model output') updated_actions = [] diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index c8afaf964..e4632721c 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -3053,7 +3053,9 @@ class BrowserSession(BaseModel): @observe_debug(ignore_input=True, ignore_output=True) @time_execution_async('--get_state_summary') @require_healthy_browser(usable_page=True, reopen_page=True) - async def get_state_summary(self, cache_clickable_elements_hashes: bool) -> BrowserStateSummary: + async def get_state_summary( + self, cache_clickable_elements_hashes: bool, include_screenshot: bool = True + ) -> BrowserStateSummary: self.logger.debug('๐Ÿ”„ Starting get_state_summary...') """Get a summary of the current browser state @@ -3066,9 +3068,12 @@ class BrowserSession(BaseModel): If True, cache the clickable elements hashes for the current state. This is used to calculate which elements are new to the LLM since the last message, which helps reduce token usage. + include_screenshot: bool + If True, include screenshot in the state summary. Set to False to improve performance + when screenshots are not needed (e.g., in multi_act element validation). """ await self._wait_for_page_and_frames_load() - updated_state = await self._get_updated_state() + updated_state = await self._get_updated_state(include_screenshot=include_screenshot) # Find out which elements are new # Do this only if url has not changed @@ -3143,7 +3148,7 @@ class BrowserSession(BaseModel): ) @observe_debug(ignore_input=True, ignore_output=True, name='get_updated_state') - async def _get_updated_state(self, focus_element: int = -1) -> BrowserStateSummary: + async def _get_updated_state(self, focus_element: int = -1, include_screenshot: bool = True) -> BrowserStateSummary: """Update and return state.""" # Check if current page is still valid, if not switch to another available page @@ -3229,13 +3234,16 @@ class BrowserSession(BaseModel): # ) # ) - try: - self.logger.debug('๐Ÿ“ธ Capturing screenshot...') - # Reasonable timeout for screenshot - screenshot_b64 = await self.take_screenshot() - # self.logger.debug('โœ… Screenshot completed') - except Exception as e: - self.logger.warning(f'โŒ Screenshot failed for {_log_pretty_url(page.url)}: {type(e).__name__} {e}') + if include_screenshot: + try: + self.logger.debug('๐Ÿ“ธ Capturing screenshot...') + # Reasonable timeout for screenshot + screenshot_b64 = await self.take_screenshot() + # self.logger.debug('โœ… Screenshot completed') + except Exception as e: + self.logger.warning(f'โŒ Screenshot failed for {_log_pretty_url(page.url)}: {type(e).__name__} {e}') + screenshot_b64 = None + else: screenshot_b64 = None # Get comprehensive page information @@ -4475,7 +4483,9 @@ class BrowserSession(BaseModel): @observe_debug(ignore_input=True, ignore_output=True, name='get_state_summary_with_fallback') @require_healthy_browser(usable_page=True, reopen_page=True) @time_execution_async('--get_state_summary_with_fallback') - async def get_state_summary_with_fallback(self, cache_clickable_elements_hashes: bool = True) -> BrowserStateSummary: + async def get_state_summary_with_fallback( + self, cache_clickable_elements_hashes: bool = True, include_screenshot: bool = True + ) -> BrowserStateSummary: """Get browser state with fallback to minimal state on errors This method first tries to get a full state summary. If that fails, @@ -4485,6 +4495,8 @@ class BrowserSession(BaseModel): ----------- cache_clickable_elements_hashes: bool If True, cache the clickable elements hashes for the current state. + include_screenshot: bool + If True, include screenshot in the state summary. Returns: -------- @@ -4492,7 +4504,7 @@ class BrowserSession(BaseModel): """ # Try 1: Full state summary (current implementation) try: - return await self.get_state_summary(cache_clickable_elements_hashes) + return await self.get_state_summary(cache_clickable_elements_hashes, include_screenshot=include_screenshot) except Exception as e: self.logger.warning(f'Full state retrieval failed: {type(e).__name__}: {e}') self.logger.warning('๐Ÿ”„ Falling back to minimal state summary') @@ -4500,6 +4512,30 @@ class BrowserSession(BaseModel): # Try 2: Minimal state summary as fallback return await self.get_minimal_state_summary() + @observe_debug(ignore_input=True, ignore_output=True, name='get_browser_state_with_recovery') + async def get_browser_state_with_recovery( + self, cache_clickable_elements_hashes: bool = True, include_screenshot: bool = True + ) -> BrowserStateSummary: + """Get browser state with multiple fallback strategies for error recovery + + Parameters: + ----------- + cache_clickable_elements_hashes: bool + If True, cache the clickable elements hashes for the current state. + include_screenshot: bool + If True, include screenshot in the state summary. Set to False to improve performance + when screenshots are not needed (e.g., in multi_act element validation). + """ + + # Try 1: Full state summary (current implementation) - like main branch + try: + return await self.get_state_summary(cache_clickable_elements_hashes, include_screenshot=include_screenshot) + except Exception as e: + self.logger.warning(f'Full state retrieval failed: {type(e).__name__}: {e}') + + self.logger.warning('๐Ÿ”„ Falling back to minimal state summary') + return await self.get_minimal_state_summary() + async def _is_pdf_viewer(self, page: Page) -> bool: """ Check if the current page is displaying a PDF in Chrome's PDF viewer. From d84e2c1d132cf7d4ade502d2b97674186cd7f17e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Fri, 18 Jul 2025 18:45:50 +0200 Subject: [PATCH 06/56] Remove 2 sec waiting while spin up --- browser_use/browser/session.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index e4632721c..04e738a04 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -879,6 +879,7 @@ class BrowserSession(BaseModel): self.logger.info(f'๐ŸŽญ Connected to existing user-provided browser: {self.browser_context}') self._set_browser_keep_alive(True) # we connected to an existing browser, dont kill it at the end + @observe_debug(ignore_input=True, ignore_output=True, name='setup_browser_via_browser_pid') async def setup_browser_via_browser_pid(self) -> None: """if browser_pid is provided, calcuclate its CDP URL by looking for --remote-debugging-port=... in its CLI args, then connect to it""" @@ -923,8 +924,7 @@ class BrowserSession(BaseModel): # Wait for CDP port to become available (Chrome might still be starting) import httpx - # Add initial delay to give Chrome time to start up before first check - await asyncio.sleep(2) + # No initial sleep needed - the polling loop below handles waiting if Chrome isn't ready yet async with httpx.AsyncClient() as client: for i in range(30): # 30 second timeout @@ -1011,6 +1011,7 @@ class BrowserSession(BaseModel): ) self._set_browser_keep_alive(True) # we connected to an existing browser, dont kill it at the end + @observe_debug(ignore_input=True, ignore_output=True, name='setup_browser_via_wss_url') async def setup_browser_via_wss_url(self) -> None: """check for a passed wss_url, connect to a remote playwright browser server via WSS""" @@ -1045,6 +1046,7 @@ class BrowserSession(BaseModel): ) self._set_browser_keep_alive(True) # we connected to an existing browser, dont kill it at the end + @observe_debug(ignore_input=True, ignore_output=True, name='setup_new_browser_context') @retry(wait=1, retries=2, timeout=45, semaphore_limit=1, semaphore_scope='self', semaphore_lax=False) async def setup_new_browser_context(self) -> None: """Launch a new browser and browser_context""" @@ -1060,6 +1062,7 @@ class BrowserSession(BaseModel): pass await self._unsafe_setup_new_browser_context() + @observe_debug(ignore_input=True, ignore_output=True, name='_unsafe_setup_new_browser_context') async def _unsafe_setup_new_browser_context(self) -> None: """Unsafe browser context setup without retry protection.""" From fc6d5e0d24d8b1126793b4dbbb7ef68b57c17496 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Fri, 18 Jul 2025 18:59:09 +0200 Subject: [PATCH 07/56] Optimize browser setup by running configuration tasks in parallel and skip network wait for new tab pages --- browser_use/browser/session.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 04e738a04..7434efd56 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -384,10 +384,13 @@ class BrowserSession(BaseModel): # Ensure we have a context assert self.browser_context, f'Failed to create BrowserContext for browser={self.browser}' - # Configure browser - await self._setup_viewports() - await self._setup_current_page_change_listeners() - await self._start_context_tracing() + # Configure browser - run some setup tasks in parallel for speed + await asyncio.gather( + self._setup_viewports(), + self._setup_current_page_change_listeners(), + self._start_context_tracing(), + return_exceptions=True, + ) self.initialized = True return self @@ -2810,6 +2813,13 @@ class BrowserSession(BaseModel): # Wait for page load page = await self.get_current_page() + + # Skip network waiting for new tab pages (about:blank, chrome://new-tab-page, etc.) + # These pages load instantly and don't need network idle time + if is_new_tab_page(page.url): + self.logger.debug(f'โšก Skipping page load wait for new tab page: {page.url}') + return + try: await self._wait_for_stable_network() From 0809c3956f294b65a1ef2ccbf79ef668fa926b94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Fri, 18 Jul 2025 22:32:56 +0200 Subject: [PATCH 08/56] 0.5 sec timeout --- browser_use/browser/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 7434efd56..3dc5cbc5d 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -992,7 +992,7 @@ class BrowserSession(BaseModel): except (httpx.ConnectError, httpx.TimeoutException): if i == 0: self.logger.debug(f'โณ Waiting for Chrome CDP port {debug_port} to become available...') - await asyncio.sleep(1) + await asyncio.sleep(0.5) else: self.logger.error(f'โŒ Chrome CDP port {debug_port} did not become available after 30 seconds') self.browser_pid = None From 041cc4470068d02e2a8d84b2df3341c546b9a4ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Fri, 18 Jul 2025 22:44:02 +0200 Subject: [PATCH 09/56] Retries more with less timeout --- browser_use/browser/session.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 3dc5cbc5d..e3f0cb4b5 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -930,7 +930,7 @@ class BrowserSession(BaseModel): # No initial sleep needed - the polling loop below handles waiting if Chrome isn't ready yet async with httpx.AsyncClient() as client: - for i in range(30): # 30 second timeout + for i in range(30): # timeout # First check if the Chrome process has exited try: chrome_process = psutil.Process(pid=self.browser_pid) @@ -1050,7 +1050,7 @@ class BrowserSession(BaseModel): self._set_browser_keep_alive(True) # we connected to an existing browser, dont kill it at the end @observe_debug(ignore_input=True, ignore_output=True, name='setup_new_browser_context') - @retry(wait=1, retries=2, timeout=45, semaphore_limit=1, semaphore_scope='self', semaphore_lax=False) + @retry(wait=0.1, retries=5, timeout=45, semaphore_limit=1, semaphore_scope='self', semaphore_lax=False) async def setup_new_browser_context(self) -> None: """Launch a new browser and browser_context""" # Double-check after semaphore acquisition to prevent duplicate browser launches From c688056d3775082953a268fe7513bbd2bc417815 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Fri, 18 Jul 2025 23:09:41 +0200 Subject: [PATCH 10/56] Wait before get state so that we can check require_healthy_browser for get_state and dont check page in _get_updated_state --- browser_use/browser/session.py | 44 ++-------------------------------- browser_use/mcp/server.py | 2 +- 2 files changed, 3 insertions(+), 43 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index e3f0cb4b5..5adb55f2b 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -2022,7 +2022,6 @@ class BrowserSession(BaseModel): await page.wait_for_selector(selector, state='visible', timeout=timeout) @observe_debug(name='remove_highlights', ignore_output=True, ignore_input=True) - @require_healthy_browser(usable_page=True, reopen_page=True) @time_execution_async('--remove_highlights') @retry(timeout=2, retries=0) async def remove_highlights(self): @@ -3085,7 +3084,7 @@ class BrowserSession(BaseModel): If True, include screenshot in the state summary. Set to False to improve performance when screenshots are not needed (e.g., in multi_act element validation). """ - await self._wait_for_page_and_frames_load() + updated_state = await self._get_updated_state(include_screenshot=include_screenshot) # Find out which elements are new @@ -3167,14 +3166,6 @@ class BrowserSession(BaseModel): # Check if current page is still valid, if not switch to another available page page = await self.get_current_page() - try: - # Test if page is still accessible - # NOTE: This also happens on invalid urls like www.sadfdsafdssdafd.com - await asyncio.wait_for(page.evaluate('1'), timeout=2.5) - except Exception as e: - self.logger.debug(f'๐Ÿ‘‹ Current page is not accessible: {type(e).__name__}: {e}') - raise BrowserError('Page is not accessible') - try: self.logger.debug('๐Ÿงน Removing highlights...') try: @@ -4493,38 +4484,6 @@ class BrowserSession(BaseModel): except Exception as e: self.logger.debug(f'โŒ Failed to show ๐Ÿ“€ DVD loading animation: {type(e).__name__}: {e}') - @observe_debug(ignore_input=True, ignore_output=True, name='get_state_summary_with_fallback') - @require_healthy_browser(usable_page=True, reopen_page=True) - @time_execution_async('--get_state_summary_with_fallback') - async def get_state_summary_with_fallback( - self, cache_clickable_elements_hashes: bool = True, include_screenshot: bool = True - ) -> BrowserStateSummary: - """Get browser state with fallback to minimal state on errors - - This method first tries to get a full state summary. If that fails, - it falls back to a minimal state summary to allow basic navigation. - - Parameters: - ----------- - cache_clickable_elements_hashes: bool - If True, cache the clickable elements hashes for the current state. - include_screenshot: bool - If True, include screenshot in the state summary. - - Returns: - -------- - BrowserStateSummary: Either full state or minimal fallback state - """ - # Try 1: Full state summary (current implementation) - try: - return await self.get_state_summary(cache_clickable_elements_hashes, include_screenshot=include_screenshot) - except Exception as e: - self.logger.warning(f'Full state retrieval failed: {type(e).__name__}: {e}') - self.logger.warning('๐Ÿ”„ Falling back to minimal state summary') - - # Try 2: Minimal state summary as fallback - return await self.get_minimal_state_summary() - @observe_debug(ignore_input=True, ignore_output=True, name='get_browser_state_with_recovery') async def get_browser_state_with_recovery( self, cache_clickable_elements_hashes: bool = True, include_screenshot: bool = True @@ -4539,6 +4498,7 @@ class BrowserSession(BaseModel): If True, include screenshot in the state summary. Set to False to improve performance when screenshots are not needed (e.g., in multi_act element validation). """ + await self._wait_for_page_and_frames_load() # Try 1: Full state summary (current implementation) - like main branch try: diff --git a/browser_use/mcp/server.py b/browser_use/mcp/server.py index 84a0e840b..55d35ee82 100644 --- a/browser_use/mcp/server.py +++ b/browser_use/mcp/server.py @@ -659,7 +659,7 @@ class BrowserUseServer: if not self.browser_session: return 'Error: No browser session active' - state = await self.browser_session.get_state_summary(cache_clickable_elements_hashes=False) + state = await self.browser_session.get_browser_state_with_recovery(cache_clickable_elements_hashes=False) result = { 'url': state.url, From c153a9bed85ff2df72458f46943fcb411b7b0ecd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Fri, 18 Jul 2025 23:42:37 +0200 Subject: [PATCH 11/56] Better error message for llm --- browser_use/browser/session.py | 5 +++-- browser_use/dom/service.py | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 5adb55f2b..f652aa6a2 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -2075,7 +2075,8 @@ class BrowserSession(BaseModel): element_handle = await self.get_locate_element(element_node) if element_handle is None: - raise Exception(f'Element: {repr(element_node)} not found') + self.logger.debug(f'Element: {repr(element_node)} not found') + raise Exception('Element not found') async def perform_click(click_func): """Performs the actual click, handling both download and navigation scenarios.""" @@ -2169,7 +2170,7 @@ class BrowserSession(BaseModel): except URLNotAllowedError as e: raise e except Exception as e: - raise Exception(f'Failed to click element: {repr(element_node)}. Error: {str(e)}') + raise Exception(f'Failed to click element. Error: {str(e)}') @time_execution_async('--get_tabs_info') @retry(timeout=6, retries=1) diff --git a/browser_use/dom/service.py b/browser_use/dom/service.py index dc059732a..1bc0000a3 100644 --- a/browser_use/dom/service.py +++ b/browser_use/dom/service.py @@ -15,6 +15,7 @@ from browser_use.dom.views import ( SelectorMap, ViewportInfo, ) +from browser_use.observability import observe_debug from browser_use.utils import is_new_tab_page, time_execution_async # @dataclass @@ -34,6 +35,7 @@ class DomService: self.js_code = resources.files('browser_use.dom.dom_tree').joinpath('index.js').read_text() # region - Clickable elements + @observe_debug(ignore_input=True, ignore_output=True, name='get_clickable_elements') @time_execution_async('--get_clickable_elements') async def get_clickable_elements( self, From 5f77942f3eb13d735f24a061c3d9b68fa718a568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 00:10:11 +0200 Subject: [PATCH 12/56] Enhance debugging by adding observe_debug decorators to _click_element_node and get_locate_element methods --- browser_use/browser/session.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index f652aa6a2..81743d1e9 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -2062,6 +2062,7 @@ class BrowserSession(BaseModel): @require_healthy_browser(usable_page=True, reopen_page=True) @time_execution_async('--click_element_node') + @observe_debug(ignore_input=True, name='click_element_node') async def _click_element_node(self, element_node: DOMElementNode) -> str | None: """ Optimized method to click an element using xpath. @@ -3842,6 +3843,7 @@ class BrowserSession(BaseModel): @require_healthy_browser(usable_page=True, reopen_page=True) @time_execution_async('--get_locate_element') + @observe_debug(ignore_input=True, name='get_locate_element') async def get_locate_element(self, element: DOMElementNode) -> ElementHandle | None: page = await self.get_current_page() current_frame = page From 28c80a595a91ccf8514d4d2f711fd48cde4a440d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 01:14:28 +0200 Subject: [PATCH 13/56] Improve error handling during browser setup by checking for exceptions in parallel task results and providing clearer error messages. --- browser_use/browser/session.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 81743d1e9..3066d8b35 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -385,13 +385,19 @@ class BrowserSession(BaseModel): assert self.browser_context, f'Failed to create BrowserContext for browser={self.browser}' # Configure browser - run some setup tasks in parallel for speed - await asyncio.gather( + setup_results = await asyncio.gather( self._setup_viewports(), self._setup_current_page_change_listeners(), self._start_context_tracing(), return_exceptions=True, ) + # Check for exceptions in setup results + for i, result in enumerate(setup_results): + if isinstance(result, Exception): + setup_task_names = ['_setup_viewports', '_setup_current_page_change_listeners', '_start_context_tracing'] + raise Exception(f'Browser setup failed in {setup_task_names[i]}: {result}') from result + self.initialized = True return self From 7a80d2a1ab5d4affdcc066921eea29f6e704f948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 01:14:37 +0200 Subject: [PATCH 14/56] Refactor service.py by removing unused imports to streamline the codebase. --- browser_use/sync/service.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/browser_use/sync/service.py b/browser_use/sync/service.py index 053011831..6e6729a03 100644 --- a/browser_use/sync/service.py +++ b/browser_use/sync/service.py @@ -3,11 +3,9 @@ Cloud sync service for sending events to the Browser Use cloud. """ import asyncio -import json import logging import shutil -import anyio import httpx from bubus import BaseEvent From b7b05d12a145fb2dfd8fc2796bf58a9d531a5aa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 01:28:24 +0200 Subject: [PATCH 15/56] Better waiting --- browser_use/browser/session.py | 140 +++++++++++++++++++++++++++++++-- 1 file changed, 135 insertions(+), 5 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 3066d8b35..545d8f628 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -2808,12 +2808,137 @@ class BrowserSession(BaseModel): if elapsed > 1: self.logger.debug(f'๐Ÿ’ค Page network traffic calmed down after {now - start_time:.2f} seconds') - @observe_debug(ignore_input=True, ignore_output=True, name='wait_for_page_and_frames_load') - async def _wait_for_page_and_frames_load(self, timeout_overwrite: float | None = None): + async def _wait_for_dom_stability(self, max_wait: float = 3.0, stability_time: float = 0.5): """ - Ensures page is fully loaded before continuing. - Waits for either network to be idle or minimum WAIT_TIME, whichever is longer. + Wait for DOM mutations to stop, indicating the page has stabilized. + + This prevents race conditions where: + - User types in input โ†’ network idle โ†’ but dropdown still loading via JS + - Click happens โ†’ page updates โ†’ but animations/transitions still running + - Form submission โ†’ success response โ†’ but redirect/updates still pending + + Parameters: + ----------- + max_wait: float + Maximum time to wait for stability (prevents infinite waiting) + stability_time: float + How long DOM must be stable before considering it "done" + """ + page = await self.get_current_page() + + # JavaScript to monitor DOM mutations + monitor_script = """ + () => { + return new Promise((resolve) => { + let mutationCount = 0; + let lastMutationTime = Date.now(); + let stabilityTimeout; + const maxWaitMs = arguments[0] * 1000; + const stabilityMs = arguments[1] * 1000; + + // Create mutation observer + const observer = new MutationObserver((mutations) => { + // Filter out irrelevant mutations (like style changes from highlights) + const relevantMutations = mutations.filter(mutation => { + // Ignore changes to highlighting-related attributes + if (mutation.type === 'attributes') { + const attrName = mutation.attributeName; + if (attrName && ( + attrName.includes('highlight') || + attrName.includes('border') || + attrName.includes('outline') || + attrName === 'style' && mutation.target.style.outline + )) { + return false; + } + } + return true; + }); + + if (relevantMutations.length > 0) { + mutationCount += relevantMutations.length; + lastMutationTime = Date.now(); + + // Clear existing stability timeout + if (stabilityTimeout) { + clearTimeout(stabilityTimeout); + } + + // Set new stability timeout + stabilityTimeout = setTimeout(() => { + observer.disconnect(); + resolve({ + stable: true, + mutationCount, + waitTime: Date.now() - lastMutationTime + }); + }, stabilityMs); + } + }); + + // Start observing + observer.observe(document.body, { + childList: true, + subtree: true, + attributes: true, + attributeFilter: ['class', 'style', 'hidden', 'disabled', 'aria-expanded'] + }); + + // Max wait timeout + setTimeout(() => { + observer.disconnect(); + if (stabilityTimeout) clearTimeout(stabilityTimeout); + resolve({ + stable: false, + mutationCount, + waitTime: maxWaitMs, + timeout: true + }); + }, maxWaitMs); + + // Initial stability timeout (in case DOM is already stable) + stabilityTimeout = setTimeout(() => { + observer.disconnect(); + resolve({ + stable: true, + mutationCount: 0, + waitTime: stabilityMs, + initiallyStable: true + }); + }, stabilityMs); + }); + } + """ + + try: + start_time = time.time() + result = await page.evaluate(monitor_script, max_wait, stability_time) + + elapsed = time.time() - start_time + if result.get('stable'): + if result.get('initiallyStable'): + self.logger.debug('๐ŸŽฏ DOM was already stable') + else: + self.logger.debug(f'๐ŸŽฏ DOM stabilized after {elapsed:.2f}s ({result.get("mutationCount", 0)} mutations)') + else: + self.logger.debug(f'โฐ DOM stability timeout after {elapsed:.2f}s ({result.get("mutationCount", 0)} mutations)') + + except Exception as e: + self.logger.debug(f'๐Ÿ” DOM stability check failed: {type(e).__name__}: {e}') + + @observe_debug(ignore_input=True, ignore_output=True, name='wait_for_page_and_frames_load') + async def _wait_for_page_and_frames_load(self, timeout_overwrite: float | None = None, wait_for_dom_stability: bool = True): + """ + Ensures page is fully loaded and stable before continuing. + Waits for network idle, DOM stability, and minimum WAIT_TIME. Also checks if the loaded URL is allowed. + + Parameters: + ----------- + timeout_overwrite: float | None + Override the minimum wait time + wait_for_dom_stability: bool + If True, also wait for DOM mutations to stop (prevents race conditions with dropdowns, etc.) """ # Start timing start_time = time.time() @@ -2830,6 +2955,10 @@ class BrowserSession(BaseModel): try: await self._wait_for_stable_network() + # Wait for DOM stability if requested (prevents race conditions with JS-driven changes) + if wait_for_dom_stability: + await self._wait_for_dom_stability() + # Check if the loaded URL is allowed await self._check_and_handle_navigation(page) except URLNotAllowedError as e: @@ -3496,6 +3625,7 @@ class BrowserSession(BaseModel): 'Browser is unable to load any new about:blank pages (something is very wrong or browser is extremely overloaded)' ) + @observe_debug(ignore_input=True, name='recover_unresponsive_page') async def _recover_unresponsive_page(self, calling_method: str, timeout_ms: int | None = None) -> None: """Recover from an unresponsive page by closing and reopening it.""" self.logger.warning(f'โš ๏ธ Page JS engine became unresponsive in {calling_method}(), attempting recovery...') @@ -4507,10 +4637,10 @@ class BrowserSession(BaseModel): If True, include screenshot in the state summary. Set to False to improve performance when screenshots are not needed (e.g., in multi_act element validation). """ - await self._wait_for_page_and_frames_load() # Try 1: Full state summary (current implementation) - like main branch try: + await self._wait_for_page_and_frames_load() return await self.get_state_summary(cache_clickable_elements_hashes, include_screenshot=include_screenshot) except Exception as e: self.logger.warning(f'Full state retrieval failed: {type(e).__name__}: {e}') From b7d29f54e8e68b1a22691dc0760c31c4f682d2a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 01:40:41 +0200 Subject: [PATCH 16/56] Wait inside multiact --- browser_use/agent/service.py | 91 ++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 40 deletions(-) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 82a7fa800..00b13f2ef 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -1361,52 +1361,63 @@ class Agent(Generic[Context, AgentStructuredOutput]): results: list[ActionResult] = [] assert self.browser_session is not None, 'BrowserSession is not set up' - cached_selector_map = await self.browser_session.get_selector_map() - cached_path_hashes = {e.hash.branch_path_hash for e in cached_selector_map.values()} - - for i, action in enumerate(actions): - # DO NOT ALLOW TO CALL `done` AS A SINGLE ACTION - if i > 0 and action.model_dump(exclude_unset=True).get('done') is not None: - msg = f'Done action is allowed only as a single action - stopped after action {i} / {len(actions)}.' - logger.info(msg) + cached_selector_map = {} + cached_path_hashes = set() + # check all actions if any has index, if so, get the selector map + for action in actions: + if action.get_index() is not None: + cached_selector_map = await self.browser_session.get_selector_map() + cached_path_hashes = {e.hash.branch_path_hash for e in cached_selector_map.values()} break - if action.get_index() is not None and i != 0: - new_browser_state_summary = await self.browser_session.get_browser_state_with_recovery( - cache_clickable_elements_hashes=False, include_screenshot=False - ) - new_selector_map = new_browser_state_summary.selector_map - - # Detect index change after previous action - orig_target = cached_selector_map.get(action.get_index()) # type: ignore - orig_target_hash = orig_target.hash.branch_path_hash if orig_target else None - new_target = new_selector_map.get(action.get_index()) # type: ignore - new_target_hash = new_target.hash.branch_path_hash if new_target else None - if orig_target_hash != new_target_hash: - msg = f'Element index changed after action {i} / {len(actions)}, because page changed.' + # loop over actions and execute them + for i, action in enumerate(actions): + if i > 0: + # ONLY ALLOW TO CALL `done` IF IT IS A SINGLE ACTION + if action.model_dump(exclude_unset=True).get('done') is not None: + msg = f'Done action is allowed only as a single action - stopped after action {i} / {len(actions)}.' logger.info(msg) - results.append( - ActionResult( - extracted_content=msg, - include_in_memory=True, - long_term_memory=msg, - ) - ) break - new_path_hashes = {e.hash.branch_path_hash for e in new_selector_map.values()} - if check_for_new_elements and not new_path_hashes.issubset(cached_path_hashes): - # next action requires index but there are new elements on the page - msg = f'Something new appeared after action {i} / {len(actions)}, following actions are NOT executed and should be retried.' - logger.info(msg) - results.append( - ActionResult( - extracted_content=msg, - include_in_memory=True, - long_term_memory=msg, - ) + if action.get_index() is not None: + new_browser_state_summary = await self.browser_session.get_browser_state_with_recovery( + cache_clickable_elements_hashes=False, include_screenshot=False ) - break + new_selector_map = new_browser_state_summary.selector_map + + # Detect index change after previous action + orig_target = cached_selector_map.get(action.get_index()) # type: ignore + orig_target_hash = orig_target.hash.branch_path_hash if orig_target else None + new_target = new_selector_map.get(action.get_index()) # type: ignore + new_target_hash = new_target.hash.branch_path_hash if new_target else None + if orig_target_hash != new_target_hash: + msg = f'Element index changed after action {i} / {len(actions)}, because page changed.' + logger.info(msg) + results.append( + ActionResult( + extracted_content=msg, + include_in_memory=True, + long_term_memory=msg, + ) + ) + break + + new_path_hashes = {e.hash.branch_path_hash for e in new_selector_map.values()} + if check_for_new_elements and not new_path_hashes.issubset(cached_path_hashes): + # next action requires index but there are new elements on the page + msg = f'Something new appeared after action {i} / {len(actions)}, following actions are NOT executed and should be retried.' + logger.info(msg) + results.append( + ActionResult( + extracted_content=msg, + include_in_memory=True, + long_term_memory=msg, + ) + ) + break + + # wait between actions + await asyncio.sleep(self.browser_profile.wait_between_actions) try: await self._raise_if_stopped_or_paused() From f347c8b472843ece4dabaf80a4f5c386d38dd8bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 01:49:45 +0200 Subject: [PATCH 17/56] Refactor DOM stability check in BrowserSession to use element count for stability verification, reducing max wait time and improving reliability of checks. --- browser_use/browser/session.py | 135 +++++++++------------------------ 1 file changed, 34 insertions(+), 101 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 545d8f628..410596dd7 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -2808,120 +2808,53 @@ class BrowserSession(BaseModel): if elapsed > 1: self.logger.debug(f'๐Ÿ’ค Page network traffic calmed down after {now - start_time:.2f} seconds') - async def _wait_for_dom_stability(self, max_wait: float = 3.0, stability_time: float = 0.5): + async def _wait_for_dom_stability(self, max_wait: float = 2.0, stability_checks: int = 3): """ - Wait for DOM mutations to stop, indicating the page has stabilized. + Wait for DOM to stabilize by checking element counts remain constant. This prevents race conditions where: - - User types in input โ†’ network idle โ†’ but dropdown still loading via JS - - Click happens โ†’ page updates โ†’ but animations/transitions still running - - Form submission โ†’ success response โ†’ but redirect/updates still pending + - User types in input โ†’ dropdown appears after state capture + - Click happens โ†’ content updates after state capture + - Form submission โ†’ UI changes after state capture Parameters: ----------- max_wait: float - Maximum time to wait for stability (prevents infinite waiting) - stability_time: float - How long DOM must be stable before considering it "done" + Maximum time to wait for stability + stability_checks: int + Number of consistent checks needed to consider DOM stable """ page = await self.get_current_page() - # JavaScript to monitor DOM mutations - monitor_script = """ - () => { - return new Promise((resolve) => { - let mutationCount = 0; - let lastMutationTime = Date.now(); - let stabilityTimeout; - const maxWaitMs = arguments[0] * 1000; - const stabilityMs = arguments[1] * 1000; - - // Create mutation observer - const observer = new MutationObserver((mutations) => { - // Filter out irrelevant mutations (like style changes from highlights) - const relevantMutations = mutations.filter(mutation => { - // Ignore changes to highlighting-related attributes - if (mutation.type === 'attributes') { - const attrName = mutation.attributeName; - if (attrName && ( - attrName.includes('highlight') || - attrName.includes('border') || - attrName.includes('outline') || - attrName === 'style' && mutation.target.style.outline - )) { - return false; - } - } - return true; - }); - - if (relevantMutations.length > 0) { - mutationCount += relevantMutations.length; - lastMutationTime = Date.now(); - - // Clear existing stability timeout - if (stabilityTimeout) { - clearTimeout(stabilityTimeout); - } - - // Set new stability timeout - stabilityTimeout = setTimeout(() => { - observer.disconnect(); - resolve({ - stable: true, - mutationCount, - waitTime: Date.now() - lastMutationTime - }); - }, stabilityMs); - } - }); - - // Start observing - observer.observe(document.body, { - childList: true, - subtree: true, - attributes: true, - attributeFilter: ['class', 'style', 'hidden', 'disabled', 'aria-expanded'] - }); - - // Max wait timeout - setTimeout(() => { - observer.disconnect(); - if (stabilityTimeout) clearTimeout(stabilityTimeout); - resolve({ - stable: false, - mutationCount, - waitTime: maxWaitMs, - timeout: true - }); - }, maxWaitMs); - - // Initial stability timeout (in case DOM is already stable) - stabilityTimeout = setTimeout(() => { - observer.disconnect(); - resolve({ - stable: true, - mutationCount: 0, - waitTime: stabilityMs, - initiallyStable: true - }); - }, stabilityMs); - }); - } - """ - try: start_time = time.time() - result = await page.evaluate(monitor_script, max_wait, stability_time) + check_interval = 0.2 # Check every 200ms + consistent_checks = 0 + last_element_count = 0 + + while time.time() - start_time < max_wait: + try: + # Get current element count as a simple stability metric + current_count = await page.evaluate("() => document.querySelectorAll('*').length") + + if current_count == last_element_count: + consistent_checks += 1 + if consistent_checks >= stability_checks: + elapsed = time.time() - start_time + self.logger.debug(f'๐ŸŽฏ DOM stabilized after {elapsed:.2f}s (element count: {current_count})') + return + else: + consistent_checks = 0 + last_element_count = current_count + + await asyncio.sleep(check_interval) + + except Exception: + # If page becomes inaccessible, consider it stable + break elapsed = time.time() - start_time - if result.get('stable'): - if result.get('initiallyStable'): - self.logger.debug('๐ŸŽฏ DOM was already stable') - else: - self.logger.debug(f'๐ŸŽฏ DOM stabilized after {elapsed:.2f}s ({result.get("mutationCount", 0)} mutations)') - else: - self.logger.debug(f'โฐ DOM stability timeout after {elapsed:.2f}s ({result.get("mutationCount", 0)} mutations)') + self.logger.debug(f'โฐ DOM stability timeout after {elapsed:.2f}s') except Exception as e: self.logger.debug(f'๐Ÿ” DOM stability check failed: {type(e).__name__}: {e}') @@ -2938,7 +2871,7 @@ class BrowserSession(BaseModel): timeout_overwrite: float | None Override the minimum wait time wait_for_dom_stability: bool - If True, also wait for DOM mutations to stop (prevents race conditions with dropdowns, etc.) + If True, wait for DOM mutations to stop (prevents race conditions with dropdowns, etc.) """ # Start timing start_time = time.time() From b24f111f7ea56bd902ec8b8184bd0ece74ffef8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 02:08:25 +0200 Subject: [PATCH 18/56] Remove _wait_for_page_and_frames_load --- browser_use/browser/session.py | 59 +--------------------------------- 1 file changed, 1 insertion(+), 58 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 410596dd7..e22a1d699 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -2808,59 +2808,8 @@ class BrowserSession(BaseModel): if elapsed > 1: self.logger.debug(f'๐Ÿ’ค Page network traffic calmed down after {now - start_time:.2f} seconds') - async def _wait_for_dom_stability(self, max_wait: float = 2.0, stability_checks: int = 3): - """ - Wait for DOM to stabilize by checking element counts remain constant. - - This prevents race conditions where: - - User types in input โ†’ dropdown appears after state capture - - Click happens โ†’ content updates after state capture - - Form submission โ†’ UI changes after state capture - - Parameters: - ----------- - max_wait: float - Maximum time to wait for stability - stability_checks: int - Number of consistent checks needed to consider DOM stable - """ - page = await self.get_current_page() - - try: - start_time = time.time() - check_interval = 0.2 # Check every 200ms - consistent_checks = 0 - last_element_count = 0 - - while time.time() - start_time < max_wait: - try: - # Get current element count as a simple stability metric - current_count = await page.evaluate("() => document.querySelectorAll('*').length") - - if current_count == last_element_count: - consistent_checks += 1 - if consistent_checks >= stability_checks: - elapsed = time.time() - start_time - self.logger.debug(f'๐ŸŽฏ DOM stabilized after {elapsed:.2f}s (element count: {current_count})') - return - else: - consistent_checks = 0 - last_element_count = current_count - - await asyncio.sleep(check_interval) - - except Exception: - # If page becomes inaccessible, consider it stable - break - - elapsed = time.time() - start_time - self.logger.debug(f'โฐ DOM stability timeout after {elapsed:.2f}s') - - except Exception as e: - self.logger.debug(f'๐Ÿ” DOM stability check failed: {type(e).__name__}: {e}') - @observe_debug(ignore_input=True, ignore_output=True, name='wait_for_page_and_frames_load') - async def _wait_for_page_and_frames_load(self, timeout_overwrite: float | None = None, wait_for_dom_stability: bool = True): + async def _wait_for_page_and_frames_load(self, timeout_overwrite: float | None = None): """ Ensures page is fully loaded and stable before continuing. Waits for network idle, DOM stability, and minimum WAIT_TIME. @@ -2870,8 +2819,6 @@ class BrowserSession(BaseModel): ----------- timeout_overwrite: float | None Override the minimum wait time - wait_for_dom_stability: bool - If True, wait for DOM mutations to stop (prevents race conditions with dropdowns, etc.) """ # Start timing start_time = time.time() @@ -2888,10 +2835,6 @@ class BrowserSession(BaseModel): try: await self._wait_for_stable_network() - # Wait for DOM stability if requested (prevents race conditions with JS-driven changes) - if wait_for_dom_stability: - await self._wait_for_dom_stability() - # Check if the loaded URL is allowed await self._check_and_handle_navigation(page) except URLNotAllowedError as e: From 05fe25a45405cfeba7f609892c1ca194b5b9cc18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 02:13:12 +0200 Subject: [PATCH 19/56] Change cookie extension to i still dont care about cookies --- browser_use/browser/profile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index 3bc9aa972..4f5634118 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -560,7 +560,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.') enable_default_extensions: bool = Field( default=True, - description="Enable default extensions for ad blocking (uBlock Origin) and cookie handling (I don't care about cookies). Extensions are automatically downloaded and loaded when enabled.", + description="Enable default extensions for ad blocking (uBlock Origin) and cookie handling (I still don't care about cookies). Extensions are automatically downloaded and loaded when enabled.", ) window_size: ViewportSize | None = Field( default=None, @@ -742,9 +742,9 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dcjpalhdlnbpafiamejdnhcphjbkeiagm%26uc', }, { - 'name': "I don't care about cookies", - 'id': 'fihnjjcciajhdojfnbdddfaoknhalnja', - 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dfihnjjcciajhdojfnbdddfaoknhalnja%26uc', + 'name': "I still don't care about cookies", + 'id': 'edibdbjcniadpccecjdfdjjppcpchdlm', + 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dedibdbjcniadpccecjdfdjjppcpchdlm%26uc', }, ] From 59992e236a6f516ce54afe182a3f1e1dcb7716ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 02:29:31 +0200 Subject: [PATCH 20/56] Optimize browser extensions for automation - Update 'I don't care about cookies' to 'I still don't care about cookies' (community-maintained) - Add Decentraleyes for CDN tracking protection and performance - Add ClearURLs for automatic URL tracking parameter removal - Add FastForward for automatic link shortener bypass - Remove Privacy Badger (requires learning over multiple sessions) - Remove 'Don't track me Google' (redundant with other extensions) - Remove 'Block image' (requires manual activation, unusable in automation) Final extension set: 5 fully automatic extensions optimized for single-session browser automation --- browser_use/browser/profile.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index 4f5634118..38de23b80 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -560,7 +560,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.') enable_default_extensions: bool = Field( default=True, - description="Enable default extensions for ad blocking (uBlock Origin) and cookie handling (I still don't care about cookies). Extensions are automatically downloaded and loaded when enabled.", + description="Enable default extensions: ad blocking (uBlock Origin), cookie handling (I still don't care about cookies), privacy protection (Decentraleyes, ClearURLs), and navigation optimization (FastForward). All extensions work automatically without manual intervention. Extensions are automatically downloaded and loaded when enabled.", ) window_size: ViewportSize | None = Field( default=None, @@ -746,6 +746,21 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro 'id': 'edibdbjcniadpccecjdfdjjppcpchdlm', 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dedibdbjcniadpccecjdfdjjppcpchdlm%26uc', }, + { + 'name': 'Decentraleyes', + 'id': 'ldpochfccmkkmhdbclfhpagapcfdljkj', + 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dldpochfccmkkmhdbclfhpagapcfdljkj%26uc', + }, + { + 'name': 'ClearURLs', + 'id': 'lckanjgmijmafbedllaakclkaicjfmnk', + 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dlckanjgmijmafbedllaakclkaicjfmnk%26uc', + }, + { + 'name': 'FastForward', + 'id': 'icallnadddjmdinamnolclfjanhfoafe', + 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dicallnadddjmdinamnolclfjanhfoafe%26uc', + }, ] # Create extensions cache directory @@ -780,7 +795,9 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro continue if extension_paths: - logger.info(f'โœ… Default extensions ready: {len(extension_paths)} extensions loaded') + logger.info( + f"โœ… Default extensions ready: {len(extension_paths)} extensions loaded (uBlock Origin, I still don't care about cookies, Decentraleyes, ClearURLs, FastForward)" + ) else: logger.warning('โš ๏ธ No default extensions could be loaded') From 48b1c5d202f8da2435ff81951bd7962eb896af3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 03:25:37 +0200 Subject: [PATCH 21/56] No extension --- browser_use/browser/profile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index 38de23b80..b8cf8600a 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -559,7 +559,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro ) keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.') enable_default_extensions: bool = Field( - default=True, + default=False, description="Enable default extensions: ad blocking (uBlock Origin), cookie handling (I still don't care about cookies), privacy protection (Decentraleyes, ClearURLs), and navigation optimization (FastForward). All extensions work automatically without manual intervention. Extensions are automatically downloaded and loaded when enabled.", ) window_size: ViewportSize | None = Field( From 65f4c01bcefe6922f5ee09aa1d49f63256a47d25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 11:02:21 +0200 Subject: [PATCH 22/56] Disable extension by default comparison --- browser_use/browser/profile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index 4f5634118..17812f909 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -559,7 +559,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro ) keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.') enable_default_extensions: bool = Field( - default=True, + default=False, description="Enable default extensions for ad blocking (uBlock Origin) and cookie handling (I still don't care about cookies). Extensions are automatically downloaded and loaded when enabled.", ) window_size: ViewportSize | None = Field( From c3f2fbba1e563e488898696181f6a55c9e4228fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 11:48:27 +0200 Subject: [PATCH 23/56] True extension --- browser_use/browser/profile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index 17812f909..4f5634118 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -559,7 +559,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro ) keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.') enable_default_extensions: bool = Field( - default=False, + default=True, description="Enable default extensions for ad blocking (uBlock Origin) and cookie handling (I still don't care about cookies). Extensions are automatically downloaded and loaded when enabled.", ) window_size: ViewportSize | None = Field( From e0da77b545f2b15c7ee85e687bb0e1400ce6be4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 15:41:04 +0200 Subject: [PATCH 24/56] Disable extensions by default --- browser_use/browser/profile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index 4f5634118..17812f909 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -559,7 +559,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro ) keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.') enable_default_extensions: bool = Field( - default=True, + default=False, description="Enable default extensions for ad blocking (uBlock Origin) and cookie handling (I still don't care about cookies). Extensions are automatically downloaded and loaded when enabled.", ) window_size: ViewportSize | None = Field( From 0ea8ccbff63af8a76391cebe33bf283fa69827eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 15:53:19 +0200 Subject: [PATCH 25/56] Add Chrome flags to suppress extension welcome tabs - Add --disable-extensions-http-throttling to prevent extension communication issues - Add --disable-extension-activity-logging to reduce extension logging overhead - Add --extensions-on-chrome-urls to allow extensions on chrome:// pages - Add --disable-default-apps to prevent default app welcome screens These flags help prevent extensions from opening welcome/onboarding tabs that interfere with automation. --- browser_use/browser/profile.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index f92d767f0..5bc34ea61 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -169,6 +169,11 @@ CHROME_DEFAULT_ARGS = [ '--disable-desktop-notifications', '--noerrdialogs', '--silent-debugger-extension-api', + # Extension welcome tab suppression for automation + '--disable-extensions-http-throttling', + '--disable-extension-activity-logging', + '--extensions-on-chrome-urls', + '--disable-default-apps', f'--disable-features={",".join(CHROME_DISABLED_COMPONENTS)}', ] From 1a522b225159520b993410c5214327cf89322a1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 15:56:32 +0200 Subject: [PATCH 26/56] Simplify to essential extensions only to prevent welcome tabs - Remove Decentraleyes (nice-to-have performance boost, but uBlock Origin covers most tracking) - Remove ClearURLs (URL cleaning, but not critical for automation) - Remove FastForward (likely causing welcome tabs, most automation doesn't hit link shorteners) - Keep only uBlock Origin (essential ad/tracker blocking) and I still don't care about cookies (essential cookie banner handling) - Set default back to True since these 2 core extensions should not cause welcome tab issues - Update descriptions to reflect minimal essential set This gives maximum automation reliability with minimal extension overhead. --- browser_use/browser/profile.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index 5bc34ea61..efad9df38 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -564,8 +564,8 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro ) keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.') enable_default_extensions: bool = Field( - default=False, - description="Enable default extensions for ad blocking (uBlock Origin) and cookie handling (I still don't care about cookies). Extensions are automatically downloaded and loaded when enabled.", + default=True, + description="Enable minimal essential extensions: ad blocking (uBlock Origin) and cookie handling (I still don't care about cookies). These core extensions work automatically without welcome tabs. Extensions are automatically downloaded and loaded when enabled.", ) window_size: ViewportSize | None = Field( default=None, @@ -739,7 +739,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro """ from pathlib import Path - # Extension definitions + # Extension definitions - minimal essential set to avoid welcome tab issues extensions = [ { 'name': 'uBlock Origin', @@ -751,21 +751,6 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro 'id': 'edibdbjcniadpccecjdfdjjppcpchdlm', 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dedibdbjcniadpccecjdfdjjppcpchdlm%26uc', }, - { - 'name': 'Decentraleyes', - 'id': 'ldpochfccmkkmhdbclfhpagapcfdljkj', - 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dldpochfccmkkmhdbclfhpagapcfdljkj%26uc', - }, - { - 'name': 'ClearURLs', - 'id': 'lckanjgmijmafbedllaakclkaicjfmnk', - 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dlckanjgmijmafbedllaakclkaicjfmnk%26uc', - }, - { - 'name': 'FastForward', - 'id': 'icallnadddjmdinamnolclfjanhfoafe', - 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dicallnadddjmdinamnolclfjanhfoafe%26uc', - }, ] # Create extensions cache directory @@ -801,7 +786,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro if extension_paths: logger.info( - f"โœ… Default extensions ready: {len(extension_paths)} extensions loaded (uBlock Origin, I still don't care about cookies, Decentraleyes, ClearURLs, FastForward)" + f"โœ… Essential extensions ready: {len(extension_paths)} extensions loaded (uBlock Origin, I still don't care about cookies)" ) else: logger.warning('โš ๏ธ No default extensions could be loaded') From 42753241cf17b9952d9961a2048474b0abc10584 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 16:00:35 +0200 Subject: [PATCH 27/56] Add ClearURLs and Reader View extensions for improved extraction - Add ClearURLs for automatic URL tracking parameter removal - Add Reader View (Mozilla Readability-based) for content extraction - Reader View requires manual activation but can be triggered via Playwright - Update descriptions to reflect automation-optimized extension set - These additions improve content extraction capabilities while maintaining clean automation --- browser_use/browser/profile.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index efad9df38..31cd779e5 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -565,7 +565,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.') enable_default_extensions: bool = Field( default=True, - description="Enable minimal essential extensions: ad blocking (uBlock Origin) and cookie handling (I still don't care about cookies). These core extensions work automatically without welcome tabs. Extensions are automatically downloaded and loaded when enabled.", + description="Enable automation-optimized extensions: ad blocking (uBlock Origin), cookie handling (I still don't care about cookies), URL cleaning (ClearURLs), and content extraction (Reader View). Note: Reader View requires manual activation or Playwright automation to trigger. Extensions are automatically downloaded and loaded when enabled.", ) window_size: ViewportSize | None = Field( default=None, @@ -739,7 +739,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro """ from pathlib import Path - # Extension definitions - minimal essential set to avoid welcome tab issues + # Extension definitions - optimized for automation and content extraction extensions = [ { 'name': 'uBlock Origin', @@ -751,6 +751,16 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro 'id': 'edibdbjcniadpccecjdfdjjppcpchdlm', 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dedibdbjcniadpccecjdfdjjppcpchdlm%26uc', }, + { + 'name': 'ClearURLs', + 'id': 'lckanjgmijmafbedllaakclkaicjfmnk', + 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dlckanjgmijmafbedllaakclkaicjfmnk%26uc', + }, + { + 'name': 'Reader View', + 'id': 'ecabifbgmdmgdllomnfinbmaellmclnh', + 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Decabifbgmdmgdllomnfinbmaellmclnh%26uc', + }, ] # Create extensions cache directory @@ -786,7 +796,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro if extension_paths: logger.info( - f"โœ… Essential extensions ready: {len(extension_paths)} extensions loaded (uBlock Origin, I still don't care about cookies)" + f"โœ… Extensions ready: {len(extension_paths)} extensions loaded (uBlock Origin, I still don't care about cookies, ClearURLs, Reader View)" ) else: logger.warning('โš ๏ธ No default extensions could be loaded') From 7b87d3324f523db7307bc29e32cc84b397d6c752 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 16:04:19 +0200 Subject: [PATCH 28/56] Only keep ClearURLs --- browser_use/browser/profile.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index 31cd779e5..036b18194 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -756,11 +756,6 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro 'id': 'lckanjgmijmafbedllaakclkaicjfmnk', 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dlckanjgmijmafbedllaakclkaicjfmnk%26uc', }, - { - 'name': 'Reader View', - 'id': 'ecabifbgmdmgdllomnfinbmaellmclnh', - 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Decabifbgmdmgdllomnfinbmaellmclnh%26uc', - }, ] # Create extensions cache directory From ec43c5c81ee7a53d4bbc54a11f9453a3a4465419 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 16:06:53 +0200 Subject: [PATCH 29/56] Fix misleading extension log message - Track actual loaded extension names instead of hardcoding them - Log message now accurately reflects which extensions loaded successfully - Count and names will match even if some extensions fail to load - Fixes bug where log mentioned 'Reader View' which was removed from extension list --- browser_use/browser/profile.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index 036b18194..2a3b84551 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -763,6 +763,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro cache_dir.mkdir(parents=True, exist_ok=True) extension_paths = [] + loaded_extension_names = [] for ext in extensions: ext_dir = cache_dir / ext['id'] @@ -771,6 +772,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro # Check if extension is already extracted if ext_dir.exists() and (ext_dir / 'manifest.json').exists(): extension_paths.append(str(ext_dir)) + loaded_extension_names.append(ext['name']) continue try: @@ -784,15 +786,14 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro logger.info(f'๐Ÿ“‚ Extracting {ext["name"]} extension...') self._extract_extension(crx_file, ext_dir) extension_paths.append(str(ext_dir)) + loaded_extension_names.append(ext['name']) except Exception as e: logger.warning(f'โš ๏ธ Failed to setup {ext["name"]} extension: {e}') continue if extension_paths: - logger.info( - f"โœ… Extensions ready: {len(extension_paths)} extensions loaded (uBlock Origin, I still don't care about cookies, ClearURLs, Reader View)" - ) + logger.info(f'โœ… Extensions ready: {len(extension_paths)} extensions loaded ({", ".join(loaded_extension_names)})') else: logger.warning('โš ๏ธ No default extensions could be loaded') From bbd34cab80986b06327e8d9eed18e25f4e494886 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 16:07:52 +0200 Subject: [PATCH 30/56] Fix two extension-related bugs 1. Remove Reader View from description - extension not actually loaded - Updated enable_default_extensions description to only mention extensions that are actually loaded - Removed misleading reference to Reader View functionality 2. Resolve Chrome flag conflict for extension activity logging - Removed --disable-extension-activity-logging from CHROME_DEFAULT_ARGS - Keeps --enable-extension-activity-logging in _get_extension_args() when extensions enabled - Eliminates contradictory flags that caused unpredictable logging behavior --- browser_use/browser/profile.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index 2a3b84551..e3b9afdb9 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -171,7 +171,6 @@ CHROME_DEFAULT_ARGS = [ '--silent-debugger-extension-api', # Extension welcome tab suppression for automation '--disable-extensions-http-throttling', - '--disable-extension-activity-logging', '--extensions-on-chrome-urls', '--disable-default-apps', f'--disable-features={",".join(CHROME_DISABLED_COMPONENTS)}', @@ -565,7 +564,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.') enable_default_extensions: bool = Field( default=True, - description="Enable automation-optimized extensions: ad blocking (uBlock Origin), cookie handling (I still don't care about cookies), URL cleaning (ClearURLs), and content extraction (Reader View). Note: Reader View requires manual activation or Playwright automation to trigger. Extensions are automatically downloaded and loaded when enabled.", + description="Enable automation-optimized extensions: ad blocking (uBlock Origin), cookie handling (I still don't care about cookies), and URL cleaning (ClearURLs). All extensions work automatically without manual intervention. Extensions are automatically downloaded and loaded when enabled.", ) window_size: ViewportSize | None = Field( default=None, From e43363cb0ba2e5800da0a834c390ac6a90254f4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 18:56:50 +0200 Subject: [PATCH 31/56] Set maximum 1 min timeout for llm call --- browser_use/agent/service.py | 12 +++++++++++- browser_use/agent/views.py | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 00b13f2ef..97dd322fd 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -183,6 +183,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): calculate_cost: bool = False, display_files_in_done_text: bool = True, include_tool_call_examples: bool = False, + llm_timeout: int = 60, **kwargs, ): # Check for deprecated planner parameters @@ -259,6 +260,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): extend_planner_system_message=None, # Always None now (deprecated) calculate_cost=calculate_cost, include_tool_call_examples=include_tool_call_examples, + llm_timeout=llm_timeout, ) # Token cost service @@ -731,7 +733,15 @@ class Agent(Generic[Context, AgentStructuredOutput]): f'๐Ÿค– Step {self.state.n_steps + 1}: Calling LLM with {len(input_messages)} messages (model: {self.llm.model})...' ) - model_output = await self._get_model_output_with_retry(input_messages) + try: + model_output = await asyncio.wait_for( + self._get_model_output_with_retry(input_messages), timeout=self.settings.llm_timeout + ) + except asyncio.TimeoutError: + raise TimeoutError( + f'LLM call timed out after {self.settings.llm_timeout} seconds. Generate less tokens and try again.' + ) + self.state.last_model_output = model_output # Check again for paused/stopped state after getting model output diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py index 0eb3079cc..e521675cc 100644 --- a/browser_use/agent/views.py +++ b/browser_use/agent/views.py @@ -64,6 +64,7 @@ class AgentSettings(BaseModel): extend_planner_system_message: str | None = None calculate_cost: bool = False include_tool_call_examples: bool = False + llm_timeout: int = 60 # Timeout in seconds for LLM calls class AgentState(BaseModel): From 6285bfeca0f97445373d9c881c2b399f51d01ef1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 19:36:49 +0200 Subject: [PATCH 32/56] Enable max tokens for openai and top_p --- browser_use/llm/openai/chat.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/browser_use/llm/openai/chat.py b/browser_use/llm/openai/chat.py index 96ce849fa..48be7993e 100644 --- a/browser_use/llm/openai/chat.py +++ b/browser_use/llm/openai/chat.py @@ -50,6 +50,8 @@ class ChatOpenAI(BaseChatModel): default_query: Mapping[str, object] | None = None http_client: httpx.AsyncClient | None = None _strict_response_validation: bool = False + max_completion_tokens: int | None = None + top_p: float | None = None # Static @property @@ -150,6 +152,12 @@ class ChatOpenAI(BaseChatModel): if self.temperature is not None: model_params['temperature'] = self.temperature + if self.max_completion_tokens is not None: + model_params['max_completion_tokens'] = self.max_completion_tokens + + if self.top_p is not None: + model_params['top_p'] = self.top_p + if output_format is None: # Return string response response = await self.get_client().chat.completions.create( From 01f58547065862c8bec65c30013d34719ec4b5b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 19:40:21 +0200 Subject: [PATCH 33/56] Inlcude timeout per step at 3min --- browser_use/agent/service.py | 6 ++++-- browser_use/agent/views.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 97dd322fd..38a4f1863 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -184,6 +184,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): display_files_in_done_text: bool = True, include_tool_call_examples: bool = False, llm_timeout: int = 60, + step_timeout: int = 180, **kwargs, ): # Check for deprecated planner parameters @@ -261,6 +262,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): calculate_cost=calculate_cost, include_tool_call_examples=include_tool_call_examples, llm_timeout=llm_timeout, + step_timeout=step_timeout, ) # Token cost service @@ -1239,7 +1241,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): try: await asyncio.wait_for( self.step(step_info), - timeout=300, # 5 minute step timeout - more generous for slow LLM calls + timeout=self.settings.step_timeout, ) self.logger.debug(f'โœ… Completed step {step + 1}/{max_steps}') except TimeoutError: @@ -1247,7 +1249,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): error_msg = f'Step {step + 1} timed out after 300 seconds' self.logger.error(f'โฐ {error_msg}') self.state.consecutive_failures += 1 - self.state.last_result = [ActionResult(error=error_msg, include_in_memory=True)] + self.state.last_result = [ActionResult(error=error_msg)] if on_step_end is not None: await on_step_end(self) diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py index e521675cc..c5a72f457 100644 --- a/browser_use/agent/views.py +++ b/browser_use/agent/views.py @@ -65,6 +65,7 @@ class AgentSettings(BaseModel): calculate_cost: bool = False include_tool_call_examples: bool = False llm_timeout: int = 60 # Timeout in seconds for LLM calls + step_timeout: int = 180 # Timeout in seconds for each step class AgentState(BaseModel): From 0e64c846ae24a32c160ccd60d5cbec49c79f65b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 19:50:13 +0200 Subject: [PATCH 34/56] Refactor wait action to adjust for initial wait time and simplify ActionResult return - Updated the wait action to account for the initial 3 seconds already waited, ensuring accurate logging of total wait time. - Simplified the ActionResult return by removing unnecessary parameters. --- browser_use/controller/service.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/browser_use/controller/service.py b/browser_use/controller/service.py index ff9a180b1..503fa83eb 100644 --- a/browser_use/controller/service.py +++ b/browser_use/controller/service.py @@ -130,23 +130,23 @@ class Controller(Generic[Context]): await browser_session.go_back() msg = '๐Ÿ”™ Navigated back' logger.info(msg) - return ActionResult(extracted_content=msg, include_in_memory=True, long_term_memory='Navigated back') + return ActionResult(extracted_content=msg) - # wait for x seconds - - @self.registry.action('Wait for x seconds default 3 (max 10 seconds)') + @self.registry.action( + 'Wait for x seconds default 3 (max 10 seconds). This can be used to wait until the page is fully loaded.' + ) async def wait(seconds: int = 3): # Cap wait time at maximum 10 seconds - actual_seconds = min(max(seconds, 0), 10) + # 3 seconds already waited by calling the llm + actual_seconds = min(max(seconds - 3, 0), 10) + if actual_seconds != seconds: - msg = f'๐Ÿ•’ Waiting for {actual_seconds} seconds (capped from {seconds} seconds, max 10 seconds)' + msg = f'๐Ÿ•’ Waiting for {actual_seconds + 3} seconds (capped from {seconds} seconds, max 10 seconds)' else: - msg = f'๐Ÿ•’ Waiting for {actual_seconds} seconds' + msg = f'๐Ÿ•’ Waiting for {actual_seconds + 3} seconds' logger.info(msg) await asyncio.sleep(actual_seconds) - return ActionResult( - extracted_content=msg, include_in_memory=True, long_term_memory=f'Waited for {actual_seconds} seconds' - ) + return ActionResult(extracted_content=msg) # Element Interaction Actions From f436338207e4fb728f10b264a6d08ed3588ae5d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 20:32:59 +0200 Subject: [PATCH 35/56] Adjust timeout settings in BrowserSession for improved performance - Reduced the retry timeout from 6 seconds to 3 seconds. - Decreased the page title retrieval timeout from 3 seconds to 2 seconds. - Enhanced timeout handling logic to prioritize user-defined timeouts while ensuring a minimum of 3000ms. --- browser_use/browser/session.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index e22a1d699..882d70dd3 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -2180,7 +2180,7 @@ class BrowserSession(BaseModel): raise Exception(f'Failed to click element. Error: {str(e)}') @time_execution_async('--get_tabs_info') - @retry(timeout=6, retries=1) + @retry(timeout=3, retries=1) @require_healthy_browser(usable_page=False, reopen_page=False) async def get_tabs_info(self) -> list[TabInfo]: """Get information about all tabs""" @@ -2188,7 +2188,7 @@ class BrowserSession(BaseModel): tabs_info = [] for page_id, page in enumerate(self.browser_context.pages): try: - title = await asyncio.wait_for(page.title(), timeout=3.0) + title = await asyncio.wait_for(page.title(), timeout=2.0) tab_info = TabInfo(page_id=page_id, url=page.url, title=title) except Exception: # page.title() can hang forever on tabs that are crashed/disappeared/about:blank @@ -2269,8 +2269,14 @@ class BrowserSession(BaseModel): # Check if URL is allowed if not self._is_url_allowed(normalized_url): raise BrowserError(f'โ›”๏ธ Navigation to non-allowed URL: {normalized_url}') - - timeout_ms = min(3000, int(timeout_ms or self.browser_profile.default_navigation_timeout or 12000)) + # If timeout_ms is not None, use it (even if 0); else try profile.default_navigation_timeout (even if 0); else 12000 + if timeout_ms is not None: + user_timeout_ms = int(timeout_ms) + elif self.browser_profile.default_navigation_timeout is not None: + user_timeout_ms = int(self.browser_profile.default_navigation_timeout) + else: + user_timeout_ms = 12000 + timeout_ms = min(3000, user_timeout_ms) # Handle new tab creation if new_tab: From 724dc15b290070e2e2fbd3e252f28a994cca900a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 20:36:56 +0200 Subject: [PATCH 36/56] Fix timeout exception handling in Agent class - Changed the exception type from asyncio.TimeoutError to TimeoutError for better clarity in timeout handling. - Updated the error message to suggest generating fewer tokens in case of a timeout. --- browser_use/agent/service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 38a4f1863..5d542d365 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -739,7 +739,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): model_output = await asyncio.wait_for( self._get_model_output_with_retry(input_messages), timeout=self.settings.llm_timeout ) - except asyncio.TimeoutError: + except TimeoutError: raise TimeoutError( f'LLM call timed out after {self.settings.llm_timeout} seconds. Generate less tokens and try again.' ) From 067b249da2cdbbea9b5e314d6c96f8a2310a9206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 20:53:22 +0200 Subject: [PATCH 37/56] More tracking --- browser_use/agent/service.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 5d542d365..88245dba1 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -728,6 +728,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): await self._handle_final_step(step_info) return browser_state_summary + @observe_debug(ignore_input=True, name='get_next_action') async def _get_next_action(self, browser_state_summary: BrowserStateSummary) -> None: """Execute LLM interaction with retry logic and handle callbacks""" input_messages = self._message_manager.get_messages() @@ -980,6 +981,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): return text.strip() @time_execution_async('--get_next_action') + @observe_debug(ignore_input=True, ignore_output=True, name='get_model_output') async def get_model_output(self, input_messages: list[BaseMessage]) -> AgentOutput: """Get next action from LLM based on current state""" From c7f266d52a8db092dbcbce54481b16b5af16b4aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 20:55:19 +0200 Subject: [PATCH 38/56] Improved error for timeout --- browser_use/agent/service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 88245dba1..1e2169de5 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -742,7 +742,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): ) except TimeoutError: raise TimeoutError( - f'LLM call timed out after {self.settings.llm_timeout} seconds. Generate less tokens and try again.' + f'LLM call timed out after {self.settings.llm_timeout} seconds. Keep your thinking and output short.' ) self.state.last_model_output = model_output From 1531907b40959a65de77c14cfe138fec3ef87451 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sat, 19 Jul 2025 20:56:30 +0200 Subject: [PATCH 39/56] Enhance timeout handling in BrowserSession methods - Added timeout parameters to scroll_into_view_if_needed, click, type, and fill methods to improve reliability and prevent potential hangs. - Introduced debug observation for get_dom_element_by_index and _input_text_element_node methods to aid in tracking and debugging. --- browser_use/browser/session.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 882d70dd3..d829f37dd 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -2061,6 +2061,7 @@ class BrowserSession(BaseModel): # Don't raise the error since this is not critical functionality @require_healthy_browser(usable_page=True, reopen_page=True) + @observe_debug(ignore_output=True, name='get_dom_element_by_index') async def get_dom_element_by_index(self, index: int) -> DOMElementNode | None: """Get DOM element by index.""" selector_map = await self.get_selector_map() @@ -3915,7 +3916,7 @@ class BrowserSession(BaseModel): if element_handle: is_visible = await self._is_visible(element_handle) if is_visible: - await element_handle.scroll_into_view_if_needed() + await element_handle.scroll_into_view_if_needed(timeout=1_000) return element_handle return None except Exception as e: @@ -3931,7 +3932,7 @@ class BrowserSession(BaseModel): if element_handle: is_visible = await self._is_visible(element_handle) if is_visible: - await element_handle.scroll_into_view_if_needed() + await element_handle.scroll_into_view_if_needed(timeout=1_000) return element_handle except Exception as xpath_e: self.logger.error( @@ -3958,7 +3959,7 @@ class BrowserSession(BaseModel): if element_handle: is_visible = await self._is_visible(element_handle) if is_visible: - await element_handle.scroll_into_view_if_needed() + await element_handle.scroll_into_view_if_needed(timeout=1_000) return element_handle return None except Exception as e: @@ -3979,7 +3980,7 @@ class BrowserSession(BaseModel): if element_handle: is_visible = await self._is_visible(element_handle) if is_visible: - await element_handle.scroll_into_view_if_needed() + await element_handle.scroll_into_view_if_needed(timeout=1_000) return element_handle return None except Exception as e: @@ -4023,7 +4024,7 @@ class BrowserSession(BaseModel): is_visible = await self._is_visible(element_handle) if is_visible: - await element_handle.scroll_into_view_if_needed() + await element_handle.scroll_into_view_if_needed(timeout=1_000) return element_handle except Exception as e: self.logger.error( @@ -4033,6 +4034,7 @@ class BrowserSession(BaseModel): @require_healthy_browser(usable_page=True, reopen_page=True) @time_execution_async('--input_text_element_node') + @observe_debug(ignore_input=True, name='input_text_element_node') async def _input_text_element_node(self, element_node: DOMElementNode, text: str): """ Input text into an element with proper error handling and state management. @@ -4056,7 +4058,7 @@ class BrowserSession(BaseModel): # let's first try to click and type try: await element_handle.evaluate('el => {el.textContent = ""; el.value = "";}') - await element_handle.click() + await element_handle.click(timeout=2_000) # Add 2 second timeout await asyncio.sleep(0.1) # Increased sleep time page = await self.get_current_page() await page.keyboard.type(text) @@ -4078,9 +4080,9 @@ class BrowserSession(BaseModel): try: if (await is_contenteditable.json_value() or tag_name == 'input') and not (readonly or disabled): await element_handle.evaluate('el => {el.textContent = ""; el.value = "";}') - await element_handle.type(text, delay=5) + await element_handle.type(text, delay=5, timeout=5_000) # Add 5 second timeout else: - await element_handle.fill(text) + await element_handle.fill(text, timeout=3_000) # Add 3 second timeout except Exception as e: self.logger.error(f'Error during input text into element: {type(e).__name__}: {e}') raise BrowserError(f'Failed to input text into element: {repr(element_node)}') From f7bd2c98e9d93e2374039756a26ab24ca87b24e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sun, 20 Jul 2025 15:23:14 +0200 Subject: [PATCH 40/56] Run tests again --- browser_use/browser/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index d829f37dd..e403d3c18 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -2300,7 +2300,7 @@ class BrowserSession(BaseModel): # Navigate to URL try: - # Use asyncio.wait to prevent hanging on slow page loads + # Use asyncio.wait to prevent hanging on a slow page loads # Don't cap the timeout - respect what was requested self.logger.debug(f'๐Ÿงญ Starting navigation to {_log_pretty_url(normalized_url)} with timeout {timeout_ms}ms') nav_task = asyncio.create_task(page.goto(normalized_url, wait_until='load', timeout=timeout_ms)) From 34590509fa609ded95fe4d61134016fd2d37839e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sun, 20 Jul 2025 15:45:12 +0200 Subject: [PATCH 41/56] Change log --- browser_use/agent/service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 1e2169de5..bca8193b6 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -1248,7 +1248,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): self.logger.debug(f'โœ… Completed step {step + 1}/{max_steps}') except TimeoutError: # Handle step timeout gracefully - error_msg = f'Step {step + 1} timed out after 300 seconds' + error_msg = f'Step {step + 1} timed out after {self.settings.step_timeout} seconds' self.logger.error(f'โฐ {error_msg}') self.state.consecutive_failures += 1 self.state.last_result = [ActionResult(error=error_msg)] From cbf2a783e25ea8c7bc0f253a637284741daf5dca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sun, 20 Jul 2025 15:50:13 +0200 Subject: [PATCH 42/56] Switch decorators order --- browser_use/browser/session.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index e403d3c18..1214b18e1 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -2060,16 +2060,16 @@ class BrowserSession(BaseModel): self.logger.debug(f'โš ๏ธ Failed to remove highlights (this is usually ok): {type(e).__name__}: {e}') # Don't raise the error since this is not critical functionality - @require_healthy_browser(usable_page=True, reopen_page=True) @observe_debug(ignore_output=True, name='get_dom_element_by_index') + @require_healthy_browser(usable_page=True, reopen_page=True) async def get_dom_element_by_index(self, index: int) -> DOMElementNode | None: """Get DOM element by index.""" selector_map = await self.get_selector_map() return selector_map.get(index) - @require_healthy_browser(usable_page=True, reopen_page=True) @time_execution_async('--click_element_node') @observe_debug(ignore_input=True, name='click_element_node') + @require_healthy_browser(usable_page=True, reopen_page=True) async def _click_element_node(self, element_node: DOMElementNode) -> str | None: """ Optimized method to click an element using xpath. From 2552825786ee6ef27e5926e81298e04802760425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sun, 20 Jul 2025 15:59:20 +0200 Subject: [PATCH 43/56] Update wait test --- tests/ci/test_controller.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tests/ci/test_controller.py b/tests/ci/test_controller.py index 2778a125f..e36046d42 100644 --- a/tests/ci/test_controller.py +++ b/tests/ci/test_controller.py @@ -328,8 +328,33 @@ class TestControllerIntegration: assert result.extracted_content is not None assert 'Waiting for' in result.extracted_content - # Verify that at least 1 second has passed - assert end_time - start_time >= 0.9 # Allow some timing margin + # Verify that less than 0.1 second has passed (because we substract 3 seconds to account for the llm call) + assert end_time - start_time <= 0.1 # Allow some timing margin + + # longer wait + # Create wait action for 1 second - fix to use a dictionary + wait_action = {'wait': {'seconds': 5}} # Corrected format + + class WaitActionModel(ActionModel): + wait: dict | None = None + + # Record start time + start_time = time.time() + + # Execute wait action + result = await controller.act(WaitActionModel(**wait_action), browser_session) + + # Record end time + end_time = time.time() + + # Verify the result + assert isinstance(result, ActionResult) + assert result.extracted_content is not None + assert 'Waiting for' in result.extracted_content + + # Verify that we took 2 sec (5s-3s (llm call)= 2s) + assert end_time - start_time <= 2.1 # Allow some timing margin + assert end_time - start_time >= 1.9 # Allow some timing margin async def test_go_back_action(self, controller, browser_session, base_url): """Test that go_back action navigates to the previous page.""" From c96121a98bc70e6fc7644f2f2a67c1028bc36d02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sun, 20 Jul 2025 16:02:11 +0200 Subject: [PATCH 44/56] Update wait test --- tests/ci/test_controller.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/ci/test_controller.py b/tests/ci/test_controller.py index e36046d42..0b23dabff 100644 --- a/tests/ci/test_controller.py +++ b/tests/ci/test_controller.py @@ -335,9 +335,6 @@ class TestControllerIntegration: # Create wait action for 1 second - fix to use a dictionary wait_action = {'wait': {'seconds': 5}} # Corrected format - class WaitActionModel(ActionModel): - wait: dict | None = None - # Record start time start_time = time.time() From b8067da3d4b2509715c3afbe4f41dcdc9d75570b Mon Sep 17 00:00:00 2001 From: mertunsall Date: Sun, 20 Jul 2025 16:10:38 +0200 Subject: [PATCH 45/56] no screenshots if vision is disabled --- browser_use/agent/service.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 00b13f2ef..2fd2a7d84 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -278,7 +278,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): self._set_browser_use_version_and_source(source) self.initial_actions = self._convert_initial_actions(initial_actions) if initial_actions else None - # Verify we can connect to the LLM and setup the tool calling method + # Verify we can connect to the model self._verify_and_setup_llm() # TODO: move this logic to the LLMs @@ -687,7 +687,9 @@ class Agent(Generic[Context, AgentStructuredOutput]): assert self.browser_session is not None, 'BrowserSession is not set up' self.logger.debug(f'๐ŸŒ Step {self.state.n_steps + 1}: Getting browser state...') - browser_state_summary = await self.browser_session.get_browser_state_with_recovery(cache_clickable_elements_hashes=True) + browser_state_summary = await self.browser_session.get_browser_state_with_recovery( + cache_clickable_elements_hashes=True, include_screenshot=self.settings.use_vision + ) current_page = await self.browser_session.get_current_page() # Check for new downloads after getting browser state (catches PDF auto-downloads and previous step downloads) From af04b713d5ef306c19b550c5208094f087128f8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sun, 20 Jul 2025 16:14:42 +0200 Subject: [PATCH 46/56] Fix lint errors --- browser_use/controller/service.py | 3 ++- tests/ci/test_controller.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/browser_use/controller/service.py b/browser_use/controller/service.py index 503fa83eb..4f5bfc142 100644 --- a/browser_use/controller/service.py +++ b/browser_use/controller/service.py @@ -137,7 +137,8 @@ class Controller(Generic[Context]): ) async def wait(seconds: int = 3): # Cap wait time at maximum 10 seconds - # 3 seconds already waited by calling the llm + # Reduce the wait time by 3 seconds to account for the llm call which takes at least 3 seconds + # So if the model decides to wait for 5 seconds, the llm call took at least 3 seconds, so we only need to wait for 2 seconds actual_seconds = min(max(seconds - 3, 0), 10) if actual_seconds != seconds: diff --git a/tests/ci/test_controller.py b/tests/ci/test_controller.py index 0b23dabff..94de1b428 100644 --- a/tests/ci/test_controller.py +++ b/tests/ci/test_controller.py @@ -328,7 +328,7 @@ class TestControllerIntegration: assert result.extracted_content is not None assert 'Waiting for' in result.extracted_content - # Verify that less than 0.1 second has passed (because we substract 3 seconds to account for the llm call) + # Verify that less than 0.1 second has passed (because we deducted 3 seconds to account for the llm call) assert end_time - start_time <= 0.1 # Allow some timing margin # longer wait From 5b13195f560254ec97ced921f1ff18b7a1ddce38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sun, 20 Jul 2025 16:24:41 +0200 Subject: [PATCH 47/56] Increase default temperature to 0.2 to avoid infinite generation --- browser_use/llm/openai/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/llm/openai/chat.py b/browser_use/llm/openai/chat.py index 48be7993e..d96b98c60 100644 --- a/browser_use/llm/openai/chat.py +++ b/browser_use/llm/openai/chat.py @@ -35,7 +35,7 @@ class ChatOpenAI(BaseChatModel): model: ChatModel | str # Model params - temperature: float | None = None + temperature: float | None = 0.2 reasoning_effort: ReasoningEffort = 'low' # Client initialization parameters From 7a619c3612de5ab75a80e5429b6afb8a4809bc69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sun, 20 Jul 2025 16:34:27 +0200 Subject: [PATCH 48/56] Refactor wait time logging in Controller to simplify message formatting --- browser_use/controller/service.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/browser_use/controller/service.py b/browser_use/controller/service.py index 4f5bfc142..c5f1e210b 100644 --- a/browser_use/controller/service.py +++ b/browser_use/controller/service.py @@ -140,11 +140,7 @@ class Controller(Generic[Context]): # Reduce the wait time by 3 seconds to account for the llm call which takes at least 3 seconds # So if the model decides to wait for 5 seconds, the llm call took at least 3 seconds, so we only need to wait for 2 seconds actual_seconds = min(max(seconds - 3, 0), 10) - - if actual_seconds != seconds: - msg = f'๐Ÿ•’ Waiting for {actual_seconds + 3} seconds (capped from {seconds} seconds, max 10 seconds)' - else: - msg = f'๐Ÿ•’ Waiting for {actual_seconds + 3} seconds' + msg = f'๐Ÿ•’ Waiting for {actual_seconds + 3} seconds' logger.info(msg) await asyncio.sleep(actual_seconds) return ActionResult(extracted_content=msg) From ce5bfe6195f9148950595f850bb8c34fc6072d92 Mon Sep 17 00:00:00 2001 From: mertunsall Date: Sun, 20 Jul 2025 22:58:34 +0200 Subject: [PATCH 49/56] lazy imports and bump some versions --- browser_use/__init__.py | 81 +++++++++++++++++++++++++------ browser_use/agent/service.py | 6 ++- browser_use/browser/__init__.py | 43 ++++++++++++++-- browser_use/browser/session.py | 11 ++++- browser_use/llm/__init__.py | 59 +++++++++++++++++----- browser_use/llm/aws/__init__.py | 33 ++++++++++++- browser_use/mcp/__init__.py | 44 ++++++++++++----- browser_use/telemetry/__init__.py | 48 +++++++++++++++--- pyproject.toml | 8 +-- 9 files changed, 275 insertions(+), 58 deletions(-) diff --git a/browser_use/__init__.py b/browser_use/__init__.py index 1635b1e79..0c079efe7 100644 --- a/browser_use/__init__.py +++ b/browser_use/__init__.py @@ -1,4 +1,5 @@ import os +from typing import TYPE_CHECKING from browser_use.logging_config import setup_logging @@ -13,21 +14,6 @@ else: # Monkeypatch BaseSubprocessTransport.__del__ to handle closed event loops gracefully from asyncio import base_subprocess -from browser_use.agent.prompts import SystemPrompt -from browser_use.agent.service import Agent -from browser_use.agent.views import ActionModel, ActionResult, AgentHistoryList -from browser_use.browser import Browser, BrowserConfig, BrowserContext, BrowserContextConfig, BrowserProfile, BrowserSession -from browser_use.controller.service import Controller -from browser_use.dom.service import DomService -from browser_use.llm import ( - ChatAnthropic, - ChatAzureOpenAI, - ChatGoogle, - ChatGroq, - ChatOllama, - ChatOpenAI, -) - _original_del = base_subprocess.BaseSubprocessTransport.__del__ @@ -50,6 +36,71 @@ def _patched_del(self): base_subprocess.BaseSubprocessTransport.__del__ = _patched_del +# Type stubs for lazy imports - fixes linter warnings +if TYPE_CHECKING: + from browser_use.agent.prompts import SystemPrompt + from browser_use.agent.service import Agent + from browser_use.agent.views import ActionModel, ActionResult, AgentHistoryList + from browser_use.browser import Browser, BrowserConfig, BrowserContext, BrowserContextConfig, BrowserProfile, BrowserSession + from browser_use.controller.service import Controller + from browser_use.dom.service import DomService + from browser_use.llm.anthropic.chat import ChatAnthropic + from browser_use.llm.azure.chat import ChatAzureOpenAI + from browser_use.llm.google.chat import ChatGoogle + from browser_use.llm.groq.chat import ChatGroq + from browser_use.llm.ollama.chat import ChatOllama + from browser_use.llm.openai.chat import ChatOpenAI + + +# Lazy imports mapping - only import when actually accessed +_LAZY_IMPORTS = { + # Agent service (heavy due to dependencies) + 'Agent': ('browser_use.agent.service', 'Agent'), + # System prompt (moderate weight due to agent.views imports) + 'SystemPrompt': ('browser_use.agent.prompts', 'SystemPrompt'), + # Agent views (very heavy - over 1 second!) + 'ActionModel': ('browser_use.agent.views', 'ActionModel'), + 'ActionResult': ('browser_use.agent.views', 'ActionResult'), + 'AgentHistoryList': ('browser_use.agent.views', 'AgentHistoryList'), + # Browser components (heavy due to playwright/patchright) + 'Browser': ('browser_use.browser', 'Browser'), + 'BrowserConfig': ('browser_use.browser', 'BrowserConfig'), + 'BrowserSession': ('browser_use.browser', 'BrowserSession'), + 'BrowserProfile': ('browser_use.browser', 'BrowserProfile'), + 'BrowserContext': ('browser_use.browser', 'BrowserContext'), + 'BrowserContextConfig': ('browser_use.browser', 'BrowserContextConfig'), + # Controller (moderate weight) + 'Controller': ('browser_use.controller.service', 'Controller'), + # DOM service (moderate weight) + 'DomService': ('browser_use.dom.service', 'DomService'), + # Chat models (very heavy imports) + 'ChatOpenAI': ('browser_use.llm.openai.chat', 'ChatOpenAI'), + 'ChatGoogle': ('browser_use.llm.google.chat', 'ChatGoogle'), + 'ChatAnthropic': ('browser_use.llm.anthropic.chat', 'ChatAnthropic'), + 'ChatGroq': ('browser_use.llm.groq.chat', 'ChatGroq'), + 'ChatAzureOpenAI': ('browser_use.llm.azure.chat', 'ChatAzureOpenAI'), + 'ChatOllama': ('browser_use.llm.ollama.chat', 'ChatOllama'), +} + + +def __getattr__(name: str): + """Lazy import mechanism - only import modules when they're actually accessed.""" + if name in _LAZY_IMPORTS: + module_path, attr_name = _LAZY_IMPORTS[name] + try: + from importlib import import_module + + module = import_module(module_path) + attr = getattr(module, attr_name) + # Cache the imported attribute in the module's globals + globals()[name] = attr + return attr + except ImportError as e: + raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e + + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") + + __all__ = [ 'Agent', 'Browser', diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 2fd2a7d84..da243ab7e 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -34,7 +34,8 @@ from bubus import EventBus from pydantic import ValidationError from uuid_extensions import uuid7str -from browser_use.agent.gif import create_history_gif +# Lazy import for gif to avoid heavy agent.views import at startup +# from browser_use.agent.gif import create_history_gif from browser_use.agent.message_manager.service import ( MessageManager, ) @@ -1329,6 +1330,9 @@ class Agent(Generic[Context, AgentStructuredOutput]): if isinstance(self.settings.generate_gif, str): output_path = self.settings.generate_gif + # Lazy import gif module to avoid heavy startup cost + from browser_use.agent.gif import create_history_gif + create_history_gif(task=self.task, history=self.state.history, output_path=output_path) # Emit output file generated event for GIF diff --git a/browser_use/browser/__init__.py b/browser_use/browser/__init__.py index eaea05808..d6c13f2d6 100644 --- a/browser_use/browser/__init__.py +++ b/browser_use/browser/__init__.py @@ -1,6 +1,41 @@ -from .browser import Browser, BrowserConfig -from .context import BrowserContext, BrowserContextConfig -from .profile import BrowserProfile -from .session import BrowserSession +from typing import TYPE_CHECKING + +# Type stubs for lazy imports +if TYPE_CHECKING: + from .browser import Browser, BrowserConfig + from .context import BrowserContext, BrowserContextConfig + from .profile import BrowserProfile + from .session import BrowserSession + +# Lazy imports mapping for heavy browser components +_LAZY_IMPORTS = { + 'Browser': ('.browser', 'Browser'), + 'BrowserConfig': ('.browser', 'BrowserConfig'), + 'BrowserContext': ('.context', 'BrowserContext'), + 'BrowserContextConfig': ('.context', 'BrowserContextConfig'), + 'BrowserProfile': ('.profile', 'BrowserProfile'), + 'BrowserSession': ('.session', 'BrowserSession'), +} + + +def __getattr__(name: str): + """Lazy import mechanism for heavy browser components.""" + if name in _LAZY_IMPORTS: + module_path, attr_name = _LAZY_IMPORTS[name] + try: + from importlib import import_module + + # Use relative import for current package + full_module_path = f'browser_use.browser{module_path}' + module = import_module(full_module_path) + attr = getattr(module, attr_name) + # Cache the imported attribute in the module's globals + globals()[name] = attr + return attr + except ImportError as e: + raise ImportError(f'Failed to import {name} from {full_module_path}: {e}') from e + + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") + __all__ = ['Browser', 'BrowserConfig', 'BrowserContext', 'BrowserContextConfig', 'BrowserSession', 'BrowserProfile'] diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index e22a1d699..b4cfa5665 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -51,8 +51,10 @@ from browser_use.browser.views import ( TabInfo, URLNotAllowedError, ) -from browser_use.dom.clickable_element_processor.service import ClickableElementProcessor -from browser_use.dom.service import DomService + +# Lazy imports for heavy DOM services to improve startup time +# from browser_use.dom.clickable_element_processor.service import ClickableElementProcessor +# from browser_use.dom.service import DomService from browser_use.dom.views import DOMElementNode, SelectorMap from browser_use.utils import ( is_new_tab_page, @@ -3103,6 +3105,9 @@ class BrowserSession(BaseModel): # Find out which elements are new # Do this only if url has not changed if cache_clickable_elements_hashes: + # Lazy import heavy DOM service + from browser_use.dom.clickable_element_processor.service import ClickableElementProcessor + # if we are on the same url as the last state, we can use the cached hashes if self._cached_clickable_element_hashes and self._cached_clickable_element_hashes.url == updated_state.url: # Pointers, feel free to edit in place @@ -3195,6 +3200,8 @@ class BrowserSession(BaseModel): self.logger.debug(f'PDF auto-download check failed: {type(e).__name__}: {e}') self.logger.debug('๐ŸŒณ Starting DOM processing...') + from browser_use.dom.service import DomService + dom_service = DomService(page, logger=self.logger) try: content = await asyncio.wait_for( diff --git a/browser_use/llm/__init__.py b/browser_use/llm/__init__.py index 59169ba34..f409f1839 100644 --- a/browser_use/llm/__init__.py +++ b/browser_use/llm/__init__.py @@ -4,14 +4,10 @@ We have switched all of our code from langchain to openai.types.chat.chat_comple For easier transition we have """ -from browser_use.llm.anthropic.chat import ChatAnthropic -from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock -from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock -from browser_use.llm.azure.chat import ChatAzureOpenAI +from typing import TYPE_CHECKING + +# Lightweight imports that are commonly used from browser_use.llm.base import BaseChatModel -from browser_use.llm.deepseek.chat import ChatDeepSeek -from browser_use.llm.google.chat import ChatGoogle -from browser_use.llm.groq.chat import ChatGroq from browser_use.llm.messages import ( AssistantMessage, BaseMessage, @@ -27,11 +23,52 @@ from browser_use.llm.messages import ( from browser_use.llm.messages import ( ContentPartTextParam as ContentText, ) -from browser_use.llm.ollama.chat import ChatOllama -from browser_use.llm.openai.chat import ChatOpenAI -from browser_use.llm.openrouter.chat import ChatOpenRouter -# Make better names for the message +# Type stubs for lazy imports +if TYPE_CHECKING: + from browser_use.llm.anthropic.chat import ChatAnthropic + from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock + from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock + from browser_use.llm.azure.chat import ChatAzureOpenAI + from browser_use.llm.deepseek.chat import ChatDeepSeek + from browser_use.llm.google.chat import ChatGoogle + from browser_use.llm.groq.chat import ChatGroq + from browser_use.llm.ollama.chat import ChatOllama + from browser_use.llm.openai.chat import ChatOpenAI + from browser_use.llm.openrouter.chat import ChatOpenRouter + +# Lazy imports mapping for heavy chat models +_LAZY_IMPORTS = { + 'ChatAnthropic': ('browser_use.llm.anthropic.chat', 'ChatAnthropic'), + 'ChatAnthropicBedrock': ('browser_use.llm.aws.chat_anthropic', 'ChatAnthropicBedrock'), + 'ChatAWSBedrock': ('browser_use.llm.aws.chat_bedrock', 'ChatAWSBedrock'), + 'ChatAzureOpenAI': ('browser_use.llm.azure.chat', 'ChatAzureOpenAI'), + 'ChatDeepSeek': ('browser_use.llm.deepseek.chat', 'ChatDeepSeek'), + 'ChatGoogle': ('browser_use.llm.google.chat', 'ChatGoogle'), + 'ChatGroq': ('browser_use.llm.groq.chat', 'ChatGroq'), + 'ChatOllama': ('browser_use.llm.ollama.chat', 'ChatOllama'), + 'ChatOpenAI': ('browser_use.llm.openai.chat', 'ChatOpenAI'), + 'ChatOpenRouter': ('browser_use.llm.openrouter.chat', 'ChatOpenRouter'), +} + + +def __getattr__(name: str): + """Lazy import mechanism for heavy chat model imports.""" + if name in _LAZY_IMPORTS: + module_path, attr_name = _LAZY_IMPORTS[name] + try: + from importlib import import_module + + module = import_module(module_path) + attr = getattr(module, attr_name) + # Cache the imported attribute in the module's globals + globals()[name] = attr + return attr + except ImportError as e: + raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e + + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") + __all__ = [ # Message types -> for easier transition from langchain diff --git a/browser_use/llm/aws/__init__.py b/browser_use/llm/aws/__init__.py index 69afb3a95..cb2def920 100644 --- a/browser_use/llm/aws/__init__.py +++ b/browser_use/llm/aws/__init__.py @@ -1,5 +1,34 @@ -from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock -from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock +from typing import TYPE_CHECKING + +# Type stubs for lazy imports +if TYPE_CHECKING: + from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock + from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock + +# Lazy imports mapping for AWS chat models +_LAZY_IMPORTS = { + 'ChatAnthropicBedrock': ('browser_use.llm.aws.chat_anthropic', 'ChatAnthropicBedrock'), + 'ChatAWSBedrock': ('browser_use.llm.aws.chat_bedrock', 'ChatAWSBedrock'), +} + + +def __getattr__(name: str): + """Lazy import mechanism for AWS chat models.""" + if name in _LAZY_IMPORTS: + module_path, attr_name = _LAZY_IMPORTS[name] + try: + from importlib import import_module + + module = import_module(module_path) + attr = getattr(module, attr_name) + # Cache the imported attribute in the module's globals + globals()[name] = attr + return attr + except ImportError as e: + raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e + + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") + __all__ = [ 'ChatAWSBedrock', diff --git a/browser_use/mcp/__init__.py b/browser_use/mcp/__init__.py index 08c759377..fe97674b4 100644 --- a/browser_use/mcp/__init__.py +++ b/browser_use/mcp/__init__.py @@ -3,16 +3,38 @@ This module provides integration with MCP servers and clients for browser automation. """ -from browser_use.mcp.client import MCPClient -from browser_use.mcp.controller import MCPToolWrapper +from typing import TYPE_CHECKING + +# Type stubs for lazy imports +if TYPE_CHECKING: + from browser_use.mcp.client import MCPClient + from browser_use.mcp.controller import MCPToolWrapper + from browser_use.mcp.server import BrowserUseServer + +# Lazy imports mapping +_LAZY_IMPORTS = { + 'MCPClient': ('browser_use.mcp.client', 'MCPClient'), + 'MCPToolWrapper': ('browser_use.mcp.controller', 'MCPToolWrapper'), + 'BrowserUseServer': ('browser_use.mcp.server', 'BrowserUseServer'), +} + + +def __getattr__(name: str): + """Lazy import to avoid importing heavy modules when not needed.""" + if name in _LAZY_IMPORTS: + module_path, attr_name = _LAZY_IMPORTS[name] + try: + from importlib import import_module + + module = import_module(module_path) + attr = getattr(module, attr_name) + # Cache the imported attribute in the module's globals + globals()[name] = attr + return attr + except ImportError as e: + raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e + + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") + __all__ = ['MCPClient', 'MCPToolWrapper', 'BrowserUseServer'] - - -def __getattr__(name): - """Lazy import to avoid importing server module when only client is needed.""" - if name == 'BrowserUseServer': - from browser_use.mcp.server import BrowserUseServer - - return BrowserUseServer - raise AttributeError(f"module '{__name__}' has no attribute '{name}'") diff --git a/browser_use/telemetry/__init__.py b/browser_use/telemetry/__init__.py index 40282dc9e..222bc9ba1 100644 --- a/browser_use/telemetry/__init__.py +++ b/browser_use/telemetry/__init__.py @@ -2,18 +2,50 @@ Telemetry for Browser Use. """ -from browser_use.telemetry.service import ProductTelemetry -from browser_use.telemetry.views import ( - BaseTelemetryEvent, - CLITelemetryEvent, - MCPClientTelemetryEvent, - MCPServerTelemetryEvent, -) +from typing import TYPE_CHECKING + +# Type stubs for lazy imports +if TYPE_CHECKING: + from browser_use.telemetry.service import ProductTelemetry + from browser_use.telemetry.views import ( + BaseTelemetryEvent, + CLITelemetryEvent, + MCPClientTelemetryEvent, + MCPServerTelemetryEvent, + ) + +# Lazy imports mapping +_LAZY_IMPORTS = { + 'ProductTelemetry': ('browser_use.telemetry.service', 'ProductTelemetry'), + 'BaseTelemetryEvent': ('browser_use.telemetry.views', 'BaseTelemetryEvent'), + 'CLITelemetryEvent': ('browser_use.telemetry.views', 'CLITelemetryEvent'), + 'MCPClientTelemetryEvent': ('browser_use.telemetry.views', 'MCPClientTelemetryEvent'), + 'MCPServerTelemetryEvent': ('browser_use.telemetry.views', 'MCPServerTelemetryEvent'), +} + + +def __getattr__(name: str): + """Lazy import mechanism for telemetry components.""" + if name in _LAZY_IMPORTS: + module_path, attr_name = _LAZY_IMPORTS[name] + try: + from importlib import import_module + + module = import_module(module_path) + attr = getattr(module, attr_name) + # Cache the imported attribute in the module's globals + globals()[name] = attr + return attr + except ImportError as e: + raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e + + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") + __all__ = [ 'BaseTelemetryEvent', 'ProductTelemetry', + 'CLITelemetryEvent', 'MCPClientTelemetryEvent', 'MCPServerTelemetryEvent', - 'CLITelemetryEvent', ] diff --git a/pyproject.toml b/pyproject.toml index 47fc212dd..9d3cbb632 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,10 +31,10 @@ dependencies = [ "typing-extensions>=4.12.2", "uuid7>=0.1.0", "authlib>=1.6.0", - "google-genai>=1.21.1", - "openai>=1.81.0", - "anthropic>=0.54.0", - "groq>=0.28.0", + "google-genai>=1.26.0", + "openai>=1.97.0", + "anthropic>=0.58.2", + "groq>=0.30.0", "ollama>=0.5.1", "google-api-python-client>=2.174.0", "google-auth>=2.40.3", From d9e6348a679fee529e1e9e83df2ef591fd401209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sun, 20 Jul 2025 23:11:23 +0200 Subject: [PATCH 50/56] Include frequency_penalty 0.05 --- browser_use/llm/openai/chat.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/browser_use/llm/openai/chat.py b/browser_use/llm/openai/chat.py index d96b98c60..edfb75d7a 100644 --- a/browser_use/llm/openai/chat.py +++ b/browser_use/llm/openai/chat.py @@ -20,6 +20,16 @@ from browser_use.llm.views import ChatInvokeCompletion, ChatInvokeUsage T = TypeVar('T', bound=BaseModel) ReasoningModels: list[ChatModel | str] = ['o4-mini', 'o3', 'o3-mini', 'o1', 'o1-pro', 'o3-pro'] +UnsupportedReasoningParams: list[str] = [ + 'temperature', + 'frequency_penalty', + 'top_p', + 'presence_penalty', + 'logprobs', + 'top_logprobs', + 'logit_bias', + 'max_tokens', +] @dataclass @@ -36,6 +46,7 @@ class ChatOpenAI(BaseChatModel): # Model params temperature: float | None = 0.2 + frequency_penalty: float | None = 0.05 reasoning_effort: ReasoningEffort = 'low' # Client initialization parameters @@ -50,7 +61,7 @@ class ChatOpenAI(BaseChatModel): default_query: Mapping[str, object] | None = None http_client: httpx.AsyncClient | None = None _strict_response_validation: bool = False - max_completion_tokens: int | None = None + max_completion_tokens: int | None = 8000 top_p: float | None = None # Static @@ -146,18 +157,24 @@ class ChatOpenAI(BaseChatModel): try: model_params: dict[str, Any] = {} - if self.model in ReasoningModels: - model_params['reasoning_effort'] = self.reasoning_effort if self.temperature is not None: model_params['temperature'] = self.temperature + if self.frequency_penalty is not None: + model_params['frequency_penalty'] = self.frequency_penalty + if self.max_completion_tokens is not None: model_params['max_completion_tokens'] = self.max_completion_tokens if self.top_p is not None: model_params['top_p'] = self.top_p + if self.model in ReasoningModels: + model_params['reasoning_effort'] = self.reasoning_effort + for param in UnsupportedReasoningParams: + model_params.pop(param, None) + if output_format is None: # Return string response response = await self.get_client().chat.completions.create( From 5ed62da25f0c1ad2bdcbb1a4a40594e86c1d7afd Mon Sep 17 00:00:00 2001 From: mertunsall Date: Sun, 20 Jul 2025 23:14:07 +0200 Subject: [PATCH 51/56] restored this to main --- browser_use/mcp/__init__.py | 44 ++++++++++--------------------------- 1 file changed, 11 insertions(+), 33 deletions(-) diff --git a/browser_use/mcp/__init__.py b/browser_use/mcp/__init__.py index fe97674b4..08c759377 100644 --- a/browser_use/mcp/__init__.py +++ b/browser_use/mcp/__init__.py @@ -3,38 +3,16 @@ This module provides integration with MCP servers and clients for browser automation. """ -from typing import TYPE_CHECKING - -# Type stubs for lazy imports -if TYPE_CHECKING: - from browser_use.mcp.client import MCPClient - from browser_use.mcp.controller import MCPToolWrapper - from browser_use.mcp.server import BrowserUseServer - -# Lazy imports mapping -_LAZY_IMPORTS = { - 'MCPClient': ('browser_use.mcp.client', 'MCPClient'), - 'MCPToolWrapper': ('browser_use.mcp.controller', 'MCPToolWrapper'), - 'BrowserUseServer': ('browser_use.mcp.server', 'BrowserUseServer'), -} - - -def __getattr__(name: str): - """Lazy import to avoid importing heavy modules when not needed.""" - if name in _LAZY_IMPORTS: - module_path, attr_name = _LAZY_IMPORTS[name] - try: - from importlib import import_module - - module = import_module(module_path) - attr = getattr(module, attr_name) - # Cache the imported attribute in the module's globals - globals()[name] = attr - return attr - except ImportError as e: - raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e - - raise AttributeError(f"module '{__name__}' has no attribute '{name}'") - +from browser_use.mcp.client import MCPClient +from browser_use.mcp.controller import MCPToolWrapper __all__ = ['MCPClient', 'MCPToolWrapper', 'BrowserUseServer'] + + +def __getattr__(name): + """Lazy import to avoid importing server module when only client is needed.""" + if name == 'BrowserUseServer': + from browser_use.mcp.server import BrowserUseServer + + return BrowserUseServer + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") From d695a5692396be187b854ec1a7055cdee86484e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sun, 20 Jul 2025 23:53:00 +0200 Subject: [PATCH 52/56] fix temperature for reasoning models --- browser_use/llm/openai/chat.py | 14 ++------------ examples/simple.py | 30 ++++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/browser_use/llm/openai/chat.py b/browser_use/llm/openai/chat.py index edfb75d7a..4883478b1 100644 --- a/browser_use/llm/openai/chat.py +++ b/browser_use/llm/openai/chat.py @@ -20,16 +20,6 @@ from browser_use.llm.views import ChatInvokeCompletion, ChatInvokeUsage T = TypeVar('T', bound=BaseModel) ReasoningModels: list[ChatModel | str] = ['o4-mini', 'o3', 'o3-mini', 'o1', 'o1-pro', 'o3-pro'] -UnsupportedReasoningParams: list[str] = [ - 'temperature', - 'frequency_penalty', - 'top_p', - 'presence_penalty', - 'logprobs', - 'top_logprobs', - 'logit_bias', - 'max_tokens', -] @dataclass @@ -172,8 +162,8 @@ class ChatOpenAI(BaseChatModel): if self.model in ReasoningModels: model_params['reasoning_effort'] = self.reasoning_effort - for param in UnsupportedReasoningParams: - model_params.pop(param, None) + model_params['temperature'] = 1 + model_params['frequency_penalty'] = 0 if output_format is None: # Return string response diff --git a/examples/simple.py b/examples/simple.py index f9bfe8132..3204f280e 100644 --- a/examples/simple.py +++ b/examples/simple.py @@ -10,21 +10,39 @@ from dotenv import load_dotenv load_dotenv() +try: + from lmnr import Instruments, Laminar + + Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY'), disabled_instruments={Instruments.BROWSER_USE}) +except Exception: + print('Error initializing Laminar') + pass from browser_use import Agent # Initialize the model llm = ChatOpenAI( - model='gpt-4.1-mini', + model='o4-mini', + temperature=0.2, +) + +# Optimized task with more specific instructions +task = """ +Navigate to the Qatar Airways homepage and search for flights from Doha to Paris departing in the upcoming week; then list the available fare classes and prices. +Only use https://www.qatarairways.com/ to achieve the task. Don't go to any other site. The task is achievable with just navigation from this site.""" + +task = ' call done directly and repeat the word "New" 100 times' +# Performance optimizations +agent = Agent( + task=task, + llm=llm, ) -task = 'Find the founders of browser-use' -agent = Agent(task=task, llm=llm) - - async def main(): - await agent.run() + history = await agent.run(max_steps=10) + # token usage + print(history.usage) if __name__ == '__main__': From 330e4d9ab5559ca2bf7ade3a3d73b0fe30616c19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sun, 20 Jul 2025 23:54:03 +0200 Subject: [PATCH 53/56] Simple example --- examples/simple.py | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/examples/simple.py b/examples/simple.py index 3204f280e..f9bfe8132 100644 --- a/examples/simple.py +++ b/examples/simple.py @@ -10,39 +10,21 @@ from dotenv import load_dotenv load_dotenv() -try: - from lmnr import Instruments, Laminar - - Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY'), disabled_instruments={Instruments.BROWSER_USE}) -except Exception: - print('Error initializing Laminar') - pass from browser_use import Agent # Initialize the model llm = ChatOpenAI( - model='o4-mini', - temperature=0.2, + model='gpt-4.1-mini', ) -# Optimized task with more specific instructions -task = """ -Navigate to the Qatar Airways homepage and search for flights from Doha to Paris departing in the upcoming week; then list the available fare classes and prices. -Only use https://www.qatarairways.com/ to achieve the task. Don't go to any other site. The task is achievable with just navigation from this site.""" -task = ' call done directly and repeat the word "New" 100 times' -# Performance optimizations -agent = Agent( - task=task, - llm=llm, -) +task = 'Find the founders of browser-use' +agent = Agent(task=task, llm=llm) async def main(): - history = await agent.run(max_steps=10) - # token usage - print(history.usage) + await agent.run() if __name__ == '__main__': From f901a6931f8ea0b492a803ef4db4df1fac949aa0 Mon Sep 17 00:00:00 2001 From: mertunsall Date: Sun, 20 Jul 2025 23:55:25 +0200 Subject: [PATCH 54/56] add init to make things library --- browser_use/agent/__init__.py | 4 ++++ browser_use/agent/message_manager/__init__.py | 3 +++ browser_use/controller/registry/__init__.py | 1 + browser_use/dom/clickable_element_processor/__init__.py | 1 + browser_use/dom/history_tree_processor/__init__.py | 3 +++ browser_use/llm/anthropic/__init__.py | 5 +++++ browser_use/llm/azure/__init__.py | 5 +++++ browser_use/llm/deepseek/__init__.py | 5 +++++ browser_use/llm/groq/__init__.py | 5 +++++ browser_use/llm/ollama/__init__.py | 5 +++++ browser_use/llm/openai/__init__.py | 5 +++++ browser_use/llm/openrouter/__init__.py | 5 +++++ 12 files changed, 47 insertions(+) create mode 100644 browser_use/agent/__init__.py create mode 100644 browser_use/agent/message_manager/__init__.py create mode 100644 browser_use/controller/registry/__init__.py create mode 100644 browser_use/dom/clickable_element_processor/__init__.py create mode 100644 browser_use/dom/history_tree_processor/__init__.py create mode 100644 browser_use/llm/anthropic/__init__.py create mode 100644 browser_use/llm/azure/__init__.py create mode 100644 browser_use/llm/deepseek/__init__.py create mode 100644 browser_use/llm/groq/__init__.py create mode 100644 browser_use/llm/ollama/__init__.py create mode 100644 browser_use/llm/openai/__init__.py create mode 100644 browser_use/llm/openrouter/__init__.py diff --git a/browser_use/agent/__init__.py b/browser_use/agent/__init__.py new file mode 100644 index 000000000..294f8d115 --- /dev/null +++ b/browser_use/agent/__init__.py @@ -0,0 +1,4 @@ +"""Browser automation agent module.""" + +# Main agent components are imported lazily via browser_use.__init__.py +# This file just makes the directory a proper Python package diff --git a/browser_use/agent/message_manager/__init__.py b/browser_use/agent/message_manager/__init__.py new file mode 100644 index 000000000..31056ea56 --- /dev/null +++ b/browser_use/agent/message_manager/__init__.py @@ -0,0 +1,3 @@ +"""Message manager for browser automation agent.""" + +# Message manager components diff --git a/browser_use/controller/registry/__init__.py b/browser_use/controller/registry/__init__.py new file mode 100644 index 000000000..d7bc0ca68 --- /dev/null +++ b/browser_use/controller/registry/__init__.py @@ -0,0 +1 @@ +"""Action registry for browser automation controller.""" diff --git a/browser_use/dom/clickable_element_processor/__init__.py b/browser_use/dom/clickable_element_processor/__init__.py new file mode 100644 index 000000000..36758da78 --- /dev/null +++ b/browser_use/dom/clickable_element_processor/__init__.py @@ -0,0 +1 @@ +"""Clickable element processor for DOM manipulation.""" diff --git a/browser_use/dom/history_tree_processor/__init__.py b/browser_use/dom/history_tree_processor/__init__.py new file mode 100644 index 000000000..63d6f0752 --- /dev/null +++ b/browser_use/dom/history_tree_processor/__init__.py @@ -0,0 +1,3 @@ +"""History tree processor for DOM manipulation.""" + +# History tree processor components diff --git a/browser_use/llm/anthropic/__init__.py b/browser_use/llm/anthropic/__init__.py new file mode 100644 index 000000000..d3d680c40 --- /dev/null +++ b/browser_use/llm/anthropic/__init__.py @@ -0,0 +1,5 @@ +"""Anthropic LLM integration for browser-use.""" + +from browser_use.llm.anthropic.chat import ChatAnthropic + +__all__ = ['ChatAnthropic'] diff --git a/browser_use/llm/azure/__init__.py b/browser_use/llm/azure/__init__.py new file mode 100644 index 000000000..91bcb8e01 --- /dev/null +++ b/browser_use/llm/azure/__init__.py @@ -0,0 +1,5 @@ +"""Azure OpenAI LLM integration for browser-use.""" + +from browser_use.llm.azure.chat import ChatAzureOpenAI + +__all__ = ['ChatAzureOpenAI'] diff --git a/browser_use/llm/deepseek/__init__.py b/browser_use/llm/deepseek/__init__.py new file mode 100644 index 000000000..cb61b710a --- /dev/null +++ b/browser_use/llm/deepseek/__init__.py @@ -0,0 +1,5 @@ +"""DeepSeek LLM integration for browser-use.""" + +from browser_use.llm.deepseek.chat import ChatDeepSeek + +__all__ = ['ChatDeepSeek'] diff --git a/browser_use/llm/groq/__init__.py b/browser_use/llm/groq/__init__.py new file mode 100644 index 000000000..7ac86b72a --- /dev/null +++ b/browser_use/llm/groq/__init__.py @@ -0,0 +1,5 @@ +"""Groq LLM integration for browser-use.""" + +from browser_use.llm.groq.chat import ChatGroq + +__all__ = ['ChatGroq'] diff --git a/browser_use/llm/ollama/__init__.py b/browser_use/llm/ollama/__init__.py new file mode 100644 index 000000000..3ff9cbe59 --- /dev/null +++ b/browser_use/llm/ollama/__init__.py @@ -0,0 +1,5 @@ +"""Ollama LLM integration for browser-use.""" + +from browser_use.llm.ollama.chat import ChatOllama + +__all__ = ['ChatOllama'] diff --git a/browser_use/llm/openai/__init__.py b/browser_use/llm/openai/__init__.py new file mode 100644 index 000000000..df5c72585 --- /dev/null +++ b/browser_use/llm/openai/__init__.py @@ -0,0 +1,5 @@ +"""OpenAI LLM integration for browser-use.""" + +from browser_use.llm.openai.chat import ChatOpenAI + +__all__ = ['ChatOpenAI'] diff --git a/browser_use/llm/openrouter/__init__.py b/browser_use/llm/openrouter/__init__.py new file mode 100644 index 000000000..717b75925 --- /dev/null +++ b/browser_use/llm/openrouter/__init__.py @@ -0,0 +1,5 @@ +"""OpenRouter LLM integration for browser-use.""" + +from browser_use.llm.openrouter.chat import ChatOpenRouter + +__all__ = ['ChatOpenRouter'] From a5bc83f7664228b4af89f4bc85af955c2d88cb7d Mon Sep 17 00:00:00 2001 From: Mert Unsal Date: Sun, 20 Jul 2025 23:57:53 +0200 Subject: [PATCH 55/56] Revert "add init to make things library" --- browser_use/agent/__init__.py | 4 ---- browser_use/agent/message_manager/__init__.py | 3 --- browser_use/controller/registry/__init__.py | 1 - browser_use/dom/clickable_element_processor/__init__.py | 1 - browser_use/dom/history_tree_processor/__init__.py | 3 --- browser_use/llm/anthropic/__init__.py | 5 ----- browser_use/llm/azure/__init__.py | 5 ----- browser_use/llm/deepseek/__init__.py | 5 ----- browser_use/llm/groq/__init__.py | 5 ----- browser_use/llm/ollama/__init__.py | 5 ----- browser_use/llm/openai/__init__.py | 5 ----- browser_use/llm/openrouter/__init__.py | 5 ----- 12 files changed, 47 deletions(-) delete mode 100644 browser_use/agent/__init__.py delete mode 100644 browser_use/agent/message_manager/__init__.py delete mode 100644 browser_use/controller/registry/__init__.py delete mode 100644 browser_use/dom/clickable_element_processor/__init__.py delete mode 100644 browser_use/dom/history_tree_processor/__init__.py delete mode 100644 browser_use/llm/anthropic/__init__.py delete mode 100644 browser_use/llm/azure/__init__.py delete mode 100644 browser_use/llm/deepseek/__init__.py delete mode 100644 browser_use/llm/groq/__init__.py delete mode 100644 browser_use/llm/ollama/__init__.py delete mode 100644 browser_use/llm/openai/__init__.py delete mode 100644 browser_use/llm/openrouter/__init__.py diff --git a/browser_use/agent/__init__.py b/browser_use/agent/__init__.py deleted file mode 100644 index 294f8d115..000000000 --- a/browser_use/agent/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""Browser automation agent module.""" - -# Main agent components are imported lazily via browser_use.__init__.py -# This file just makes the directory a proper Python package diff --git a/browser_use/agent/message_manager/__init__.py b/browser_use/agent/message_manager/__init__.py deleted file mode 100644 index 31056ea56..000000000 --- a/browser_use/agent/message_manager/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Message manager for browser automation agent.""" - -# Message manager components diff --git a/browser_use/controller/registry/__init__.py b/browser_use/controller/registry/__init__.py deleted file mode 100644 index d7bc0ca68..000000000 --- a/browser_use/controller/registry/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Action registry for browser automation controller.""" diff --git a/browser_use/dom/clickable_element_processor/__init__.py b/browser_use/dom/clickable_element_processor/__init__.py deleted file mode 100644 index 36758da78..000000000 --- a/browser_use/dom/clickable_element_processor/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Clickable element processor for DOM manipulation.""" diff --git a/browser_use/dom/history_tree_processor/__init__.py b/browser_use/dom/history_tree_processor/__init__.py deleted file mode 100644 index 63d6f0752..000000000 --- a/browser_use/dom/history_tree_processor/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -"""History tree processor for DOM manipulation.""" - -# History tree processor components diff --git a/browser_use/llm/anthropic/__init__.py b/browser_use/llm/anthropic/__init__.py deleted file mode 100644 index d3d680c40..000000000 --- a/browser_use/llm/anthropic/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Anthropic LLM integration for browser-use.""" - -from browser_use.llm.anthropic.chat import ChatAnthropic - -__all__ = ['ChatAnthropic'] diff --git a/browser_use/llm/azure/__init__.py b/browser_use/llm/azure/__init__.py deleted file mode 100644 index 91bcb8e01..000000000 --- a/browser_use/llm/azure/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Azure OpenAI LLM integration for browser-use.""" - -from browser_use.llm.azure.chat import ChatAzureOpenAI - -__all__ = ['ChatAzureOpenAI'] diff --git a/browser_use/llm/deepseek/__init__.py b/browser_use/llm/deepseek/__init__.py deleted file mode 100644 index cb61b710a..000000000 --- a/browser_use/llm/deepseek/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""DeepSeek LLM integration for browser-use.""" - -from browser_use.llm.deepseek.chat import ChatDeepSeek - -__all__ = ['ChatDeepSeek'] diff --git a/browser_use/llm/groq/__init__.py b/browser_use/llm/groq/__init__.py deleted file mode 100644 index 7ac86b72a..000000000 --- a/browser_use/llm/groq/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Groq LLM integration for browser-use.""" - -from browser_use.llm.groq.chat import ChatGroq - -__all__ = ['ChatGroq'] diff --git a/browser_use/llm/ollama/__init__.py b/browser_use/llm/ollama/__init__.py deleted file mode 100644 index 3ff9cbe59..000000000 --- a/browser_use/llm/ollama/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Ollama LLM integration for browser-use.""" - -from browser_use.llm.ollama.chat import ChatOllama - -__all__ = ['ChatOllama'] diff --git a/browser_use/llm/openai/__init__.py b/browser_use/llm/openai/__init__.py deleted file mode 100644 index df5c72585..000000000 --- a/browser_use/llm/openai/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""OpenAI LLM integration for browser-use.""" - -from browser_use.llm.openai.chat import ChatOpenAI - -__all__ = ['ChatOpenAI'] diff --git a/browser_use/llm/openrouter/__init__.py b/browser_use/llm/openrouter/__init__.py deleted file mode 100644 index 717b75925..000000000 --- a/browser_use/llm/openrouter/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""OpenRouter LLM integration for browser-use.""" - -from browser_use.llm.openrouter.chat import ChatOpenRouter - -__all__ = ['ChatOpenRouter'] From a63fa12f9655265668896f284f9989f4d7e0fc4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Sun, 20 Jul 2025 23:59:58 +0200 Subject: [PATCH 56/56] Enhance logging for crashed page recovery in session.py Updated the logging messages in the require_healthy_browser function to include the current page URL during recovery attempts. This change improves the clarity of debug and warning logs by ensuring the page URL is accurately reported, even when the page is unresponsive. --- browser_use/browser/session.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 8310a4fa3..b068f952d 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -162,12 +162,14 @@ def require_healthy_browser(usable_page=True, reopen_page=True): await self._recover_unresponsive_page( func.__name__, timeout_ms=int(self.browser_profile.default_navigation_timeout or 5000) + 5_000 ) + page_url = self.agent_current_page.url if self.agent_current_page else 'unknown page' self.logger.debug( - f'๐Ÿค• Crashed page recovery finished, attempting to continue with {func.__name__}() on {_log_pretty_url(self.agent_current_page.url)}...' + f'๐Ÿค• Crashed page recovery finished, attempting to continue with {func.__name__}() on {_log_pretty_url(page_url)}...' ) except Exception as e: + page_url = self.agent_current_page.url if self.agent_current_page else 'unknown page' self.logger.warning( - f'โŒ Crashed page recovery failed, could not run {func.__name__}(), page is stuck unresponsive on {_log_pretty_url(self.agent_current_page.url)}...' + f'โŒ Crashed page recovery failed, could not run {func.__name__}(), page is stuck unresponsive on {_log_pretty_url(page_url)}...' ) raise # Re-raise to let retry decorator / callsite handle it