diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 98b01e8c3..a015a381f 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -1139,23 +1139,21 @@ class Agent(Generic[Context, AgentStructuredOutput]): """Extract URL from task string using naive pattern matching.""" import re + # Remove email addresses from task before looking for URLs + task_without_emails = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '', task) + # Look for common URL patterns patterns = [ r'https?://[^\s<>"\']+', # Full URLs with http/https r'(?:www\.)?[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.[a-zA-Z]{2,}(?:/[^\s<>"\']*)?', # Domain names with subdomains and optional paths ] - # Email pattern to exclude - email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b' - found_urls = [] for pattern in patterns: - matches = re.finditer(pattern, task) + matches = re.finditer(pattern, task_without_emails) for match in matches: url = match.group(0) - # Skip if this looks like an email address - if re.search(email_pattern, url): - continue + # Remove trailing punctuation that's not part of URLs url = re.sub(r'[.,;:!?()\[\]]+$', '', url) # Add https:// if missing diff --git a/browser_use/agent/system_prompt.md b/browser_use/agent/system_prompt.md index e57f288f2..83640b832 100644 --- a/browser_use/agent/system_prompt.md +++ b/browser_use/agent/system_prompt.md @@ -129,50 +129,24 @@ The `done` action is your opportunity to terminate and share your findings with - You are allowed to use a maximum of {max_actions} actions per step. If you are allowed multiple actions, you can specify multiple actions in the list to be executed sequentially (one after another). -- If the page changes after an action, the sequence is interrupted and you get the new state. You can see this in your agent history when this happens. +- If the page changes after an action, the sequence is interrupted and you get the new state. -**IMPORTANT: Be More Efficient with Multi-Action Outputs** +You can output multiple actions in one step. Try to be efficient where it makes sense. Do not predict actions which do not make sense for the current page. -Maximize efficiency by combining related actions in one step instead of doing them separately: - -**Highly Recommended Action Combinations:** -- `click_element_by_index` + `extract_structured_data` → Click element and immediately extract information -- `go_to_url` + `extract_structured_data` → Navigate and extract data in one step +**Recommended Action Combinations:** - `input_text` + `click_element_by_index` → Fill form field and submit/search in one step -- `click_element_by_index` + `input_text` → Click input field and fill it immediately -- `click_element_by_index` + `click_element_by_index` → Navigate through multi-step flows (when safe) +- `input_text` + `input_text` → Fill multiple form fields +- `click_element_by_index` + `click_element_by_index` → Navigate through multi-step flows (when the page does not navigate between clicks) +- `scroll` with num_pages 10 + `extract_structured_data` → Scroll to the bottom of the page to load more content before extracting structured data - File operations + browser actions -**Examples of Efficient Combinations:** -```json -"action": [ - {{"click_element_by_index": {{"index": 15}}}}, - {{"extract_structured_data": {{"query": "Extract the first 3 headlines", "extract_links": false}}}} -] -``` - -```json -"action": [ - {{"input_text": {{"index": 23, "text": "laptop"}}}}, - {{"click_element_by_index": {{"index": 24}}}} -] -``` - -```json -"action": [ - {{"go_to_url": {{"url": "https://example.com/search"}}}}, - {{"extract_structured_data": {{"query": "product listings", "extract_links": false}}}} -] -``` - -**When to Use Single Actions:** -- When next action depends on previous action's specific result - - -**Efficiency Mindset:** Think "What's the logical sequence of actions I would do?" and group them together when safe. +Do not try multiple different paths in one step. Always have one clear goal per step. +Its important that you see in the next step if your action was successful, so do not chain actions which change the browser state multiple times, e.g. +- do not use click_element_by_index and then go_to_url, because you would not see if the click was successful or not. +- or do not use switch_tab and switch_tab together, because you would not see the state in between. diff --git a/browser_use/agent/system_prompt_flash.md b/browser_use/agent/system_prompt_flash.md index 0b63287d1..a2f9257fc 100644 --- a/browser_use/agent/system_prompt_flash.md +++ b/browser_use/agent/system_prompt_flash.md @@ -130,48 +130,22 @@ If you are allowed multiple actions, you can specify multiple actions in the lis - If the page changes after an action, the sequence is interrupted and you get the new state. You can see this in your agent history when this happens. - -**IMPORTANT: Be More Efficient with Multi-Action Outputs** +You can output multiple actions in one step. Try to be efficient where it makes sense. Do not predict actions which do not make sense for the current page. -Maximize efficiency by combining related actions in one step instead of doing them separately: - -**Highly Recommended Action Combinations:** -- `click_element_by_index` + `extract_structured_data` → Click element and immediately extract information -- `go_to_url` + `extract_structured_data` → Navigate and extract data in one step +**Recommended Action Combinations:** - `input_text` + `click_element_by_index` → Fill form field and submit/search in one step -- `click_element_by_index` + `input_text` → Click input field and fill it immediately -- `click_element_by_index` + `click_element_by_index` → Navigate through multi-step flows (when safe) +- `input_text` + `input_text` → Fill multiple form fields +- `click_element_by_index` + `click_element_by_index` → Navigate through multi-step flows (when the page does not navigate between clicks) +- `scroll` with num_pages 10 + `extract_structured_data` → Scroll to the bottom of the page to load more content before extracting structured data - File operations + browser actions -**Examples of Efficient Combinations:** -```json -"action": [ - {{"click_element_by_index": {{"index": 15}}}}, - {{"extract_structured_data": {{"query": "Extract the first 3 headlines", "extract_links": false}}}} -] -``` - -```json -"action": [ - {{"input_text": {{"index": 23, "text": "laptop"}}}}, - {{"click_element_by_index": {{"index": 24}}}} -] -``` - -```json -"action": [ - {{"go_to_url": {{"url": "https://example.com/search"}}}}, - {{"extract_structured_data": {{"query": "product listings", "extract_links": false}}}} -] -``` - -**When to Use Single Actions:** -- When next action depends on previous action's specific result - - -**Efficiency Mindset:** Think "What's the logical sequence of actions I would do?" and group them together when safe. +Do not try multiple different paths in one step. Always have one clear goal per step. +Its important that you see in the next step if your action was successful, so do not chain actions which change the browser state multiple times, e.g. +- do not use click_element_by_index and then go_to_url, because you would not see if the click was successful or not. +- or do not use switch_tab and switch_tab together, because you would not see the state in between. + Be clear and concise in your decision-making. Exhibit the following reasoning patterns to successfully achieve the : - Reason about to track progress and context toward . diff --git a/browser_use/agent/system_prompt_no_thinking.md b/browser_use/agent/system_prompt_no_thinking.md index c55c8f2d1..a79cb569b 100644 --- a/browser_use/agent/system_prompt_no_thinking.md +++ b/browser_use/agent/system_prompt_no_thinking.md @@ -133,45 +133,19 @@ If you are allowed multiple actions, you can specify multiple actions in the lis -**IMPORTANT: Be More Efficient with Multi-Action Outputs** +You can output multiple actions in one step. Try to be efficient where it makes sense. Do not predict actions which do not make sense for the current page. -Maximize efficiency by combining related actions in one step instead of doing them separately: - -**Highly Recommended Action Combinations:** -- `click_element_by_index` + `extract_structured_data` → Click element and immediately extract information -- `go_to_url` + `extract_structured_data` → Navigate and extract data in one step +**Recommended Action Combinations:** - `input_text` + `click_element_by_index` → Fill form field and submit/search in one step -- `click_element_by_index` + `input_text` → Click input field and fill it immediately -- `click_element_by_index` + `click_element_by_index` → Navigate through multi-step flows (when safe) +- `input_text` + `input_text` → Fill multiple form fields +- `click_element_by_index` + `click_element_by_index` → Navigate through multi-step flows (when the page does not navigate between clicks) +- `scroll` with num_pages 10 + `extract_structured_data` → Scroll to the bottom of the page to load more content before extracting structured data - File operations + browser actions -**Examples of Efficient Combinations:** -```json -"action": [ - {{"click_element_by_index": {{"index": 15}}}}, - {{"extract_structured_data": {{"query": "Extract the first 3 headlines", "extract_links": false}}}} -] -``` - -```json -"action": [ - {{"input_text": {{"index": 23, "text": "laptop"}}}}, - {{"click_element_by_index": {{"index": 24}}}} -] -``` - -```json -"action": [ - {{"go_to_url": {{"url": "https://example.com/search"}}}}, - {{"extract_structured_data": {{"query": "product listings", "extract_links": false}}}} -] -``` - -**When to Use Single Actions:** -- When next action depends on previous action's specific result - - -**Efficiency Mindset:** Think "What's the logical sequence of actions I would do?" and group them together when safe. +Do not try multiple different paths in one step. Always have one clear goal per step. +Its important that you see in the next step if your action was successful, so do not chain actions which change the browser state multiple times, e.g. +- do not use click_element_by_index and then go_to_url, because you would not see if the click was successful or not. +- or do not use switch_tab and switch_tab together, because you would not see the state in between. diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index 86e4eada5..4fa41b1ad 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -558,6 +558,11 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro default=True, description="Enable automation-optimized extensions: ad blocking (uBlock Origin), cookie handling (I still don't care about cookies), and URL cleaning (ClearURLs). All extensions work automatically without manual intervention. Extensions are automatically downloaded and loaded when enabled.", ) + cookie_whitelist_domains: list[str] = Field( + default_factory=lambda: ['nature.com', 'qatarairways.com'], + description='List of domains to whitelist in the "I still don\'t care about cookies" extension, preventing automatic cookie banner handling on these sites.', + ) + window_size: ViewportSize | None = Field( default=None, description='Browser window size to use when headless=False.', @@ -753,27 +758,29 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro """ # Extension definitions - optimized for automation and content extraction + # Combines uBlock Origin (ad blocking) + "I still don't care about cookies" (cookie banner handling) extensions = [ { 'name': 'uBlock Origin', 'id': 'cjpalhdlnbpafiamejdnhcphjbkeiagm', - 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dcjpalhdlnbpafiamejdnhcphjbkeiagm%26uc', + 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=133&acceptformat=crx3&x=id%3Dcjpalhdlnbpafiamejdnhcphjbkeiagm%26uc', }, { 'name': "I still don't care about cookies", 'id': 'edibdbjcniadpccecjdfdjjppcpchdlm', - 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dedibdbjcniadpccecjdfdjjppcpchdlm%26uc', + 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=133&acceptformat=crx3&x=id%3Dedibdbjcniadpccecjdfdjjppcpchdlm%26uc', }, { 'name': 'ClearURLs', 'id': 'lckanjgmijmafbedllaakclkaicjfmnk', - 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dlckanjgmijmafbedllaakclkaicjfmnk%26uc', + 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=133&acceptformat=crx3&x=id%3Dlckanjgmijmafbedllaakclkaicjfmnk%26uc', }, # { # 'name': 'Captcha Solver: Auto captcha solving service', # 'id': 'pgojnojmmhpofjgdmaebadhbocahppod', # 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dpgojnojmmhpofjgdmaebadhbocahppod%26uc', # }, + # Consent-O-Matic disabled - using uBlock Origin's cookie lists instead for simplicity # { # 'name': 'Consent-O-Matic', # 'id': 'mdjildafknihdffpkfmmpnpoiajfjnjd', @@ -816,6 +823,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro # Extract extension logger.info(f'📂 Extracting {ext["name"]} extension...') self._extract_extension(crx_file, ext_dir) + extension_paths.append(str(ext_dir)) loaded_extension_names.append(ext['name']) @@ -823,6 +831,11 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro logger.warning(f'⚠️ Failed to setup {ext["name"]} extension: {e}') continue + # Apply minimal patch to cookie extension with configurable whitelist + for i, path in enumerate(extension_paths): + if loaded_extension_names[i] == "I still don't care about cookies": + self._apply_minimal_extension_patch(Path(path), self.cookie_whitelist_domains) + if extension_paths: logger.debug(f'[BrowserProfile] 🧩 Extensions loaded ({len(extension_paths)}): [{", ".join(loaded_extension_names)}]') else: @@ -830,6 +843,70 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro return extension_paths + def _apply_minimal_extension_patch(self, ext_dir: Path, whitelist_domains: list[str]) -> None: + """Minimal patch: pre-populate chrome.storage.local with configurable domain whitelist.""" + try: + bg_path = ext_dir / 'data' / 'background.js' + if not bg_path.exists(): + return + + with open(bg_path, encoding='utf-8') as f: + content = f.read() + + # Create the whitelisted domains object for JavaScript with proper indentation + whitelist_entries = [f' "{domain}": true' for domain in whitelist_domains] + whitelist_js = '{\n' + ',\n'.join(whitelist_entries) + '\n }' + + # Find the initialize() function and inject storage setup before updateSettings() + # The actual function uses 2-space indentation, not tabs + old_init = """async function initialize(checkInitialized, magic) { + if (checkInitialized && initialized) { + return; + } + loadCachedRules(); + await updateSettings(); + await recreateTabList(magic); + initialized = true; +}""" + + # New function with configurable whitelist initialization + new_init = f"""// Pre-populate storage with configurable domain whitelist if empty +async function ensureWhitelistStorage() {{ + const result = await chrome.storage.local.get({{ settings: null }}); + if (!result.settings) {{ + const defaultSettings = {{ + statusIndicators: true, + whitelistedDomains: {whitelist_js} + }}; + await chrome.storage.local.set({{ settings: defaultSettings }}); + }} +}} + +async function initialize(checkInitialized, magic) {{ + if (checkInitialized && initialized) {{ + return; + }} + loadCachedRules(); + await ensureWhitelistStorage(); // Add storage initialization + await updateSettings(); + await recreateTabList(magic); + initialized = true; +}}""" + + if old_init in content: + content = content.replace(old_init, new_init) + + with open(bg_path, 'w', encoding='utf-8') as f: + f.write(content) + + domain_list = ', '.join(whitelist_domains) + logger.info(f'[BrowserProfile] ✅ Cookie extension: {domain_list} pre-populated in storage') + else: + logger.debug('[BrowserProfile] Initialize function not found for patching') + + except Exception as e: + logger.debug(f'[BrowserProfile] Could not patch extension storage: {e}') + def _download_extension(self, url: str, output_path: Path) -> None: """Download extension .crx file.""" import urllib.request diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 5e6e9d578..c85631b23 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -268,6 +268,7 @@ class BrowserSession(BaseModel): filter_highlight_ids: bool | None = None, auto_download_pdfs: bool | None = None, profile_directory: str | None = None, + cookie_whitelist_domains: list[str] | None = None, ): # Following the same pattern as AgentSettings in service.py # Only pass non-None values to avoid validation errors @@ -630,6 +631,9 @@ class BrowserSession(BaseModel): # # Wait a bit to ensure page starts loading # await asyncio.sleep(0.5) + # Close any extension options pages that might have opened + await self._close_extension_options_pages() + # Dispatch navigation complete self.logger.debug(f'Dispatching NavigationCompleteEvent for {event.url} (tab #{target_id[-4:]})') await self.event_bus.dispatch( @@ -1589,21 +1593,29 @@ class BrowserSession(BaseModel): except Exception as e: self.logger.warning(f'Failed to remove highlights: {e}') - # Try again with simpler script if the complex one fails - try: - simple_script = """ - const highlights = document.querySelectorAll('[data-browser-use-highlight]'); - highlights.forEach(el => el.remove()); - const container = document.getElementById('browser-use-debug-highlights'); - if (container) container.remove(); - """ - cdp_session = await self.get_or_create_cdp_session() - await cdp_session.cdp_client.send.Runtime.evaluate( - params={'expression': simple_script}, session_id=cdp_session.session_id - ) - self.logger.debug('Fallback highlight removal completed') - except Exception as fallback_error: - self.logger.error(f'Both highlight removal attempts failed: {fallback_error}') + + async def _close_extension_options_pages(self) -> None: + """Close any extension options/welcome pages that have opened.""" + try: + # Get all open pages + targets = await self._cdp_get_all_pages() + + for target in targets: + target_url = target.get('url', '') + target_id = target.get('targetId', '') + + # Check if this is an extension options/welcome page + if 'chrome-extension://' in target_url and ( + 'options.html' in target_url or 'welcome.html' in target_url or 'onboarding.html' in target_url + ): + self.logger.info(f'[BrowserSession] 🚫 Closing extension options page: {target_url}') + try: + await self._cdp_close_page(target_id) + except Exception as e: + self.logger.debug(f'[BrowserSession] Could not close extension page {target_id}: {e}') + + except Exception as e: + self.logger.debug(f'[BrowserSession] Error closing extension options pages: {e}') @property def downloaded_files(self) -> list[str]: diff --git a/browser_use/browser/watchdogs/dom_watchdog.py b/browser_use/browser/watchdogs/dom_watchdog.py index 33cad6257..4362801b6 100644 --- a/browser_use/browser/watchdogs/dom_watchdog.py +++ b/browser_use/browser/watchdogs/dom_watchdog.py @@ -255,16 +255,16 @@ class DOMWatchdog(BaseWatchdog): # Get target title safely try: self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: Getting page title...') - title = await asyncio.wait_for(self.browser_session.get_current_page_title(), timeout=2.0) + title = await asyncio.wait_for(self.browser_session.get_current_page_title(), timeout=1.0) self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got title: {title}') except Exception as e: self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Failed to get title: {e}') title = 'Page' - # Get comprehensive page info from CDP + # Get comprehensive page info from CDP with timeout try: self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: Getting page info from CDP...') - page_info = await self._get_page_info() + page_info = await asyncio.wait_for(self._get_page_info(), timeout=1.0) self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got page info from CDP: {page_info}') except Exception as e: self.logger.debug( diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py index b43eb37ff..a0ec8661c 100644 --- a/browser_use/tools/service.py +++ b/browser_use/tools/service.py @@ -253,7 +253,7 @@ class Tools(Generic[Context]): # Element Interaction Actions @self.registry.action( - 'Click element by index, set while_holding_ctrl=True to open any resulting navigation in a new tab. Only click on indices that are inside your current browser_state. Never click or assume not existing indices.', + 'Click element by index. Only indices from your browser_state are allowed. Never use an index that is not inside your current browser_state. Set while_holding_ctrl=True to open any resulting navigation in a new tab.', param_model=ClickElementAction, ) async def click_element_by_index(params: ClickElementAction, browser_session: BrowserSession): @@ -302,7 +302,7 @@ class Tools(Generic[Context]): return ActionResult(error=error_msg) @self.registry.action( - 'Click and input text into a input interactive element. Only input text into indices that are inside your current browser_state. Never input text into indices that are not inside your current browser_state.', + 'Input text into an input interactive element. Only input text into indices that are inside your current browser_state. Never input text into indices that are not inside your current browser_state.', param_model=InputTextAction, ) async def input_text(params: InputTextAction, browser_session: BrowserSession, has_sensitive_data: bool = False): @@ -660,7 +660,9 @@ You will be given a query and the markdown of a webpage. raise RuntimeError(str(e)) @self.registry.action( - 'Scroll the page by specified number of pages (set down=True to scroll down, down=False to scroll up, num_pages=number of pages to scroll like 0.5 for half page, 1.0 for one page, etc.). Optional index parameter to scroll within a specific element or its scroll container (works well for dropdowns and custom UI components). Use index=0 or omit index to scroll the entire page.', + """Scroll the page by specified number of pages (set down=True to scroll down, down=False to scroll up, num_pages=number of pages to scroll like 0.5 for half page, 10.0 for ten pages, etc.). Optional index parameter to scroll within a specific element or its scroll container (works well for dropdowns and custom UI components). If you want to scroll the entire page, don't use index. + Instead of scrolling step after step, use a high number of pages at once like 10 to get to the bottom of the page. + """, param_model=ScrollAction, ) async def scroll(params: ScrollAction, browser_session: BrowserSession):