diff --git a/browser_use/agent/cloud_events.py b/browser_use/agent/cloud_events.py index 83ca52a66..ed7b3c4b3 100644 --- a/browser_use/agent/cloud_events.py +++ b/browser_use/agent/cloud_events.py @@ -188,7 +188,7 @@ class CreateAgentTaskEvent(BaseEvent): user_id: str = Field(max_length=255) # Added for authorization checks device_id: str | None = Field(None, max_length=255) # Device ID for auth lookup agent_session_id: str - llm_model: str = Field(max_length=100) # LLMModel enum value as string + llm_model: str = Field(max_length=200) # LLMModel enum value as string stopped: bool = False paused: bool = False task: str = Field(max_length=MAX_TASK_LENGTH) diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py index 8c7aa7348..42ef64926 100644 --- a/browser_use/agent/prompts.py +++ b/browser_use/agent/prompts.py @@ -28,7 +28,7 @@ class SystemPrompt: self.use_thinking = use_thinking self.flash_mode = flash_mode prompt = '' - if override_system_message: + if override_system_message is not None: prompt = override_system_message else: self._load_prompt_template() @@ -265,7 +265,8 @@ class AgentMessagePrompt: # Check if current page is a PDF viewer and add appropriate message pdf_message = '' if self.browser_state.is_pdf_viewer: - pdf_message = 'PDF viewer cannot be rendered. In this page, DO NOT use the extract_structured_data action as PDF content cannot be rendered. Use the read_file action on the downloaded PDF in available_file_paths to read the full content.\n\n' + pdf_message = 'PDF viewer cannot be rendered. In this page, DO NOT use the extract_structured_data action as PDF content cannot be rendered. ' + pdf_message += 'Use the read_file action on the downloaded PDF in available_file_paths to read the full text content or scroll in the page to see images/figures if needed.\n\n' # Add recent events if available and requested recent_events_text = '' diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 68d677b6d..93f95524c 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -624,7 +624,10 @@ class Agent(Generic[Context, AgentStructuredOutput]): self._message_manager.add_new_task(new_task) # Mark as follow-up task and recreate eventbus (gets shut down after each run) self.state.follow_up_task = True - self.eventbus = EventBus(name=f'Agent_{str(self.id)[-self.state.n_steps :]}') + agent_id_suffix = str(self.id)[-4:].replace('-', '_') + if agent_id_suffix and agent_id_suffix[0].isdigit(): + agent_id_suffix = 'a' + agent_id_suffix + self.eventbus = EventBus(name=f'Agent_{agent_id_suffix}') # Re-register cloud sync handler if it exists (if not disabled) if hasattr(self, 'cloud_sync') and self.cloud_sync and self.enable_cloud_sync: @@ -681,7 +684,6 @@ class Agent(Generic[Context, AgentStructuredOutput]): # Always take screenshots for all steps self.logger.debug('šŸ“ø Requesting browser state with include_screenshot=True') browser_state_summary = await self.browser_session.get_browser_state_summary( - cache_clickable_elements_hashes=True, include_screenshot=True, # always capture even if use_vision=False so that cloud sync is useful (it's fast now anyway) include_recent_events=self.include_recent_events, ) @@ -1662,7 +1664,6 @@ class Agent(Generic[Context, AgentStructuredOutput]): # This prevents stale element detection but doesn't refresh before execution if action.get_index() is not None and i != 0: new_browser_state_summary = await self.browser_session.get_browser_state_summary( - cache_clickable_elements_hashes=False, include_screenshot=False, ) new_selector_map = new_browser_state_summary.dom_state.selector_map @@ -1888,9 +1889,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): async def _execute_history_step(self, history_item: AgentHistory, delay: float) -> list[ActionResult]: """Execute a single step from history with element validation""" assert self.browser_session is not None, 'BrowserSession is not set up' - state = await self.browser_session.get_browser_state_summary( - cache_clickable_elements_hashes=False, include_screenshot=False - ) + state = await self.browser_session.get_browser_state_summary(include_screenshot=False) if not state or not history_item.model_output: raise ValueError('Invalid state or model output') updated_actions = [] diff --git a/browser_use/browser/events.py b/browser_use/browser/events.py index 15f388bed..a89d9412c 100644 --- a/browser_use/browser/events.py +++ b/browser_use/browser/events.py @@ -190,7 +190,6 @@ class BrowserStateRequestEvent(BaseEvent[BrowserStateSummary]): include_dom: bool = True include_screenshot: bool = True - cache_clickable_elements_hashes: bool = True include_recent_events: bool = False event_timeout: float | None = _get_timeout('TIMEOUT_BrowserStateRequestEvent', 30.0) # seconds diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index ca81b5c46..2891c1ccc 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -837,6 +837,17 @@ class BrowserSession(BaseModel): self.agent_focus = await self.get_or_create_cdp_session(target_id=last_target_id, focus=True) raise + # Dispatch NavigationCompleteEvent when tab focus changes + # This ensures PDF detection and downloads work when switching tabs + if event.target_id and event.url: + self.logger.debug(f'šŸ”„ Dispatching NavigationCompleteEvent for tab switch to {event.url[:50]}...') + await self.event_bus.dispatch( + NavigationCompleteEvent( + target_id=event.target_id, + url=event.url, + ) + ) + # self.logger.debug('šŸ”„ AgentFocusChangedEvent handler completed successfully') async def on_FileDownloadedEvent(self, event: FileDownloadedEvent) -> None: @@ -1061,7 +1072,6 @@ class BrowserSession(BaseModel): @observe_debug(ignore_input=True, ignore_output=True, name='get_browser_state_summary') async def get_browser_state_summary( self, - cache_clickable_elements_hashes: bool = True, include_screenshot: bool = True, cached: bool = False, include_recent_events: bool = False, @@ -1088,7 +1098,6 @@ class BrowserSession(BaseModel): BrowserStateRequestEvent( include_dom=True, include_screenshot=include_screenshot, - cache_clickable_elements_hashes=cache_clickable_elements_hashes, include_recent_events=include_recent_events, ) ), diff --git a/browser_use/browser/watchdogs/downloads_watchdog.py b/browser_use/browser/watchdogs/downloads_watchdog.py index f31d27b81..bc6f0d4c6 100644 --- a/browser_use/browser/watchdogs/downloads_watchdog.py +++ b/browser_use/browser/watchdogs/downloads_watchdog.py @@ -56,6 +56,7 @@ class DownloadsWatchdog(BaseWatchdog): _cdp_event_tasks: set[asyncio.Task] = PrivateAttr(default_factory=set) # Track CDP event handler tasks _cdp_downloads_info: dict[str, dict[str, Any]] = PrivateAttr(default_factory=dict) # Map guid -> info _use_js_fetch_for_local: bool = PrivateAttr(default=False) # Guard JS fetch path for local regular downloads + _session_pdf_urls: dict[str, str] = PrivateAttr(default_factory=dict) # URL -> path for PDFs downloaded this session async def on_BrowserLaunchEvent(self, event: BrowserLaunchEvent) -> None: self.logger.debug(f'[DownloadsWatchdog] Received BrowserLaunchEvent, EventBus ID: {id(self.event_bus)}') @@ -123,6 +124,7 @@ class DownloadsWatchdog(BaseWatchdog): self._sessions_with_listeners.clear() self._active_downloads.clear() self._pdf_viewer_cache.clear() + self._session_pdf_urls.clear() async def on_NavigationCompleteEvent(self, event: NavigationCompleteEvent) -> None: """Check for PDFs after navigation completes.""" @@ -801,13 +803,26 @@ class DownloadsWatchdog(BaseWatchdog): self.logger.debug(f'[DownloadsWatchdog] Generated filename: {pdf_filename}') - # Check if already downloaded by looking in the downloads directory + # Check if already downloaded in this session + self.logger.debug(f'[DownloadsWatchdog] PDF_URL: {pdf_url}, session_pdf_urls: {self._session_pdf_urls}') + if pdf_url in self._session_pdf_urls: + existing_path = self._session_pdf_urls[pdf_url] + self.logger.debug(f'[DownloadsWatchdog] PDF already downloaded in session: {existing_path}') + return existing_path + + # Generate unique filename if file exists from previous run downloads_dir = str(self.browser_session.browser_profile.downloads_path) - if os.path.exists(downloads_dir): - existing_files = os.listdir(downloads_dir) - if pdf_filename in existing_files: - self.logger.debug(f'[DownloadsWatchdog] PDF already downloaded: {pdf_filename}') - return None + os.makedirs(downloads_dir, exist_ok=True) + final_filename = pdf_filename + existing_files = os.listdir(downloads_dir) + if pdf_filename in existing_files: + # Generate unique name with (1), (2), etc. + base, ext = os.path.splitext(pdf_filename) + counter = 1 + while f'{base} ({counter}){ext}' in existing_files: + counter += 1 + final_filename = f'{base} ({counter}){ext}' + self.logger.debug(f'[DownloadsWatchdog] File exists, using: {final_filename}') self.logger.debug(f'[DownloadsWatchdog] Starting PDF download from: {pdf_url[:100]}...') @@ -858,12 +873,10 @@ class DownloadsWatchdog(BaseWatchdog): download_result = result.get('result', {}).get('value', {}) if download_result and download_result.get('data') and len(download_result['data']) > 0: - # Ensure unique filename - downloads_dir = str(self.browser_session.browser_profile.downloads_path) # Ensure downloads directory exists + downloads_dir = str(self.browser_session.browser_profile.downloads_path) os.makedirs(downloads_dir, exist_ok=True) - unique_filename = await self._get_unique_filename(downloads_dir, pdf_filename) - download_path = os.path.join(downloads_dir, unique_filename) + download_path = os.path.join(downloads_dir, final_filename) # Save the PDF asynchronously async with await anyio.open_file(download_path, 'wb') as f: @@ -886,13 +899,16 @@ class DownloadsWatchdog(BaseWatchdog): f'[DownloadsWatchdog] āœ… Auto-downloaded PDF ({cache_status}, {response_size:,} bytes): {download_path}' ) + # Store URL->path mapping for this session + self._session_pdf_urls[pdf_url] = download_path + # Emit file downloaded event - self.logger.debug(f'[DownloadsWatchdog] Dispatching FileDownloadedEvent for {unique_filename}') + self.logger.debug(f'[DownloadsWatchdog] Dispatching FileDownloadedEvent for {final_filename}') self.event_bus.dispatch( FileDownloadedEvent( url=pdf_url, path=download_path, - file_name=unique_filename, + file_name=final_filename, file_size=response_size, file_type='pdf', mime_type='application/pdf', diff --git a/browser_use/filesystem/file_system.py b/browser_use/filesystem/file_system.py index c0cb7eaa7..1f4fc3f18 100644 --- a/browser_use/filesystem/file_system.py +++ b/browser_use/filesystem/file_system.py @@ -272,7 +272,7 @@ class FileSystem: reader = pypdf.PdfReader(full_filename) num_pages = len(reader.pages) - MAX_PDF_PAGES = 10 + MAX_PDF_PAGES = 20 extra_pages = num_pages - MAX_PDF_PAGES extracted_text = '' for page in reader.pages[:MAX_PDF_PAGES]: diff --git a/browser_use/mcp/server.py b/browser_use/mcp/server.py index d9cec1e54..1074f9329 100644 --- a/browser_use/mcp/server.py +++ b/browser_use/mcp/server.py @@ -768,7 +768,7 @@ class BrowserUseServer: if not self.browser_session: return 'Error: No browser session active' - state = await self.browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False) + state = await self.browser_session.get_browser_state_summary() result = { 'url': state.url, @@ -819,10 +819,15 @@ class BrowserUseServer: ExtractAction = create_model( 'ExtractAction', __base__=ActionModel, - extract_structured_data=(dict[str, Any], {'query': query, 'extract_links': extract_links}), + extract_structured_data=dict[str, Any], ) - action = ExtractAction() + # Use model_validate because Pyright does not understand the dynamic model + action = ExtractAction.model_validate( + { + 'extract_structured_data': {'query': query, 'extract_links': extract_links}, + } + ) action_result = await self.tools.act( action=action, browser_session=self.browser_session, diff --git a/docs/customize/agent/supported-models.mdx b/docs/customize/agent/supported-models.mdx index 499a14976..5b5883189 100644 --- a/docs/customize/agent/supported-models.mdx +++ b/docs/customize/agent/supported-models.mdx @@ -243,6 +243,33 @@ Required environment variables: ALIBABA_CLOUD= ``` +## ModelScope [example](https://github.com/browser-use/browser-use/blob/main/examples/models/modelscope_example.py) + +```python +from browser_use import Agent, ChatOpenAI +from dotenv import load_dotenv +import os + +load_dotenv() + +# Get API key from https://www.modelscope.cn/docs/model-service/API-Inference/intro +api_key = os.getenv('MODELSCOPE_API_KEY') +base_url = 'https://api-inference.modelscope.cn/v1/' + +llm = ChatOpenAI(model='Qwen/Qwen2.5-VL-72B-Instruct', api_key=api_key, base_url=base_url) + +agent = Agent( + task="Your task here", + llm=llm, + use_vision=True +) +``` + +Required environment variables: + +```bash .env +MODELSCOPE_API_KEY= +``` ## Other models (DeepSeek, Novita, X...) diff --git a/examples/custom-functions/parallel_agents.py b/examples/custom-functions/parallel_agents.py new file mode 100644 index 000000000..6a5958ea3 --- /dev/null +++ b/examples/custom-functions/parallel_agents.py @@ -0,0 +1,312 @@ +""" +Simple parallel multi-agent example. + +This launches multiple agents in parallel to work on different tasks simultaneously. +No complex orchestrator - just direct parallel execution. + +@file purpose: Demonstrates parallel multi-agent execution using asyncio +""" + +import asyncio +import os +import sys +from typing import List + +sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +from dotenv import load_dotenv +load_dotenv() + +from browser_use import Agent +from browser_use.llm.google import ChatGoogle + +# ============================================================================ +# šŸ”§ SIMPLE CONFIGURATION - CHANGE THIS TO YOUR DESIRED TASK +# ============================================================================ + +MAIN_TASK = "find age of ronaldo and messi" + +# Simple test - let's start with just one person to see what happens +# MAIN_TASK = "find age of elon musk" + +# ============================================================================ + + +async def create_subtasks(main_task: str, llm) -> list[str]: + """ + Use LLM to break down main task into logical subtasks + + Real examples of how this works: + + Input: "what is the revenue of nvidia, microsoft, tesla" + Output: [ + "Find Nvidia's current revenue and financial data", + "Find Microsoft's current revenue and financial data", + "Find Tesla's current revenue and financial data" + ] + + Input: "what are ages of musk, altman, bezos, gates" + Output: [ + "Find Elon Musk's age and birth date", + "Find Sam Altman's age and birth date", + "Find Jeff Bezos's age and birth date", + "Find Bill Gates's age and birth date" + ] + + Input: "what is the population of tokyo, new york, london, paris" + Output: [ + "Find Tokyo's current population", + "Find New York's current population", + "Find London's current population", + "Find Paris's current population" + ] + + Input: "name top 10 yc companies by revenue" + Output: [ + "Research Y Combinator's top companies by revenue", + "Find revenue data for top YC companies", + "Compile list of top 10 YC companies by revenue" + ] + """ + + prompt = f""" + Break down this main task into individual, separate subtasks where each subtask focuses on ONLY ONE specific person, company, or item: + + Main task: {main_task} + + RULES: + - Each subtask must focus on ONLY ONE person/company/item + - Do NOT combine multiple people/companies/items in one subtask + - Each subtask should be completely independent + - If the main task mentions multiple items, create one subtask per item + + Return only the subtasks, one per line, without numbering or bullets. + Each line should focus on exactly ONE person/company/item. + """ + + try: + # Use the correct method for ChatGoogle + response = await llm.ainvoke(prompt) + + # Debug: Print the response type and content + print(f"DEBUG: Response type: {type(response)}") + print(f"DEBUG: Response content: {response}") + + # Handle different response types - ChatGoogle returns string content + if hasattr(response, 'content'): + content = response.content + elif isinstance(response, str): + content = response + elif hasattr(response, 'text'): + content = response.text + else: + # Convert to string if it's some other type + content = str(response) + + # Split by newlines and clean up + subtasks = [task.strip() for task in content.strip().split('\n') if task.strip()] + + # Remove any numbering or bullets that the LLM might add + cleaned_subtasks = [] + for task in subtasks: + # Remove common prefixes like "1. ", "- ", "* ", etc. + cleaned = task.lstrip('0123456789.-* ') + if cleaned: + cleaned_subtasks.append(cleaned) + + return cleaned_subtasks if cleaned_subtasks else simple_split_task(main_task) + except Exception as e: + print(f"Error creating subtasks: {e}") + # Fallback to simple split + return simple_split_task(main_task) + + +def simple_split_task(main_task: str) -> list[str]: + """Simple fallback: split task by common separators""" + task_lower = main_task.lower() + + # Try to split by common separators + if " and " in task_lower: + parts = main_task.split(" and ") + return [part.strip() for part in parts if part.strip()] + elif ", " in main_task: + parts = main_task.split(", ") + return [part.strip() for part in parts if part.strip()] + elif "," in main_task: + parts = main_task.split(",") + return [part.strip() for part in parts if part.strip()] + + # If no separators found, return the original task + return [main_task] + + +async def run_single_agent(task: str, llm, agent_id: int) -> tuple[int, str]: + """Run a single agent and return its result""" + print(f"šŸš€ Agent {agent_id} starting: {task}") + print(f" šŸ“ This agent will focus ONLY on: {task}") + print(f" 🌐 Creating isolated browser instance for agent {agent_id}") + + try: + # Create agent with its own browser session (separate browser instance) + from browser_use.browser import BrowserSession + from browser_use.browser.profile import BrowserProfile + import tempfile + + # Create a unique temp directory for this agent's browser data + temp_dir = tempfile.mkdtemp(prefix=f"browser_agent_{agent_id}_") + + # Create browser profile with custom user data directory and single tab focus + profile = BrowserProfile() + profile.user_data_dir = temp_dir + profile.headless = False # Set to True if you want headless mode + profile.keep_alive = False # Don't keep browser alive after task + + # Add custom args to prevent new tabs and popups + profile.args = [ + '--disable-popup-blocking', + '--disable-extensions', + '--disable-plugins', + '--disable-images', # Faster loading + '--no-first-run', + '--disable-default-apps', + '--disable-background-timer-throttling', + '--disable-backgrounding-occluded-windows', + '--disable-renderer-backgrounding', + ] + + # Create a new browser session for each agent with the custom profile + browser_session = BrowserSession(browser_profile=profile) + + # Debug: Check initial tab count + try: + await browser_session.start() + initial_tabs = await browser_session._cdp_get_all_pages() + print(f" šŸ“Š Agent {agent_id} initial tab count: {len(initial_tabs)}") + except Exception as e: + print(f" āš ļø Could not check initial tabs for agent {agent_id}: {e}") + + # Create agent with the dedicated browser session and disable auto URL detection + agent = Agent(task=task, llm=llm, browser_session=browser_session, preload=False) + + # Run the agent with timeout to prevent hanging + try: + result = await asyncio.wait_for(agent.run(), timeout=300) # 5 minute timeout + except asyncio.TimeoutError: + print(f"ā° Agent {agent_id} timed out after 5 minutes") + result = "Task timed out" + + # Debug: Check final tab count + try: + final_tabs = await browser_session._cdp_get_all_pages() + print(f" šŸ“Š Agent {agent_id} final tab count: {len(final_tabs)}") + for i, tab in enumerate(final_tabs): + print(f" Tab {i+1}: {tab.get('url', 'unknown')[:50]}...") + except Exception as e: + print(f" āš ļø Could not check final tabs for agent {agent_id}: {e}") + + # Extract clean result from the agent history + clean_result = extract_clean_result(result) + + # Close the browser session for this agent + try: + await browser_session.kill() + except Exception as e: + print(f"āš ļø Warning: Error closing browser for agent {agent_id}: {e}") + + print(f"āœ… Agent {agent_id} completed and browser closed: {task}") + + return agent_id, clean_result + + except Exception as e: + error_msg = f"Agent {agent_id} failed: {str(e)}" + print(f"āŒ {error_msg}") + return agent_id, error_msg + + +def extract_clean_result(agent_result) -> str: + """Extract clean result from agent history""" + try: + # Get the last result from the agent history + if hasattr(agent_result, 'all_results') and agent_result.all_results: + last_result = agent_result.all_results[-1] + if hasattr(last_result, 'extracted_content') and last_result.extracted_content: + return last_result.extracted_content + + # Fallback to string representation + return str(agent_result) + except Exception: + return "Result extraction failed" + + +async def run_parallel_agents(): + """Run multiple agents in parallel on different tasks""" + + # Use Gemini 1.5 Flash + llm = ChatGoogle(model="gemini-1.5-flash") + + # Main task to break down - use the simple configuration + main_task = MAIN_TASK + + print(f"šŸŽÆ Main task: {main_task}") + print("🧠 Creating subtasks using LLM...") + + # Create subtasks using LLM + subtasks = await create_subtasks(main_task, llm) + + print(f"šŸ“‹ Created {len(subtasks)} subtasks:") + for i, task in enumerate(subtasks, 1): + print(f" {i}. {task}") + + print(f"\nšŸ”„ Starting {len(subtasks)} agents in parallel...") + print(f"šŸ” Each agent will get its own browser instance with exactly ONE tab") + print(f"šŸ“Š Expected: {len(subtasks)} browser instances, {len(subtasks)} tabs total") + + # Create tasks for parallel execution + agent_tasks = [ + run_single_agent(task, llm, i+1) + for i, task in enumerate(subtasks) + ] + + # Run all agents in parallel using asyncio.gather + results = await asyncio.gather(*agent_tasks) + + # Print results + print("\n" + "="*60) + print("šŸ“Š PARALLEL EXECUTION RESULTS") + print("="*60) + + for agent_id, result in results: + print(f"\nšŸ¤– Agent {agent_id} result:") + print(f"Task: {subtasks[agent_id-1]}") + print(f"Result: {result}") + print("-" * 50) + + print(f"\nšŸŽ‰ All {len(subtasks)} parallel agents completed!") + + +def main(): + """Main function to run parallel agents""" + # Check if Google API key is available + api_key = os.getenv('GOOGLE_API_KEY') + if not api_key: + print('āŒ Error: GOOGLE_API_KEY environment variable not set') + print('Please set your Google API key to use parallel agents') + print('You can set it with: export GOOGLE_API_KEY="your-key-here"') + sys.exit(1) + + # Check if API key looks valid (Google API keys are typically 39 characters) + if len(api_key) < 20: + print(f'āš ļø Warning: GOOGLE_API_KEY seems too short ({len(api_key)} characters)') + print('Google API keys are typically 39 characters long') + print('Continuing anyway, but this might cause authentication issues...') + + print('šŸš€ Starting parallel multi-agent example...') + print(f'šŸ“ Task: {MAIN_TASK}') + print('This will dynamically create agents based on task complexity') + print('-' * 60) + + asyncio.run(run_parallel_agents()) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/models/modelscope_example.py b/examples/models/modelscope_example.py new file mode 100644 index 000000000..2be79cea4 --- /dev/null +++ b/examples/models/modelscope_example.py @@ -0,0 +1,34 @@ +""" +Simple try of the agent. + +@dev You need to add MODELSCOPE_API_KEY to your environment variables. +""" + +import asyncio +import os + +from dotenv import load_dotenv + +from browser_use import Agent, ChatOpenAI + +# dotenv +load_dotenv() + +api_key = os.getenv('MODELSCOPE_API_KEY', '') +if not api_key: + raise ValueError('MODELSCOPE_API_KEY is not set') + + +async def run_search(): + agent = Agent( + # task=('go to amazon.com, search for laptop'), + task=('go to google, search for modelscope'), + llm=ChatOpenAI(base_url='https://api-inference.modelscope.cn/v1/', model='Qwen/Qwen2.5-VL-72B-Instruct', api_key=api_key), + use_vision=False, + ) + + await agent.run() + + +if __name__ == '__main__': + asyncio.run(run_search()) diff --git a/tests/ci/test_browser_event_ClickElementEvent.py b/tests/ci/test_browser_event_ClickElementEvent.py index 6a8b62684..6c2fa03cf 100644 --- a/tests/ci/test_browser_event_ClickElementEvent.py +++ b/tests/ci/test_browser_event_ClickElementEvent.py @@ -143,7 +143,7 @@ class TestClickElementEvent: await asyncio.sleep(0.5) # Give page time to load # Initialize the DOM state to populate the selector map - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() # Get the selector map selector_map = await browser_session.get_selector_map() @@ -406,7 +406,7 @@ class TestClickElementEvent: await asyncio.sleep(0.5) # Get the clickable elements - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() selector_map = await browser_session.get_selector_map() # Find the inline element @@ -488,7 +488,7 @@ class TestClickElementEvent: await asyncio.sleep(0.5) # Get the clickable elements - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() selector_map = await browser_session.get_selector_map() # Find the block element inside inline @@ -576,7 +576,7 @@ class TestClickElementEvent: await asyncio.sleep(0.5) # Get the clickable elements - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() selector_map = await browser_session.get_selector_map() # Find the target element @@ -636,7 +636,7 @@ class TestClickElementEvent: await asyncio.sleep(0.5) # Get the clickable elements - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() selector_map = await browser_session.get_selector_map() # Find the file input @@ -699,7 +699,7 @@ class TestClickElementEvent: await asyncio.sleep(0.5) # Get the clickable elements - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() selector_map = await browser_session.get_selector_map() # Find the select element @@ -1098,7 +1098,7 @@ class TestClickElementEvent: await asyncio.sleep(0.5) # Initialize the DOM state to populate the selector map - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() # Get the selector map selector_map = await browser_session.get_selector_map() diff --git a/tests/ci/test_browser_event_GetDropdownOptionsEvent.py b/tests/ci/test_browser_event_GetDropdownOptionsEvent.py index d582bee96..3d3193e25 100644 --- a/tests/ci/test_browser_event_GetDropdownOptionsEvent.py +++ b/tests/ci/test_browser_event_GetDropdownOptionsEvent.py @@ -286,7 +286,7 @@ class TestGetDropdownOptionsEvent: await tools.act(GoToUrlActionModel(**goto_action), browser_session) # Initialize the DOM state to populate the selector map - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() # Get the selector map and find the select element selector_map = await browser_session.get_selector_map() @@ -344,7 +344,7 @@ class TestGetDropdownOptionsEvent: await tools.act(GoToUrlActionModel(**goto_action), browser_session) # Initialize the DOM state - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() # Get the selector map and find the ARIA menu selector_map = await browser_session.get_selector_map() @@ -406,7 +406,7 @@ class TestGetDropdownOptionsEvent: await tools.act(GoToUrlActionModel(**goto_action), browser_session) # Initialize the DOM state - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() # Get the selector map and find the custom dropdown selector_map = await browser_session.get_selector_map() @@ -495,7 +495,7 @@ class TestSelectDropdownOptionEvent: await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0) # Initialize the DOM state - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() # Get the selector map and find the select element selector_map = await browser_session.get_selector_map() @@ -543,7 +543,7 @@ class TestSelectDropdownOptionEvent: await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0) # Initialize the DOM state - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() # Get the selector map and find the ARIA menu selector_map = await browser_session.get_selector_map() @@ -595,7 +595,7 @@ class TestSelectDropdownOptionEvent: await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0) # Initialize the DOM state - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() # Get the selector map and find the custom dropdown selector_map = await browser_session.get_selector_map() @@ -643,7 +643,7 @@ class TestSelectDropdownOptionEvent: await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0) # Initialize the DOM state - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() # Get the selector map and find the select element selector_map = await browser_session.get_selector_map() diff --git a/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py b/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py index eddf4cb1a..54a1a07a1 100644 --- a/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py +++ b/tests/ci/test_browser_event_GetDropdownOptionsEvent_aria_menus.py @@ -165,7 +165,7 @@ class TestARIAMenuDropdown: await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0) # Initialize the DOM state to populate the selector map - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() # Get the selector map selector_map = await browser_session.get_selector_map() @@ -232,7 +232,7 @@ class TestARIAMenuDropdown: await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0) # Initialize the DOM state to populate the selector map - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() # Get the selector map selector_map = await browser_session.get_selector_map() @@ -302,7 +302,7 @@ class TestARIAMenuDropdown: await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0) # Initialize the DOM state to populate the selector map - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() # Get the selector map selector_map = await browser_session.get_selector_map() diff --git a/tests/ci/test_browser_event_NavigateToUrlEvent.py b/tests/ci/test_browser_event_NavigateToUrlEvent.py index 198c05701..a6a008d4c 100644 --- a/tests/ci/test_browser_event_NavigateToUrlEvent.py +++ b/tests/ci/test_browser_event_NavigateToUrlEvent.py @@ -97,7 +97,7 @@ class TestNavigateToUrlEvent: # Test that get_state_summary works try: - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() assert False, 'Expected throw error when navigating to non-existent page' except Exception as e: pass diff --git a/tests/ci/test_browser_session_element_cache.py b/tests/ci/test_browser_session_element_cache.py index 293ab5526..132b8c716 100644 --- a/tests/ci/test_browser_session_element_cache.py +++ b/tests/ci/test_browser_session_element_cache.py @@ -88,7 +88,7 @@ async def test_assumption_1_dom_processing_works(browser_session, httpserver): await event.event_result(raise_if_any=True, raise_if_none=False) # Trigger DOM processing - state = await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False) + state = await browser_session.get_browser_state_summary() print('DOM processing result:') print(f' - Elements found: {len(state.dom_state.selector_map)}') @@ -109,7 +109,7 @@ async def test_assumption_2_cached_selector_map_persists(browser_session, httpse await event.event_result(raise_if_any=True, raise_if_none=False) # Trigger DOM processing and cache - state = await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False) + state = await browser_session.get_browser_state_summary() initial_selector_map = dict(state.dom_state.selector_map) # Check if cached selector map is still available @@ -136,7 +136,7 @@ async def test_assumption_3_action_gets_same_selector_map(browser_session, tools await event.event_result(raise_if_any=True, raise_if_none=False) # Trigger DOM processing and cache - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False) + await browser_session.get_browser_state_summary() cached_selector_map = await browser_session.get_selector_map() print('Pre-action state:') @@ -174,7 +174,7 @@ async def test_assumption_4_click_action_specific_issue(browser_session, tools, await event.event_result(raise_if_any=True, raise_if_none=False) # Trigger DOM processing and cache - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False) + await browser_session.get_browser_state_summary() cached_selector_map = await browser_session.get_selector_map() print('Pre-click state:') @@ -224,7 +224,7 @@ async def test_assumption_5_multiple_get_selector_map_calls(browser_session, htt await event.event_result(raise_if_any=True, raise_if_none=False) # Trigger DOM processing and cache - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False) + await browser_session.get_browser_state_summary() # Call get_selector_map multiple times map1 = await browser_session.get_selector_map() diff --git a/tests/ci/test_browser_watchdog_screenshots.py b/tests/ci/test_browser_watchdog_screenshots.py index 001493bf1..ec8900c5a 100644 --- a/tests/ci/test_browser_watchdog_screenshots.py +++ b/tests/ci/test_browser_watchdog_screenshots.py @@ -104,7 +104,7 @@ class TestHeadlessScreenshots: await event.event_result(raise_if_any=True, raise_if_none=False) # Get state summary - state = await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False) + state = await browser_session.get_browser_state_summary() # Verify screenshot is included assert state.screenshot is not None @@ -143,7 +143,7 @@ class TestHeadlessScreenshots: # Browser should auto-create a new page on about:blank with animation # With AboutBlankWatchdog, about:blank pages now have animated content, so they should have screenshots - state = await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False) + state = await browser_session.get_browser_state_summary() assert state.screenshot is not None, 'Screenshot should not be None for animated about:blank pages' assert state.url == 'about:blank' or state.url.startswith('chrome://'), f'Expected empty page but got {state.url}' @@ -153,7 +153,7 @@ class TestHeadlessScreenshots: await event.event_result(raise_if_any=True, raise_if_none=False) # Get state with screenshot - state = await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False) + state = await browser_session.get_browser_state_summary() # Should have a screenshot now assert state.screenshot is not None, 'Screenshot should not be None for real pages' assert isinstance(state.screenshot, str) diff --git a/tests/ci/test_tools.py b/tests/ci/test_tools.py index 228367619..bd4378b4e 100644 --- a/tests/ci/test_tools.py +++ b/tests/ci/test_tools.py @@ -413,7 +413,7 @@ class TestToolsIntegration: await asyncio.sleep(1.0) # Initialize the DOM state to populate the selector map - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() # Get the selector map selector_map = await browser_session.get_selector_map() @@ -540,7 +540,7 @@ class TestToolsIntegration: await asyncio.sleep(1.0) # populate the selector map with highlight indices - await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True) + await browser_session.get_browser_state_summary() # Now get the selector map which should contain our dropdown selector_map = await browser_session.get_selector_map() diff --git a/tests/scripts/debug_iframe_scrolling.py b/tests/scripts/debug_iframe_scrolling.py index e2914eac9..7b02fba66 100644 --- a/tests/scripts/debug_iframe_scrolling.py +++ b/tests/scripts/debug_iframe_scrolling.py @@ -131,9 +131,7 @@ async def debug_iframe_scrolling(): """Capture DOM state and return analysis""" print(f'\nšŸ“ø Capturing DOM state: {label}') state_event = browser_session.event_bus.dispatch( - BrowserStateRequestEvent( - include_dom=True, include_screenshot=False, cache_clickable_elements_hashes=True, include_recent_events=False - ) + BrowserStateRequestEvent(include_dom=True, include_screenshot=False, include_recent_events=False) ) browser_state = await state_event.event_result()