import asyncio import time import pytest from pydantic import BaseModel from pytest_httpserver import HTTPServer from browser_use.agent.views import ActionModel, ActionResult from browser_use.browser.browser import Browser, BrowserConfig from browser_use.browser.context import BrowserContext from browser_use.controller.service import Controller from browser_use.controller.views import ( ClickElementAction, CloseTabAction, DoneAction, DragDropAction, GoToUrlAction, InputTextAction, NoParamsAction, OpenTabAction, ScrollAction, SearchGoogleAction, SendKeysAction, SwitchTabAction, ) class TestControllerIntegration: """Integration tests for Controller using actual browser instances.""" @pytest.fixture(scope='module') def event_loop(self): """Create and provide an event loop for async tests.""" loop = asyncio.get_event_loop_policy().new_event_loop() yield loop loop.close() @pytest.fixture(scope='module') def http_server(self): """Create and provide a test HTTP server that serves static content.""" server = HTTPServer() server.start() # Add routes for common test pages server.expect_request('/').respond_with_data( 'Test Home Page

Test Home Page

Welcome to the test site

', content_type='text/html', ) server.expect_request('/page1').respond_with_data( 'Test Page 1

Test Page 1

This is test page 1

', content_type='text/html', ) server.expect_request('/page2').respond_with_data( 'Test Page 2

Test Page 2

This is test page 2

', content_type='text/html', ) server.expect_request('/search').respond_with_data( """ Search Results

Search Results

Result 1
Result 2
Result 3
""", content_type='text/html', ) yield server server.stop() @pytest.fixture def base_url(self, http_server): """Return the base URL for the test HTTP server.""" return f'http://{http_server.host}:{http_server.port}' @pytest.fixture(scope='module') async def browser(self, event_loop): """Create and provide a Browser instance with security disabled.""" browser_instance = Browser( config=BrowserConfig( headless=True, ) ) yield browser_instance await browser_instance.close() @pytest.fixture async def browser_context(self, browser): """Create and provide a BrowserContext instance.""" context = BrowserContext(browser=browser) yield context await context.close() @pytest.fixture def controller(self): """Create and provide a Controller instance.""" return Controller() @pytest.mark.asyncio async def test_go_to_url_action(self, controller, browser_context, base_url): """Test that GoToUrlAction navigates to the specified URL.""" # Create action model for go_to_url action_data = {'go_to_url': GoToUrlAction(url=f'{base_url}/page1')} # Create the ActionModel instance class GoToUrlActionModel(ActionModel): go_to_url: GoToUrlAction | None = None action_model = GoToUrlActionModel(**action_data) # Execute the action result = await controller.act(action_model, browser_context) # Verify the result assert isinstance(result, ActionResult) assert f'Navigated to {base_url}/page1' in result.extracted_content # Verify the current page URL page = await browser_context.get_current_page() assert f'{base_url}/page1' in page.url @pytest.mark.asyncio async def test_scroll_actions(self, controller, browser_context, base_url): """Test that scroll actions correctly scroll the page.""" # First navigate to a page goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/page1')} class GoToUrlActionModel(ActionModel): go_to_url: GoToUrlAction | None = None await controller.act(GoToUrlActionModel(**goto_action), browser_context) # Create scroll down action scroll_action = {'scroll_down': ScrollAction(amount=200)} class ScrollActionModel(ActionModel): scroll_down: ScrollAction | None = None # Execute scroll down result = await controller.act(ScrollActionModel(**scroll_action), browser_context) # Verify the result assert isinstance(result, ActionResult) assert 'Scrolled down' in result.extracted_content # Create scroll up action scroll_up_action = {'scroll_up': ScrollAction(amount=100)} class ScrollUpActionModel(ActionModel): scroll_up: ScrollAction | None = None # Execute scroll up result = await controller.act(ScrollUpActionModel(**scroll_up_action), browser_context) # Verify the result assert isinstance(result, ActionResult) assert 'Scrolled up' in result.extracted_content @pytest.mark.asyncio async def test_registry_actions(self, controller, browser_context): """Test that the registry contains the expected default actions.""" # Check that common actions are registered common_actions = [ 'go_to_url', 'search_google', 'click_element_by_index', 'input_text', 'scroll_down', 'scroll_up', 'go_back', 'switch_tab', 'open_tab', 'close_tab', 'wait', ] for action in common_actions: assert action in controller.registry.registry.actions assert controller.registry.registry.actions[action].function is not None assert controller.registry.registry.actions[action].description is not None @pytest.mark.asyncio async def test_custom_action_registration(self, controller, browser_context, base_url): """Test registering a custom action and executing it.""" # Define a custom action class CustomParams(BaseModel): text: str @controller.action('Test custom action', param_model=CustomParams) async def custom_action(params: CustomParams, browser): page = await browser.get_current_page() return ActionResult(extracted_content=f'Custom action executed with: {params.text} on {page.url}') # Navigate to a page first goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/page1')} class GoToUrlActionModel(ActionModel): go_to_url: GoToUrlAction | None = None await controller.act(GoToUrlActionModel(**goto_action), browser_context) # Create the custom action model custom_action_data = {'custom_action': CustomParams(text='test_value')} class CustomActionModel(ActionModel): custom_action: CustomParams | None = None # Execute the custom action result = await controller.act(CustomActionModel(**custom_action_data), browser_context) # Verify the result assert isinstance(result, ActionResult) assert 'Custom action executed with: test_value on' in result.extracted_content assert f'{base_url}/page1' in result.extracted_content @pytest.mark.asyncio async def test_input_text_action(self, controller, browser_context, base_url, http_server): """Test that InputTextAction correctly inputs text into form fields.""" # Set up search form endpoint for this test http_server.expect_request('/searchform').respond_with_data( """ Search Form

Search Form

""", content_type='text/html', ) # Navigate to a page with a form goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/searchform')} class GoToUrlActionModel(ActionModel): go_to_url: GoToUrlAction | None = None await controller.act(GoToUrlActionModel(**goto_action), browser_context) # Get the search input field index page = await browser_context.get_current_page() selector_map = await browser_context.get_selector_map() # Find the search input field - this requires examining the DOM # We'll mock this part since we can't rely on specific element indices # In a real test, you would get the actual index from the selector map # For demonstration, we'll just use a hard-coded mock value # and check that the controller processes the action correctly mock_input_index = 1 # This would normally be determined dynamically # Create input text action input_action = {'input_text': InputTextAction(index=mock_input_index, text='Python programming')} class InputTextActionModel(ActionModel): input_text: InputTextAction | None = None # The actual input might fail if the page structure changes or in headless mode # So we'll just verify the controller correctly processes the action try: result = await controller.act(InputTextActionModel(**input_action), browser_context) # If successful, verify the result assert isinstance(result, ActionResult) assert 'Input' in result.extracted_content except Exception as e: # If it fails due to DOM issues, that's expected in a test environment assert 'Element index' in str(e) or 'does not exist' in str(e) @pytest.mark.asyncio async def test_error_handling(self, controller, browser_context): """Test error handling when an action fails.""" # Create an action with an invalid index invalid_action = {'click_element_by_index': ClickElementAction(index=9999)} class ClickActionModel(ActionModel): click_element_by_index: ClickElementAction | None = None # This should fail since the element doesn't exist with pytest.raises(Exception) as excinfo: await controller.act(ClickActionModel(**invalid_action), browser_context) # Verify that an appropriate error is raised assert 'does not exist' in str(excinfo.value) or 'Element with index' in str(excinfo.value) @pytest.mark.asyncio async def test_wait_action(self, controller, browser_context): """Test that the wait action correctly waits for the specified duration.""" # Create wait action for 1 second - fix to use a dictionary wait_action = {'wait': {'seconds': 1}} # Corrected format class WaitActionModel(ActionModel): wait: dict | None = None # Record start time start_time = time.time() # Execute wait action result = await controller.act(WaitActionModel(**wait_action), browser_context) # Record end time end_time = time.time() # Verify the result assert isinstance(result, ActionResult) assert 'Waiting for' in result.extracted_content # Verify that at least 1 second has passed assert end_time - start_time >= 0.9 # Allow some timing margin @pytest.mark.asyncio async def test_go_back_action(self, controller, browser_context, base_url): """Test that go_back action navigates to the previous page.""" # Navigate to first page goto_action1 = {'go_to_url': GoToUrlAction(url=f'{base_url}/page1')} class GoToUrlActionModel(ActionModel): go_to_url: GoToUrlAction | None = None await controller.act(GoToUrlActionModel(**goto_action1), browser_context) # Store the first page URL page1 = await browser_context.get_current_page() first_url = page1.url print(f'First page URL: {first_url}') # Navigate to second page goto_action2 = {'go_to_url': GoToUrlAction(url=f'{base_url}/page2')} await controller.act(GoToUrlActionModel(**goto_action2), browser_context) # Verify we're on the second page page2 = await browser_context.get_current_page() second_url = page2.url print(f'Second page URL: {second_url}') assert f'{base_url}/page2' in second_url # Execute go back action go_back_action = {'go_back': NoParamsAction()} class GoBackActionModel(ActionModel): go_back: NoParamsAction | None = None result = await controller.act(GoBackActionModel(**go_back_action), browser_context) # Verify the result assert isinstance(result, ActionResult) assert 'Navigated back' in result.extracted_content # Add another delay to allow the navigation to complete await asyncio.sleep(1) # Verify we're back on a different page than before page3 = await browser_context.get_current_page() final_url = page3.url print(f'Final page URL after going back: {final_url}') # Try to verify we're back on the first page, but don't fail the test if not assert f'{base_url}/page1' in final_url, f'Expected to return to page1 but got {final_url}' @pytest.mark.asyncio async def test_navigation_chain(self, controller, browser_context, base_url): """Test navigating through multiple pages and back through history.""" # Set up a chain of navigation: Home -> Page1 -> Page2 urls = [f'{base_url}/', f'{base_url}/page1', f'{base_url}/page2'] # Navigate to each page in sequence for url in urls: action_data = {'go_to_url': GoToUrlAction(url=url)} class GoToUrlActionModel(ActionModel): go_to_url: GoToUrlAction | None = None await controller.act(GoToUrlActionModel(**action_data), browser_context) # Verify current page page = await browser_context.get_current_page() assert url in page.url # Go back twice and verify each step for expected_url in reversed(urls[:-1]): go_back_action = {'go_back': NoParamsAction()} class GoBackActionModel(ActionModel): go_back: NoParamsAction | None = None await controller.act(GoBackActionModel(**go_back_action), browser_context) await asyncio.sleep(1) # Wait for navigation to complete page = await browser_context.get_current_page() assert expected_url in page.url @pytest.mark.asyncio async def test_concurrent_tab_operations(self, controller, browser_context, base_url): """Test operations across multiple tabs.""" # Create two tabs with different content urls = [f'{base_url}/page1', f'{base_url}/page2'] # First tab goto_action1 = {'go_to_url': GoToUrlAction(url=urls[0])} class GoToUrlActionModel(ActionModel): go_to_url: GoToUrlAction | None = None await controller.act(GoToUrlActionModel(**goto_action1), browser_context) # Open second tab open_tab_action = {'open_tab': OpenTabAction(url=urls[1])} class OpenTabActionModel(ActionModel): open_tab: OpenTabAction | None = None await controller.act(OpenTabActionModel(**open_tab_action), browser_context) # Verify we're on second tab page = await browser_context.get_current_page() assert urls[1] in page.url # Switch back to first tab switch_tab_action = {'switch_tab': SwitchTabAction(page_id=0)} class SwitchTabActionModel(ActionModel): switch_tab: SwitchTabAction | None = None await controller.act(SwitchTabActionModel(**switch_tab_action), browser_context) # Verify we're back on first tab page = await browser_context.get_current_page() assert urls[0] in page.url # Close the second tab close_tab_action = {'close_tab': CloseTabAction(page_id=1)} class CloseTabActionModel(ActionModel): close_tab: CloseTabAction | None = None await controller.act(CloseTabActionModel(**close_tab_action), browser_context) # Verify only one tab remains tabs_info = await browser_context.get_tabs_info() assert len(tabs_info) == 1 assert urls[0] in tabs_info[0].url @pytest.mark.asyncio async def test_excluded_actions(self, browser_context): """Test that excluded actions are not registered.""" # Create controller with excluded actions excluded_controller = Controller(exclude_actions=['search_google', 'open_tab']) # Verify excluded actions are not in the registry assert 'search_google' not in excluded_controller.registry.registry.actions assert 'open_tab' not in excluded_controller.registry.registry.actions # But other actions are still there assert 'go_to_url' in excluded_controller.registry.registry.actions assert 'click_element_by_index' in excluded_controller.registry.registry.actions @pytest.mark.asyncio async def test_search_google_action(self, controller, browser_context, base_url): """Test the search_google action.""" # Add a custom search handler for our test server # Since this is a mock test, we'll just navigate to the /search page # Execute search_google action - it will actually navigate to our search results page search_action = {'search_google': SearchGoogleAction(query='Python web automation')} class SearchGoogleActionModel(ActionModel): search_google: SearchGoogleAction | None = None result = await controller.act(SearchGoogleActionModel(**search_action), browser_context) # Verify the result assert isinstance(result, ActionResult) assert 'Searched for "Python web automation" in Google' in result.extracted_content # For our test purposes, we just verify we're on some URL page = await browser_context.get_current_page() assert page.url is not None and 'Python' in page.url @pytest.mark.asyncio async def test_done_action(self, controller, browser_context, base_url): """Test that DoneAction completes a task and reports success or failure.""" # First navigate to a page goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/page1')} class GoToUrlActionModel(ActionModel): go_to_url: GoToUrlAction | None = None await controller.act(GoToUrlActionModel(**goto_action), browser_context) success_done_message = 'Successfully completed task' # Create done action with success done_action = {'done': DoneAction(text=success_done_message, success=True)} class DoneActionModel(ActionModel): done: DoneAction | None = None # Execute done action result = await controller.act(DoneActionModel(**done_action), browser_context) # Verify the result assert isinstance(result, ActionResult) assert success_done_message in result.extracted_content assert result.success is True assert result.is_done is True assert result.error is None failed_done_message = 'Failed to complete task' # Test with failure case failed_done_action = {'done': DoneAction(text=failed_done_message, success=False)} # Execute failed done action result = await controller.act(DoneActionModel(**failed_done_action), browser_context) # Verify the result assert isinstance(result, ActionResult) assert failed_done_message in result.extracted_content assert result.success is False assert result.is_done is True assert result.error is None @pytest.mark.asyncio async def test_drag_drop_action(self, controller, browser_context, base_url, http_server): """Test that DragDropAction correctly drags and drops elements.""" # Set up drag and drop test page for this test http_server.expect_request('/dragdrop').respond_with_data( """ Drag and Drop Test

Drag and Drop Test

Zone 1
Drag me
Zone 2
Event log:
""", content_type='text/html', ) # Step 1: Navigate to the drag and drop test page goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/dragdrop')} class GoToUrlActionModel(ActionModel): go_to_url: GoToUrlAction | None = None goto_result = await controller.act(GoToUrlActionModel(**goto_action), browser_context) # Verify navigation worked assert goto_result.error is None, f'Navigation failed: {goto_result.error}' assert f'Navigated to {base_url}/dragdrop' in goto_result.extracted_content # Get page reference page = await browser_context.get_current_page() # Verify we loaded the page correctly title = await page.title() assert title == 'Drag and Drop Test', f'Page did not load correctly, got title: {title}' # Step 2: Verify initial state - draggable should be in zone1 initial_parent = await page.evaluate('() => document.getElementById("draggable").parentElement.id') assert initial_parent == 'zone1', f'Element should start in zone1, but found in {initial_parent}' # Step 3: Get the element positions for drag operation element_info = await page.evaluate(""" () => { const draggable = document.getElementById("draggable"); const zone2 = document.getElementById("zone2"); const draggableRect = draggable.getBoundingClientRect(); const zone2Rect = zone2.getBoundingClientRect(); return { source: { x: Math.round(draggableRect.left + draggableRect.width/2), y: Math.round(draggableRect.top + draggableRect.height/2) }, target: { x: Math.round(zone2Rect.left + zone2Rect.width/2), y: Math.round(zone2Rect.top + zone2Rect.height/2) } }; } """) print(f'Source element position: {element_info["source"]}') print(f'Target position: {element_info["target"]}') # Step 4: Use the controller's DragDropAction to perform the drag drag_action = { 'drag_drop': DragDropAction( # Use the coordinate-based approach coord_source_x=element_info['source']['x'], coord_source_y=element_info['source']['y'], coord_target_x=element_info['target']['x'], coord_target_y=element_info['target']['y'], steps=10, # More steps for smoother movement delay_ms=10, # Small delay for browser to process events ) } class DragDropActionModel(ActionModel): drag_drop: DragDropAction | None = None # Execute the drag action through the controller result = await controller.act(DragDropActionModel(**drag_action), browser_context) # Step 5: Verify the controller action result assert result.error is None, f'Drag operation failed with error: {result.error}' assert result.is_done is False assert '🖱️ Dragged from' in result.extracted_content # Step 6: Verify the element was moved by checking its new parent final_parent = await page.evaluate('() => document.getElementById("draggable").parentElement.id') # Step 7: Get the event log to see what events were fired event_log = await page.evaluate('() => document.getElementById("log").textContent') print(f'Event log: {event_log}') # Check that mousedown and mouseup events were recorded assert 'mousedown' in event_log, 'No mousedown event detected' # Step 8: Verify the status shows the item was dropped status_text = await page.evaluate('() => document.getElementById("status").textContent') drag_succeeded = final_parent == 'zone2' assert drag_succeeded, "Drag and drop events weren't fired correctly" @pytest.mark.asyncio async def test_send_keys_action(self, controller, browser_context, base_url, http_server): """Test SendKeysAction using a controlled local HTML file.""" # Set up keyboard test page for this test http_server.expect_request('/keyboard').respond_with_data( """ Keyboard Test

Keyboard Actions Test

""", content_type='text/html', ) # Navigate to the keyboard test page on the local HTTP server goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/keyboard')} class GoToUrlActionModel(ActionModel): go_to_url: GoToUrlAction | None = None # Execute navigation goto_result = await controller.act(GoToUrlActionModel(**goto_action), browser_context) await asyncio.sleep(0.1) # Verify navigation result assert isinstance(goto_result, ActionResult) assert f'Navigated to {base_url}/keyboard' in goto_result.extracted_content assert goto_result.error is None assert goto_result.is_done is False # Get the page object page = await browser_context.get_current_page() # Verify page loaded title = await page.title() assert title == 'Keyboard Test' # Verify initial page state h1_text = await page.evaluate('() => document.querySelector("h1").textContent') assert h1_text == 'Keyboard Actions Test' # 1. Test Tab key to focus the first input tab_keys_action = {'send_keys': SendKeysAction(keys='Tab')} class SendKeysActionModel(ActionModel): send_keys: SendKeysAction | None = None tab_result = await controller.act(SendKeysActionModel(**tab_keys_action), browser_context) await asyncio.sleep(0.1) # Verify Tab action result assert isinstance(tab_result, ActionResult) assert 'Sent keys: Tab' in tab_result.extracted_content assert tab_result.error is None assert tab_result.is_done is False # Verify Tab worked by checking focused element active_element_id = await page.evaluate('() => document.activeElement.id') assert active_element_id == 'textInput', f"Expected 'textInput' to be focused, got '{active_element_id}'" # Verify result text in the DOM result_text = await page.locator('#result').text_content() assert 'Focused on: textInput' in result_text # 2. Type text into the input test_text = 'This is a test' type_action = {'send_keys': SendKeysAction(keys=test_text)} type_result = await controller.act(SendKeysActionModel(**type_action), browser_context) await asyncio.sleep(0.1) # Verify typing action result assert isinstance(type_result, ActionResult) assert f'Sent keys: {test_text}' in type_result.extracted_content assert type_result.error is None assert type_result.is_done is False # Verify text was entered input_value = await page.evaluate('() => document.getElementById("textInput").value') assert input_value == test_text, f"Expected input value '{test_text}', got '{input_value}'" # Verify key events were recorded result_text = await page.locator('#result').text_content() for char in test_text: assert f'Keydown: {char}' in result_text, f"Missing key event for '{char}'" # 3. Test Ctrl+A for select all select_all_action = {'send_keys': SendKeysAction(keys='ControlOrMeta+a')} select_all_result = await controller.act(SendKeysActionModel(**select_all_action), browser_context) # Wait longer for selection to take effect await asyncio.sleep(1.0) # Verify select all action result assert isinstance(select_all_result, ActionResult) assert 'Sent keys: ControlOrMeta+a' in select_all_result.extracted_content assert select_all_result.error is None # Verify selection length matches the text length selection_length = await page.evaluate( '() => document.activeElement.selectionEnd - document.activeElement.selectionStart' ) assert selection_length == len(test_text), f'Expected selection length {len(test_text)}, got {selection_length}' # Verify selection in result text result_text = await page.locator('#result').text_content() assert 'Keydown: a' in result_text assert 'Ctrl+A detected' in result_text assert 'Selection length:' in result_text # 4. Test Tab to next field tab_result2 = await controller.act(SendKeysActionModel(**tab_keys_action), browser_context) await asyncio.sleep(0.1) # Verify second Tab action result assert isinstance(tab_result2, ActionResult) assert 'Sent keys: Tab' in tab_result2.extracted_content assert tab_result2.error is None # Verify we moved to the textarea active_element_id = await page.evaluate('() => document.activeElement.id') assert active_element_id == 'textarea', f"Expected 'textarea' to be focused, got '{active_element_id}'" # Verify focus changed in result text result_text = await page.locator('#result').text_content() assert 'Focused on: textarea' in result_text # 5. Type in the textarea textarea_text = 'Testing multiline\ninput text' textarea_action = {'send_keys': SendKeysAction(keys=textarea_text)} textarea_result = await controller.act(SendKeysActionModel(**textarea_action), browser_context) # Verify textarea typing action result assert isinstance(textarea_result, ActionResult) assert f'Sent keys: {textarea_text}' in textarea_result.extracted_content assert textarea_result.error is None assert textarea_result.is_done is False # Verify text was entered in textarea textarea_value = await page.evaluate('() => document.getElementById("textarea").value') assert textarea_value == textarea_text, f"Expected textarea value '{textarea_text}', got '{textarea_value}'" # Verify newline was properly handled lines = textarea_value.split('\n') assert len(lines) == 2, f'Expected 2 lines in textarea, got {len(lines)}' assert lines[0] == 'Testing multiline' assert lines[1] == 'input text' # Test that Tab cycles back to the first element if we tab again await controller.act(SendKeysActionModel(**tab_keys_action), browser_context) await controller.act(SendKeysActionModel(**tab_keys_action), browser_context) active_element_id = await page.evaluate('() => document.activeElement.id') assert active_element_id == 'textInput', 'Tab cycling through form elements failed' # Verify the test input still has its value input_value = await page.evaluate('() => document.getElementById("textInput").value') assert input_value == test_text, "Input value shouldn't have changed after tabbing" @pytest.mark.asyncio async def test_get_dropdown_options(self, controller, browser_context, base_url, http_server): """Test that get_dropdown_options correctly retrieves options from a dropdown.""" # Add route for dropdown test page http_server.expect_request('/dropdown1').respond_with_data( """ Dropdown Test

Dropdown Test

""", content_type='text/html', ) # Navigate to the dropdown test page goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/dropdown1')} class GoToUrlActionModel(ActionModel): go_to_url: GoToUrlAction | None = None await controller.act(GoToUrlActionModel(**goto_action), browser_context) # Wait for the page to load page = await browser_context.get_current_page() await page.wait_for_load_state() # Initialize the DOM state to populate the selector map await browser_context.get_state(cache_clickable_elements_hashes=True) # Interact with the dropdown to ensure it's recognized await page.click('select#test-dropdown') # Update the state after interaction await browser_context.get_state(cache_clickable_elements_hashes=True) # Get the selector map selector_map = await browser_context.get_selector_map() # Find the dropdown element in the selector map dropdown_index = None for idx, element in selector_map.items(): if element.tag_name.lower() == 'select': dropdown_index = idx break assert dropdown_index is not None, ( f'Could not find select element in selector map. Available elements: {[f"{idx}: {element.tag_name}" for idx, element in selector_map.items()]}' ) # Create a model for the standard get_dropdown_options action class GetDropdownOptionsModel(ActionModel): get_dropdown_options: dict # Execute the action with the dropdown index result = await controller.act(GetDropdownOptionsModel(get_dropdown_options={'index': dropdown_index}), browser_context) expected_options = [ {'index': 0, 'text': 'Please select', 'value': ''}, {'index': 1, 'text': 'First Option', 'value': 'option1'}, {'index': 2, 'text': 'Second Option', 'value': 'option2'}, {'index': 3, 'text': 'Third Option', 'value': 'option3'}, ] # Verify the result structure assert isinstance(result, ActionResult) # Core logic validation: Verify all options are returned for option in expected_options[1:]: # Skip the placeholder option assert option['text'] in result.extracted_content, f"Option '{option['text']}' not found in result content" # Verify the instruction for using the text in select_dropdown_option is included assert 'Use the exact text string in select_dropdown_option' in result.extracted_content # Verify the actual dropdown options in the DOM dropdown_options = await page.evaluate(""" () => { const select = document.getElementById('test-dropdown'); return Array.from(select.options).map(opt => ({ text: opt.text, value: opt.value })); } """) # Verify the dropdown has the expected options assert len(dropdown_options) == len(expected_options), ( f'Expected {len(expected_options)} options, got {len(dropdown_options)}' ) for i, expected in enumerate(expected_options): actual = dropdown_options[i] assert actual['text'] == expected['text'], ( f"Option at index {i} has wrong text: expected '{expected['text']}', got '{actual['text']}'" ) assert actual['value'] == expected['value'], ( f"Option at index {i} has wrong value: expected '{expected['value']}', got '{actual['value']}'" ) @pytest.mark.asyncio async def test_select_dropdown_option(self, controller, browser_context, base_url, http_server): """Test that select_dropdown_option correctly selects an option from a dropdown.""" # Add route for dropdown test page http_server.expect_request('/dropdown2').respond_with_data( """ Dropdown Test

Dropdown Test

""", content_type='text/html', ) # Navigate to the dropdown test page goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/dropdown2')} class GoToUrlActionModel(ActionModel): go_to_url: GoToUrlAction | None = None await controller.act(GoToUrlActionModel(**goto_action), browser_context) # Wait for the page to load page = await browser_context.get_current_page() await page.wait_for_load_state() # populate the selector map with highlight indices await browser_context.get_state(cache_clickable_elements_hashes=True) # Now get the selector map which should contain our dropdown selector_map = await browser_context.get_selector_map() # Find the dropdown element in the selector map dropdown_index = None for idx, element in selector_map.items(): if element.tag_name.lower() == 'select': dropdown_index = idx break assert dropdown_index is not None, ( f'Could not find select element in selector map. Available elements: {[f"{idx}: {element.tag_name}" for idx, element in selector_map.items()]}' ) # Create a model for the standard select_dropdown_option action class SelectDropdownOptionModel(ActionModel): select_dropdown_option: dict # Execute the action with the dropdown index result = await controller.act( SelectDropdownOptionModel(select_dropdown_option={'index': dropdown_index, 'text': 'Second Option'}), browser_context, ) # Verify the result structure assert isinstance(result, ActionResult) # Core logic validation: Verify selection was successful assert 'selected option' in result.extracted_content.lower() assert 'Second Option' in result.extracted_content # Verify the actual dropdown selection was made by checking the DOM selected_value = await page.evaluate("document.getElementById('test-dropdown').value") assert selected_value == 'option2' # Second Option has value "option2" @pytest.mark.asyncio async def test_click_element_by_index(self, controller, browser_context, base_url, http_server): """Test that click_element_by_index correctly clicks an element and handles different outcomes.""" # Add route for clickable elements test page http_server.expect_request('/clickable').respond_with_data( """ Click Test

Click Test

Button 1
Button 2
Link 1
""", content_type='text/html', ) # Navigate to the clickable elements test page goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/clickable')} class GoToUrlActionModel(ActionModel): go_to_url: GoToUrlAction | None = None await controller.act(GoToUrlActionModel(**goto_action), browser_context) # Wait for the page to load page = await browser_context.get_current_page() await page.wait_for_load_state() # Initialize the DOM state to populate the selector map await browser_context.get_state(cache_clickable_elements_hashes=True) # Get the selector map selector_map = await browser_context.get_selector_map() # Find a clickable element in the selector map button_index = None button_text = None for idx, element in selector_map.items(): # Look for the first div with class "clickable" if element.tag_name.lower() == 'div' and 'clickable' in str(element.attributes.get('class', '')): button_index = idx button_text = element.get_all_text_till_next_clickable_element(max_depth=2).strip() break # Verify we found a clickable element assert button_index is not None, ( f'Could not find clickable element in selector map. Available elements: {[f"{idx}: {element.tag_name}" for idx, element in selector_map.items()]}' ) # Define expected test data expected_button_text = 'Button 1' expected_result_text = 'Button 1 clicked' # Verify the button text matches what we expect assert expected_button_text in button_text, f"Expected button text '{expected_button_text}' not found in '{button_text}'" # Create a model for the click_element_by_index action class ClickElementActionModel(ActionModel): click_element_by_index: ClickElementAction | None = None # Execute the action with the button index result = await controller.act(ClickElementActionModel(click_element_by_index={'index': button_index}), browser_context) # Verify the result structure assert isinstance(result, ActionResult), 'Result should be an ActionResult instance' assert result.error is None, f'Expected no error but got: {result.error}' # Core logic validation: Verify click was successful assert f'Clicked button with index {button_index}' in result.extracted_content, ( f'Expected click confirmation in result content, got: {result.extracted_content}' ) assert button_text in result.extracted_content, ( f"Button text '{button_text}' not found in result content: {result.extracted_content}" ) # Verify the click actually had an effect on the page result_text = await page.evaluate("document.getElementById('result').textContent") assert result_text == expected_result_text, f"Expected result text '{expected_result_text}', got '{result_text}'"