import asyncio import pytest from langchain_openai import ChatOpenAI from pytest_httpserver import HTTPServer from browser_use.agent.service import Agent from browser_use.agent.views import AgentHistoryList from browser_use.browser import BrowserProfile, BrowserSession class TestCoreFunctionality: """Tests for core functionality of the Agent using real browser instances.""" @pytest.fixture(scope='module') def event_loop(self): """Create and provide an event loop for async tests.""" loop = asyncio.get_event_loop_policy().new_event_loop() yield loop loop.close() @pytest.fixture(scope='module') def http_server(self): """Create and provide a test HTTP server that serves static content.""" server = HTTPServer() server.start() # Add routes for common test pages server.expect_request('/').respond_with_data( 'Test Home Page

Test Home Page

Welcome to the test site

', content_type='text/html', ) server.expect_request('/page1').respond_with_data( 'Test Page 1

Test Page 1

This is test page 1

Link to Page 2', content_type='text/html', ) server.expect_request('/page2').respond_with_data( 'Test Page 2

Test Page 2

This is test page 2

Back to Page 1', content_type='text/html', ) server.expect_request('/search').respond_with_data( """ Search Results

Search Results

Result 1
Result 2
Result 3
""", content_type='text/html', ) yield server server.stop() @pytest.fixture def base_url(self, http_server): """Return the base URL for the test HTTP server.""" return f'http://{http_server.host}:{http_server.port}' @pytest.fixture(scope='module') async def browser_session(self, event_loop): """Create and provide a BrowserSession instance with security disabled.""" browser_session = BrowserSession( browser_profile=BrowserProfile( headless=True, disable_security=True, ) ) yield browser_session await browser_session.stop() @pytest.fixture def llm(self): """Initialize language model for testing with minimal settings.""" return ChatOpenAI( model='gpt-4o', temperature=0.0, ) @pytest.mark.asyncio async def test_search_google(self, llm, browser_session, base_url): """Test 'Search Google' action using a mock search page.""" agent = Agent( task=f"Go to '{base_url}/search' and search for 'OpenAI'.", llm=llm, browser_session=browser_session, ) history: AgentHistoryList = await agent.run(max_steps=3) action_names = history.action_names() assert 'go_to_url' in action_names assert any('input_text' in action or 'click_element_by_index' in action for action in action_names) @pytest.mark.asyncio async def test_go_to_url(self, llm, browser_session, base_url): """Test 'Navigate to URL' action.""" agent = Agent( task=f"Navigate to '{base_url}/page1'.", llm=llm, browser_session=browser_session, ) history = await agent.run(max_steps=2) action_names = history.action_names() assert 'go_to_url' in action_names # Verify we're on the correct page page = await browser_session.get_current_page() assert f'{base_url}/page1' in page.url @pytest.mark.asyncio async def test_go_back(self, llm, browser_session, base_url): """Test 'Go back' action.""" # First navigate to page1, then to page2, then go back agent = Agent( task=f"Go to '{base_url}/page1', then go to '{base_url}/page2', then go back.", llm=llm, browser_session=browser_session, ) history = await agent.run(max_steps=4) action_names = history.action_names() assert 'go_to_url' in action_names assert 'go_back' in action_names # Verify we're back on page1 page = await browser_session.get_current_page() assert f'{base_url}/page1' in page.url @pytest.mark.asyncio async def test_click_element(self, llm, browser_session, base_url): """Test 'Click element' action.""" agent = Agent( task=f"Go to '{base_url}/page1' and click on the link to Page 2.", llm=llm, browser_session=browser_session, ) history = await agent.run(max_steps=3) action_names = history.action_names() assert 'go_to_url' in action_names assert 'click_element_by_index' in action_names # Verify we're now on page2 after clicking the link page = await browser_session.get_current_page() assert f'{base_url}/page2' in page.url @pytest.mark.asyncio async def test_input_text(self, llm, browser_session, base_url): """Test 'Input text' action.""" agent = Agent( task=f"Go to '{base_url}/search' and input 'OpenAI' into the search box.", llm=llm, browser_session=browser_session, ) history = await agent.run(max_steps=3) action_names = history.action_names() assert 'go_to_url' in action_names assert 'input_text' in action_names # Verify text was entered in the search box page = await browser_session.get_current_page() search_value = await page.evaluate("document.getElementById('search-box').value") assert 'OpenAI' in search_value @pytest.mark.asyncio async def test_switch_tab(self, llm, browser_session, base_url): """Test 'Switch tab' action.""" agent = Agent( task=f"Open '{base_url}/page1' in the current tab, then open a new tab with '{base_url}/page2', then switch back to the first tab.", llm=llm, browser_session=browser_session, ) history = await agent.run(max_steps=4) action_names = history.action_names() assert 'go_to_url' in action_names assert 'open_tab' in action_names assert 'switch_tab' in action_names # Verify we're back on the first tab with page1 page = await browser_session.get_current_page() assert f'{base_url}/page1' in page.url @pytest.mark.asyncio async def test_open_new_tab(self, llm, browser_session, base_url): """Test 'Open new tab' action.""" agent = Agent( task=f"Open a new tab and go to '{base_url}/page2'.", llm=llm, browser_session=browser_session, ) history = await agent.run(max_steps=2) action_names = history.action_names() assert 'open_tab' in action_names # Verify we have at least two tabs tabs_info = await browser_session.get_tabs_info() assert len(tabs_info) >= 2 # Verify the current page is page2 page = await browser_session.get_current_page() assert f'{base_url}/page2' in page.url @pytest.mark.asyncio async def test_extract_page_content(self, llm, browser_session, base_url): """Test 'Extract page content' action.""" agent = Agent( task=f"Go to '{base_url}/page1' and extract the page content.", llm=llm, browser_session=browser_session, ) history = await agent.run(max_steps=3) action_names = history.action_names() assert 'go_to_url' in action_names assert 'extract_content' in action_names # Verify the extracted content includes some expected text extracted_content = None for action_result in history.history[-1].result: if action_result.extracted_content and 'This is test page 1' in action_result.extracted_content: extracted_content = action_result.extracted_content break assert extracted_content is not None, 'Expected content not found in extraction' @pytest.mark.asyncio async def test_done_action(self, llm, browser_session, base_url): """Test 'Complete task' action.""" agent = Agent( task=f"Navigate to '{base_url}/page1' and signal that the task is done.", llm=llm, browser_session=browser_session, ) history = await agent.run(max_steps=3) action_names = history.action_names() assert 'go_to_url' in action_names assert 'done' in action_names # Verify the task was marked as successful assert history.is_successful() @pytest.mark.asyncio async def test_scroll_down(self, llm, browser_session, base_url, http_server): """Test 'Scroll down' action and validate that the page actually scrolled.""" # Create a test page with scrollable content http_server.expect_request('/scroll-test').respond_with_data( """ Scroll Test
Top of the page
Middle of the page
Bottom of the page
""", content_type='text/html', ) agent = Agent( task=f"Go to '{base_url}/scroll-test' and scroll down the page.", llm=llm, browser_session=browser_session, ) # First go to the page await agent.run(max_steps=1) page = await browser_session.get_current_page() # Get initial scroll position initial_scroll_position = await page.evaluate('window.scrollY') # Execute a few more steps to allow for scrolling await agent.run(max_steps=2) # Get final scroll position final_scroll_position = await page.evaluate('window.scrollY') # Verify that scrolling occurred assert final_scroll_position > initial_scroll_position, 'Page did not scroll down' # Verify the action was executed history = agent.state.history action_names = history.action_names() assert 'scroll_down' in action_names