mirror of
https://github.com/browser-use/browser-use
synced 2026-05-06 17:52:15 +02:00
179 lines
6.0 KiB
Python
179 lines
6.0 KiB
Python
"""Tests for LLM model initialization and basic functionality.
|
|
|
|
This test verifies that all supported LLM models from examples/models/ can be initialized
|
|
and execute a simple button click task. Each model is tested in parallel.
|
|
"""
|
|
|
|
import os
|
|
|
|
import pytest
|
|
|
|
from browser_use.agent.service import Agent
|
|
from browser_use.browser.profile import BrowserProfile
|
|
from browser_use.browser.session import BrowserSession
|
|
from browser_use.llm.anthropic.chat import ChatAnthropic
|
|
from browser_use.llm.azure.chat import ChatAzureOpenAI
|
|
from browser_use.llm.browser_use.chat import ChatBrowserUse
|
|
from browser_use.llm.google.chat import ChatGoogle
|
|
from browser_use.llm.openai.chat import ChatOpenAI
|
|
|
|
# Define models to test based on the examples in examples/models/
|
|
# Each tuple is (model_class, model_name, required_api_key_env_var, extra_kwargs)
|
|
MODELS_TO_TEST = [
|
|
# OpenAI models (from gpt-4.1.py, gpt-5-mini.py)
|
|
pytest.param(ChatOpenAI, 'gpt-4.1-mini', 'OPENAI_API_KEY', {}, id='openai_gpt_4_1_mini'),
|
|
# Anthropic models (from claude-4-sonnet.py)
|
|
pytest.param(ChatAnthropic, 'claude-sonnet-4-0', 'ANTHROPIC_API_KEY', {}, id='anthropic_claude_sonnet_4_0'),
|
|
# Google models (from gemini.py)
|
|
pytest.param(ChatGoogle, 'gemini-flash-latest', 'GOOGLE_API_KEY', {}, id='google_gemini_flash_latest'),
|
|
|
|
# Azure OpenAI (from azure_openai.py) - needs both API key and endpoint
|
|
pytest.param(
|
|
ChatAzureOpenAI,
|
|
'gpt-4.1-mini',
|
|
'AZURE_OPENAI_KEY',
|
|
{'azure_endpoint': 'AZURE_OPENAI_ENDPOINT'}, # Special marker - will be resolved at runtime
|
|
id='azure_gpt_4_1_mini',
|
|
),
|
|
# Browser Use LLM (from browser_use_llm.py)
|
|
pytest.param(ChatBrowserUse, 'bu-latest', 'BROWSER_USE_API_KEY', {}, id='browser_use_bu_latest'),
|
|
|
|
# OpenRouter (from openrouter.py)
|
|
pytest.param(
|
|
ChatOpenAI,
|
|
'x-ai/grok-4-fast',
|
|
'OPENROUTER_API_KEY',
|
|
{'base_url': 'https://openrouter.ai/api/v1'},
|
|
id='openrouter_grok_4_mini',
|
|
)
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize('model_class,model_name,api_key_env,extra_kwargs', MODELS_TO_TEST)
|
|
async def test_llm_model_button_click(model_class, model_name, api_key_env, extra_kwargs, httpserver):
|
|
"""Test that each LLM model can click a button.
|
|
|
|
This test verifies:
|
|
1. Model can be initialized with API key
|
|
2. Agent can navigate and click a button
|
|
3. Button click is verified by checking page state change
|
|
4. Completes within max 2 steps
|
|
"""
|
|
# Check if running in GitHub Actions
|
|
in_github_actions = os.getenv('GITHUB_ACTIONS') == 'true'
|
|
|
|
# Handle API key validation (except Ollama which is local)
|
|
if api_key_env is not None:
|
|
api_key = os.getenv(api_key_env)
|
|
if not api_key:
|
|
if in_github_actions:
|
|
pytest.fail(f'{api_key_env} not set - failing in GitHub Actions')
|
|
else:
|
|
pytest.skip(f'{api_key_env} not set')
|
|
else:
|
|
api_key = None
|
|
|
|
# Handle Azure endpoint validation at runtime
|
|
if model_class is ChatAzureOpenAI:
|
|
azure_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
|
|
if not azure_endpoint:
|
|
if in_github_actions:
|
|
pytest.fail('AZURE_OPENAI_ENDPOINT not set - failing in GitHub Actions')
|
|
else:
|
|
pytest.skip('AZURE_OPENAI_ENDPOINT not set')
|
|
# Replace the marker with actual endpoint value
|
|
extra_kwargs = {**extra_kwargs, 'azure_endpoint': azure_endpoint}
|
|
|
|
# Create HTML page with a button that changes page content when clicked
|
|
html = """
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head><title>Button Test</title></head>
|
|
<body>
|
|
<h1>Button Click Test</h1>
|
|
<button id="test-button" onclick="document.getElementById('result').innerText='SUCCESS'">
|
|
Click Me
|
|
</button>
|
|
<div id="result">NOT_CLICKED</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
httpserver.expect_request('/').respond_with_data(html, content_type='text/html')
|
|
|
|
# Create LLM instance with extra kwargs if provided
|
|
llm_kwargs = {'model': model_name}
|
|
if api_key is not None:
|
|
llm_kwargs['api_key'] = api_key
|
|
llm_kwargs.update(extra_kwargs)
|
|
llm = model_class(**llm_kwargs)
|
|
|
|
# Create browser session
|
|
browser = BrowserSession(
|
|
browser_profile=BrowserProfile(
|
|
headless=True,
|
|
user_data_dir=None, # Use temporary directory
|
|
)
|
|
)
|
|
|
|
try:
|
|
# Start browser
|
|
await browser.start()
|
|
|
|
# Create agent with button click task (URL in task triggers auto-navigation)
|
|
test_url = httpserver.url_for('/')
|
|
agent = Agent(
|
|
task=f'{test_url} - Click the button',
|
|
llm=llm,
|
|
browser_session=browser,
|
|
max_steps=2, # Max 2 steps as per requirements
|
|
)
|
|
|
|
# Run the agent
|
|
result = await agent.run()
|
|
|
|
# Verify task completed
|
|
assert result is not None
|
|
assert len(result.history) > 0
|
|
|
|
# Verify button was clicked by checking page state across any step
|
|
# (Don't enforce strict step count since auto-navigation and optional done actions vary by model)
|
|
button_clicked = False
|
|
for step in result.history:
|
|
# Check state_message which contains browser state with page text
|
|
if step.state_message and 'SUCCESS' in step.state_message:
|
|
button_clicked = True
|
|
break
|
|
|
|
# Check if SUCCESS appears in any step (indicating button was clicked)
|
|
assert button_clicked, 'Button was not clicked - SUCCESS not found in any page state'
|
|
|
|
finally:
|
|
# Clean up browser session
|
|
await browser.kill()
|
|
await browser.event_bus.stop(clear=True, timeout=5)
|
|
|
|
|
|
async def test_all_models_discoverable():
|
|
"""Test that all models defined in MODELS_TO_TEST are valid.
|
|
|
|
This is a simple sanity check that doesn't require API keys.
|
|
"""
|
|
assert len(MODELS_TO_TEST) > 0, 'No models defined in MODELS_TO_TEST'
|
|
|
|
# Verify each model parameter is a tuple of the right length
|
|
for param in MODELS_TO_TEST:
|
|
assert len(param.values) == 4, f'Invalid model parameter: {param}'
|
|
model_class, model_name, api_key_env, extra_kwargs = param.values
|
|
|
|
# Verify model_class is a class
|
|
assert callable(model_class), f'model_class {model_class} is not callable'
|
|
|
|
# Verify model_name is a string
|
|
assert isinstance(model_name, str), f'model_name {model_name} is not a string'
|
|
|
|
# Verify api_key_env is a string or None (for local models like Ollama)
|
|
assert api_key_env is None or isinstance(api_key_env, str), f'api_key_env {api_key_env} must be string or None'
|
|
|
|
# Verify extra_kwargs is a dict
|
|
assert isinstance(extra_kwargs, dict), f'extra_kwargs {extra_kwargs} is not a dict'
|