Files
browser-use/tests/ci/models/test_llm_models.py
Magnus Müller ba79f4df88 Linter
2025-10-25 09:59:30 -07:00

179 lines
6.0 KiB
Python

"""Tests for LLM model initialization and basic functionality.
This test verifies that all supported LLM models from examples/models/ can be initialized
and execute a simple button click task. Each model is tested in parallel.
"""
import os
import pytest
from browser_use.agent.service import Agent
from browser_use.browser.profile import BrowserProfile
from browser_use.browser.session import BrowserSession
from browser_use.llm.anthropic.chat import ChatAnthropic
from browser_use.llm.azure.chat import ChatAzureOpenAI
from browser_use.llm.browser_use.chat import ChatBrowserUse
from browser_use.llm.google.chat import ChatGoogle
from browser_use.llm.openai.chat import ChatOpenAI
# Define models to test based on the examples in examples/models/
# Each tuple is (model_class, model_name, required_api_key_env_var, extra_kwargs)
MODELS_TO_TEST = [
# OpenAI models (from gpt-4.1.py, gpt-5-mini.py)
pytest.param(ChatOpenAI, 'gpt-4.1-mini', 'OPENAI_API_KEY', {}, id='openai_gpt_4_1_mini'),
# Anthropic models (from claude-4-sonnet.py)
pytest.param(ChatAnthropic, 'claude-sonnet-4-0', 'ANTHROPIC_API_KEY', {}, id='anthropic_claude_sonnet_4_0'),
# Google models (from gemini.py)
pytest.param(ChatGoogle, 'gemini-flash-latest', 'GOOGLE_API_KEY', {}, id='google_gemini_flash_latest'),
# Azure OpenAI (from azure_openai.py) - needs both API key and endpoint
pytest.param(
ChatAzureOpenAI,
'gpt-4.1-mini',
'AZURE_OPENAI_KEY',
{'azure_endpoint': 'AZURE_OPENAI_ENDPOINT'}, # Special marker - will be resolved at runtime
id='azure_gpt_4_1_mini',
),
# Browser Use LLM (from browser_use_llm.py)
pytest.param(ChatBrowserUse, 'bu-latest', 'BROWSER_USE_API_KEY', {}, id='browser_use_bu_latest'),
# OpenRouter (from openrouter.py)
pytest.param(
ChatOpenAI,
'x-ai/grok-4-fast',
'OPENROUTER_API_KEY',
{'base_url': 'https://openrouter.ai/api/v1'},
id='openrouter_grok_4_mini',
)
]
@pytest.mark.parametrize('model_class,model_name,api_key_env,extra_kwargs', MODELS_TO_TEST)
async def test_llm_model_button_click(model_class, model_name, api_key_env, extra_kwargs, httpserver):
"""Test that each LLM model can click a button.
This test verifies:
1. Model can be initialized with API key
2. Agent can navigate and click a button
3. Button click is verified by checking page state change
4. Completes within max 2 steps
"""
# Check if running in GitHub Actions
in_github_actions = os.getenv('GITHUB_ACTIONS') == 'true'
# Handle API key validation (except Ollama which is local)
if api_key_env is not None:
api_key = os.getenv(api_key_env)
if not api_key:
if in_github_actions:
pytest.fail(f'{api_key_env} not set - failing in GitHub Actions')
else:
pytest.skip(f'{api_key_env} not set')
else:
api_key = None
# Handle Azure endpoint validation at runtime
if model_class is ChatAzureOpenAI:
azure_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
if not azure_endpoint:
if in_github_actions:
pytest.fail('AZURE_OPENAI_ENDPOINT not set - failing in GitHub Actions')
else:
pytest.skip('AZURE_OPENAI_ENDPOINT not set')
# Replace the marker with actual endpoint value
extra_kwargs = {**extra_kwargs, 'azure_endpoint': azure_endpoint}
# Create HTML page with a button that changes page content when clicked
html = """
<!DOCTYPE html>
<html>
<head><title>Button Test</title></head>
<body>
<h1>Button Click Test</h1>
<button id="test-button" onclick="document.getElementById('result').innerText='SUCCESS'">
Click Me
</button>
<div id="result">NOT_CLICKED</div>
</body>
</html>
"""
httpserver.expect_request('/').respond_with_data(html, content_type='text/html')
# Create LLM instance with extra kwargs if provided
llm_kwargs = {'model': model_name}
if api_key is not None:
llm_kwargs['api_key'] = api_key
llm_kwargs.update(extra_kwargs)
llm = model_class(**llm_kwargs)
# Create browser session
browser = BrowserSession(
browser_profile=BrowserProfile(
headless=True,
user_data_dir=None, # Use temporary directory
)
)
try:
# Start browser
await browser.start()
# Create agent with button click task (URL in task triggers auto-navigation)
test_url = httpserver.url_for('/')
agent = Agent(
task=f'{test_url} - Click the button',
llm=llm,
browser_session=browser,
max_steps=2, # Max 2 steps as per requirements
)
# Run the agent
result = await agent.run()
# Verify task completed
assert result is not None
assert len(result.history) > 0
# Verify button was clicked by checking page state across any step
# (Don't enforce strict step count since auto-navigation and optional done actions vary by model)
button_clicked = False
for step in result.history:
# Check state_message which contains browser state with page text
if step.state_message and 'SUCCESS' in step.state_message:
button_clicked = True
break
# Check if SUCCESS appears in any step (indicating button was clicked)
assert button_clicked, 'Button was not clicked - SUCCESS not found in any page state'
finally:
# Clean up browser session
await browser.kill()
await browser.event_bus.stop(clear=True, timeout=5)
async def test_all_models_discoverable():
"""Test that all models defined in MODELS_TO_TEST are valid.
This is a simple sanity check that doesn't require API keys.
"""
assert len(MODELS_TO_TEST) > 0, 'No models defined in MODELS_TO_TEST'
# Verify each model parameter is a tuple of the right length
for param in MODELS_TO_TEST:
assert len(param.values) == 4, f'Invalid model parameter: {param}'
model_class, model_name, api_key_env, extra_kwargs = param.values
# Verify model_class is a class
assert callable(model_class), f'model_class {model_class} is not callable'
# Verify model_name is a string
assert isinstance(model_name, str), f'model_name {model_name} is not a string'
# Verify api_key_env is a string or None (for local models like Ollama)
assert api_key_env is None or isinstance(api_key_env, str), f'api_key_env {api_key_env} must be string or None'
# Verify extra_kwargs is a dict
assert isinstance(extra_kwargs, dict), f'extra_kwargs {extra_kwargs} is not a dict'