mirror of
https://github.com/browser-use/browser-use
synced 2026-04-22 17:45:09 +02:00
Previously only suggested --full, but --remote-only is a lighter-weight option when users only need cloud browser support. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
336 lines
9.3 KiB
Python
336 lines
9.3 KiB
Python
"""Agent task command handler."""
|
|
|
|
import logging
|
|
import os
|
|
from typing import Any
|
|
|
|
from browser_use.skill_cli.api_key import APIKeyRequired, require_api_key
|
|
from browser_use.skill_cli.sessions import SessionInfo
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Cloud-only flags that only work in remote mode
|
|
CLOUD_ONLY_FLAGS = [
|
|
'session_id',
|
|
'proxy_country',
|
|
'wait',
|
|
'stream',
|
|
'flash',
|
|
'keep_alive',
|
|
'thinking',
|
|
'start_url',
|
|
'metadata',
|
|
'secret',
|
|
'allowed_domain',
|
|
'skill_id',
|
|
'structured_output',
|
|
'judge',
|
|
'judge_ground_truth',
|
|
]
|
|
|
|
|
|
async def handle(session: SessionInfo, params: dict[str, Any]) -> Any:
|
|
"""Handle agent run command.
|
|
|
|
Routes based on browser mode:
|
|
- Remote mode (--browser remote): Uses Cloud API with US proxy by default
|
|
- Local mode (default): Uses local browser-use agent
|
|
"""
|
|
task = params.get('task')
|
|
if not task:
|
|
return {'success': False, 'error': 'No task provided'}
|
|
|
|
# Route based on browser mode
|
|
if session.browser_mode == 'remote':
|
|
# Remote mode requires Browser-Use API key
|
|
try:
|
|
require_api_key('Cloud agent tasks')
|
|
except APIKeyRequired as e:
|
|
return {'success': False, 'error': str(e)}
|
|
return await _handle_cloud_task(params)
|
|
else:
|
|
# Check if user tried to use cloud-only flags in local mode
|
|
used_cloud_flags = [f for f in CLOUD_ONLY_FLAGS if params.get(f)]
|
|
if used_cloud_flags:
|
|
from browser_use.skill_cli.install_config import is_mode_available
|
|
|
|
flags_str = ', '.join(f'--{f.replace("_", "-")}' for f in used_cloud_flags)
|
|
|
|
if is_mode_available('remote'):
|
|
# Remote is available, user just needs to use it
|
|
return {
|
|
'success': False,
|
|
'error': f'Cloud-only flags used in local mode: {flags_str}\nUse --browser remote to enable cloud features.',
|
|
}
|
|
else:
|
|
# Remote not installed (--local-only install)
|
|
return {
|
|
'success': False,
|
|
'error': f'Cloud-only flags require remote mode: {flags_str}\n'
|
|
f'Remote mode is not installed. Reinstall to enable:\n'
|
|
f' curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --remote-only\n'
|
|
f' curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --full',
|
|
}
|
|
return await _handle_local_task(session, params)
|
|
|
|
|
|
async def _handle_cloud_task(params: dict[str, Any]) -> Any:
|
|
"""Handle task execution via Cloud API.
|
|
|
|
By default uses US proxy for all cloud tasks.
|
|
"""
|
|
from browser_use.skill_cli.commands import cloud_session, cloud_task
|
|
|
|
task = params['task']
|
|
|
|
# Handle vision flag (--vision vs --no-vision)
|
|
vision: bool | None = None
|
|
if params.get('vision'):
|
|
vision = True
|
|
elif params.get('no_vision'):
|
|
vision = False
|
|
|
|
# Parse key=value list params
|
|
metadata = _parse_key_value_list(params.get('metadata'))
|
|
secrets = _parse_key_value_list(params.get('secret'))
|
|
|
|
# Build session params - only include what user explicitly set
|
|
session_id = params.get('session_id')
|
|
profile_id = params.get('profile')
|
|
proxy_country = params.get('proxy_country')
|
|
|
|
try:
|
|
logger.info(f'Creating cloud task: {task}')
|
|
|
|
# Create session first if profile or proxy specified and no session_id
|
|
if (profile_id or proxy_country) and not session_id:
|
|
session = cloud_session.create_session(
|
|
profile_id=profile_id,
|
|
proxy_country=proxy_country,
|
|
keep_alive=params.get('keep_alive'),
|
|
)
|
|
session_id = session.id
|
|
logger.info(f'Created cloud session: {session_id}')
|
|
|
|
# Create cloud task - only pass what user explicitly set
|
|
task_response = cloud_task.create_task(
|
|
task=task,
|
|
llm=params.get('llm'),
|
|
session_id=session_id,
|
|
max_steps=params.get('max_steps'),
|
|
flash_mode=params.get('flash'),
|
|
thinking=params.get('thinking'),
|
|
vision=vision,
|
|
start_url=params.get('start_url'),
|
|
metadata=metadata,
|
|
secrets=secrets,
|
|
allowed_domains=params.get('allowed_domain'),
|
|
skill_ids=params.get('skill_id'),
|
|
structured_output=params.get('structured_output'),
|
|
judge=params.get('judge'),
|
|
judge_ground_truth=params.get('judge_ground_truth'),
|
|
)
|
|
|
|
task_id = task_response.id
|
|
response_session_id = task_response.session_id
|
|
|
|
if not task_id:
|
|
return {
|
|
'success': False,
|
|
'error': 'Cloud API did not return a task ID',
|
|
'task': task,
|
|
}
|
|
|
|
logger.info(f'Cloud task created: {task_id}')
|
|
|
|
# Return immediately unless --wait is specified
|
|
if not params.get('wait'):
|
|
return {
|
|
'success': True,
|
|
'task_id': task_id,
|
|
'session_id': response_session_id,
|
|
'message': 'Task started. Use "browser-use task status <task_id>" to check progress.',
|
|
}
|
|
|
|
# Poll until complete
|
|
logger.info('Waiting for task completion...')
|
|
result = await cloud_task.poll_until_complete(task_id, stream=params.get('stream', False))
|
|
|
|
return {
|
|
'success': True,
|
|
'task': task,
|
|
'task_id': task_id,
|
|
'session_id': response_session_id,
|
|
'status': result.status,
|
|
'output': result.output,
|
|
'cost': result.cost,
|
|
'done': result.status == 'finished',
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.exception(f'Cloud task failed: {e}')
|
|
return {
|
|
'success': False,
|
|
'error': str(e),
|
|
'task': task,
|
|
}
|
|
|
|
|
|
def _parse_key_value_list(items: list[str] | None) -> dict[str, str | None] | None:
|
|
"""Parse a list of 'key=value' strings into a dict."""
|
|
if not items:
|
|
return None
|
|
result: dict[str, str | None] = {}
|
|
for item in items:
|
|
if '=' in item:
|
|
key, value = item.split('=', 1)
|
|
result[key] = value
|
|
return result if result else None
|
|
|
|
|
|
async def _handle_local_task(session: SessionInfo, params: dict[str, Any]) -> Any:
|
|
"""Handle task execution locally with browser-use agent."""
|
|
task = params['task']
|
|
max_steps = params.get('max_steps')
|
|
model = params.get('llm') # Optional model override
|
|
|
|
try:
|
|
# Import agent and LLM
|
|
from browser_use.agent.service import Agent
|
|
|
|
# Try to get LLM from environment (with optional model override)
|
|
llm = await get_llm(model=model)
|
|
if llm is None:
|
|
if model:
|
|
return {
|
|
'success': False,
|
|
'error': f'Could not initialize model "{model}". '
|
|
f'Make sure the appropriate API key is set (OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY).',
|
|
}
|
|
return {
|
|
'success': False,
|
|
'error': 'No LLM configured. Set BROWSER_USE_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY',
|
|
}
|
|
|
|
# Create and run agent
|
|
agent = Agent(
|
|
task=task,
|
|
llm=llm,
|
|
browser_session=session.browser_session,
|
|
)
|
|
|
|
logger.info(f'Running local agent task: {task}')
|
|
run_kwargs = {}
|
|
if max_steps is not None:
|
|
run_kwargs['max_steps'] = max_steps
|
|
result = await agent.run(**run_kwargs)
|
|
|
|
# Extract result info
|
|
final_result = result.final_result() if result else None
|
|
|
|
return {
|
|
'success': True,
|
|
'task': task,
|
|
'steps': len(result) if result else 0,
|
|
'result': str(final_result) if final_result else None,
|
|
'done': result.is_done() if result else False,
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.exception(f'Local agent task failed: {e}')
|
|
return {
|
|
'success': False,
|
|
'error': str(e),
|
|
'task': task,
|
|
}
|
|
|
|
|
|
def _get_verified_models() -> dict[str, set[str]]:
|
|
"""Extract verified model names from SDK sources of truth."""
|
|
import typing
|
|
|
|
from anthropic.types.model_param import ModelParam
|
|
from openai.types.shared.chat_model import ChatModel
|
|
|
|
from browser_use.llm.google.chat import VerifiedGeminiModels
|
|
|
|
# OpenAI: ChatModel is a Literal type
|
|
openai_models = set(typing.get_args(ChatModel))
|
|
|
|
# Anthropic: ModelParam is Union[Literal[...], str] - extract the Literal
|
|
anthropic_literal = typing.get_args(ModelParam)[0]
|
|
anthropic_models = set(typing.get_args(anthropic_literal))
|
|
|
|
# Google: VerifiedGeminiModels Literal
|
|
google_models = set(typing.get_args(VerifiedGeminiModels))
|
|
|
|
# Browser-Use: cloud models
|
|
browser_use_models = {'bu-latest', 'bu-1-0', 'bu-2-0'}
|
|
|
|
return {
|
|
'openai': openai_models,
|
|
'anthropic': anthropic_models,
|
|
'google': google_models,
|
|
'browser-use': browser_use_models,
|
|
}
|
|
|
|
|
|
_VERIFIED_MODELS: dict[str, set[str]] | None = None
|
|
|
|
|
|
def _get_provider_for_model(model: str) -> str | None:
|
|
"""Determine the provider by checking SDK verified model lists."""
|
|
global _VERIFIED_MODELS
|
|
if _VERIFIED_MODELS is None:
|
|
_VERIFIED_MODELS = _get_verified_models()
|
|
|
|
for provider, models in _VERIFIED_MODELS.items():
|
|
if model in models:
|
|
return provider
|
|
|
|
return None
|
|
|
|
|
|
def get_llm(model: str | None = None) -> Any:
|
|
"""Get LLM instance from environment configuration.
|
|
|
|
Args:
|
|
model: Optional model name to use. If provided, will instantiate
|
|
the appropriate provider for that model. If not provided,
|
|
auto-detects from available API keys.
|
|
|
|
Supported providers: OpenAI, Anthropic, Google, Browser-Use.
|
|
Model names are validated against each SDK's verified model list.
|
|
"""
|
|
from browser_use.llm import ChatAnthropic, ChatBrowserUse, ChatGoogle, ChatOpenAI
|
|
|
|
if model:
|
|
provider = _get_provider_for_model(model)
|
|
|
|
if provider == 'openai':
|
|
return ChatOpenAI(model=model)
|
|
elif provider == 'anthropic':
|
|
return ChatAnthropic(model=model)
|
|
elif provider == 'google':
|
|
return ChatGoogle(model=model)
|
|
elif provider == 'browser-use':
|
|
return ChatBrowserUse(model=model)
|
|
else:
|
|
logger.warning(f'Unknown model: {model}. Not in any verified model list.')
|
|
return None
|
|
|
|
# No model specified - auto-detect from available API keys
|
|
if os.environ.get('BROWSER_USE_API_KEY'):
|
|
return ChatBrowserUse()
|
|
|
|
if os.environ.get('OPENAI_API_KEY'):
|
|
return ChatOpenAI(model='o3')
|
|
|
|
if os.environ.get('ANTHROPIC_API_KEY'):
|
|
return ChatAnthropic(model='claude-sonnet-4-0')
|
|
|
|
if os.environ.get('GOOGLE_API_KEY'):
|
|
return ChatGoogle(model='gemini-flash-latest')
|
|
|
|
return None
|