Files
browser-use/browser_use/skill_cli/commands/agent.py
ShawnPana a1d20ed81c fix(cli): include --remote-only in remote mode error messages
Previously only suggested --full, but --remote-only is a lighter-weight
option when users only need cloud browser support.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-12 10:26:31 -08:00

336 lines
9.3 KiB
Python

"""Agent task command handler."""
import logging
import os
from typing import Any
from browser_use.skill_cli.api_key import APIKeyRequired, require_api_key
from browser_use.skill_cli.sessions import SessionInfo
logger = logging.getLogger(__name__)
# Cloud-only flags that only work in remote mode
CLOUD_ONLY_FLAGS = [
'session_id',
'proxy_country',
'wait',
'stream',
'flash',
'keep_alive',
'thinking',
'start_url',
'metadata',
'secret',
'allowed_domain',
'skill_id',
'structured_output',
'judge',
'judge_ground_truth',
]
async def handle(session: SessionInfo, params: dict[str, Any]) -> Any:
"""Handle agent run command.
Routes based on browser mode:
- Remote mode (--browser remote): Uses Cloud API with US proxy by default
- Local mode (default): Uses local browser-use agent
"""
task = params.get('task')
if not task:
return {'success': False, 'error': 'No task provided'}
# Route based on browser mode
if session.browser_mode == 'remote':
# Remote mode requires Browser-Use API key
try:
require_api_key('Cloud agent tasks')
except APIKeyRequired as e:
return {'success': False, 'error': str(e)}
return await _handle_cloud_task(params)
else:
# Check if user tried to use cloud-only flags in local mode
used_cloud_flags = [f for f in CLOUD_ONLY_FLAGS if params.get(f)]
if used_cloud_flags:
from browser_use.skill_cli.install_config import is_mode_available
flags_str = ', '.join(f'--{f.replace("_", "-")}' for f in used_cloud_flags)
if is_mode_available('remote'):
# Remote is available, user just needs to use it
return {
'success': False,
'error': f'Cloud-only flags used in local mode: {flags_str}\nUse --browser remote to enable cloud features.',
}
else:
# Remote not installed (--local-only install)
return {
'success': False,
'error': f'Cloud-only flags require remote mode: {flags_str}\n'
f'Remote mode is not installed. Reinstall to enable:\n'
f' curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --remote-only\n'
f' curl -fsSL https://browser-use.com/cli/install.sh | bash -s -- --full',
}
return await _handle_local_task(session, params)
async def _handle_cloud_task(params: dict[str, Any]) -> Any:
"""Handle task execution via Cloud API.
By default uses US proxy for all cloud tasks.
"""
from browser_use.skill_cli.commands import cloud_session, cloud_task
task = params['task']
# Handle vision flag (--vision vs --no-vision)
vision: bool | None = None
if params.get('vision'):
vision = True
elif params.get('no_vision'):
vision = False
# Parse key=value list params
metadata = _parse_key_value_list(params.get('metadata'))
secrets = _parse_key_value_list(params.get('secret'))
# Build session params - only include what user explicitly set
session_id = params.get('session_id')
profile_id = params.get('profile')
proxy_country = params.get('proxy_country')
try:
logger.info(f'Creating cloud task: {task}')
# Create session first if profile or proxy specified and no session_id
if (profile_id or proxy_country) and not session_id:
session = cloud_session.create_session(
profile_id=profile_id,
proxy_country=proxy_country,
keep_alive=params.get('keep_alive'),
)
session_id = session.id
logger.info(f'Created cloud session: {session_id}')
# Create cloud task - only pass what user explicitly set
task_response = cloud_task.create_task(
task=task,
llm=params.get('llm'),
session_id=session_id,
max_steps=params.get('max_steps'),
flash_mode=params.get('flash'),
thinking=params.get('thinking'),
vision=vision,
start_url=params.get('start_url'),
metadata=metadata,
secrets=secrets,
allowed_domains=params.get('allowed_domain'),
skill_ids=params.get('skill_id'),
structured_output=params.get('structured_output'),
judge=params.get('judge'),
judge_ground_truth=params.get('judge_ground_truth'),
)
task_id = task_response.id
response_session_id = task_response.session_id
if not task_id:
return {
'success': False,
'error': 'Cloud API did not return a task ID',
'task': task,
}
logger.info(f'Cloud task created: {task_id}')
# Return immediately unless --wait is specified
if not params.get('wait'):
return {
'success': True,
'task_id': task_id,
'session_id': response_session_id,
'message': 'Task started. Use "browser-use task status <task_id>" to check progress.',
}
# Poll until complete
logger.info('Waiting for task completion...')
result = await cloud_task.poll_until_complete(task_id, stream=params.get('stream', False))
return {
'success': True,
'task': task,
'task_id': task_id,
'session_id': response_session_id,
'status': result.status,
'output': result.output,
'cost': result.cost,
'done': result.status == 'finished',
}
except Exception as e:
logger.exception(f'Cloud task failed: {e}')
return {
'success': False,
'error': str(e),
'task': task,
}
def _parse_key_value_list(items: list[str] | None) -> dict[str, str | None] | None:
"""Parse a list of 'key=value' strings into a dict."""
if not items:
return None
result: dict[str, str | None] = {}
for item in items:
if '=' in item:
key, value = item.split('=', 1)
result[key] = value
return result if result else None
async def _handle_local_task(session: SessionInfo, params: dict[str, Any]) -> Any:
"""Handle task execution locally with browser-use agent."""
task = params['task']
max_steps = params.get('max_steps')
model = params.get('llm') # Optional model override
try:
# Import agent and LLM
from browser_use.agent.service import Agent
# Try to get LLM from environment (with optional model override)
llm = await get_llm(model=model)
if llm is None:
if model:
return {
'success': False,
'error': f'Could not initialize model "{model}". '
f'Make sure the appropriate API key is set (OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY).',
}
return {
'success': False,
'error': 'No LLM configured. Set BROWSER_USE_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY',
}
# Create and run agent
agent = Agent(
task=task,
llm=llm,
browser_session=session.browser_session,
)
logger.info(f'Running local agent task: {task}')
run_kwargs = {}
if max_steps is not None:
run_kwargs['max_steps'] = max_steps
result = await agent.run(**run_kwargs)
# Extract result info
final_result = result.final_result() if result else None
return {
'success': True,
'task': task,
'steps': len(result) if result else 0,
'result': str(final_result) if final_result else None,
'done': result.is_done() if result else False,
}
except Exception as e:
logger.exception(f'Local agent task failed: {e}')
return {
'success': False,
'error': str(e),
'task': task,
}
def _get_verified_models() -> dict[str, set[str]]:
"""Extract verified model names from SDK sources of truth."""
import typing
from anthropic.types.model_param import ModelParam
from openai.types.shared.chat_model import ChatModel
from browser_use.llm.google.chat import VerifiedGeminiModels
# OpenAI: ChatModel is a Literal type
openai_models = set(typing.get_args(ChatModel))
# Anthropic: ModelParam is Union[Literal[...], str] - extract the Literal
anthropic_literal = typing.get_args(ModelParam)[0]
anthropic_models = set(typing.get_args(anthropic_literal))
# Google: VerifiedGeminiModels Literal
google_models = set(typing.get_args(VerifiedGeminiModels))
# Browser-Use: cloud models
browser_use_models = {'bu-latest', 'bu-1-0', 'bu-2-0'}
return {
'openai': openai_models,
'anthropic': anthropic_models,
'google': google_models,
'browser-use': browser_use_models,
}
_VERIFIED_MODELS: dict[str, set[str]] | None = None
def _get_provider_for_model(model: str) -> str | None:
"""Determine the provider by checking SDK verified model lists."""
global _VERIFIED_MODELS
if _VERIFIED_MODELS is None:
_VERIFIED_MODELS = _get_verified_models()
for provider, models in _VERIFIED_MODELS.items():
if model in models:
return provider
return None
def get_llm(model: str | None = None) -> Any:
"""Get LLM instance from environment configuration.
Args:
model: Optional model name to use. If provided, will instantiate
the appropriate provider for that model. If not provided,
auto-detects from available API keys.
Supported providers: OpenAI, Anthropic, Google, Browser-Use.
Model names are validated against each SDK's verified model list.
"""
from browser_use.llm import ChatAnthropic, ChatBrowserUse, ChatGoogle, ChatOpenAI
if model:
provider = _get_provider_for_model(model)
if provider == 'openai':
return ChatOpenAI(model=model)
elif provider == 'anthropic':
return ChatAnthropic(model=model)
elif provider == 'google':
return ChatGoogle(model=model)
elif provider == 'browser-use':
return ChatBrowserUse(model=model)
else:
logger.warning(f'Unknown model: {model}. Not in any verified model list.')
return None
# No model specified - auto-detect from available API keys
if os.environ.get('BROWSER_USE_API_KEY'):
return ChatBrowserUse()
if os.environ.get('OPENAI_API_KEY'):
return ChatOpenAI(model='o3')
if os.environ.get('ANTHROPIC_API_KEY'):
return ChatAnthropic(model='claude-sonnet-4-0')
if os.environ.get('GOOGLE_API_KEY'):
return ChatGoogle(model='gemini-flash-latest')
return None