Files
browser-use/browser_use/controller/registry/service.py
2025-06-19 23:45:41 +02:00

520 lines
20 KiB
Python

import asyncio
import functools
import inspect
import logging
import re
from collections.abc import Callable
from inspect import Parameter, iscoroutinefunction, signature
from types import UnionType
from typing import Any, Generic, Optional, TypeVar, Union, get_args, get_origin
from langchain_core.language_models.chat_models import BaseChatModel
from pydantic import BaseModel, Field, create_model
from browser_use.browser import BrowserSession
from browser_use.browser.types import Page
from browser_use.controller.registry.views import (
ActionModel,
ActionRegistry,
RegisteredAction,
SpecialActionParameters,
)
from browser_use.filesystem.file_system import FileSystem
from browser_use.telemetry.service import ProductTelemetry
from browser_use.telemetry.views import (
ControllerRegisteredFunctionsTelemetryEvent,
RegisteredFunction,
)
from browser_use.utils import match_url_with_domain_pattern, time_execution_async
Context = TypeVar('Context')
logger = logging.getLogger(__name__)
class Registry(Generic[Context]):
"""Service for registering and managing actions"""
def __init__(self, exclude_actions: list[str] | None = None):
self.registry = ActionRegistry()
self.telemetry = ProductTelemetry()
self.exclude_actions = exclude_actions if exclude_actions is not None else []
def _get_special_param_types(self) -> dict[str, type | UnionType | None]:
"""Get the expected types for special parameters from SpecialActionParameters"""
# Manually define the expected types to avoid issues with Optional handling.
# we should try to reduce this list to 0 if possible, give as few standardized objects to all the actions
# but each driver should decide what is relevant to expose the action methods,
# e.g. playwright page, 2fa code getters, sensitive_data wrappers, other context, etc.
return {
'context': None, # Context is a TypeVar, so we can't validate type
'browser_session': BrowserSession,
'browser': BrowserSession, # legacy name
'browser_context': BrowserSession, # legacy name
'page': Page,
'page_extraction_llm': BaseChatModel,
'available_file_paths': list,
'has_sensitive_data': bool,
'file_system': FileSystem,
}
def _normalize_action_function_signature(
self,
func: Callable,
description: str,
param_model: type[BaseModel] | None = None,
) -> tuple[Callable, type[BaseModel]]:
"""
Normalize action function to accept only kwargs.
Returns:
- Normalized function that accepts (*_, params: ParamModel, **special_params)
- The param model to use for registration
"""
sig = signature(func)
parameters = list(sig.parameters.values())
special_param_types = self._get_special_param_types()
special_param_names = set(special_param_types.keys())
# Step 1: Validate no **kwargs in original function signature
# if it needs default values it must use a dedicated param_model: BaseModel instead
for param in parameters:
if param.kind == Parameter.VAR_KEYWORD:
raise ValueError(
f"Action '{func.__name__}' has **{param.name} which is not allowed. "
f'Actions must have explicit positional parameters only.'
)
# Step 2: Separate special and action parameters
action_params = []
special_params = []
param_model_provided = param_model is not None
for i, param in enumerate(parameters):
# Check if this is a Type 1 pattern (first param is BaseModel)
if i == 0 and param_model_provided and param.name not in special_param_names:
# This is Type 1 pattern - skip the params argument
continue
if param.name in special_param_names:
# Validate special parameter type
expected_type = special_param_types.get(param.name)
if param.annotation != Parameter.empty and expected_type is not None:
# Handle Optional types - normalize both sides
param_type = param.annotation
origin = get_origin(param_type)
if origin is Union:
args = get_args(param_type)
# Find non-None type
param_type = next((arg for arg in args if arg is not type(None)), param_type)
# Check if types are compatible (exact match, subclass, or generic list)
types_compatible = (
param_type == expected_type
or (
inspect.isclass(param_type)
and inspect.isclass(expected_type)
and issubclass(param_type, expected_type)
)
or
# Handle list[T] vs list comparison
(expected_type is list and (param_type is list or get_origin(param_type) is list))
)
if not types_compatible:
expected_type_name = getattr(expected_type, '__name__', str(expected_type))
param_type_name = getattr(param_type, '__name__', str(param_type))
raise ValueError(
f"Action '{func.__name__}' parameter '{param.name}: {param_type_name}' "
f"conflicts with special argument injected by controller: '{param.name}: {expected_type_name}'"
)
special_params.append(param)
else:
action_params.append(param)
# Step 3: Create or validate param model
if not param_model_provided:
# Type 2: Generate param model from action params
if action_params:
params_dict = {}
for param in action_params:
annotation = param.annotation if param.annotation != Parameter.empty else str
default = ... if param.default == Parameter.empty else param.default
params_dict[param.name] = (annotation, default)
param_model = create_model(f'{func.__name__}_Params', __base__=ActionModel, **params_dict)
else:
# No action params, create empty model
param_model = create_model(
f'{func.__name__}_Params',
__base__=ActionModel,
)
assert param_model is not None, f'param_model is None for {func.__name__}'
# Step 4: Create normalized wrapper function
@functools.wraps(func)
async def normalized_wrapper(*args, params: BaseModel | None = None, **kwargs):
"""Normalized action that only accepts kwargs"""
# Validate no positional args
if args:
raise TypeError(f'{func.__name__}() does not accept positional arguments, only keyword arguments are allowed')
# Prepare arguments for original function
call_args = []
call_kwargs = {}
# Handle Type 1 pattern (first arg is the param model)
if param_model_provided and parameters and parameters[0].name not in special_param_names:
if params is None:
raise ValueError(f"{func.__name__}() missing required 'params' argument")
# For Type 1, we'll use the params object as first argument
pass
else:
# Type 2 pattern - need to unpack params
# If params is None, try to create it from kwargs
if params is None and action_params:
# Extract action params from kwargs
action_kwargs = {}
for param in action_params:
if param.name in kwargs:
action_kwargs[param.name] = kwargs[param.name]
if action_kwargs:
# Use the param_model which has the correct types defined
params = param_model(**action_kwargs)
# Build call_args by iterating through original function parameters in order
params_dict = params.model_dump() if params is not None else {}
for i, param in enumerate(parameters):
# Skip first param for Type 1 pattern (it's the model itself)
if param_model_provided and i == 0 and param.name not in special_param_names:
call_args.append(params)
elif param.name in special_param_names:
# This is a special parameter
if param.name in kwargs:
value = kwargs[param.name]
# Check if required special param is None
if value is None and param.default == Parameter.empty:
if param.name == 'browser_session':
raise ValueError(f'Action {func.__name__} requires browser_session but none provided.')
elif param.name == 'page_extraction_llm':
raise ValueError(f'Action {func.__name__} requires page_extraction_llm but none provided.')
elif param.name == 'file_system':
raise ValueError(f'Action {func.__name__} requires file_system but none provided.')
elif param.name == 'page':
raise ValueError(f'Action {func.__name__} requires page but none provided.')
else:
raise ValueError(f"{func.__name__}() missing required special parameter '{param.name}'")
call_args.append(value)
elif param.default != Parameter.empty:
call_args.append(param.default)
else:
# Special param is required but not provided
if param.name == 'browser_session':
raise ValueError(f'Action {func.__name__} requires browser_session but none provided.')
elif param.name == 'page_extraction_llm':
raise ValueError(f'Action {func.__name__} requires page_extraction_llm but none provided.')
elif param.name == 'file_system':
raise ValueError(f'Action {func.__name__} requires file_system but none provided.')
elif param.name == 'page':
raise ValueError(f'Action {func.__name__} requires page but none provided.')
else:
raise ValueError(f"{func.__name__}() missing required special parameter '{param.name}'")
else:
# This is an action parameter
if param.name in params_dict:
call_args.append(params_dict[param.name])
elif param.default != Parameter.empty:
call_args.append(param.default)
else:
raise ValueError(f"{func.__name__}() missing required parameter '{param.name}'")
# Call original function with positional args
if iscoroutinefunction(func):
return await func(*call_args)
else:
return await asyncio.to_thread(func, *call_args)
# Update wrapper signature to be kwargs-only
new_params = [Parameter('params', Parameter.KEYWORD_ONLY, default=None, annotation=Optional[param_model])]
# Add special params as keyword-only
for sp in special_params:
new_params.append(Parameter(sp.name, Parameter.KEYWORD_ONLY, default=sp.default, annotation=sp.annotation))
# Add **kwargs to accept and ignore extra params
new_params.append(Parameter('kwargs', Parameter.VAR_KEYWORD))
normalized_wrapper.__signature__ = sig.replace(parameters=new_params) # type: ignore[attr-defined]
return normalized_wrapper, param_model
# @time_execution_sync('--create_param_model')
def _create_param_model(self, function: Callable) -> type[BaseModel]:
"""Creates a Pydantic model from function signature"""
sig = signature(function)
special_param_names = set(SpecialActionParameters.model_fields.keys())
params = {
name: (param.annotation, ... if param.default == param.empty else param.default)
for name, param in sig.parameters.items()
if name not in special_param_names
}
# TODO: make the types here work
return create_model(
f'{function.__name__}_parameters',
__base__=ActionModel,
**params, # type: ignore
)
def action(
self,
description: str,
param_model: type[BaseModel] | None = None,
domains: list[str] | None = None,
allowed_domains: list[str] | None = None,
page_filter: Callable[[Any], bool] | None = None,
):
"""Decorator for registering actions"""
# Handle aliases: domains and allowed_domains are the same parameter
if allowed_domains is not None and domains is not None:
raise ValueError("Cannot specify both 'domains' and 'allowed_domains' - they are aliases for the same parameter")
final_domains = allowed_domains if allowed_domains is not None else domains
def decorator(func: Callable):
# Skip registration if action is in exclude_actions
if func.__name__ in self.exclude_actions:
return func
# Normalize the function signature
normalized_func, actual_param_model = self._normalize_action_function_signature(func, description, param_model)
action = RegisteredAction(
name=func.__name__,
description=description,
function=normalized_func,
param_model=actual_param_model,
domains=final_domains,
page_filter=page_filter,
)
self.registry.actions[func.__name__] = action
# Return the normalized function so it can be called with kwargs
return normalized_func
return decorator
@time_execution_async('--execute_action')
async def execute_action(
self,
action_name: str,
params: dict,
browser_session: BrowserSession | None = None,
page_extraction_llm: BaseChatModel | None = None,
file_system: FileSystem | None = None,
sensitive_data: dict[str, str | dict[str, str]] | None = None,
available_file_paths: list[str] | None = None,
#
context: Context | None = None,
) -> Any:
"""Execute a registered action with simplified parameter handling"""
if action_name not in self.registry.actions:
raise ValueError(f'Action {action_name} not found')
action = self.registry.actions[action_name]
try:
# Create the validated Pydantic model
try:
validated_params = action.param_model(**params)
except Exception as e:
raise ValueError(f'Invalid parameters {params} for action {action_name}: {type(e)}: {e}') from e
if sensitive_data:
# Get current URL if browser_session is provided
current_url = None
if browser_session:
if browser_session.agent_current_page:
current_url = browser_session.agent_current_page.url
else:
current_page = await browser_session.get_current_page()
current_url = current_page.url if current_page else None
validated_params = self._replace_sensitive_data(validated_params, sensitive_data, current_url)
# Build special context dict
special_context = {
'context': context,
'browser_session': browser_session,
'browser': browser_session, # legacy support
'browser_context': browser_session, # legacy support
'page_extraction_llm': page_extraction_llm,
'available_file_paths': available_file_paths,
'has_sensitive_data': action_name == 'input_text' and bool(sensitive_data),
'file_system': file_system,
}
# Handle async page parameter if needed
if browser_session:
# Check if function signature includes 'page' parameter
sig = signature(action.function)
if 'page' in sig.parameters:
special_context['page'] = await browser_session.get_current_page()
# All functions are now normalized to accept kwargs only
# Call with params and unpacked special context
try:
return await action.function(params=validated_params, **special_context)
except Exception as e:
# Retry once if it's a page error
logger.warning(f'⚠️ Action {action_name}() failed: {type(e).__name__}: {e}, trying one more time...')
special_context['page'] = browser_session and await browser_session.get_current_page()
try:
return await action.function(params=validated_params, **special_context)
except Exception as retry_error:
raise RuntimeError(
f'Action {action_name}() failed: {type(e).__name__}: {e} (page may have closed or navigated away mid-action)'
) from retry_error
raise
except ValueError as e:
# Preserve ValueError messages from validation
if 'requires browser_session but none provided' in str(e) or 'requires page_extraction_llm but none provided' in str(
e
):
raise RuntimeError(str(e)) from e
else:
raise RuntimeError(f'Error executing action {action_name}: {str(e)}') from e
except Exception as e:
raise RuntimeError(f'Error executing action {action_name}: {str(e)}') from e
def _log_sensitive_data_usage(self, placeholders_used: set[str], current_url: str | None) -> None:
"""Log when sensitive data is being used on a page"""
if placeholders_used:
url_info = f' on {current_url}' if current_url and current_url != 'about:blank' else ''
logger.info(f'🔒 Using sensitive data placeholders: {", ".join(sorted(placeholders_used))}{url_info}')
def _replace_sensitive_data(
self, params: BaseModel, sensitive_data: dict[str, Any], current_url: str | None = None
) -> BaseModel:
"""
Replaces sensitive data placeholders in params with actual values.
Args:
params: The parameter object containing <secret>placeholder</secret> tags
sensitive_data: Dictionary of sensitive data, either in old format {key: value}
or new format {domain_pattern: {key: value}}
current_url: Optional current URL for domain matching
Returns:
BaseModel: The parameter object with placeholders replaced by actual values
"""
secret_pattern = re.compile(r'<secret>(.*?)</secret>')
# Set to track all missing placeholders across the full object
all_missing_placeholders = set()
# Set to track successfully replaced placeholders
replaced_placeholders = set()
# Process sensitive data based on format and current URL
applicable_secrets = {}
for domain_or_key, content in sensitive_data.items():
if isinstance(content, dict):
# New format: {domain_pattern: {key: value}}
# Only include secrets for domains that match the current URL
if current_url and current_url != 'about:blank':
# it's a real url, check it using our custom allowed_domains scheme://*.example.com glob matching
if match_url_with_domain_pattern(current_url, domain_or_key):
applicable_secrets.update(content)
else:
# Old format: {key: value}, expose to all domains (only allowed for legacy reasons)
applicable_secrets[domain_or_key] = content
# Filter out empty values
applicable_secrets = {k: v for k, v in applicable_secrets.items() if v}
def recursively_replace_secrets(value: str | dict | list) -> str | dict | list:
if isinstance(value, str):
matches = secret_pattern.findall(value)
for placeholder in matches:
if placeholder in applicable_secrets:
value = value.replace(f'<secret>{placeholder}</secret>', applicable_secrets[placeholder])
replaced_placeholders.add(placeholder)
else:
# Keep track of missing placeholders
all_missing_placeholders.add(placeholder)
# Don't replace the tag, keep it as is
return value
elif isinstance(value, dict):
return {k: recursively_replace_secrets(v) for k, v in value.items()}
elif isinstance(value, list):
return [recursively_replace_secrets(v) for v in value]
return value
params_dump = params.model_dump()
processed_params = recursively_replace_secrets(params_dump)
# Log sensitive data usage
self._log_sensitive_data_usage(replaced_placeholders, current_url)
# Log a warning if any placeholders are missing
if all_missing_placeholders:
logger.warning(f'Missing or empty keys in sensitive_data dictionary: {", ".join(all_missing_placeholders)}')
return type(params).model_validate(processed_params)
# @time_execution_sync('--create_action_model')
def create_action_model(self, include_actions: list[str] | None = None, page=None) -> type[ActionModel]:
"""Creates a Pydantic model from registered actions, used by LLM APIs that support tool calling & enforce a schema"""
# Filter actions based on page if provided:
# if page is None, only include actions with no filters
# if page is provided, only include actions that match the page
available_actions = {}
for name, action in self.registry.actions.items():
if include_actions is not None and name not in include_actions:
continue
# If no page provided, only include actions with no filters
if page is None:
if action.page_filter is None and action.domains is None:
available_actions[name] = action
continue
# Check page_filter if present
domain_is_allowed = self.registry._match_domains(action.domains, page.url)
page_is_allowed = self.registry._match_page_filter(action.page_filter, page)
# Include action if both filters match (or if either is not present)
if domain_is_allowed and page_is_allowed:
available_actions[name] = action
fields = {
name: (
Optional[action.param_model],
Field(default=None, description=action.description),
)
for name, action in available_actions.items()
}
self.telemetry.capture(
ControllerRegisteredFunctionsTelemetryEvent(
registered_functions=[
RegisteredFunction(name=name, params=action.param_model.model_json_schema())
for name, action in available_actions.items()
]
)
)
return create_model('ActionModel', __base__=ActionModel, **fields) # type:ignore
def get_prompt_description(self, page=None) -> str:
"""Get a description of all actions for the prompt
If page is provided, only include actions that are available for that page
based on their filter_func
"""
return self.registry.get_prompt_description(page=page)