diff --git a/.env.example b/.env.example index a6724f8e0..dfd481610 100644 --- a/.env.example +++ b/.env.example @@ -1,2 +1,8 @@ OPENAI_API_KEY= -ANTHROPIC_API_KEY= \ No newline at end of file +ANTHROPIC_API_KEY= + +# Set to false to disable anonymized telemetry +ANONYMIZED_TELEMETRY=true + +# Set to true to enable verbose logging +BROWSER_USE_DEBUG_LOGGING=true \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index dc3f727ca..8b09300df 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,11 @@ { - "python.analysis.typeCheckingMode": "basic" + "python.analysis.typeCheckingMode": "basic", + "[python]": { + "editor.defaultFormatter": "charliermarsh.ruff", + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.fixAll.ruff": "explicit", + "source.organizeImports.ruff": "explicit" + } + } } diff --git a/README.md b/README.md index 4f19acb44..e8b515275 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,12 @@ print(history) For more examples see the [examples](examples) folder or join the [Discord](https://link.browser-use.com/discord) and show off your project. +## Telemetry + +We collect anonymous usage data to help us understand how the library is being used and to identify potential issues. There is no privacy risk, as no personal information is collected. We collect data with PostHog. + +You can opt out of telemetry by setting the `ANONYMIZED_TELEMETRY=false` environment variable. + # Contributing Contributions are welcome! Feel free to open issues for bugs or feature requests. diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 77c868a55..ba9d979ba 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -4,6 +4,7 @@ import json import logging import os import time +import uuid from datetime import datetime from typing import Any, Optional, TypeVar @@ -22,12 +23,17 @@ from browser_use.agent.views import ( AgentHistory, AgentOutput, ModelPricingCatalog, - Pricing, TokenDetails, TokenUsage, ) from browser_use.browser.views import BrowserState from browser_use.controller.service import Controller +from browser_use.telemetry.service import ProductTelemetry +from browser_use.telemetry.views import ( + AgentEndTelemetryEvent, + AgentRunTelemetryEvent, + AgentStepErrorTelemetryEvent, +) from browser_use.utils import time_execution_async load_dotenv() @@ -47,6 +53,8 @@ class Agent: max_failures: int = 5, retry_delay: int = 10, ): + self.agent_id = str(uuid.uuid4()) # unique identifier for the agent + self.task = task self.use_vision = use_vision self.llm = llm @@ -56,6 +64,9 @@ class Agent: self.controller_injected = controller is not None self.controller = controller or Controller() + # Telemetry setup + self.telemetry = ProductTelemetry() + # Action and output models setup self._setup_action_models() @@ -93,7 +104,9 @@ class Agent: action_descriptions = self.controller.registry.get_prompt_description() system_prompt = AgentSystemPrompt( - self.task, action_description=action_descriptions, current_date=datetime.now() + self.task, + action_description=action_descriptions, + current_date=datetime.now(), ).get_system_message() first_message = HumanMessage(content=f'Your task is: {self.task}') @@ -115,6 +128,15 @@ class Agent: except Exception as e: result = self._handle_step_error(e, state) model_output = None + + if result.error: + self.telemetry.capture( + AgentStepErrorTelemetryEvent( + agent_id=self.agent_id, + error=result.error, + ) + ) + self._update_messages_with_result(result) self._make_history_item(model_output, state, result) @@ -356,6 +378,13 @@ class Agent: try: logger.info(f'🚀 Starting task: {self.task}') + self.telemetry.capture( + AgentRunTelemetryEvent( + agent_id=self.agent_id, + task=self.task, + ) + ) + for step in range(max_steps): if self._too_many_failures(): break @@ -371,6 +400,14 @@ class Agent: return self.history finally: + self.telemetry.capture( + AgentEndTelemetryEvent( + agent_id=self.agent_id, + task=self.task, + success=self._is_task_complete(), + steps=len(self.history), + ) + ) if not self.controller_injected: self.controller.browser.close() diff --git a/browser_use/controller/registry/service.py b/browser_use/controller/registry/service.py index fa23951a2..72de3dd07 100644 --- a/browser_use/controller/registry/service.py +++ b/browser_use/controller/registry/service.py @@ -9,6 +9,11 @@ from browser_use.controller.registry.views import ( ActionRegistry, RegisteredAction, ) +from browser_use.telemetry.service import ProductTelemetry +from browser_use.telemetry.views import ( + ControllerRegisteredFunctionsTelemetryEvent, + RegisteredFunction, +) class Registry: @@ -16,6 +21,7 @@ class Registry: def __init__(self): self.registry = ActionRegistry() + self.telemetry = ProductTelemetry() def _create_param_model(self, function: Callable) -> Type[BaseModel]: """Creates a Pydantic model from function signature""" @@ -97,6 +103,16 @@ class Registry: name: (Optional[action.param_model], None) for name, action in self.registry.actions.items() } + + self.telemetry.capture( + ControllerRegisteredFunctionsTelemetryEvent( + registered_functions=[ + RegisteredFunction(name=name, params=action.param_model.model_json_schema()) + for name, action in self.registry.actions.items() + ] + ) + ) + return create_model('ActionModel', __base__=ActionModel, **fields) # type:ignore def get_prompt_description(self) -> str: diff --git a/browser_use/logging_config.py b/browser_use/logging_config.py index 8a36cc66d..461703930 100644 --- a/browser_use/logging_config.py +++ b/browser_use/logging_config.py @@ -1,8 +1,11 @@ import logging +import os import sys def setup_logging(): + debug_logging = os.getenv('BROWSER_USE_DEBUG_LOGGING', 'false').lower() == 'true' + # Check if handlers are already set up if logging.getLogger().hasHandlers(): return @@ -23,7 +26,11 @@ def setup_logging(): # Configure root logger only root.addHandler(console) - root.setLevel(logging.INFO) + + if debug_logging: + root.setLevel(logging.DEBUG) + else: + root.setLevel(logging.INFO) # Configure browser_use logger to prevent propagation browser_use_logger = logging.getLogger('browser_use') diff --git a/browser_use/telemetry/service.py b/browser_use/telemetry/service.py new file mode 100644 index 000000000..bbb2b69fc --- /dev/null +++ b/browser_use/telemetry/service.py @@ -0,0 +1,101 @@ +import logging +import os +import uuid +from pathlib import Path + +from dotenv import load_dotenv +from posthog import Posthog + +from browser_use.telemetry.views import BaseTelemetryEvent +from browser_use.utils import singleton + +load_dotenv() + + +logger = logging.getLogger(__name__) + +POSTHOG_EVENT_SETTINGS = {'$process_person_profile': False} + + +@singleton +class ProductTelemetry: + """ + Service for capturing anonymized telemetry data. + + If the environment variable `ANONYMIZED_TELEMETRY=False`, anonymized telemetry will be disabled. + """ + + USER_ID_PATH = str(Path.home() / '.cache' / 'browser_use' / 'telemetry_user_id') + PROJECT_API_KEY = 'phc_F8JMNjW1i2KbGUTaW1unnDdLSPCoyc52SGRU0JecaUh' + HOST = 'https://eu.i.posthog.com' + UNKNOWN_USER_ID = 'UNKNOWN' + + _curr_user_id = None + + def __init__(self) -> None: + telemetry_disabled = os.getenv('ANONYMIZED_TELEMETRY', 'true').lower() == 'false' + self.debug_logging = os.getenv('BROWSER_USE_DEBUG_LOGGING', 'false').lower() == 'true' + + if telemetry_disabled: + self._posthog_client = None + else: + logging.info( + 'Anonymized telemetry enabled. See https://github.com/gregpr07/browser-use for more information.' + ) + self._posthog_client = Posthog( + project_api_key=self.PROJECT_API_KEY, + host=self.HOST, + ) + + # Silence posthog's logging + if not self.debug_logging: + posthog_logger = logging.getLogger('posthog') + posthog_logger.disabled = True + + if self._posthog_client is None: + logger.debug('Telemetry disabled') + + def capture(self, event: BaseTelemetryEvent) -> None: + if self._posthog_client is None: + return + + if self.debug_logging: + logger.debug(f'Telemetry event: {event.name} {event.properties}') + self._direct_capture(event) + + def _direct_capture(self, event: BaseTelemetryEvent) -> None: + """ + Should not be thread blocking because posthog magically handles it + """ + if self._posthog_client is None: + return + + try: + self._posthog_client.capture( + self.user_id, + event.name, + {**event.properties, **POSTHOG_EVENT_SETTINGS}, + ) + except Exception as e: + logger.error(f'Failed to send telemetry event {event.name}: {e}') + + @property + def user_id(self) -> str: + if self._curr_user_id: + return self._curr_user_id + + # File access may fail due to permissions or other reasons. We don't want to + # crash so we catch all exceptions. + try: + if not os.path.exists(self.USER_ID_PATH): + os.makedirs(os.path.dirname(self.USER_ID_PATH), exist_ok=True) + with open(self.USER_ID_PATH, 'w') as f: + new_user_id = str(uuid.uuid4()) + f.write(new_user_id) + self._curr_user_id = new_user_id + else: + with open(self.USER_ID_PATH, 'r') as f: + self._curr_user_id = f.read() + except Exception: + self._curr_user_id = 'UNKNOWN_USER_ID' + return self._curr_user_id diff --git a/browser_use/telemetry/views.py b/browser_use/telemetry/views.py new file mode 100644 index 000000000..6ce12c17e --- /dev/null +++ b/browser_use/telemetry/views.py @@ -0,0 +1,51 @@ +from abc import ABC, abstractmethod +from dataclasses import asdict, dataclass +from typing import Any, Dict, Optional + + +@dataclass +class BaseTelemetryEvent(ABC): + @property + @abstractmethod + def name(self) -> str: + pass + + @property + def properties(self) -> Dict[str, Any]: + return {k: v for k, v in asdict(self).items() if k != 'name'} + + +@dataclass +class RegisteredFunction: + name: str + params: dict[str, Any] + + +@dataclass +class ControllerRegisteredFunctionsTelemetryEvent(BaseTelemetryEvent): + registered_functions: list[RegisteredFunction] + name: str = 'controller_registered_functions' + + +@dataclass +class AgentRunTelemetryEvent(BaseTelemetryEvent): + agent_id: str + task: str + name: str = 'agent_run' + + +@dataclass +class AgentStepErrorTelemetryEvent(BaseTelemetryEvent): + agent_id: str + error: str + name: str = 'agent_step_error' + + +@dataclass +class AgentEndTelemetryEvent(BaseTelemetryEvent): + agent_id: str + task: str + steps: int + success: bool + error: Optional[str] = None + name: str = 'agent_end' diff --git a/browser_use/utils.py b/browser_use/utils.py index 4b8fea995..860b35a32 100644 --- a/browser_use/utils.py +++ b/browser_use/utils.py @@ -41,3 +41,14 @@ def time_execution_async( return wrapper return decorator + + +def singleton(cls): + instance = [None] + + def wrapper(*args, **kwargs): + if instance[0] is None: + instance[0] = cls(*args, **kwargs) + return instance[0] + + return wrapper diff --git a/examples/try.py b/examples/try.py index 2e5b5ef56..12ebbba28 100644 --- a/examples/try.py +++ b/examples/try.py @@ -4,7 +4,6 @@ Simple try of the agent. @dev You need to add ANTHROPIC_API_KEY to your environment variables. """ -import logging import os import sys diff --git a/pyproject.toml b/pyproject.toml index 46c66d40e..b01c52abd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ description = "Make websites accessible for AI agents" authors = [ { name = "Gregor Zunic" } ] -version = "0.1.3" +version = "0.1.4" readme = "README.md" requires-python = ">=3.11" classifiers = [ @@ -24,14 +24,17 @@ dependencies = [ "python-dotenv>=1.0.1", "requests>=2.32.3", "selenium>=4.26.1", - "webdriver-manager>=4.0.2" + "webdriver-manager>=4.0.2", + "posthog>=3.7.0" ] [project.optional-dependencies] dev = [ "tokencost>=0.1.16", "hatch>=1.13.0", - "build>=1.2.2" + "build>=1.2.2", + "pytest>=8.3.3", + "pytest-asyncio>=0.24.0" ] [tool.ruff]