mirror of
https://github.com/browser-use/browser-use
synced 2026-05-13 17:56:35 +02:00
* Validator * Test mind2web * Cleaned up logger * Pytest logger * Cleaned up logger * Disable flag for human input * Multiple clicks per button * Multiple clicks per button * More structured system prompt * Fields with description * System prompt example * One logger * Cleaner logging * Log step in step function * Fix critical clicking error - wrong argument used * Improved thought process of agent * Improve system prompt * Remove human input message * Custome action registration * Pydantic model for custom actions * Pydantic model for custome output * Runs through, model outputs functions, but not called yet * Work in progress - description for custome actions * Description works, but schema not yet * Model can call the right action - but is not executed * Seperate is_controller_action and is_custom_action * Works! Model can call custom function * Use registry for action, but result is not feed back to model * Include result in messages * Works with custom function - but typing is not correct * Renamed registry * First test cases * Captcha tests * Pydantic for tests * Improve prompts for multy step * System prompt structure * Handle errors like validation error * Refactor error handling in agent * Refactor error handling in agent * Improved logging * Update view * Fix click parameter to index * Simplify dynamic actions * Use run instead of step * Rename history * Rename AgentService to Agent * Rename ControllerService to Controller * Pytest file * Rename get state * Rename BrowserService * reversed dom extraction recursion to while * Rename use_vision * Rename use_vision * reversed dom tree items and made browser less anoying * Renaming and fixing type errors * Renamed class names for agent * updated requirements * Update prompt * Action registration works for user and controller * Fix done call by returning ActionResult * Fix if result is none * Rename AgentOutput and ActionModel * Improved prompt Passes 6/8 tests from test_agent_actions * Calculate token cost * Improve display * Simplified logger * Test function calling * created super simple xpath extraction algo * Tests logging * tiny fixes to dom extraction * Remove test * Dont log number of clicks * Pytest file * merged per element js checks * Check if driver is still open * super fast processing * fixed agent planning and stuff * Fix example * Fix example * Improve error * Improved error correction * New line for step * small type error fixes * Test for pydantic * Fix line * Removed sample * fixed readme and examples --------- Co-authored-by: magmueller <mamagnus00@gmail.com>
108 lines
2.9 KiB
Python
108 lines
2.9 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import Optional, Type
|
|
|
|
from openai import RateLimitError
|
|
from pydantic import BaseModel, ConfigDict, Field, ValidationError, create_model
|
|
|
|
from browser_use.browser.views import BrowserState
|
|
from browser_use.controller.registry.views import ActionModel
|
|
|
|
|
|
class TokenDetails(BaseModel):
|
|
audio: int = 0
|
|
cache_read: int = 0
|
|
reasoning: int = 0
|
|
|
|
|
|
class TokenUsage(BaseModel):
|
|
input_tokens: int
|
|
output_tokens: int
|
|
total_tokens: int
|
|
input_token_details: TokenDetails = Field(default=TokenDetails())
|
|
output_token_details: TokenDetails = Field(default=TokenDetails())
|
|
|
|
# allow arbitrary types
|
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
|
|
|
|
class Pricing(BaseModel):
|
|
uncached_input: float # per 1M tokens
|
|
cached_input: float
|
|
output: float
|
|
|
|
|
|
class ModelPricingCatalog(BaseModel):
|
|
gpt_4o: Pricing = Field(default=Pricing(uncached_input=2.50, cached_input=1.25, output=10.00))
|
|
gpt_4o_mini: Pricing = Field(
|
|
default=Pricing(uncached_input=0.15, cached_input=0.075, output=0.60)
|
|
)
|
|
claude_3_5_sonnet: Pricing = Field(
|
|
default=Pricing(uncached_input=3.00, cached_input=1.50, output=15.00)
|
|
)
|
|
|
|
|
|
class ActionResult(BaseModel):
|
|
"""Result of executing an action"""
|
|
|
|
is_done: Optional[bool] = False
|
|
extracted_content: Optional[str] = None
|
|
error: Optional[str] = None
|
|
|
|
|
|
class AgentBrain(BaseModel):
|
|
"""Current state of the agent"""
|
|
|
|
valuation_previous_goal: str
|
|
memory: str
|
|
next_goal: str
|
|
|
|
|
|
class AgentOutput(BaseModel):
|
|
"""Output model for agent
|
|
|
|
@dev note: this model is extended with custom actions in AgentService. You can also use some fields that are not in this model as provided by the linter, as long as they are registered in the DynamicActions model.
|
|
"""
|
|
|
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
|
|
current_state: AgentBrain
|
|
action: ActionModel
|
|
|
|
@staticmethod
|
|
def type_with_custom_actions(custom_actions: Type[ActionModel]) -> Type['AgentOutput']:
|
|
"""Extend actions with custom actions"""
|
|
return create_model(
|
|
'AgentOutput',
|
|
__base__=AgentOutput,
|
|
action=(custom_actions, Field(...)), # Properly annotated field with no default
|
|
__module__=AgentOutput.__module__,
|
|
)
|
|
|
|
|
|
class AgentHistory(BaseModel):
|
|
"""History item for agent actions"""
|
|
|
|
model_output: AgentOutput | None
|
|
result: ActionResult
|
|
state: BrowserState
|
|
|
|
model_config = ConfigDict(arbitrary_types_allowed=True, protected_namespaces=())
|
|
|
|
|
|
class AgentError:
|
|
"""Container for agent error handling"""
|
|
|
|
VALIDATION_ERROR = 'Invalid model output format. Please follow the correct schema.'
|
|
RATE_LIMIT_ERROR = 'Rate limit reached. Waiting before retry.'
|
|
NO_VALID_ACTION = 'No valid action found'
|
|
|
|
@staticmethod
|
|
def format_error(error: Exception) -> str:
|
|
"""Format error message based on error type"""
|
|
if isinstance(error, ValidationError):
|
|
return f'{AgentError.VALIDATION_ERROR}\nDetails: {str(error)}'
|
|
if isinstance(error, RateLimitError):
|
|
return AgentError.RATE_LIMIT_ERROR
|
|
return f'Unexpected error: {str(error)}'
|