Files
browser-use/browser_use/agent/views.py
Gregor Žunič 89c63fdd63 Added custom actions registry and fixed extraction layer (#20)
* Validator

* Test mind2web

* Cleaned up logger

* Pytest logger

* Cleaned up logger

* Disable flag for human input

* Multiple clicks per button

* Multiple clicks per button

* More structured system prompt

* Fields with description

* System prompt example

* One logger

* Cleaner logging

* Log step in step function

* Fix critical clicking error - wrong argument used

* Improved thought process of agent

* Improve system prompt

* Remove human input message

* Custome action registration

* Pydantic model for custom actions

* Pydantic model for custome output

* Runs through, model outputs functions, but not called yet

* Work in progress - description for custome actions

* Description works, but schema not yet

* Model can call the right action - but is not executed

* Seperate is_controller_action  and is_custom_action

* Works! Model can call custom function

* Use registry for action, but result is not feed back to model

* Include result in messages

* Works with custom function - but typing is not correct

* Renamed registry

* First test cases

* Captcha tests

* Pydantic for tests

* Improve prompts for multy step

* System prompt structure

* Handle errors like validation error

* Refactor error handling in agent

* Refactor error handling in agent

* Improved logging

* Update view

* Fix click parameter to index

* Simplify dynamic actions

* Use run instead of step

* Rename history

* Rename AgentService to Agent

* Rename ControllerService to Controller

* Pytest file

* Rename get state

* Rename BrowserService

* reversed dom extraction recursion to while

* Rename use_vision

* Rename use_vision

* reversed dom tree items and made browser less anoying

* Renaming and fixing type errors

* Renamed class names for agent

* updated requirements

* Update prompt

* Action registration works for user and controller

* Fix done call by returning ActionResult

* Fix if result is none

* Rename AgentOutput and ActionModel

* Improved prompt Passes 6/8 tests from test_agent_actions

* Calculate token cost

* Improve display

* Simplified logger

* Test function calling

* created super simple xpath extraction algo

* Tests logging

* tiny fixes to dom extraction

* Remove test

* Dont log number of clicks

* Pytest file

* merged per element js checks

* Check if driver is still open

* super fast processing

* fixed agent planning and stuff

* Fix example

* Fix example

* Improve error

* Improved error correction

* New line for step

* small type error fixes

* Test for pydantic

* Fix line

* Removed sample

* fixed readme and examples

---------

Co-authored-by: magmueller <mamagnus00@gmail.com>
2024-11-15 21:42:02 +01:00

108 lines
2.9 KiB
Python

from __future__ import annotations
from typing import Optional, Type
from openai import RateLimitError
from pydantic import BaseModel, ConfigDict, Field, ValidationError, create_model
from browser_use.browser.views import BrowserState
from browser_use.controller.registry.views import ActionModel
class TokenDetails(BaseModel):
audio: int = 0
cache_read: int = 0
reasoning: int = 0
class TokenUsage(BaseModel):
input_tokens: int
output_tokens: int
total_tokens: int
input_token_details: TokenDetails = Field(default=TokenDetails())
output_token_details: TokenDetails = Field(default=TokenDetails())
# allow arbitrary types
model_config = ConfigDict(arbitrary_types_allowed=True)
class Pricing(BaseModel):
uncached_input: float # per 1M tokens
cached_input: float
output: float
class ModelPricingCatalog(BaseModel):
gpt_4o: Pricing = Field(default=Pricing(uncached_input=2.50, cached_input=1.25, output=10.00))
gpt_4o_mini: Pricing = Field(
default=Pricing(uncached_input=0.15, cached_input=0.075, output=0.60)
)
claude_3_5_sonnet: Pricing = Field(
default=Pricing(uncached_input=3.00, cached_input=1.50, output=15.00)
)
class ActionResult(BaseModel):
"""Result of executing an action"""
is_done: Optional[bool] = False
extracted_content: Optional[str] = None
error: Optional[str] = None
class AgentBrain(BaseModel):
"""Current state of the agent"""
valuation_previous_goal: str
memory: str
next_goal: str
class AgentOutput(BaseModel):
"""Output model for agent
@dev note: this model is extended with custom actions in AgentService. You can also use some fields that are not in this model as provided by the linter, as long as they are registered in the DynamicActions model.
"""
model_config = ConfigDict(arbitrary_types_allowed=True)
current_state: AgentBrain
action: ActionModel
@staticmethod
def type_with_custom_actions(custom_actions: Type[ActionModel]) -> Type['AgentOutput']:
"""Extend actions with custom actions"""
return create_model(
'AgentOutput',
__base__=AgentOutput,
action=(custom_actions, Field(...)), # Properly annotated field with no default
__module__=AgentOutput.__module__,
)
class AgentHistory(BaseModel):
"""History item for agent actions"""
model_output: AgentOutput | None
result: ActionResult
state: BrowserState
model_config = ConfigDict(arbitrary_types_allowed=True, protected_namespaces=())
class AgentError:
"""Container for agent error handling"""
VALIDATION_ERROR = 'Invalid model output format. Please follow the correct schema.'
RATE_LIMIT_ERROR = 'Rate limit reached. Waiting before retry.'
NO_VALID_ACTION = 'No valid action found'
@staticmethod
def format_error(error: Exception) -> str:
"""Format error message based on error type"""
if isinstance(error, ValidationError):
return f'{AgentError.VALIDATION_ERROR}\nDetails: {str(error)}'
if isinstance(error, RateLimitError):
return AgentError.RATE_LIMIT_ERROR
return f'Unexpected error: {str(error)}'