mirror of
https://github.com/browser-use/browser-use
synced 2026-05-06 17:52:15 +02:00
239 lines
6.4 KiB
Python
239 lines
6.4 KiB
Python
"""
|
|
This implementation is based on the OpenAI types, while removing all the parts that are not needed for Browser Use.
|
|
"""
|
|
|
|
# region - Content parts
|
|
from typing import Literal, Union
|
|
|
|
from openai import BaseModel
|
|
|
|
|
|
def _truncate(text: str, max_length: int = 50) -> str:
|
|
"""Truncate text to max_length characters, adding ellipsis if truncated."""
|
|
if len(text) <= max_length:
|
|
return text
|
|
return text[: max_length - 3] + '...'
|
|
|
|
|
|
def _format_image_url(url: str, max_length: int = 50) -> str:
|
|
"""Format image URL for display, truncating if necessary."""
|
|
if url.startswith('data:'):
|
|
# Base64 image
|
|
media_type = url.split(';')[0].split(':')[1] if ';' in url else 'image'
|
|
return f'<base64 {media_type}>'
|
|
else:
|
|
# Regular URL
|
|
return _truncate(url, max_length)
|
|
|
|
|
|
class ContentPartTextParam(BaseModel):
|
|
text: str
|
|
type: Literal['text'] = 'text'
|
|
|
|
def __str__(self) -> str:
|
|
return f'Text: {_truncate(self.text)}'
|
|
|
|
def __repr__(self) -> str:
|
|
return f'ContentPartTextParam(text={_truncate(self.text)})'
|
|
|
|
|
|
class ContentPartRefusalParam(BaseModel):
|
|
refusal: str
|
|
type: Literal['refusal'] = 'refusal'
|
|
|
|
def __str__(self) -> str:
|
|
return f'Refusal: {_truncate(self.refusal)}'
|
|
|
|
def __repr__(self) -> str:
|
|
return f'ContentPartRefusalParam(refusal={_truncate(repr(self.refusal), 50)})'
|
|
|
|
|
|
SupportedImageMediaType = Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']
|
|
|
|
|
|
class ImageURL(BaseModel):
|
|
url: str
|
|
"""Either a URL of the image or the base64 encoded image data."""
|
|
detail: Literal['auto', 'low', 'high'] = 'auto'
|
|
"""Specifies the detail level of the image.
|
|
|
|
Learn more in the
|
|
[Vision guide](https://platform.openai.com/docs/guides/vision#low-or-high-fidelity-image-understanding).
|
|
"""
|
|
# needed for Anthropic
|
|
media_type: SupportedImageMediaType = 'image/jpeg'
|
|
|
|
def __str__(self) -> str:
|
|
url_display = _format_image_url(self.url)
|
|
return f'🖼️ Image[{self.media_type}, detail={self.detail}]: {url_display}'
|
|
|
|
def __repr__(self) -> str:
|
|
url_repr = _format_image_url(self.url, 30)
|
|
return f'ImageURL(url={repr(url_repr)}, detail={repr(self.detail)}, media_type={repr(self.media_type)})'
|
|
|
|
|
|
class ContentPartImageParam(BaseModel):
|
|
image_url: ImageURL
|
|
type: Literal['image_url'] = 'image_url'
|
|
|
|
def __str__(self) -> str:
|
|
return str(self.image_url)
|
|
|
|
def __repr__(self) -> str:
|
|
return f'ContentPartImageParam(image_url={repr(self.image_url)})'
|
|
|
|
|
|
class Function(BaseModel):
|
|
arguments: str
|
|
"""
|
|
The arguments to call the function with, as generated by the model in JSON
|
|
format. Note that the model does not always generate valid JSON, and may
|
|
hallucinate parameters not defined by your function schema. Validate the
|
|
arguments in your code before calling your function.
|
|
"""
|
|
name: str
|
|
"""The name of the function to call."""
|
|
|
|
def __str__(self) -> str:
|
|
args_preview = _truncate(self.arguments, 80)
|
|
return f'{self.name}({args_preview})'
|
|
|
|
def __repr__(self) -> str:
|
|
args_repr = _truncate(repr(self.arguments), 50)
|
|
return f'Function(name={repr(self.name)}, arguments={args_repr})'
|
|
|
|
|
|
class ToolCall(BaseModel):
|
|
id: str
|
|
"""The ID of the tool call."""
|
|
function: Function
|
|
"""The function that the model called."""
|
|
type: Literal['function'] = 'function'
|
|
"""The type of the tool. Currently, only `function` is supported."""
|
|
|
|
def __str__(self) -> str:
|
|
return f'ToolCall[{self.id}]: {self.function}'
|
|
|
|
def __repr__(self) -> str:
|
|
return f'ToolCall(id={repr(self.id)}, function={repr(self.function)})'
|
|
|
|
|
|
# endregion
|
|
|
|
|
|
# region - Message types
|
|
class _MessageBase(BaseModel):
|
|
"""Base class for all message types"""
|
|
|
|
role: Literal['user', 'system', 'assistant']
|
|
|
|
cache: bool = False
|
|
"""Whether to cache this message. This is only applicable when using Anthropic models.
|
|
"""
|
|
|
|
|
|
class UserMessage(_MessageBase):
|
|
role: Literal['user'] = 'user'
|
|
"""The role of the messages author, in this case `user`."""
|
|
|
|
content: str | list[ContentPartTextParam | ContentPartImageParam]
|
|
"""The contents of the user message."""
|
|
|
|
name: str | None = None
|
|
"""An optional name for the participant.
|
|
|
|
Provides the model information to differentiate between participants of the same
|
|
role.
|
|
"""
|
|
|
|
@property
|
|
def text(self) -> str:
|
|
"""
|
|
Automatically parse the text inside content, whether it's a string or a list of content parts.
|
|
"""
|
|
if isinstance(self.content, str):
|
|
return self.content
|
|
elif isinstance(self.content, list):
|
|
return '\n'.join([part.text for part in self.content if part.type == 'text'])
|
|
else:
|
|
return ''
|
|
|
|
def __str__(self) -> str:
|
|
return f'UserMessage(content={self.text})'
|
|
|
|
def __repr__(self) -> str:
|
|
return f'UserMessage(content={repr(self.text)})'
|
|
|
|
|
|
class SystemMessage(_MessageBase):
|
|
role: Literal['system'] = 'system'
|
|
"""The role of the messages author, in this case `system`."""
|
|
|
|
content: str | list[ContentPartTextParam]
|
|
"""The contents of the system message."""
|
|
|
|
name: str | None = None
|
|
|
|
@property
|
|
def text(self) -> str:
|
|
"""
|
|
Automatically parse the text inside content, whether it's a string or a list of content parts.
|
|
"""
|
|
if isinstance(self.content, str):
|
|
return self.content
|
|
elif isinstance(self.content, list):
|
|
return '\n'.join([part.text for part in self.content if part.type == 'text'])
|
|
else:
|
|
return ''
|
|
|
|
def __str__(self) -> str:
|
|
return f'SystemMessage(content={self.text})'
|
|
|
|
def __repr__(self) -> str:
|
|
return f'SystemMessage(content={repr(self.text)})'
|
|
|
|
|
|
class AssistantMessage(_MessageBase):
|
|
role: Literal['assistant'] = 'assistant'
|
|
"""The role of the messages author, in this case `assistant`."""
|
|
|
|
content: str | list[ContentPartTextParam | ContentPartRefusalParam] | None
|
|
"""The contents of the assistant message."""
|
|
|
|
name: str | None = None
|
|
|
|
refusal: str | None = None
|
|
"""The refusal message by the assistant."""
|
|
|
|
tool_calls: list[ToolCall] = []
|
|
"""The tool calls generated by the model, such as function calls."""
|
|
|
|
@property
|
|
def text(self) -> str:
|
|
"""
|
|
Automatically parse the text inside content, whether it's a string or a list of content parts.
|
|
"""
|
|
if isinstance(self.content, str):
|
|
return self.content
|
|
elif isinstance(self.content, list):
|
|
text = ''
|
|
for part in self.content:
|
|
if part.type == 'text':
|
|
text += part.text
|
|
elif part.type == 'refusal':
|
|
text += f'[Refusal] {part.refusal}'
|
|
return text
|
|
else:
|
|
return ''
|
|
|
|
def __str__(self) -> str:
|
|
return f'AssistantMessage(content={self.text})'
|
|
|
|
def __repr__(self) -> str:
|
|
return f'AssistantMessage(content={repr(self.text)})'
|
|
|
|
|
|
BaseMessage = Union[UserMessage, SystemMessage, AssistantMessage]
|
|
|
|
# endregion
|