browser-use/browser_use/llm/messages.py

"""
This implementation is based on the OpenAI types, while removing all the parts that are not needed for Browser Use.
"""

# region - Content parts
from typing import Literal, Union

from openai import BaseModel


def _truncate(text: str, max_length: int = 50) -> str:
	"""Truncate text to max_length characters, adding ellipsis if truncated."""
	if len(text) <= max_length:
		return text
	return text[: max_length - 3] + '...'


def _format_image_url(url: str, max_length: int = 50) -> str:
	"""Format image URL for display, truncating if necessary."""
	if url.startswith('data:'):
		# Base64 image
		media_type = url.split(';')[0].split(':')[1] if ';' in url else 'image'
		return f'<base64 {media_type}>'
	else:
		# Regular URL
		return _truncate(url, max_length)


class ContentPartTextParam(BaseModel):
	text: str
	type: Literal['text'] = 'text'

	def __str__(self) -> str:
		return f'Text: {_truncate(self.text)}'

	def __repr__(self) -> str:
		return f'ContentPartTextParam(text={_truncate(self.text)})'


class ContentPartRefusalParam(BaseModel):
	refusal: str
	type: Literal['refusal'] = 'refusal'

	def __str__(self) -> str:
		return f'Refusal: {_truncate(self.refusal)}'

	def __repr__(self) -> str:
		return f'ContentPartRefusalParam(refusal={_truncate(repr(self.refusal), 50)})'


SupportedImageMediaType = Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']


class ImageURL(BaseModel):
	url: str
	"""Either a URL of the image or the base64 encoded image data."""
	detail: Literal['auto', 'low', 'high'] = 'auto'
	"""Specifies the detail level of the image.

    Learn more in the
    [Vision guide](https://platform.openai.com/docs/guides/vision#low-or-high-fidelity-image-understanding).
    """
	# needed for Anthropic
	media_type: SupportedImageMediaType = 'image/jpeg'

	def __str__(self) -> str:
		url_display = _format_image_url(self.url)
		return f'🖼️  Image[{self.media_type}, detail={self.detail}]: {url_display}'

	def __repr__(self) -> str:
		url_repr = _format_image_url(self.url, 30)
		return f'ImageURL(url={repr(url_repr)}, detail={repr(self.detail)}, media_type={repr(self.media_type)})'


class ContentPartImageParam(BaseModel):
	image_url: ImageURL
	type: Literal['image_url'] = 'image_url'

	def __str__(self) -> str:
		return str(self.image_url)

	def __repr__(self) -> str:
		return f'ContentPartImageParam(image_url={repr(self.image_url)})'


class Function(BaseModel):
	arguments: str
	"""
    The arguments to call the function with, as generated by the model in JSON
    format. Note that the model does not always generate valid JSON, and may
    hallucinate parameters not defined by your function schema. Validate the
    arguments in your code before calling your function.
    """
	name: str
	"""The name of the function to call."""

	def __str__(self) -> str:
		args_preview = _truncate(self.arguments, 80)
		return f'{self.name}({args_preview})'

	def __repr__(self) -> str:
		args_repr = _truncate(repr(self.arguments), 50)
		return f'Function(name={repr(self.name)}, arguments={args_repr})'


class ToolCall(BaseModel):
	id: str
	"""The ID of the tool call."""
	function: Function
	"""The function that the model called."""
	type: Literal['function'] = 'function'
	"""The type of the tool. Currently, only `function` is supported."""

	def __str__(self) -> str:
		return f'ToolCall[{self.id}]: {self.function}'

	def __repr__(self) -> str:
		return f'ToolCall(id={repr(self.id)}, function={repr(self.function)})'


# endregion


# region - Message types
class _MessageBase(BaseModel):
	"""Base class for all message types"""

	role: Literal['user', 'system', 'assistant']

	cache: bool = False
	"""Whether to cache this message. This is only applicable when using Anthropic models.
	"""


class UserMessage(_MessageBase):
	role: Literal['user'] = 'user'
	"""The role of the messages author, in this case `user`."""

	content: str | list[ContentPartTextParam | ContentPartImageParam]
	"""The contents of the user message."""

	name: str | None = None
	"""An optional name for the participant.

    Provides the model information to differentiate between participants of the same
    role.
    """

	@property
	def text(self) -> str:
		"""
		Automatically parse the text inside content, whether it's a string or a list of content parts.
		"""
		if isinstance(self.content, str):
			return self.content
		elif isinstance(self.content, list):
			return '\n'.join([part.text for part in self.content if part.type == 'text'])
		else:
			return ''

	def __str__(self) -> str:
		return f'UserMessage(content={self.text})'

	def __repr__(self) -> str:
		return f'UserMessage(content={repr(self.text)})'


class SystemMessage(_MessageBase):
	role: Literal['system'] = 'system'
	"""The role of the messages author, in this case `system`."""

	content: str | list[ContentPartTextParam]
	"""The contents of the system message."""

	name: str | None = None

	@property
	def text(self) -> str:
		"""
		Automatically parse the text inside content, whether it's a string or a list of content parts.
		"""
		if isinstance(self.content, str):
			return self.content
		elif isinstance(self.content, list):
			return '\n'.join([part.text for part in self.content if part.type == 'text'])
		else:
			return ''

	def __str__(self) -> str:
		return f'SystemMessage(content={self.text})'

	def __repr__(self) -> str:
		return f'SystemMessage(content={repr(self.text)})'


class AssistantMessage(_MessageBase):
	role: Literal['assistant'] = 'assistant'
	"""The role of the messages author, in this case `assistant`."""

	content: str | list[ContentPartTextParam | ContentPartRefusalParam] | None
	"""The contents of the assistant message."""

	name: str | None = None

	refusal: str | None = None
	"""The refusal message by the assistant."""

	tool_calls: list[ToolCall] = []
	"""The tool calls generated by the model, such as function calls."""

	@property
	def text(self) -> str:
		"""
		Automatically parse the text inside content, whether it's a string or a list of content parts.
		"""
		if isinstance(self.content, str):
			return self.content
		elif isinstance(self.content, list):
			text = ''
			for part in self.content:
				if part.type == 'text':
					text += part.text
				elif part.type == 'refusal':
					text += f'[Refusal] {part.refusal}'
			return text
		else:
			return ''

	def __str__(self) -> str:
		return f'AssistantMessage(content={self.text})'

	def __repr__(self) -> str:
		return f'AssistantMessage(content={repr(self.text)})'


BaseMessage = Union[UserMessage, SystemMessage, AssistantMessage]

# endregion