mirror of
https://github.com/browser-use/browser-use
synced 2026-04-22 17:45:09 +02:00
288 lines
8.8 KiB
Python
288 lines
8.8 KiB
Python
# @file purpose: Ultra-compact serializer optimized for code-use agents
|
|
# Focuses on minimal token usage while preserving essential interactive context
|
|
|
|
from browser_use.dom.utils import cap_text_length
|
|
from browser_use.dom.views import (
|
|
EnhancedDOMTreeNode,
|
|
NodeType,
|
|
SimplifiedNode,
|
|
)
|
|
|
|
# Minimal but sufficient attribute list for code agents
|
|
CODE_USE_KEY_ATTRIBUTES = [
|
|
'id', # Essential for element selection
|
|
'name', # For form inputs
|
|
'type', # For input types
|
|
'placeholder', # For empty inputs
|
|
'aria-label', # For buttons without text
|
|
'value', # Current values
|
|
'alt', # For images
|
|
'class', # Keep top 2 classes for common selectors
|
|
]
|
|
|
|
# Interactive elements agent can use
|
|
INTERACTIVE_ELEMENTS = {
|
|
'a',
|
|
'button',
|
|
'input',
|
|
'textarea',
|
|
'select',
|
|
'form',
|
|
}
|
|
|
|
# Semantic structure elements - expanded to include more content containers
|
|
SEMANTIC_STRUCTURE = {
|
|
'h1',
|
|
'h2',
|
|
'h3',
|
|
'h4',
|
|
'h5',
|
|
'h6',
|
|
'nav',
|
|
'main',
|
|
'header',
|
|
'footer',
|
|
'article',
|
|
'section',
|
|
'p', # Paragraphs often contain prices and product info
|
|
'span', # Spans often contain prices and labels
|
|
'div', # Divs with useful attributes (id/class) should be shown
|
|
'ul',
|
|
'ol',
|
|
'li',
|
|
'label',
|
|
'img',
|
|
}
|
|
|
|
|
|
class DOMCodeAgentSerializer:
|
|
"""Optimized DOM serializer for code-use agents - balances token efficiency with context."""
|
|
|
|
@staticmethod
|
|
def serialize_tree(node: SimplifiedNode | None, include_attributes: list[str], depth: int = 0) -> str:
|
|
"""
|
|
Serialize DOM tree with smart token optimization.
|
|
|
|
Strategy:
|
|
- Keep top 2 CSS classes for querySelector compatibility
|
|
- Show div/span/p elements with useful attributes or text
|
|
- Show all interactive + semantic elements
|
|
- Inline text up to 80 chars for better context
|
|
"""
|
|
if not node:
|
|
return ''
|
|
|
|
# Skip excluded/hidden nodes
|
|
if hasattr(node, 'excluded_by_parent') and node.excluded_by_parent:
|
|
return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)
|
|
|
|
if not node.should_display:
|
|
return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)
|
|
|
|
formatted_text = []
|
|
depth_str = ' ' * depth # Use 2 spaces instead of tabs for compactness
|
|
|
|
if node.original_node.node_type == NodeType.ELEMENT_NODE:
|
|
tag = node.original_node.tag_name.lower()
|
|
is_visible = node.original_node.snapshot_node and node.original_node.is_visible
|
|
|
|
# Skip invisible (except iframes)
|
|
if not is_visible and tag not in ['iframe', 'frame']:
|
|
return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)
|
|
|
|
# Special handling for iframes
|
|
if tag in ['iframe', 'frame']:
|
|
return DOMCodeAgentSerializer._serialize_iframe(node, include_attributes, depth)
|
|
|
|
# Build minimal attributes
|
|
attributes_str = DOMCodeAgentSerializer._build_minimal_attributes(node.original_node)
|
|
|
|
# Decide if element should be shown
|
|
is_interactive = tag in INTERACTIVE_ELEMENTS
|
|
is_semantic = tag in SEMANTIC_STRUCTURE
|
|
has_useful_attrs = bool(attributes_str)
|
|
has_text = DOMCodeAgentSerializer._has_direct_text(node)
|
|
|
|
# Skip non-semantic, non-interactive containers without attributes
|
|
if not is_interactive and not is_semantic and not has_useful_attrs and not has_text:
|
|
return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)
|
|
|
|
# Collapse pointless wrappers
|
|
if tag in {'div', 'span'} and not has_useful_attrs and not has_text and len(node.children) == 1:
|
|
return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)
|
|
|
|
# Build element
|
|
line = f'{depth_str}<{tag}'
|
|
|
|
if attributes_str:
|
|
line += f' {attributes_str}'
|
|
|
|
# Inline text
|
|
inline_text = DOMCodeAgentSerializer._get_inline_text(node)
|
|
if inline_text:
|
|
line += f'>{inline_text}'
|
|
else:
|
|
line += '>'
|
|
|
|
formatted_text.append(line)
|
|
|
|
# Children (only if no inline text)
|
|
if node.children and not inline_text:
|
|
children_text = DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth + 1)
|
|
if children_text:
|
|
formatted_text.append(children_text)
|
|
|
|
elif node.original_node.node_type == NodeType.TEXT_NODE:
|
|
# Handled inline with parent
|
|
pass
|
|
|
|
elif node.original_node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE:
|
|
# Shadow DOM - minimal marker
|
|
if node.children:
|
|
formatted_text.append(f'{depth_str}#shadow')
|
|
children_text = DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth + 1)
|
|
if children_text:
|
|
formatted_text.append(children_text)
|
|
|
|
return '\n'.join(formatted_text)
|
|
|
|
@staticmethod
|
|
def _serialize_children(node: SimplifiedNode, include_attributes: list[str], depth: int) -> str:
|
|
"""Serialize children."""
|
|
children_output = []
|
|
for child in node.children:
|
|
child_text = DOMCodeAgentSerializer.serialize_tree(child, include_attributes, depth)
|
|
if child_text:
|
|
children_output.append(child_text)
|
|
return '\n'.join(children_output)
|
|
|
|
@staticmethod
|
|
def _build_minimal_attributes(node: EnhancedDOMTreeNode) -> str:
|
|
"""Build minimal but useful attributes - keep top 2 classes for selectors."""
|
|
attrs = []
|
|
|
|
if node.attributes:
|
|
for attr in CODE_USE_KEY_ATTRIBUTES:
|
|
if attr in node.attributes:
|
|
value = str(node.attributes[attr]).strip()
|
|
if value:
|
|
# Special handling for class - keep only first 2 classes
|
|
if attr == 'class':
|
|
classes = value.split()[:2]
|
|
value = ' '.join(classes)
|
|
# Cap at 25 chars
|
|
value = cap_text_length(value, 25)
|
|
attrs.append(f'{attr}="{value}"')
|
|
|
|
return ' '.join(attrs)
|
|
|
|
@staticmethod
|
|
def _has_direct_text(node: SimplifiedNode) -> bool:
|
|
"""Check if node has direct text children."""
|
|
for child in node.children:
|
|
if child.original_node.node_type == NodeType.TEXT_NODE:
|
|
text = child.original_node.node_value.strip() if child.original_node.node_value else ''
|
|
if len(text) > 1:
|
|
return True
|
|
return False
|
|
|
|
@staticmethod
|
|
def _get_inline_text(node: SimplifiedNode) -> str:
|
|
"""Get inline text (max 80 chars for better context)."""
|
|
text_parts = []
|
|
for child in node.children:
|
|
if child.original_node.node_type == NodeType.TEXT_NODE:
|
|
text = child.original_node.node_value.strip() if child.original_node.node_value else ''
|
|
if text and len(text) > 1:
|
|
text_parts.append(text)
|
|
|
|
if not text_parts:
|
|
return ''
|
|
|
|
combined = ' '.join(text_parts)
|
|
return cap_text_length(combined, 40)
|
|
|
|
@staticmethod
|
|
def _serialize_iframe(node: SimplifiedNode, include_attributes: list[str], depth: int) -> str:
|
|
"""Handle iframe minimally."""
|
|
formatted_text = []
|
|
depth_str = ' ' * depth
|
|
tag = node.original_node.tag_name.lower()
|
|
|
|
# Minimal iframe marker
|
|
attributes_str = DOMCodeAgentSerializer._build_minimal_attributes(node.original_node)
|
|
line = f'{depth_str}<{tag}'
|
|
if attributes_str:
|
|
line += f' {attributes_str}'
|
|
line += '>'
|
|
formatted_text.append(line)
|
|
|
|
# Iframe content
|
|
if node.original_node.content_document:
|
|
formatted_text.append(f'{depth_str} #iframe-content')
|
|
|
|
# Find and serialize body content only
|
|
for child_node in node.original_node.content_document.children_nodes or []:
|
|
if child_node.tag_name.lower() == 'html':
|
|
for html_child in child_node.children:
|
|
if html_child.tag_name.lower() == 'body':
|
|
for body_child in html_child.children:
|
|
DOMCodeAgentSerializer._serialize_document_node(
|
|
body_child, formatted_text, include_attributes, depth + 2
|
|
)
|
|
break
|
|
|
|
return '\n'.join(formatted_text)
|
|
|
|
@staticmethod
|
|
def _serialize_document_node(
|
|
dom_node: EnhancedDOMTreeNode, output: list[str], include_attributes: list[str], depth: int
|
|
) -> None:
|
|
"""Serialize document node without SimplifiedNode wrapper."""
|
|
depth_str = ' ' * depth
|
|
|
|
if dom_node.node_type == NodeType.ELEMENT_NODE:
|
|
tag = dom_node.tag_name.lower()
|
|
|
|
# Skip invisible
|
|
is_visible = dom_node.snapshot_node and dom_node.is_visible
|
|
if not is_visible:
|
|
return
|
|
|
|
# Check if worth showing
|
|
is_interactive = tag in INTERACTIVE_ELEMENTS
|
|
is_semantic = tag in SEMANTIC_STRUCTURE
|
|
attributes_str = DOMCodeAgentSerializer._build_minimal_attributes(dom_node)
|
|
|
|
if not is_interactive and not is_semantic and not attributes_str:
|
|
# Skip but process children
|
|
for child in dom_node.children:
|
|
DOMCodeAgentSerializer._serialize_document_node(child, output, include_attributes, depth)
|
|
return
|
|
|
|
# Build element
|
|
line = f'{depth_str}<{tag}'
|
|
if attributes_str:
|
|
line += f' {attributes_str}'
|
|
|
|
# Get text
|
|
text_parts = []
|
|
for child in dom_node.children:
|
|
if child.node_type == NodeType.TEXT_NODE and child.node_value:
|
|
text = child.node_value.strip()
|
|
if text and len(text) > 1:
|
|
text_parts.append(text)
|
|
|
|
if text_parts:
|
|
combined = ' '.join(text_parts)
|
|
line += f'>{cap_text_length(combined, 25)}'
|
|
else:
|
|
line += '>'
|
|
|
|
output.append(line)
|
|
|
|
# Process non-text children
|
|
for child in dom_node.children:
|
|
if child.node_type != NodeType.TEXT_NODE:
|
|
DOMCodeAgentSerializer._serialize_document_node(child, output, include_attributes, depth + 1)
|