browser-use/browser_use/code_use/formatting.py

"""Browser state formatting helpers for code-use agent."""

import logging
from typing import Any

from browser_use.browser.session import BrowserSession
from browser_use.browser.views import BrowserStateSummary

logger = logging.getLogger(__name__)


async def format_browser_state_for_llm(
	state: BrowserStateSummary,
	namespace: dict[str, Any],
	browser_session: BrowserSession,
) -> str:
	"""
	Format browser state summary for LLM consumption in code-use mode.

	Args:
		state: Browser state summary from browser_session.get_browser_state_summary()
		namespace: The code execution namespace (for showing available variables)
		browser_session: Browser session for additional checks (jQuery, etc.)

	Returns:
		Formatted browser state text for LLM
	"""
	assert state.dom_state is not None
	dom_state = state.dom_state

	# Use eval_representation (compact serializer for code agents)
	dom_html = dom_state.eval_representation()
	if dom_html == '':
		dom_html = 'Empty DOM tree (you might have to wait for the page to load)'

	# Format with URL and title header
	lines = ['## Browser State']
	lines.append(f'**URL:** {state.url}')
	lines.append(f'**Title:** {state.title}')
	lines.append('')

	# Add tabs info if multiple tabs exist
	if len(state.tabs) > 1:
		lines.append('**Tabs:**')
		current_target_candidates = []
		# Find tabs that match current URL and title
		for tab in state.tabs:
			if tab.url == state.url and tab.title == state.title:
				current_target_candidates.append(tab.target_id)
		current_target_id = current_target_candidates[0] if len(current_target_candidates) == 1 else None

		for tab in state.tabs:
			is_current = ' (current)' if tab.target_id == current_target_id else ''
			lines.append(f'  - Tab {tab.target_id[-4:]}: {tab.url} - {tab.title[:30]}{is_current}')
		lines.append('')

	# Add page scroll info if available
	if state.page_info:
		pi = state.page_info
		pages_above = pi.pixels_above / pi.viewport_height if pi.viewport_height > 0 else 0
		pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0
		total_pages = pi.page_height / pi.viewport_height if pi.viewport_height > 0 else 0

		scroll_info = f'**Page:** {pages_above:.1f} pages above, {pages_below:.1f} pages below'
		if total_pages > 1.2:  # Only mention total if significantly > 1 page
			scroll_info += f', {total_pages:.1f} total pages'
		lines.append(scroll_info)
		lines.append('')

	# Add network loading info if there are pending requests
	if state.pending_network_requests:
		# Remove duplicates by URL (keep first occurrence with earliest duration)
		seen_urls = set()
		unique_requests = []
		for req in state.pending_network_requests:
			if req.url not in seen_urls:
				seen_urls.add(req.url)
				unique_requests.append(req)

		lines.append(f'**⏳ Loading:** {len(unique_requests)} network requests still loading')
		# Show up to 20 unique requests with truncated URLs (30 chars max)
		for req in unique_requests[:20]:
			duration_sec = req.loading_duration_ms / 1000
			url_display = req.url if len(req.url) <= 30 else req.url[:27] + '...'
			logger.info(f'  - [{duration_sec:.1f}s] {url_display}')
			lines.append(f'  - [{duration_sec:.1f}s] {url_display}')
		if len(unique_requests) > 20:
			lines.append(f'  - ... and {len(unique_requests) - 20} more')
		lines.append('**Tip:** Content may still be loading. Consider waiting with `await asyncio.sleep(1)` if data is missing.')
		lines.append('')

	# Add available variables and functions BEFORE DOM structure
	# Show useful utilities (json, asyncio, etc.) and user-defined vars, but hide system objects
	skip_vars = {
		'browser',
		'file_system',  # System objects
		'np',
		'pd',
		'plt',
		'numpy',
		'pandas',
		'matplotlib',
		'requests',
		'BeautifulSoup',
		'bs4',
		'pypdf',
		'PdfReader',
		'wait',
	}

	# Highlight code block variables separately from regular variables
	code_block_vars = []
	regular_vars = []
	tracked_code_blocks = namespace.get('_code_block_vars', set())
	for name in namespace.keys():
		# Skip private vars and system objects/actions
		if not name.startswith('_') and name not in skip_vars:
			if name in tracked_code_blocks:
				code_block_vars.append(name)
			else:
				regular_vars.append(name)

	# Sort for consistent display
	available_vars_sorted = sorted(regular_vars)
	code_block_vars_sorted = sorted(code_block_vars)

	# Build available line with code blocks and variables
	parts = []
	if code_block_vars_sorted:
		# Show detailed info for code block variables
		code_block_details = []
		for var_name in code_block_vars_sorted:
			value = namespace.get(var_name)
			if value is not None:
				type_name = type(value).__name__
				value_str = str(value) if not isinstance(value, str) else value

				# Check if it's a function (starts with "(function" or "(async function")
				is_function = value_str.strip().startswith('(function') or value_str.strip().startswith('(async function')

				if is_function:
					# For functions, only show name and type
					detail = f'{var_name}({type_name})'
				else:
					# For non-functions, show first and last 20 chars
					first_20 = value_str[:20].replace('\n', '\\n').replace('\t', '\\t')
					last_20 = value_str[-20:].replace('\n', '\\n').replace('\t', '\\t') if len(value_str) > 20 else ''

					if last_20 and first_20 != last_20:
						detail = f'{var_name}({type_name}): "{first_20}...{last_20}"'
					else:
						detail = f'{var_name}({type_name}): "{first_20}"'
				code_block_details.append(detail)

		parts.append(f'**Code block variables:** {" | ".join(code_block_details)}')
	if available_vars_sorted:
		parts.append(f'**Variables:** {", ".join(available_vars_sorted)}')

	lines.append(f'**Available:** {" | ".join(parts)}')
	lines.append('')

	# Add DOM structure
	lines.append('**DOM Structure:**')

	# Add scroll position hints for DOM
	if state.page_info:
		pi = state.page_info
		pages_above = pi.pixels_above / pi.viewport_height if pi.viewport_height > 0 else 0
		pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0

		if pages_above > 0:
			dom_html = f'... {pages_above:.1f} pages above \n{dom_html}'
		else:
			dom_html = '[Start of page]\n' + dom_html

		if pages_below <= 0:
			dom_html += '\n[End of page]'

	# Truncate DOM if too long and notify LLM
	max_dom_length = 60000
	if len(dom_html) > max_dom_length:
		lines.append(dom_html[:max_dom_length])
		lines.append(
			f'\n[DOM truncated after {max_dom_length} characters. Full page contains {len(dom_html)} characters total. Use evaluate to explore more.]'
		)
	else:
		lines.append(dom_html)

	browser_state_text = '\n'.join(lines)
	return browser_state_text