Merge branch 'main' into never-relaunch

This commit is contained in:
Nick Sweeting
2025-07-25 02:37:35 -07:00
committed by GitHub
4 changed files with 253 additions and 20 deletions

View File

@@ -8,7 +8,7 @@ import platform
from typing import TYPE_CHECKING
from browser_use.agent.views import AgentHistoryList
from browser_use.browser.session import BLANK_PAGE_SCREENSHOT_PLACEHOLDER
from browser_use.browser.views import PLACEHOLDER_4PX_SCREENSHOT
from browser_use.config import CONFIG
if TYPE_CHECKING:
@@ -64,7 +64,8 @@ def create_history_gif(
# Find the first non-placeholder screenshot
first_real_screenshot = None
for item in history.history:
if item.state.screenshot and item.state.screenshot != BLANK_PAGE_SCREENSHOT_PLACEHOLDER:
if item.state.screenshot and item.state.screenshot !=
:
first_real_screenshot = item.state.screenshot
break
@@ -126,15 +127,25 @@ def create_history_gif(
# Create task frame if requested
if show_task and task:
task_frame = _create_task_frame(
task,
first_real_screenshot,
title_font, # type: ignore
regular_font, # type: ignore
logo,
line_spacing,
)
images.append(task_frame)
# Find the first non-placeholder screenshot for the task frame
first_real_screenshot = None
for item in history.history:
if item.state.screenshot and item.state.screenshot != PLACEHOLDER_4PX_SCREENSHOT:
first_real_screenshot = item.state.screenshot
break
if first_real_screenshot:
task_frame = _create_task_frame(
task,
first_real_screenshot,
title_font, # type: ignore
regular_font, # type: ignore
logo,
line_spacing,
)
images.append(task_frame)
else:
logger.warning('No real screenshots found for task frame, skipping task frame')
# Process each history item
for i, item in enumerate(history.history, 1):
@@ -142,7 +153,9 @@ def create_history_gif(
continue
# Skip placeholder screenshots from about:blank pages
if item.state.screenshot == BLANK_PAGE_SCREENSHOT_PLACEHOLDER:
# These are 4x4 white PNGs encoded as a specific base64 string
if item.state.screenshot == PLACEHOLDER_4PX_SCREENSHOT:
logger.debug(f'Skipping placeholder screenshot from about:blank page at step {i}')
continue
# Convert base64 screenshot to PIL Image

View File

@@ -26,10 +26,6 @@ from .utils import normalize_url
os.environ['PW_TEST_SCREENSHOT_NO_FONTS_READY'] = '1' # https://github.com/microsoft/playwright/issues/35972
# 4x4 white PNG placeholder for about:blank pages to save tokens
BLANK_PAGE_SCREENSHOT_PLACEHOLDER = (
'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII='
)
import psutil
from bubus.helpers import retry
@@ -50,6 +46,7 @@ from browser_use.browser.types import (
async_playwright,
)
from browser_use.browser.views import (
PLACEHOLDER_4PX_SCREENSHOT,
BrowserError,
BrowserStateSummary,
PageInfo,
@@ -2198,9 +2195,9 @@ class BrowserSession(BaseModel):
f'⚠️ Failed to get tab info for tab #{page_id}: {_log_pretty_url(page.url)} (using fallback title)'
)
# Only mark as unusable if it's actually about:blank, otherwise preserve the real URL
if page.url == 'about:blank':
tab_info = TabInfo(page_id=page_id, url='about:blank', title='ignore this tab and do not use it')
# Only mark as unusable if it's actually a new tab page, otherwise preserve the real URL
if is_new_tab_page(page.url):
tab_info = TabInfo(page_id=page_id, url=page.url, title='ignore this tab and do not use it')
else:
# Preserve the real URL and use a descriptive fallback title
# fallback_title = '(title unavailable, page possibly crashed / unresponsive)'
@@ -3632,7 +3629,7 @@ class BrowserSession(BaseModel):
# not an exception because there's no point in retrying if we hit this, its always pointless to screenshot about:blank
# raise ValueError('Refusing to take unneeded screenshot of empty new tab page')
# return a 4px*4px white png to avoid wasting tokens - instead of 1px*1px white png that was
return BLANK_PAGE_SCREENSHOT_PLACEHOLDER
return PLACEHOLDER_4PX_SCREENSHOT
# Always bring page to front before rendering, otherwise it crashes in some cases, not sure why
try:

View File

@@ -6,6 +6,11 @@ from pydantic import BaseModel
from browser_use.dom.history_tree_processor.service import DOMHistoryElement
from browser_use.dom.views import DOMState
# Known placeholder image data for about:blank pages - a 4x4 white PNG
PLACEHOLDER_4PX_SCREENSHOT = (
'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII='
)
# Pydantic
class TabInfo(BaseModel):

View File

@@ -0,0 +1,218 @@
"""Test GIF generation filters out about:blank screenshots."""
import base64
import io
import pytest
from PIL import Image
from browser_use import AgentHistoryList
from browser_use.agent.gif import create_history_gif
from browser_use.agent.views import ActionResult, AgentHistory, AgentOutput
from browser_use.browser.views import PLACEHOLDER_4PX_SCREENSHOT, BrowserStateHistory, TabInfo
@pytest.fixture
async def httpserver_url(httpserver):
"""Simple test page."""
httpserver.expect_request('/').respond_with_data(
"""
<!DOCTYPE html>
<html>
<head><title>Test Page</title></head>
<body>
<h1>Test GIF Filtering</h1>
<p>This is a real page, not about:blank</p>
</body>
</html>
""",
content_type='text/html',
)
return httpserver.url_for('/')
@pytest.fixture
def test_dir(tmp_path):
"""Create a test directory that gets cleaned up after each test."""
test_path = tmp_path / 'test_gif_filtering'
test_path.mkdir(exist_ok=True)
yield test_path
def create_test_screenshot(width: int = 800, height: int = 600, color: tuple = (100, 150, 200)) -> str:
"""Create a test screenshot as base64 string."""
img = Image.new('RGB', (width, height), color)
buffer = io.BytesIO()
img.save(buffer, format='PNG')
return base64.b64encode(buffer.getvalue()).decode('utf-8')
async def test_gif_filters_out_placeholder_screenshots(test_dir):
"""Test that 4px placeholder screenshots from about:blank pages are filtered out of GIFs."""
# Create a history with mixed screenshots: real and placeholder
history_items = []
# First item: about:blank placeholder (should be filtered)
history_items.append(
AgentHistory(
model_output=AgentOutput(
evaluation_previous_goal='',
memory='',
next_goal='Starting task',
action=[],
),
result=[ActionResult()],
state=BrowserStateHistory(
screenshot=PLACEHOLDER_4PX_SCREENSHOT,
url='about:blank',
title='New Tab',
tabs=[TabInfo(page_id=1, url='about:blank', title='New Tab')],
interacted_element=[None],
),
)
)
# Second item: real screenshot
history_items.append(
AgentHistory(
model_output=AgentOutput(
evaluation_previous_goal='',
memory='',
next_goal='Navigate to example.com',
action=[],
),
result=[ActionResult()],
state=BrowserStateHistory(
screenshot=create_test_screenshot(800, 600, (100, 150, 200)),
url='https://example.com',
title='Example',
tabs=[TabInfo(page_id=1, url='https://example.com', title='Example')],
interacted_element=[None],
),
)
)
# Third item: another about:blank placeholder (should be filtered)
history_items.append(
AgentHistory(
model_output=AgentOutput(
evaluation_previous_goal='',
memory='',
next_goal='Opening new tab',
action=[],
),
result=[ActionResult()],
state=BrowserStateHistory(
screenshot=PLACEHOLDER_4PX_SCREENSHOT,
url='about:blank',
title='New Tab',
tabs=[TabInfo(page_id=2, url='about:blank', title='New Tab')],
interacted_element=[None],
),
)
)
# Fourth item: another real screenshot
history_items.append(
AgentHistory(
model_output=AgentOutput(
evaluation_previous_goal='',
memory='',
next_goal='Click on button',
action=[],
),
result=[ActionResult()],
state=BrowserStateHistory(
screenshot=create_test_screenshot(800, 600, (200, 100, 50)),
url='https://example.com/page2',
title='Page 2',
tabs=[TabInfo(page_id=1, url='https://example.com/page2', title='Page 2')],
interacted_element=[None],
),
)
)
# Create history list
history = AgentHistoryList(history=history_items)
# Generate GIF
gif_path = test_dir / 'test_filtered.gif'
create_history_gif(
task='Test filtering about:blank screenshots',
history=history,
output_path=str(gif_path),
duration=500, # Shorter duration for testing
show_goals=True,
show_task=True,
)
# Verify GIF was created
assert gif_path.exists(), 'GIF was not created'
# Open the GIF and check the frames
with Image.open(gif_path) as img:
# Count frames
frame_count = 0
frame_sizes = []
try:
while True:
frame_sizes.append(img.size)
frame_count += 1
img.seek(img.tell() + 1)
except EOFError:
pass
# We should have 3 frames total:
# 1. Task frame (created from first real screenshot)
# 2. Second real screenshot
# 3. Fourth real screenshot
# The two placeholder screenshots should be filtered out
assert frame_count == 3, f'Expected 3 frames (1 task + 2 real screenshots), got {frame_count}'
# All frames should have the same size (800x600), not 4x4
for size in frame_sizes:
assert size == (800, 600), f'Frame has incorrect size: {size}. Placeholder images may not have been filtered.'
async def test_gif_handles_all_placeholders(test_dir):
"""Test that GIF generation handles case where all screenshots are placeholders."""
# Create a history with only placeholder screenshots
history_items = []
for i in range(3):
history_items.append(
AgentHistory(
model_output=AgentOutput(
evaluation_previous_goal='',
memory='',
next_goal=f'Step {i + 1}',
action=[],
),
result=[ActionResult()],
state=BrowserStateHistory(
screenshot=PLACEHOLDER_4PX_SCREENSHOT,
url='about:blank',
title='New Tab',
tabs=[TabInfo(page_id=1, url='about:blank', title='New Tab')],
interacted_element=[None],
),
)
)
history = AgentHistoryList(history=history_items)
# Generate GIF - should handle gracefully
gif_path = test_dir / 'test_all_placeholders.gif'
create_history_gif(
task='Test all placeholders',
history=history,
output_path=str(gif_path),
duration=500,
)
# With all placeholders filtered, no GIF should be created
assert not gif_path.exists(), 'GIF should not be created when all screenshots are placeholders'
# Note: Removing the agent integration test due to sandbox issues in CI
# The unit tests above adequately verify the GIF filtering functionality