fix: filter out about:blank placeholder screenshots from GIF generation

- Skip 4px placeholder screenshots when creating GIFs
- Find first real screenshot for task frame instead of using placeholder
- Add comprehensive tests for GIF filtering behavior
- Fixes issue where GIFs were generated at 4px dimensions

Co-authored-by: Nick Sweeting <pirate@users.noreply.github.com>
This commit is contained in:
claude[bot]
2025-07-21 06:48:26 +00:00
parent eae2296f49
commit fba94602fe
2 changed files with 256 additions and 9 deletions

View File

@@ -114,21 +114,44 @@ def create_history_gif(
# Create task frame if requested
if show_task and task:
task_frame = _create_task_frame(
task,
history.history[0].state.screenshot,
title_font, # type: ignore
regular_font, # type: ignore
logo,
line_spacing,
)
images.append(task_frame)
# Find the first non-placeholder screenshot for the task frame
first_real_screenshot = None
for item in history.history:
if (
item.state.screenshot
and item.state.screenshot
!= 'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII='
):
first_real_screenshot = item.state.screenshot
break
if first_real_screenshot:
task_frame = _create_task_frame(
task,
first_real_screenshot,
title_font, # type: ignore
regular_font, # type: ignore
logo,
line_spacing,
)
images.append(task_frame)
else:
logger.warning('No real screenshots found for task frame, skipping task frame')
# Process each history item
for i, item in enumerate(history.history, 1):
if not item.state.screenshot:
continue
# Skip placeholder screenshots from about:blank pages
# These are 4x4 white PNGs encoded as a specific base64 string
if (
item.state.screenshot
== 'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII='
):
logger.debug(f'Skipping placeholder screenshot from about:blank page at step {i}')
continue
# Convert base64 screenshot to PIL Image
img_data = base64.b64decode(item.state.screenshot)
image = Image.open(io.BytesIO(img_data))

View File

@@ -0,0 +1,224 @@
"""Test GIF generation filters out about:blank screenshots."""
import base64
import io
from pathlib import Path
import pytest
from PIL import Image
from browser_use import Agent, AgentHistoryList
from browser_use.agent.gif import create_history_gif
from browser_use.agent.views import AgentHistory, AgentOutput, ActionResult, StepMetadata
from browser_use.browser.views import BrowserStateHistory, TabInfo
from tests.ci.conftest import create_mock_llm
# Known placeholder image data for about:blank pages
PLACEHOLDER_4PX = 'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII='
@pytest.fixture
async def httpserver_url(httpserver):
"""Simple test page."""
httpserver.expect_request('/').respond_with_data(
"""
<!DOCTYPE html>
<html>
<head><title>Test Page</title></head>
<body>
<h1>Test GIF Filtering</h1>
<p>This is a real page, not about:blank</p>
</body>
</html>
""",
content_type='text/html',
)
return httpserver.url_for('/')
@pytest.fixture
def test_dir(tmp_path):
"""Create a test directory that gets cleaned up after each test."""
test_path = tmp_path / 'test_gif_filtering'
test_path.mkdir(exist_ok=True)
yield test_path
def create_test_screenshot(width: int = 800, height: int = 600, color: tuple = (100, 150, 200)) -> str:
"""Create a test screenshot as base64 string."""
img = Image.new('RGB', (width, height), color)
buffer = io.BytesIO()
img.save(buffer, format='PNG')
return base64.b64encode(buffer.getvalue()).decode('utf-8')
async def test_gif_filters_out_placeholder_screenshots(test_dir):
"""Test that 4px placeholder screenshots from about:blank pages are filtered out of GIFs."""
# Create a history with mixed screenshots: real and placeholder
history_items = []
# First item: about:blank placeholder (should be filtered)
history_items.append(
AgentHistory(
model_output=AgentOutput(
evaluation_previous_goal='',
memory='',
next_goal='Starting task',
action=[],
),
result=[ActionResult()],
state=BrowserStateHistory(
screenshot=PLACEHOLDER_4PX,
url='about:blank',
title='New Tab',
tabs=[TabInfo(page_id=1, url='about:blank', title='New Tab')],
interacted_element=[None],
),
)
)
# Second item: real screenshot
history_items.append(
AgentHistory(
model_output=AgentOutput(
evaluation_previous_goal='',
memory='',
next_goal='Navigate to example.com',
action=[],
),
result=[ActionResult()],
state=BrowserStateHistory(
screenshot=create_test_screenshot(800, 600, (100, 150, 200)),
url='https://example.com',
title='Example',
tabs=[TabInfo(page_id=1, url='https://example.com', title='Example')],
interacted_element=[None],
),
)
)
# Third item: another about:blank placeholder (should be filtered)
history_items.append(
AgentHistory(
model_output=AgentOutput(
evaluation_previous_goal='',
memory='',
next_goal='Opening new tab',
action=[],
),
result=[ActionResult()],
state=BrowserStateHistory(
screenshot=PLACEHOLDER_4PX,
url='about:blank',
title='New Tab',
tabs=[TabInfo(page_id=2, url='about:blank', title='New Tab')],
interacted_element=[None],
),
)
)
# Fourth item: another real screenshot
history_items.append(
AgentHistory(
model_output=AgentOutput(
evaluation_previous_goal='',
memory='',
next_goal='Click on button',
action=[],
),
result=[ActionResult()],
state=BrowserStateHistory(
screenshot=create_test_screenshot(800, 600, (200, 100, 50)),
url='https://example.com/page2',
title='Page 2',
tabs=[TabInfo(page_id=1, url='https://example.com/page2', title='Page 2')],
interacted_element=[None],
),
)
)
# Create history list
history = AgentHistoryList(history=history_items)
# Generate GIF
gif_path = test_dir / 'test_filtered.gif'
create_history_gif(
task='Test filtering about:blank screenshots',
history=history,
output_path=str(gif_path),
duration=500, # Shorter duration for testing
show_goals=True,
show_task=True,
)
# Verify GIF was created
assert gif_path.exists(), 'GIF was not created'
# Open the GIF and check the frames
with Image.open(gif_path) as img:
# Count frames
frame_count = 0
frame_sizes = []
try:
while True:
frame_sizes.append(img.size)
frame_count += 1
img.seek(img.tell() + 1)
except EOFError:
pass
# We should have 3 frames total:
# 1. Task frame (created from first real screenshot)
# 2. Second real screenshot
# 3. Fourth real screenshot
# The two placeholder screenshots should be filtered out
assert frame_count == 3, f'Expected 3 frames (1 task + 2 real screenshots), got {frame_count}'
# All frames should have the same size (800x600), not 4x4
for size in frame_sizes:
assert size == (800, 600), f'Frame has incorrect size: {size}. Placeholder images may not have been filtered.'
async def test_gif_handles_all_placeholders(test_dir):
"""Test that GIF generation handles case where all screenshots are placeholders."""
# Create a history with only placeholder screenshots
history_items = []
for i in range(3):
history_items.append(
AgentHistory(
model_output=AgentOutput(
evaluation_previous_goal='',
memory='',
next_goal=f'Step {i+1}',
action=[],
),
result=[ActionResult()],
state=BrowserStateHistory(
screenshot=PLACEHOLDER_4PX,
url='about:blank',
title='New Tab',
tabs=[TabInfo(page_id=1, url='about:blank', title='New Tab')],
interacted_element=[None],
),
)
)
history = AgentHistoryList(history=history_items)
# Generate GIF - should handle gracefully
gif_path = test_dir / 'test_all_placeholders.gif'
create_history_gif(
task='Test all placeholders',
history=history,
output_path=str(gif_path),
duration=500,
)
# With all placeholders filtered, no GIF should be created
assert not gif_path.exists(), 'GIF should not be created when all screenshots are placeholders'
# Note: Removing the agent integration test due to sandbox issues in CI
# The unit tests above adequately verify the GIF filtering functionality