diff --git a/browser_use/agent/gif.py b/browser_use/agent/gif.py index 99cc6ea19..392929144 100644 --- a/browser_use/agent/gif.py +++ b/browser_use/agent/gif.py @@ -114,21 +114,44 @@ def create_history_gif( # Create task frame if requested if show_task and task: - task_frame = _create_task_frame( - task, - history.history[0].state.screenshot, - title_font, # type: ignore - regular_font, # type: ignore - logo, - line_spacing, - ) - images.append(task_frame) + # Find the first non-placeholder screenshot for the task frame + first_real_screenshot = None + for item in history.history: + if ( + item.state.screenshot + and item.state.screenshot + != 'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII=' + ): + first_real_screenshot = item.state.screenshot + break + + if first_real_screenshot: + task_frame = _create_task_frame( + task, + first_real_screenshot, + title_font, # type: ignore + regular_font, # type: ignore + logo, + line_spacing, + ) + images.append(task_frame) + else: + logger.warning('No real screenshots found for task frame, skipping task frame') # Process each history item for i, item in enumerate(history.history, 1): if not item.state.screenshot: continue + # Skip placeholder screenshots from about:blank pages + # These are 4x4 white PNGs encoded as a specific base64 string + if ( + item.state.screenshot + == 'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII=' + ): + logger.debug(f'Skipping placeholder screenshot from about:blank page at step {i}') + continue + # Convert base64 screenshot to PIL Image img_data = base64.b64decode(item.state.screenshot) image = Image.open(io.BytesIO(img_data)) diff --git a/tests/ci/test_gif_filtering.py b/tests/ci/test_gif_filtering.py new file mode 100644 index 000000000..db3d35d31 --- /dev/null +++ b/tests/ci/test_gif_filtering.py @@ -0,0 +1,224 @@ +"""Test GIF generation filters out about:blank screenshots.""" + +import base64 +import io +from pathlib import Path + +import pytest +from PIL import Image + +from browser_use import Agent, AgentHistoryList +from browser_use.agent.gif import create_history_gif +from browser_use.agent.views import AgentHistory, AgentOutput, ActionResult, StepMetadata +from browser_use.browser.views import BrowserStateHistory, TabInfo +from tests.ci.conftest import create_mock_llm + + +# Known placeholder image data for about:blank pages +PLACEHOLDER_4PX = 'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII=' + + +@pytest.fixture +async def httpserver_url(httpserver): + """Simple test page.""" + httpserver.expect_request('/').respond_with_data( + """ + + +
This is a real page, not about:blank
+ + + """, + content_type='text/html', + ) + return httpserver.url_for('/') + + +@pytest.fixture +def test_dir(tmp_path): + """Create a test directory that gets cleaned up after each test.""" + test_path = tmp_path / 'test_gif_filtering' + test_path.mkdir(exist_ok=True) + yield test_path + + +def create_test_screenshot(width: int = 800, height: int = 600, color: tuple = (100, 150, 200)) -> str: + """Create a test screenshot as base64 string.""" + img = Image.new('RGB', (width, height), color) + buffer = io.BytesIO() + img.save(buffer, format='PNG') + return base64.b64encode(buffer.getvalue()).decode('utf-8') + + +async def test_gif_filters_out_placeholder_screenshots(test_dir): + """Test that 4px placeholder screenshots from about:blank pages are filtered out of GIFs.""" + # Create a history with mixed screenshots: real and placeholder + history_items = [] + + # First item: about:blank placeholder (should be filtered) + history_items.append( + AgentHistory( + model_output=AgentOutput( + evaluation_previous_goal='', + memory='', + next_goal='Starting task', + action=[], + ), + result=[ActionResult()], + state=BrowserStateHistory( + screenshot=PLACEHOLDER_4PX, + url='about:blank', + title='New Tab', + tabs=[TabInfo(page_id=1, url='about:blank', title='New Tab')], + interacted_element=[None], + ), + ) + ) + + # Second item: real screenshot + history_items.append( + AgentHistory( + model_output=AgentOutput( + evaluation_previous_goal='', + memory='', + next_goal='Navigate to example.com', + action=[], + ), + result=[ActionResult()], + state=BrowserStateHistory( + screenshot=create_test_screenshot(800, 600, (100, 150, 200)), + url='https://example.com', + title='Example', + tabs=[TabInfo(page_id=1, url='https://example.com', title='Example')], + interacted_element=[None], + ), + ) + ) + + # Third item: another about:blank placeholder (should be filtered) + history_items.append( + AgentHistory( + model_output=AgentOutput( + evaluation_previous_goal='', + memory='', + next_goal='Opening new tab', + action=[], + ), + result=[ActionResult()], + state=BrowserStateHistory( + screenshot=PLACEHOLDER_4PX, + url='about:blank', + title='New Tab', + tabs=[TabInfo(page_id=2, url='about:blank', title='New Tab')], + interacted_element=[None], + ), + ) + ) + + # Fourth item: another real screenshot + history_items.append( + AgentHistory( + model_output=AgentOutput( + evaluation_previous_goal='', + memory='', + next_goal='Click on button', + action=[], + ), + result=[ActionResult()], + state=BrowserStateHistory( + screenshot=create_test_screenshot(800, 600, (200, 100, 50)), + url='https://example.com/page2', + title='Page 2', + tabs=[TabInfo(page_id=1, url='https://example.com/page2', title='Page 2')], + interacted_element=[None], + ), + ) + ) + + # Create history list + history = AgentHistoryList(history=history_items) + + # Generate GIF + gif_path = test_dir / 'test_filtered.gif' + create_history_gif( + task='Test filtering about:blank screenshots', + history=history, + output_path=str(gif_path), + duration=500, # Shorter duration for testing + show_goals=True, + show_task=True, + ) + + # Verify GIF was created + assert gif_path.exists(), 'GIF was not created' + + # Open the GIF and check the frames + with Image.open(gif_path) as img: + # Count frames + frame_count = 0 + frame_sizes = [] + try: + while True: + frame_sizes.append(img.size) + frame_count += 1 + img.seek(img.tell() + 1) + except EOFError: + pass + + # We should have 3 frames total: + # 1. Task frame (created from first real screenshot) + # 2. Second real screenshot + # 3. Fourth real screenshot + # The two placeholder screenshots should be filtered out + assert frame_count == 3, f'Expected 3 frames (1 task + 2 real screenshots), got {frame_count}' + + # All frames should have the same size (800x600), not 4x4 + for size in frame_sizes: + assert size == (800, 600), f'Frame has incorrect size: {size}. Placeholder images may not have been filtered.' + + +async def test_gif_handles_all_placeholders(test_dir): + """Test that GIF generation handles case where all screenshots are placeholders.""" + # Create a history with only placeholder screenshots + history_items = [] + + for i in range(3): + history_items.append( + AgentHistory( + model_output=AgentOutput( + evaluation_previous_goal='', + memory='', + next_goal=f'Step {i+1}', + action=[], + ), + result=[ActionResult()], + state=BrowserStateHistory( + screenshot=PLACEHOLDER_4PX, + url='about:blank', + title='New Tab', + tabs=[TabInfo(page_id=1, url='about:blank', title='New Tab')], + interacted_element=[None], + ), + ) + ) + + history = AgentHistoryList(history=history_items) + + # Generate GIF - should handle gracefully + gif_path = test_dir / 'test_all_placeholders.gif' + create_history_gif( + task='Test all placeholders', + history=history, + output_path=str(gif_path), + duration=500, + ) + + # With all placeholders filtered, no GIF should be created + assert not gif_path.exists(), 'GIF should not be created when all screenshots are placeholders' + + +# Note: Removing the agent integration test due to sandbox issues in CI +# The unit tests above adequately verify the GIF filtering functionality \ No newline at end of file