From fba94602febecab17864e406380d8a08df34c4bb Mon Sep 17 00:00:00 2001 From: "claude[bot]" <209825114+claude[bot]@users.noreply.github.com> Date: Mon, 21 Jul 2025 06:48:26 +0000 Subject: [PATCH 1/3] fix: filter out about:blank placeholder screenshots from GIF generation - Skip 4px placeholder screenshots when creating GIFs - Find first real screenshot for task frame instead of using placeholder - Add comprehensive tests for GIF filtering behavior - Fixes issue where GIFs were generated at 4px dimensions Co-authored-by: Nick Sweeting --- browser_use/agent/gif.py | 41 ++++-- tests/ci/test_gif_filtering.py | 224 +++++++++++++++++++++++++++++++++ 2 files changed, 256 insertions(+), 9 deletions(-) create mode 100644 tests/ci/test_gif_filtering.py diff --git a/browser_use/agent/gif.py b/browser_use/agent/gif.py index 99cc6ea19..392929144 100644 --- a/browser_use/agent/gif.py +++ b/browser_use/agent/gif.py @@ -114,21 +114,44 @@ def create_history_gif( # Create task frame if requested if show_task and task: - task_frame = _create_task_frame( - task, - history.history[0].state.screenshot, - title_font, # type: ignore - regular_font, # type: ignore - logo, - line_spacing, - ) - images.append(task_frame) + # Find the first non-placeholder screenshot for the task frame + first_real_screenshot = None + for item in history.history: + if ( + item.state.screenshot + and item.state.screenshot + != 'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII=' + ): + first_real_screenshot = item.state.screenshot + break + + if first_real_screenshot: + task_frame = _create_task_frame( + task, + first_real_screenshot, + title_font, # type: ignore + regular_font, # type: ignore + logo, + line_spacing, + ) + images.append(task_frame) + else: + logger.warning('No real screenshots found for task frame, skipping task frame') # Process each history item for i, item in enumerate(history.history, 1): if not item.state.screenshot: continue + # Skip placeholder screenshots from about:blank pages + # These are 4x4 white PNGs encoded as a specific base64 string + if ( + item.state.screenshot + == 'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII=' + ): + logger.debug(f'Skipping placeholder screenshot from about:blank page at step {i}') + continue + # Convert base64 screenshot to PIL Image img_data = base64.b64decode(item.state.screenshot) image = Image.open(io.BytesIO(img_data)) diff --git a/tests/ci/test_gif_filtering.py b/tests/ci/test_gif_filtering.py new file mode 100644 index 000000000..db3d35d31 --- /dev/null +++ b/tests/ci/test_gif_filtering.py @@ -0,0 +1,224 @@ +"""Test GIF generation filters out about:blank screenshots.""" + +import base64 +import io +from pathlib import Path + +import pytest +from PIL import Image + +from browser_use import Agent, AgentHistoryList +from browser_use.agent.gif import create_history_gif +from browser_use.agent.views import AgentHistory, AgentOutput, ActionResult, StepMetadata +from browser_use.browser.views import BrowserStateHistory, TabInfo +from tests.ci.conftest import create_mock_llm + + +# Known placeholder image data for about:blank pages +PLACEHOLDER_4PX = 'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII=' + + +@pytest.fixture +async def httpserver_url(httpserver): + """Simple test page.""" + httpserver.expect_request('/').respond_with_data( + """ + + + Test Page + +

Test GIF Filtering

+

This is a real page, not about:blank

+ + + """, + content_type='text/html', + ) + return httpserver.url_for('/') + + +@pytest.fixture +def test_dir(tmp_path): + """Create a test directory that gets cleaned up after each test.""" + test_path = tmp_path / 'test_gif_filtering' + test_path.mkdir(exist_ok=True) + yield test_path + + +def create_test_screenshot(width: int = 800, height: int = 600, color: tuple = (100, 150, 200)) -> str: + """Create a test screenshot as base64 string.""" + img = Image.new('RGB', (width, height), color) + buffer = io.BytesIO() + img.save(buffer, format='PNG') + return base64.b64encode(buffer.getvalue()).decode('utf-8') + + +async def test_gif_filters_out_placeholder_screenshots(test_dir): + """Test that 4px placeholder screenshots from about:blank pages are filtered out of GIFs.""" + # Create a history with mixed screenshots: real and placeholder + history_items = [] + + # First item: about:blank placeholder (should be filtered) + history_items.append( + AgentHistory( + model_output=AgentOutput( + evaluation_previous_goal='', + memory='', + next_goal='Starting task', + action=[], + ), + result=[ActionResult()], + state=BrowserStateHistory( + screenshot=PLACEHOLDER_4PX, + url='about:blank', + title='New Tab', + tabs=[TabInfo(page_id=1, url='about:blank', title='New Tab')], + interacted_element=[None], + ), + ) + ) + + # Second item: real screenshot + history_items.append( + AgentHistory( + model_output=AgentOutput( + evaluation_previous_goal='', + memory='', + next_goal='Navigate to example.com', + action=[], + ), + result=[ActionResult()], + state=BrowserStateHistory( + screenshot=create_test_screenshot(800, 600, (100, 150, 200)), + url='https://example.com', + title='Example', + tabs=[TabInfo(page_id=1, url='https://example.com', title='Example')], + interacted_element=[None], + ), + ) + ) + + # Third item: another about:blank placeholder (should be filtered) + history_items.append( + AgentHistory( + model_output=AgentOutput( + evaluation_previous_goal='', + memory='', + next_goal='Opening new tab', + action=[], + ), + result=[ActionResult()], + state=BrowserStateHistory( + screenshot=PLACEHOLDER_4PX, + url='about:blank', + title='New Tab', + tabs=[TabInfo(page_id=2, url='about:blank', title='New Tab')], + interacted_element=[None], + ), + ) + ) + + # Fourth item: another real screenshot + history_items.append( + AgentHistory( + model_output=AgentOutput( + evaluation_previous_goal='', + memory='', + next_goal='Click on button', + action=[], + ), + result=[ActionResult()], + state=BrowserStateHistory( + screenshot=create_test_screenshot(800, 600, (200, 100, 50)), + url='https://example.com/page2', + title='Page 2', + tabs=[TabInfo(page_id=1, url='https://example.com/page2', title='Page 2')], + interacted_element=[None], + ), + ) + ) + + # Create history list + history = AgentHistoryList(history=history_items) + + # Generate GIF + gif_path = test_dir / 'test_filtered.gif' + create_history_gif( + task='Test filtering about:blank screenshots', + history=history, + output_path=str(gif_path), + duration=500, # Shorter duration for testing + show_goals=True, + show_task=True, + ) + + # Verify GIF was created + assert gif_path.exists(), 'GIF was not created' + + # Open the GIF and check the frames + with Image.open(gif_path) as img: + # Count frames + frame_count = 0 + frame_sizes = [] + try: + while True: + frame_sizes.append(img.size) + frame_count += 1 + img.seek(img.tell() + 1) + except EOFError: + pass + + # We should have 3 frames total: + # 1. Task frame (created from first real screenshot) + # 2. Second real screenshot + # 3. Fourth real screenshot + # The two placeholder screenshots should be filtered out + assert frame_count == 3, f'Expected 3 frames (1 task + 2 real screenshots), got {frame_count}' + + # All frames should have the same size (800x600), not 4x4 + for size in frame_sizes: + assert size == (800, 600), f'Frame has incorrect size: {size}. Placeholder images may not have been filtered.' + + +async def test_gif_handles_all_placeholders(test_dir): + """Test that GIF generation handles case where all screenshots are placeholders.""" + # Create a history with only placeholder screenshots + history_items = [] + + for i in range(3): + history_items.append( + AgentHistory( + model_output=AgentOutput( + evaluation_previous_goal='', + memory='', + next_goal=f'Step {i+1}', + action=[], + ), + result=[ActionResult()], + state=BrowserStateHistory( + screenshot=PLACEHOLDER_4PX, + url='about:blank', + title='New Tab', + tabs=[TabInfo(page_id=1, url='about:blank', title='New Tab')], + interacted_element=[None], + ), + ) + ) + + history = AgentHistoryList(history=history_items) + + # Generate GIF - should handle gracefully + gif_path = test_dir / 'test_all_placeholders.gif' + create_history_gif( + task='Test all placeholders', + history=history, + output_path=str(gif_path), + duration=500, + ) + + # With all placeholders filtered, no GIF should be created + assert not gif_path.exists(), 'GIF should not be created when all screenshots are placeholders' + + +# Note: Removing the agent integration test due to sandbox issues in CI +# The unit tests above adequately verify the GIF filtering functionality \ No newline at end of file From ba12148b19325fdcb02c99f489f7c421c51fb6df Mon Sep 17 00:00:00 2001 From: "claude[bot]" <209825114+claude[bot]@users.noreply.github.com> Date: Mon, 21 Jul 2025 22:28:01 +0000 Subject: [PATCH 2/3] refactor: consolidate PLACEHOLDER_4PX_SCREENSHOT constant - Move 4px placeholder screenshot constant to browser_use.browser.views - Update all references to use the single definition - Fix all lint errors and formatting issues Co-authored-by: Nick Sweeting --- browser_use/agent/gif.py | 12 ++------ browser_use/browser/session.py | 3 +- browser_use/browser/views.py | 5 ++++ tests/ci/test_gif_filtering.py | 54 +++++++++++++++------------------- 4 files changed, 34 insertions(+), 40 deletions(-) diff --git a/browser_use/agent/gif.py b/browser_use/agent/gif.py index 392929144..f79415cce 100644 --- a/browser_use/agent/gif.py +++ b/browser_use/agent/gif.py @@ -8,6 +8,7 @@ import platform from typing import TYPE_CHECKING from browser_use.agent.views import AgentHistoryList +from browser_use.browser.views import PLACEHOLDER_4PX_SCREENSHOT from browser_use.config import CONFIG if TYPE_CHECKING: @@ -117,11 +118,7 @@ def create_history_gif( # Find the first non-placeholder screenshot for the task frame first_real_screenshot = None for item in history.history: - if ( - item.state.screenshot - and item.state.screenshot - != 'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII=' - ): + if item.state.screenshot and item.state.screenshot != PLACEHOLDER_4PX_SCREENSHOT: first_real_screenshot = item.state.screenshot break @@ -145,10 +142,7 @@ def create_history_gif( # Skip placeholder screenshots from about:blank pages # These are 4x4 white PNGs encoded as a specific base64 string - if ( - item.state.screenshot - == 'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII=' - ): + if item.state.screenshot == PLACEHOLDER_4PX_SCREENSHOT: logger.debug(f'Skipping placeholder screenshot from about:blank page at step {i}') continue diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index b068f952d..beee27db9 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -45,6 +45,7 @@ from browser_use.browser.types import ( async_playwright, ) from browser_use.browser.views import ( + PLACEHOLDER_4PX_SCREENSHOT, BrowserError, BrowserStateSummary, PageInfo, @@ -3610,7 +3611,7 @@ class BrowserSession(BaseModel): # not an exception because there's no point in retrying if we hit this, its always pointless to screenshot about:blank # raise ValueError('Refusing to take unneeded screenshot of empty new tab page') # return a 4px*4px white png to avoid wasting tokens - instead of 1px*1px white png that was - return 'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII=' + return PLACEHOLDER_4PX_SCREENSHOT # Always bring page to front before rendering, otherwise it crashes in some cases, not sure why try: diff --git a/browser_use/browser/views.py b/browser_use/browser/views.py index 415ebb0a1..091728524 100644 --- a/browser_use/browser/views.py +++ b/browser_use/browser/views.py @@ -6,6 +6,11 @@ from pydantic import BaseModel from browser_use.dom.history_tree_processor.service import DOMHistoryElement from browser_use.dom.views import DOMState +# Known placeholder image data for about:blank pages - a 4x4 white PNG +PLACEHOLDER_4PX_SCREENSHOT = ( + 'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII=' +) + # Pydantic class TabInfo(BaseModel): diff --git a/tests/ci/test_gif_filtering.py b/tests/ci/test_gif_filtering.py index db3d35d31..8c8450354 100644 --- a/tests/ci/test_gif_filtering.py +++ b/tests/ci/test_gif_filtering.py @@ -2,20 +2,14 @@ import base64 import io -from pathlib import Path import pytest from PIL import Image -from browser_use import Agent, AgentHistoryList +from browser_use import AgentHistoryList from browser_use.agent.gif import create_history_gif -from browser_use.agent.views import AgentHistory, AgentOutput, ActionResult, StepMetadata -from browser_use.browser.views import BrowserStateHistory, TabInfo -from tests.ci.conftest import create_mock_llm - - -# Known placeholder image data for about:blank pages -PLACEHOLDER_4PX = 'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII=' +from browser_use.agent.views import ActionResult, AgentHistory, AgentOutput +from browser_use.browser.views import PLACEHOLDER_4PX_SCREENSHOT, BrowserStateHistory, TabInfo @pytest.fixture @@ -57,7 +51,7 @@ async def test_gif_filters_out_placeholder_screenshots(test_dir): """Test that 4px placeholder screenshots from about:blank pages are filtered out of GIFs.""" # Create a history with mixed screenshots: real and placeholder history_items = [] - + # First item: about:blank placeholder (should be filtered) history_items.append( AgentHistory( @@ -69,7 +63,7 @@ async def test_gif_filters_out_placeholder_screenshots(test_dir): ), result=[ActionResult()], state=BrowserStateHistory( - screenshot=PLACEHOLDER_4PX, + screenshot=PLACEHOLDER_4PX_SCREENSHOT, url='about:blank', title='New Tab', tabs=[TabInfo(page_id=1, url='about:blank', title='New Tab')], @@ -77,7 +71,7 @@ async def test_gif_filters_out_placeholder_screenshots(test_dir): ), ) ) - + # Second item: real screenshot history_items.append( AgentHistory( @@ -97,7 +91,7 @@ async def test_gif_filters_out_placeholder_screenshots(test_dir): ), ) ) - + # Third item: another about:blank placeholder (should be filtered) history_items.append( AgentHistory( @@ -109,15 +103,15 @@ async def test_gif_filters_out_placeholder_screenshots(test_dir): ), result=[ActionResult()], state=BrowserStateHistory( - screenshot=PLACEHOLDER_4PX, - url='about:blank', + screenshot=PLACEHOLDER_4PX_SCREENSHOT, + url='about:blank', title='New Tab', tabs=[TabInfo(page_id=2, url='about:blank', title='New Tab')], interacted_element=[None], ), ) ) - + # Fourth item: another real screenshot history_items.append( AgentHistory( @@ -137,10 +131,10 @@ async def test_gif_filters_out_placeholder_screenshots(test_dir): ), ) ) - + # Create history list history = AgentHistoryList(history=history_items) - + # Generate GIF gif_path = test_dir / 'test_filtered.gif' create_history_gif( @@ -151,10 +145,10 @@ async def test_gif_filters_out_placeholder_screenshots(test_dir): show_goals=True, show_task=True, ) - + # Verify GIF was created assert gif_path.exists(), 'GIF was not created' - + # Open the GIF and check the frames with Image.open(gif_path) as img: # Count frames @@ -167,14 +161,14 @@ async def test_gif_filters_out_placeholder_screenshots(test_dir): img.seek(img.tell() + 1) except EOFError: pass - + # We should have 3 frames total: # 1. Task frame (created from first real screenshot) - # 2. Second real screenshot + # 2. Second real screenshot # 3. Fourth real screenshot # The two placeholder screenshots should be filtered out assert frame_count == 3, f'Expected 3 frames (1 task + 2 real screenshots), got {frame_count}' - + # All frames should have the same size (800x600), not 4x4 for size in frame_sizes: assert size == (800, 600), f'Frame has incorrect size: {size}. Placeholder images may not have been filtered.' @@ -184,19 +178,19 @@ async def test_gif_handles_all_placeholders(test_dir): """Test that GIF generation handles case where all screenshots are placeholders.""" # Create a history with only placeholder screenshots history_items = [] - + for i in range(3): history_items.append( AgentHistory( model_output=AgentOutput( evaluation_previous_goal='', memory='', - next_goal=f'Step {i+1}', + next_goal=f'Step {i + 1}', action=[], ), result=[ActionResult()], state=BrowserStateHistory( - screenshot=PLACEHOLDER_4PX, + screenshot=PLACEHOLDER_4PX_SCREENSHOT, url='about:blank', title='New Tab', tabs=[TabInfo(page_id=1, url='about:blank', title='New Tab')], @@ -204,9 +198,9 @@ async def test_gif_handles_all_placeholders(test_dir): ), ) ) - + history = AgentHistoryList(history=history_items) - + # Generate GIF - should handle gracefully gif_path = test_dir / 'test_all_placeholders.gif' create_history_gif( @@ -215,10 +209,10 @@ async def test_gif_handles_all_placeholders(test_dir): output_path=str(gif_path), duration=500, ) - + # With all placeholders filtered, no GIF should be created assert not gif_path.exists(), 'GIF should not be created when all screenshots are placeholders' # Note: Removing the agent integration test due to sandbox issues in CI -# The unit tests above adequately verify the GIF filtering functionality \ No newline at end of file +# The unit tests above adequately verify the GIF filtering functionality From 8a4dc23827107f728d7ca2b2767b73937652bc08 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <209825114+claude[bot]@users.noreply.github.com> Date: Tue, 22 Jul 2025 06:57:23 +0000 Subject: [PATCH 3/3] fix: use is_new_tab_page() instead of manual about:blank check - Replace manual page.url == ''about:blank'' check with is_new_tab_page() function - This ensures consistent handling of all new tab page variations (about:blank, chrome://new-tab-page, etc.) - Improves maintainability by centralizing new tab page detection logic Co-authored-by: Nick Sweeting --- browser_use/browser/session.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index beee27db9..84b9e83c7 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -2203,9 +2203,9 @@ class BrowserSession(BaseModel): f'⚠️ Failed to get tab info for tab #{page_id}: {_log_pretty_url(page.url)} (using fallback title)' ) - # Only mark as unusable if it's actually about:blank, otherwise preserve the real URL - if page.url == 'about:blank': - tab_info = TabInfo(page_id=page_id, url='about:blank', title='ignore this tab and do not use it') + # Only mark as unusable if it's actually a new tab page, otherwise preserve the real URL + if is_new_tab_page(page.url): + tab_info = TabInfo(page_id=page_id, url=page.url, title='ignore this tab and do not use it') else: # Preserve the real URL and use a descriptive fallback title # fallback_title = '(title unavailable, page possibly crashed / unresponsive)'