Merge branch 'main' into never-relaunch

2026-05-13 17:56:35 +02:00 · 2025-07-25 02:37:35 -07:00
parent 39dfa7f624 ef553dfa3a
commit 2e768bdc7c
4 changed files with 253 additions and 20 deletions
--- a/browser_use/agent/gif.py
+++ b/browser_use/agent/gif.py
@@ -8,7 +8,7 @@ import platform
 from typing import TYPE_CHECKING

 from browser_use.agent.views import AgentHistoryList
-from browser_use.browser.session import BLANK_PAGE_SCREENSHOT_PLACEHOLDER
+from browser_use.browser.views import PLACEHOLDER_4PX_SCREENSHOT
 from browser_use.config import CONFIG

 if TYPE_CHECKING:
@@ -64,7 +64,8 @@ def create_history_gif(
 	# Find the first non-placeholder screenshot
 	first_real_screenshot = None
 	for item in history.history:
-		if item.state.screenshot and item.state.screenshot != BLANK_PAGE_SCREENSHOT_PLACEHOLDER:
+		if item.state.screenshot and item.state.screenshot != 
+    :
 			first_real_screenshot = item.state.screenshot
 			break

@@ -126,15 +127,25 @@ def create_history_gif(

 	# Create task frame if requested
 	if show_task and task:
-		task_frame = _create_task_frame(
-			task,
-			first_real_screenshot,
-			title_font,  # type: ignore
-			regular_font,  # type: ignore
-			logo,
-			line_spacing,
-		)
-		images.append(task_frame)
+		# Find the first non-placeholder screenshot for the task frame
+		first_real_screenshot = None
+		for item in history.history:
+			if item.state.screenshot and item.state.screenshot != PLACEHOLDER_4PX_SCREENSHOT:
+				first_real_screenshot = item.state.screenshot
+				break
+
+		if first_real_screenshot:
+			task_frame = _create_task_frame(
+				task,
+				first_real_screenshot,
+				title_font,  # type: ignore
+				regular_font,  # type: ignore
+				logo,
+				line_spacing,
+			)
+			images.append(task_frame)
+		else:
+			logger.warning('No real screenshots found for task frame, skipping task frame')

 	# Process each history item
 	for i, item in enumerate(history.history, 1):
@@ -142,7 +153,9 @@ def create_history_gif(
 			continue

 		# Skip placeholder screenshots from about:blank pages
-		if item.state.screenshot == BLANK_PAGE_SCREENSHOT_PLACEHOLDER:
+		# These are 4x4 white PNGs encoded as a specific base64 string
+		if item.state.screenshot == PLACEHOLDER_4PX_SCREENSHOT:
+			logger.debug(f'Skipping placeholder screenshot from about:blank page at step {i}')
 			continue

 		# Convert base64 screenshot to PIL Image
--- a/browser_use/browser/session.py
+++ b/browser_use/browser/session.py
@@ -26,10 +26,6 @@ from .utils import normalize_url

 os.environ['PW_TEST_SCREENSHOT_NO_FONTS_READY'] = '1'  # https://github.com/microsoft/playwright/issues/35972

-# 4x4 white PNG placeholder for about:blank pages to save tokens
-BLANK_PAGE_SCREENSHOT_PLACEHOLDER = (
-	'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII='
-)

 import psutil
 from bubus.helpers import retry
@@ -50,6 +46,7 @@ from browser_use.browser.types import (
 	async_playwright,
 )
 from browser_use.browser.views import (
+	PLACEHOLDER_4PX_SCREENSHOT,
 	BrowserError,
 	BrowserStateSummary,
 	PageInfo,
@@ -2198,9 +2195,9 @@ class BrowserSession(BaseModel):
 					f'⚠️ Failed to get tab info for tab #{page_id}: {_log_pretty_url(page.url)} (using fallback title)'
 				)

-				# Only mark as unusable if it's actually about:blank, otherwise preserve the real URL
-				if page.url == 'about:blank':
-					tab_info = TabInfo(page_id=page_id, url='about:blank', title='ignore this tab and do not use it')
+				# Only mark as unusable if it's actually a new tab page, otherwise preserve the real URL
+				if is_new_tab_page(page.url):
+					tab_info = TabInfo(page_id=page_id, url=page.url, title='ignore this tab and do not use it')
 				else:
 					# Preserve the real URL and use a descriptive fallback title
 					# fallback_title = '(title unavailable, page possibly crashed / unresponsive)'
@@ -3632,7 +3629,7 @@ class BrowserSession(BaseModel):
 			# not an exception because there's no point in retrying if we hit this, its always pointless to screenshot about:blank
 			# raise ValueError('Refusing to take unneeded screenshot of empty new tab page')
 			# return a 4px*4px white png to avoid wasting tokens - instead of 1px*1px white png that was
-			return BLANK_PAGE_SCREENSHOT_PLACEHOLDER
+			return PLACEHOLDER_4PX_SCREENSHOT

 		# Always bring page to front before rendering, otherwise it crashes in some cases, not sure why
 		try:
--- a/browser_use/browser/views.py
+++ b/browser_use/browser/views.py
@@ -6,6 +6,11 @@ from pydantic import BaseModel
 from browser_use.dom.history_tree_processor.service import DOMHistoryElement
 from browser_use.dom.views import DOMState

+# Known placeholder image data for about:blank pages - a 4x4 white PNG
+PLACEHOLDER_4PX_SCREENSHOT = (
+	'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII='
+)
+

 # Pydantic
 class TabInfo(BaseModel):
--- a/tests/ci/test_gif_filtering.py
+++ b/tests/ci/test_gif_filtering.py
@@ -0,0 +1,218 @@
+"""Test GIF generation filters out about:blank screenshots."""
+
+import base64
+import io
+
+import pytest
+from PIL import Image
+
+from browser_use import AgentHistoryList
+from browser_use.agent.gif import create_history_gif
+from browser_use.agent.views import ActionResult, AgentHistory, AgentOutput
+from browser_use.browser.views import PLACEHOLDER_4PX_SCREENSHOT, BrowserStateHistory, TabInfo
+
+
+@pytest.fixture
+async def httpserver_url(httpserver):
+	"""Simple test page."""
+	httpserver.expect_request('/').respond_with_data(
+		"""
+		<!DOCTYPE html>
+		<html>
+		<head><title>Test Page</title></head>
+		<body>
+			<h1>Test GIF Filtering</h1>
+			<p>This is a real page, not about:blank</p>
+		</body>
+		</html>
+		""",
+		content_type='text/html',
+	)
+	return httpserver.url_for('/')
+
+
+@pytest.fixture
+def test_dir(tmp_path):
+	"""Create a test directory that gets cleaned up after each test."""
+	test_path = tmp_path / 'test_gif_filtering'
+	test_path.mkdir(exist_ok=True)
+	yield test_path
+
+
+def create_test_screenshot(width: int = 800, height: int = 600, color: tuple = (100, 150, 200)) -> str:
+	"""Create a test screenshot as base64 string."""
+	img = Image.new('RGB', (width, height), color)
+	buffer = io.BytesIO()
+	img.save(buffer, format='PNG')
+	return base64.b64encode(buffer.getvalue()).decode('utf-8')
+
+
+async def test_gif_filters_out_placeholder_screenshots(test_dir):
+	"""Test that 4px placeholder screenshots from about:blank pages are filtered out of GIFs."""
+	# Create a history with mixed screenshots: real and placeholder
+	history_items = []
+
+	# First item: about:blank placeholder (should be filtered)
+	history_items.append(
+		AgentHistory(
+			model_output=AgentOutput(
+				evaluation_previous_goal='',
+				memory='',
+				next_goal='Starting task',
+				action=[],
+			),
+			result=[ActionResult()],
+			state=BrowserStateHistory(
+				screenshot=PLACEHOLDER_4PX_SCREENSHOT,
+				url='about:blank',
+				title='New Tab',
+				tabs=[TabInfo(page_id=1, url='about:blank', title='New Tab')],
+				interacted_element=[None],
+			),
+		)
+	)
+
+	# Second item: real screenshot
+	history_items.append(
+		AgentHistory(
+			model_output=AgentOutput(
+				evaluation_previous_goal='',
+				memory='',
+				next_goal='Navigate to example.com',
+				action=[],
+			),
+			result=[ActionResult()],
+			state=BrowserStateHistory(
+				screenshot=create_test_screenshot(800, 600, (100, 150, 200)),
+				url='https://example.com',
+				title='Example',
+				tabs=[TabInfo(page_id=1, url='https://example.com', title='Example')],
+				interacted_element=[None],
+			),
+		)
+	)
+
+	# Third item: another about:blank placeholder (should be filtered)
+	history_items.append(
+		AgentHistory(
+			model_output=AgentOutput(
+				evaluation_previous_goal='',
+				memory='',
+				next_goal='Opening new tab',
+				action=[],
+			),
+			result=[ActionResult()],
+			state=BrowserStateHistory(
+				screenshot=PLACEHOLDER_4PX_SCREENSHOT,
+				url='about:blank',
+				title='New Tab',
+				tabs=[TabInfo(page_id=2, url='about:blank', title='New Tab')],
+				interacted_element=[None],
+			),
+		)
+	)
+
+	# Fourth item: another real screenshot
+	history_items.append(
+		AgentHistory(
+			model_output=AgentOutput(
+				evaluation_previous_goal='',
+				memory='',
+				next_goal='Click on button',
+				action=[],
+			),
+			result=[ActionResult()],
+			state=BrowserStateHistory(
+				screenshot=create_test_screenshot(800, 600, (200, 100, 50)),
+				url='https://example.com/page2',
+				title='Page 2',
+				tabs=[TabInfo(page_id=1, url='https://example.com/page2', title='Page 2')],
+				interacted_element=[None],
+			),
+		)
+	)
+
+	# Create history list
+	history = AgentHistoryList(history=history_items)
+
+	# Generate GIF
+	gif_path = test_dir / 'test_filtered.gif'
+	create_history_gif(
+		task='Test filtering about:blank screenshots',
+		history=history,
+		output_path=str(gif_path),
+		duration=500,  # Shorter duration for testing
+		show_goals=True,
+		show_task=True,
+	)
+
+	# Verify GIF was created
+	assert gif_path.exists(), 'GIF was not created'
+
+	# Open the GIF and check the frames
+	with Image.open(gif_path) as img:
+		# Count frames
+		frame_count = 0
+		frame_sizes = []
+		try:
+			while True:
+				frame_sizes.append(img.size)
+				frame_count += 1
+				img.seek(img.tell() + 1)
+		except EOFError:
+			pass
+
+		# We should have 3 frames total:
+		# 1. Task frame (created from first real screenshot)
+		# 2. Second real screenshot
+		# 3. Fourth real screenshot
+		# The two placeholder screenshots should be filtered out
+		assert frame_count == 3, f'Expected 3 frames (1 task + 2 real screenshots), got {frame_count}'
+
+		# All frames should have the same size (800x600), not 4x4
+		for size in frame_sizes:
+			assert size == (800, 600), f'Frame has incorrect size: {size}. Placeholder images may not have been filtered.'
+
+
+async def test_gif_handles_all_placeholders(test_dir):
+	"""Test that GIF generation handles case where all screenshots are placeholders."""
+	# Create a history with only placeholder screenshots
+	history_items = []
+
+	for i in range(3):
+		history_items.append(
+			AgentHistory(
+				model_output=AgentOutput(
+					evaluation_previous_goal='',
+					memory='',
+					next_goal=f'Step {i + 1}',
+					action=[],
+				),
+				result=[ActionResult()],
+				state=BrowserStateHistory(
+					screenshot=PLACEHOLDER_4PX_SCREENSHOT,
+					url='about:blank',
+					title='New Tab',
+					tabs=[TabInfo(page_id=1, url='about:blank', title='New Tab')],
+					interacted_element=[None],
+				),
+			)
+		)
+
+	history = AgentHistoryList(history=history_items)
+
+	# Generate GIF - should handle gracefully
+	gif_path = test_dir / 'test_all_placeholders.gif'
+	create_history_gif(
+		task='Test all placeholders',
+		history=history,
+		output_path=str(gif_path),
+		duration=500,
+	)
+
+	# With all placeholders filtered, no GIF should be created
+	assert not gif_path.exists(), 'GIF should not be created when all screenshots are placeholders'
+
+
+# Note: Removing the agent integration test due to sandbox issues in CI
+# The unit tests above adequately verify the GIF filtering functionality