From 385dedde77482e444fc03bcf7dd69dc1b9011247 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 30 Jun 2025 23:41:21 -0700 Subject: [PATCH 1/6] fix screenshot clip errors at end of scroll --- browser_use/browser/session.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 783a0afe1..480c21e43 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -2718,6 +2718,7 @@ class BrowserSession(BaseModel): return { width: window.innerWidth, height: window.innerHeight, + pageWidth: document.documentElement.scrollWidth, pageHeight: document.documentElement.scrollHeight, devicePixelRatio: window.devicePixelRatio || 1, scrollX: window.pageXOffset || document.documentElement.scrollLeft || 0, @@ -2725,14 +2726,38 @@ class BrowserSession(BaseModel): }; }""") + # Calculate clip region that doesn't exceed page bounds + # The clip coordinates are in page coordinates, not viewport coordinates + clip_x = dimensions['scrollX'] + clip_y = dimensions['scrollY'] + clip_width = min(dimensions['width'], MAX_SCREENSHOT_WIDTH) + clip_height = min(dimensions['height'], MAX_SCREENSHOT_HEIGHT) + + # Ensure clip region doesn't exceed page boundaries + max_x = max(0, dimensions['pageWidth'] - clip_x) + max_y = max(0, dimensions['pageHeight'] - clip_y) + clip_width = min(clip_width, max_x) + clip_height = min(clip_height, max_y) + + # Don't try to screenshot if the clip area would be empty + if clip_width <= 0 or clip_height <= 0: + self.logger.warning( + f'Screenshot clip area would be empty: width={clip_width}, height={clip_height}, scrollY={clip_y}, pageHeight={dimensions["pageHeight"]}' + ) + # Fall back to capturing from top of viewport if we're scrolled past the page + clip_x = 0 + clip_y = max(0, dimensions['pageHeight'] - dimensions['height']) + clip_width = min(dimensions['width'], MAX_SCREENSHOT_WIDTH, dimensions['pageWidth']) + clip_height = min(dimensions['height'], MAX_SCREENSHOT_HEIGHT, dimensions['pageHeight'] - clip_y) + # Take screenshot using our retry-decorated method return await self._take_screenshot_hybrid( page, clip={ - 'x': dimensions['scrollX'], - 'y': dimensions['scrollY'], - 'width': min(dimensions['width'], MAX_SCREENSHOT_WIDTH), - 'height': min(dimensions['height'], MAX_SCREENSHOT_HEIGHT), + 'x': clip_x, + 'y': clip_y, + 'width': clip_width, + 'height': clip_height, }, ) except Exception as e: From 6170953b5e0c17aa264119f066bc2105f9711a4f Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 30 Jun 2025 23:47:20 -0700 Subject: [PATCH 2/6] disable screenshot clipping entirely for now --- browser_use/browser/session.py | 40 +++--------- tests/ci/test_browser_session_screenshots.py | 67 ++++++++++++++++++++ 2 files changed, 75 insertions(+), 32 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 480c21e43..828222016 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -25,7 +25,7 @@ os.environ['PW_TEST_SCREENSHOT_NO_FONTS_READY'] = '1' # https://github.com/micr import anyio import psutil -from playwright._impl._api_structures import FloatRect, ViewportSize +from playwright._impl._api_structures import ViewportSize from pydantic import AliasChoices, BaseModel, ConfigDict, Field, InstanceOf, PrivateAttr, model_validator from uuid_extensions import uuid7str @@ -725,7 +725,7 @@ class BrowserSession(BaseModel): full_page=False, # scale='css', timeout=self.browser_profile.default_timeout or 30000, - clip=FloatRect(**clip) if clip else None, + # clip=FloatRect(**clip) if clip else None, animations='allow', caret='initial', ) @@ -2726,40 +2726,16 @@ class BrowserSession(BaseModel): }; }""") - # Calculate clip region that doesn't exceed page bounds - # The clip coordinates are in page coordinates, not viewport coordinates - clip_x = dimensions['scrollX'] - clip_y = dimensions['scrollY'] + # When full_page=False, screenshot captures the current viewport + # The clip parameter uses viewport coordinates (0,0 is top-left of viewport) + # We just need to ensure the clip dimensions don't exceed our maximums clip_width = min(dimensions['width'], MAX_SCREENSHOT_WIDTH) clip_height = min(dimensions['height'], MAX_SCREENSHOT_HEIGHT) - # Ensure clip region doesn't exceed page boundaries - max_x = max(0, dimensions['pageWidth'] - clip_x) - max_y = max(0, dimensions['pageHeight'] - clip_y) - clip_width = min(clip_width, max_x) - clip_height = min(clip_height, max_y) - - # Don't try to screenshot if the clip area would be empty - if clip_width <= 0 or clip_height <= 0: - self.logger.warning( - f'Screenshot clip area would be empty: width={clip_width}, height={clip_height}, scrollY={clip_y}, pageHeight={dimensions["pageHeight"]}' - ) - # Fall back to capturing from top of viewport if we're scrolled past the page - clip_x = 0 - clip_y = max(0, dimensions['pageHeight'] - dimensions['height']) - clip_width = min(dimensions['width'], MAX_SCREENSHOT_WIDTH, dimensions['pageWidth']) - clip_height = min(dimensions['height'], MAX_SCREENSHOT_HEIGHT, dimensions['pageHeight'] - clip_y) - # Take screenshot using our retry-decorated method - return await self._take_screenshot_hybrid( - page, - clip={ - 'x': clip_x, - 'y': clip_y, - 'width': clip_width, - 'height': clip_height, - }, - ) + # Don't pass clip parameter - let Playwright capture the full viewport + # It will automatically handle cases where viewport extends beyond page content + return await self._take_screenshot_hybrid(page) except Exception as e: self.logger.error(f'❌ Failed to take screenshot after retries: {type(e).__name__}: {e}') raise diff --git a/tests/ci/test_browser_session_screenshots.py b/tests/ci/test_browser_session_screenshots.py index b1c15c8c9..7c3428887 100644 --- a/tests/ci/test_browser_session_screenshots.py +++ b/tests/ci/test_browser_session_screenshots.py @@ -2,6 +2,7 @@ Test that screenshots work correctly in headless browser mode. """ +import asyncio import base64 from browser_use.browser import BrowserProfile, BrowserSession @@ -244,3 +245,69 @@ class TestHeadlessScreenshots: for i, result in enumerate(results): if isinstance(result, Exception): print(f'Warning: Session {i} kill raised exception: {type(result).__name__}: {result}') + + async def test_screenshot_at_bottom_of_page(self, httpserver): + """Test screenshot capture when scrolled to bottom of page (regression test for clipping issue)""" + browser_session = BrowserSession( + browser_profile=BrowserProfile( + headless=True, + user_data_dir=None, + keep_alive=False, + ) + ) + + try: + await browser_session.start() + + # Create a page with scrollable content + httpserver.expect_request('/scrollable').respond_with_data( + """ + Scrollable Page Test + +
+
Top of page
+
Middle of page
+
Bottom of page
+
+ + """, + content_type='text/html', + ) + + # Navigate to test page + await browser_session.navigate(httpserver.url_for('/scrollable')) + page = browser_session.agent_current_page + assert page is not None + + # Test 1: Screenshot at top of page (should work) + screenshot_top = await browser_session.take_screenshot() + assert screenshot_top is not None + assert len(base64.b64decode(screenshot_top)) > 5000 + + # Test 2: Screenshot at middle of page + await page.evaluate('window.scrollTo(0, document.body.scrollHeight / 2)') + await asyncio.sleep(0.1) # Wait for scroll + screenshot_middle = await browser_session.take_screenshot() + assert screenshot_middle is not None + assert len(base64.b64decode(screenshot_middle)) > 5000 + + # Test 3: Screenshot at bottom of page (this was failing with clipping error) + await page.evaluate('window.scrollTo(0, document.body.scrollHeight)') + await asyncio.sleep(0.1) # Wait for scroll + + # This should not raise "Clipped area is either empty or outside the resulting image" error + screenshot_bottom = await browser_session.take_screenshot() + assert screenshot_bottom is not None + assert len(base64.b64decode(screenshot_bottom)) > 5000 + + # Test 4: Screenshot when scrolled beyond page bottom (edge case) + await page.evaluate('window.scrollTo(0, document.body.scrollHeight + 1000)') + await asyncio.sleep(0.1) + screenshot_beyond = await browser_session.take_screenshot() + assert screenshot_beyond is not None + assert len(base64.b64decode(screenshot_beyond)) > 5000 + + print('✅ All screenshot positions tested successfully!') + + finally: + await browser_session.stop() From d9943ef33646ede9e3ff5cf9ecef6cb87fc13eeb Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 1 Jul 2025 00:41:32 -0700 Subject: [PATCH 3/6] tweak emojij --- browser_use/browser/session.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 828222016..a0a12c330 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -248,12 +248,12 @@ class BrowserSession(BaseModel): return self._logger def __repr__(self) -> str: - is_copy = '©' if self._original_browser_session else '1️⃣ ' - return f'BrowserSession🆂 {self.id[-4:]}{is_copy}{str(id(self))[-2:]} ({self._connection_str}, profile={self.browser_profile})' + is_copy = '©' if self._original_browser_session else '#' + return f'BrowserSession🆂 {self.id[-4:]} {is_copy}{str(id(self))[-2:]} ({self._connection_str}, profile={self.browser_profile})' def __str__(self) -> str: - is_copy = '©' if self._original_browser_session else '1️⃣ ' - return f'BrowserSession🆂 {self.id[-4:]}{is_copy}{str(id(self))[-2:]} 🅟 {str(id(self.agent_current_page))[-2:]}' + is_copy = '©' if self._original_browser_session else '#' + return f'BrowserSession🆂 {self.id[-4:]} {is_copy}{str(id(self))[-2:]} 🅟 {str(id(self.agent_current_page))[-2:]}' # better to force people to get it from the right object, "only one way to do it" is better python # def __getattr__(self, key: str) -> Any: From 6a5e86204b490330b8d4ac39c3f5e9664d3477b3 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 1 Jul 2025 01:42:25 -0700 Subject: [PATCH 4/6] speed up screenshot tweaks --- browser_use/browser/session.py | 26 ++++++++++---------- tests/ci/test_browser_session_screenshots.py | 6 ++--- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index a0a12c330..fa4cfceab 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -2714,23 +2714,23 @@ class BrowserSession(BaseModel): # This prevents timeouts on very long pages # 1. Get current viewport and page dimensions including scroll position - dimensions = await page.evaluate("""() => { - return { - width: window.innerWidth, - height: window.innerHeight, - pageWidth: document.documentElement.scrollWidth, - pageHeight: document.documentElement.scrollHeight, - devicePixelRatio: window.devicePixelRatio || 1, - scrollX: window.pageXOffset || document.documentElement.scrollLeft || 0, - scrollY: window.pageYOffset || document.documentElement.scrollTop || 0 - }; - }""") + # dimensions = await page.evaluate("""() => { + # return { + # width: window.innerWidth, + # height: window.innerHeight, + # pageWidth: document.documentElement.scrollWidth, + # pageHeight: document.documentElement.scrollHeight, + # devicePixelRatio: window.devicePixelRatio || 1, + # scrollX: window.pageXOffset || document.documentElement.scrollLeft || 0, + # scrollY: window.pageYOffset || document.documentElement.scrollTop || 0 + # }; + # }""") # When full_page=False, screenshot captures the current viewport # The clip parameter uses viewport coordinates (0,0 is top-left of viewport) # We just need to ensure the clip dimensions don't exceed our maximums - clip_width = min(dimensions['width'], MAX_SCREENSHOT_WIDTH) - clip_height = min(dimensions['height'], MAX_SCREENSHOT_HEIGHT) + # clip_width = min(dimensions['width'], MAX_SCREENSHOT_WIDTH) + # clip_height = min(dimensions['height'], MAX_SCREENSHOT_HEIGHT) # Take screenshot using our retry-decorated method # Don't pass clip parameter - let Playwright capture the full viewport diff --git a/tests/ci/test_browser_session_screenshots.py b/tests/ci/test_browser_session_screenshots.py index 7c3428887..e07fdda8f 100644 --- a/tests/ci/test_browser_session_screenshots.py +++ b/tests/ci/test_browser_session_screenshots.py @@ -194,7 +194,7 @@ class TestHeadlessScreenshots: # Take screenshots from all sessions at the same time print('Taking screenshots from all 10 sessions simultaneously...') - screenshot_tasks = [session.take_screenshot(full_page=True) for session in browser_sessions] + screenshot_tasks = [session.take_screenshot() for session in browser_sessions] screenshots = await asyncio.gather(*screenshot_tasks) # Verify all screenshots are valid @@ -222,9 +222,7 @@ class TestHeadlessScreenshots: # Also test taking regular (viewport) screenshots in parallel print('Taking viewport screenshots from all sessions simultaneously...') - viewport_screenshots = await asyncio.gather( - *[session.take_screenshot(full_page=False) for session in browser_sessions] - ) + viewport_screenshots = await asyncio.gather(*[session.take_screenshot() for session in browser_sessions]) # Verify viewport screenshots for i, screenshot in enumerate(viewport_screenshots): From 4e2b5b2f5c18ab9f6f3d1d57980eedc2ac7fbb30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Tue, 1 Jul 2025 10:45:42 +0200 Subject: [PATCH 5/6] Fix typo in evaluation workflow runner name from '16-cores' to '16-core' --- .github/workflows/eval.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/eval.yaml b/.github/workflows/eval.yaml index 5eaba9c30..df624b6cf 100644 --- a/.github/workflows/eval.yaml +++ b/.github/workflows/eval.yaml @@ -7,7 +7,7 @@ on: jobs: run_evaluation: - runs-on: ubuntu-latest-16-cores + runs-on: ubuntu-latest-16-core timeout-minutes: 360 env: IN_DOCKER: 'true' From 31b503fb4296594705510cf32db7fcbf392da0ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Tue, 1 Jul 2025 11:14:07 +0200 Subject: [PATCH 6/6] Name group in eval --- .github/workflows/eval.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/eval.yaml b/.github/workflows/eval.yaml index df624b6cf..0587dd6b9 100644 --- a/.github/workflows/eval.yaml +++ b/.github/workflows/eval.yaml @@ -7,7 +7,9 @@ on: jobs: run_evaluation: - runs-on: ubuntu-latest-16-core + runs-on: + group: eval + labels: ubuntu-latest-16-core timeout-minutes: 360 env: IN_DOCKER: 'true'