diff --git a/.github/workflows/eval.yaml b/.github/workflows/eval.yaml index 5eaba9c30..0587dd6b9 100644 --- a/.github/workflows/eval.yaml +++ b/.github/workflows/eval.yaml @@ -7,7 +7,9 @@ on: jobs: run_evaluation: - runs-on: ubuntu-latest-16-cores + runs-on: + group: eval + labels: ubuntu-latest-16-core timeout-minutes: 360 env: IN_DOCKER: 'true' diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index 783a0afe1..fa4cfceab 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -25,7 +25,7 @@ os.environ['PW_TEST_SCREENSHOT_NO_FONTS_READY'] = '1' # https://github.com/micr import anyio import psutil -from playwright._impl._api_structures import FloatRect, ViewportSize +from playwright._impl._api_structures import ViewportSize from pydantic import AliasChoices, BaseModel, ConfigDict, Field, InstanceOf, PrivateAttr, model_validator from uuid_extensions import uuid7str @@ -248,12 +248,12 @@ class BrowserSession(BaseModel): return self._logger def __repr__(self) -> str: - is_copy = '©' if self._original_browser_session else '1️⃣ ' - return f'BrowserSession🆂 {self.id[-4:]}{is_copy}{str(id(self))[-2:]} ({self._connection_str}, profile={self.browser_profile})' + is_copy = '©' if self._original_browser_session else '#' + return f'BrowserSession🆂 {self.id[-4:]} {is_copy}{str(id(self))[-2:]} ({self._connection_str}, profile={self.browser_profile})' def __str__(self) -> str: - is_copy = '©' if self._original_browser_session else '1️⃣ ' - return f'BrowserSession🆂 {self.id[-4:]}{is_copy}{str(id(self))[-2:]} 🅟 {str(id(self.agent_current_page))[-2:]}' + is_copy = '©' if self._original_browser_session else '#' + return f'BrowserSession🆂 {self.id[-4:]} {is_copy}{str(id(self))[-2:]} 🅟 {str(id(self.agent_current_page))[-2:]}' # better to force people to get it from the right object, "only one way to do it" is better python # def __getattr__(self, key: str) -> Any: @@ -725,7 +725,7 @@ class BrowserSession(BaseModel): full_page=False, # scale='css', timeout=self.browser_profile.default_timeout or 30000, - clip=FloatRect(**clip) if clip else None, + # clip=FloatRect(**clip) if clip else None, animations='allow', caret='initial', ) @@ -2714,27 +2714,28 @@ class BrowserSession(BaseModel): # This prevents timeouts on very long pages # 1. Get current viewport and page dimensions including scroll position - dimensions = await page.evaluate("""() => { - return { - width: window.innerWidth, - height: window.innerHeight, - pageHeight: document.documentElement.scrollHeight, - devicePixelRatio: window.devicePixelRatio || 1, - scrollX: window.pageXOffset || document.documentElement.scrollLeft || 0, - scrollY: window.pageYOffset || document.documentElement.scrollTop || 0 - }; - }""") + # dimensions = await page.evaluate("""() => { + # return { + # width: window.innerWidth, + # height: window.innerHeight, + # pageWidth: document.documentElement.scrollWidth, + # pageHeight: document.documentElement.scrollHeight, + # devicePixelRatio: window.devicePixelRatio || 1, + # scrollX: window.pageXOffset || document.documentElement.scrollLeft || 0, + # scrollY: window.pageYOffset || document.documentElement.scrollTop || 0 + # }; + # }""") + + # When full_page=False, screenshot captures the current viewport + # The clip parameter uses viewport coordinates (0,0 is top-left of viewport) + # We just need to ensure the clip dimensions don't exceed our maximums + # clip_width = min(dimensions['width'], MAX_SCREENSHOT_WIDTH) + # clip_height = min(dimensions['height'], MAX_SCREENSHOT_HEIGHT) # Take screenshot using our retry-decorated method - return await self._take_screenshot_hybrid( - page, - clip={ - 'x': dimensions['scrollX'], - 'y': dimensions['scrollY'], - 'width': min(dimensions['width'], MAX_SCREENSHOT_WIDTH), - 'height': min(dimensions['height'], MAX_SCREENSHOT_HEIGHT), - }, - ) + # Don't pass clip parameter - let Playwright capture the full viewport + # It will automatically handle cases where viewport extends beyond page content + return await self._take_screenshot_hybrid(page) except Exception as e: self.logger.error(f'❌ Failed to take screenshot after retries: {type(e).__name__}: {e}') raise diff --git a/tests/ci/test_browser_session_screenshots.py b/tests/ci/test_browser_session_screenshots.py index b1c15c8c9..e07fdda8f 100644 --- a/tests/ci/test_browser_session_screenshots.py +++ b/tests/ci/test_browser_session_screenshots.py @@ -2,6 +2,7 @@ Test that screenshots work correctly in headless browser mode. """ +import asyncio import base64 from browser_use.browser import BrowserProfile, BrowserSession @@ -193,7 +194,7 @@ class TestHeadlessScreenshots: # Take screenshots from all sessions at the same time print('Taking screenshots from all 10 sessions simultaneously...') - screenshot_tasks = [session.take_screenshot(full_page=True) for session in browser_sessions] + screenshot_tasks = [session.take_screenshot() for session in browser_sessions] screenshots = await asyncio.gather(*screenshot_tasks) # Verify all screenshots are valid @@ -221,9 +222,7 @@ class TestHeadlessScreenshots: # Also test taking regular (viewport) screenshots in parallel print('Taking viewport screenshots from all sessions simultaneously...') - viewport_screenshots = await asyncio.gather( - *[session.take_screenshot(full_page=False) for session in browser_sessions] - ) + viewport_screenshots = await asyncio.gather(*[session.take_screenshot() for session in browser_sessions]) # Verify viewport screenshots for i, screenshot in enumerate(viewport_screenshots): @@ -244,3 +243,69 @@ class TestHeadlessScreenshots: for i, result in enumerate(results): if isinstance(result, Exception): print(f'Warning: Session {i} kill raised exception: {type(result).__name__}: {result}') + + async def test_screenshot_at_bottom_of_page(self, httpserver): + """Test screenshot capture when scrolled to bottom of page (regression test for clipping issue)""" + browser_session = BrowserSession( + browser_profile=BrowserProfile( + headless=True, + user_data_dir=None, + keep_alive=False, + ) + ) + + try: + await browser_session.start() + + # Create a page with scrollable content + httpserver.expect_request('/scrollable').respond_with_data( + """ + Scrollable Page Test + +
+
Top of page
+
Middle of page
+
Bottom of page
+
+ + """, + content_type='text/html', + ) + + # Navigate to test page + await browser_session.navigate(httpserver.url_for('/scrollable')) + page = browser_session.agent_current_page + assert page is not None + + # Test 1: Screenshot at top of page (should work) + screenshot_top = await browser_session.take_screenshot() + assert screenshot_top is not None + assert len(base64.b64decode(screenshot_top)) > 5000 + + # Test 2: Screenshot at middle of page + await page.evaluate('window.scrollTo(0, document.body.scrollHeight / 2)') + await asyncio.sleep(0.1) # Wait for scroll + screenshot_middle = await browser_session.take_screenshot() + assert screenshot_middle is not None + assert len(base64.b64decode(screenshot_middle)) > 5000 + + # Test 3: Screenshot at bottom of page (this was failing with clipping error) + await page.evaluate('window.scrollTo(0, document.body.scrollHeight)') + await asyncio.sleep(0.1) # Wait for scroll + + # This should not raise "Clipped area is either empty or outside the resulting image" error + screenshot_bottom = await browser_session.take_screenshot() + assert screenshot_bottom is not None + assert len(base64.b64decode(screenshot_bottom)) > 5000 + + # Test 4: Screenshot when scrolled beyond page bottom (edge case) + await page.evaluate('window.scrollTo(0, document.body.scrollHeight + 1000)') + await asyncio.sleep(0.1) + screenshot_beyond = await browser_session.take_screenshot() + assert screenshot_beyond is not None + assert len(base64.b64decode(screenshot_beyond)) > 5000 + + print('✅ All screenshot positions tested successfully!') + + finally: + await browser_session.stop()