tweak chrome launch args in docker to fix screenshots

This commit is contained in:
Nick Sweeting
2025-06-26 18:47:56 -07:00
parent b2059653e9
commit 8dfad7e557
3 changed files with 18 additions and 4 deletions

View File

@@ -76,12 +76,14 @@ CHROME_DOCKER_ARGS = [
'--disable-dev-shm-usage',
'--no-xshm',
'--no-zygote',
'--single-process',
# '--single-process', # causes "Target page, context or browser has been closed" errors during CDP page.captureScreenshot https://stackoverflow.com/questions/51629151/puppeteer-protocol-error-page-navigate-target-closed
'--disable-site-isolation-trials', # TODO: this might fix screenshots too but could lead to easier bot blocking
]
CHROME_DISABLE_SECURITY_ARGS = [
'--disable-web-security',
'--disable-site-isolation-trials',
'--disable-web-security',
'--disable-features=IsolateOrigins,site-per-process',
'--allow-running-insecure-content',
'--ignore-certificate-errors',

View File

@@ -2610,7 +2610,7 @@ class BrowserSession(BaseModel):
raise
# region - Browser Actions
@retry(timeout=30, retries=2, semaphore_limit=1, semaphore_scope='self')
@retry(timeout=30, retries=2, semaphore_limit=1, semaphore_scope='global')
async def _take_screenshot_cdp(
self, page: Page, width: int = 1920, height: int = 2000, x: int = 0, y: int = 0, scale: int = 1
) -> str:
@@ -2632,9 +2632,16 @@ class BrowserSession(BaseModel):
cdp_session = await page.context.new_cdp_session(page) # type: ignore
# Use Page.captureScreenshot for direct screenshot without Playwright overhead
cdp_params = {'format': 'png', 'clip': {'x': x, 'y': y, 'width': width, 'height': height, 'scale': scale}}
cdp_params = {
'format': 'png',
'clip': {'x': x, 'y': y, 'width': width, 'height': height, 'scale': scale},
'optimizeForSpeed': True,
'captureBeyondViewport': True,
'fromSurface': True,
}
# Take the screenshot using CDP
# https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-captureScreenshot
result = await cdp_session.send('Page.captureScreenshot', cdp_params)
# The result already contains base64 encoded data

View File

@@ -99,6 +99,11 @@ class TestAgentEventLifecycle:
async def test_agent_with_gif_generation(self, mock_llm, browser_session, cloud_sync, event_collector, httpserver):
"""Test that GIF generation triggers CreateAgentOutputFileEvent"""
# Setup cloud sync endpoint
httpserver.expect_request('/api/v1/events', method='POST').respond_with_json(
{'processed': 1, 'failed': 0, 'results': [{'success': True}]}
)
# Setup a test page
httpserver.expect_request('/').respond_with_data('<html><body><h1>GIF Test</h1></body></html>', content_type='text/html')
await browser_session.navigate(httpserver.url_for('/'))