add video recording

This commit is contained in:
EnzoFanAccount
2025-08-22 12:13:46 -03:00
parent aa60affe87
commit 49ee1a7a7b
6 changed files with 299 additions and 0 deletions

View File

@@ -662,6 +662,17 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
default=None, description='File to save cookies to. DEPRECATED, use `storage_state` instead.'
)
# --- Recording Options ---
record_video_dir: Path | None = Field(
default=None,
description='Directory to save video recordings. If set, a video of the session will be recorded.',
validation_alias=AliasChoices('save_recording_path', 'record_video_dir'),
)
record_video_size: ViewportSize | None = Field(
default=None, description='Video frame size. If not set, it will use the viewport size.'
)
record_video_framerate: int = Field(default=30, description='The framerate to use for the video recording.')
# TODO: finish implementing extension support in extensions.py
# extension_ids_to_preinstall: list[str] = Field(
# default_factory=list, description='List of Chrome extension IDs to preinstall.'

View File

@@ -0,0 +1,126 @@
"""Recording Watchdog for Browser Use Sessions."""
import asyncio
from pathlib import Path
from typing import ClassVar, Optional
from bubus import BaseEvent
from cdp_use.cdp.page.events import ScreencastFrameEvent
from uuid_extensions import uuid7str
from browser_use.browser.events import BrowserConnectedEvent, BrowserStopEvent
from browser_use.browser.profile import ViewportSize
from browser_use.browser.video_recorder import VideoRecorderService
from browser_use.browser.watchdog_base import BaseWatchdog
class RecordingWatchdog(BaseWatchdog):
"""
Manages video recording of a browser session using CDP screencasting.
"""
LISTENS_TO: ClassVar[list[type[BaseEvent]]] = [BrowserConnectedEvent, BrowserStopEvent]
EMITS: ClassVar[list[type[BaseEvent]]] = []
_recorder: Optional[VideoRecorderService] = None
async def on_BrowserConnectedEvent(self, event: BrowserConnectedEvent) -> None:
"""
Starts video recording if it is configured in the browser profile.
"""
profile = self.browser_session.browser_profile
if not profile.record_video_dir:
return
# Dynamically determine video size
size = profile.record_video_size
if not size:
self.logger.debug('record_video_size not specified, detecting viewport size...')
size = await self._get_current_viewport_size()
if not size:
self.logger.warning('Cannot start video recording: viewport size could not be determined.')
return
video_format = getattr(profile, 'record_video_format', 'mp4').strip('.')
output_path = Path(profile.record_video_dir) / f'{uuid7str()}.{video_format}'
self.logger.debug(f'Initializing video recorder for format: {video_format}')
self._recorder = VideoRecorderService(output_path=output_path, size=size, framerate=profile.record_video_framerate)
self._recorder.start()
if not self._recorder._is_active:
self._recorder = None
return
self.browser_session.cdp_client.register.Page.screencastFrame(self.on_screencastFrame)
try:
cdp_session = await self.browser_session.get_or_create_cdp_session()
await cdp_session.cdp_client.send.Page.startScreencast(
params={
'format': 'png',
'quality': 90,
'maxWidth': size['width'],
'maxHeight': size['height'],
'everyNthFrame': 1,
},
session_id=cdp_session.session_id,
)
self.logger.info(f'📹 Started video recording to {output_path}')
except Exception as e:
self.logger.error(f'Failed to start screencast via CDP: {e}')
if self._recorder:
self._recorder.stop_and_save()
self._recorder = None
async def _get_current_viewport_size(self) -> Optional[ViewportSize]:
"""Gets the current viewport size directly from the browser via CDP."""
try:
cdp_session = await self.browser_session.get_or_create_cdp_session()
metrics = await cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=cdp_session.session_id)
# Use cssVisualViewport for the most accurate representation of the visible area
viewport = metrics.get('cssVisualViewport', {})
width = viewport.get('clientWidth')
height = viewport.get('clientHeight')
if width and height:
self.logger.debug(f'Detected viewport size: {width}x{height}')
return ViewportSize(width=int(width), height=int(height))
except Exception as e:
self.logger.warning(f'Failed to get viewport size from browser: {e}')
return None
def on_screencastFrame(self, event: ScreencastFrameEvent, session_id: Optional[str]) -> None:
"""
Synchronous handler for incoming screencast frames.
"""
if not self._recorder:
return
self._recorder.add_frame(event['data'])
asyncio.create_task(self._ack_screencast_frame(event, session_id))
async def _ack_screencast_frame(self, event: ScreencastFrameEvent, session_id: Optional[str]) -> None:
"""
Asynchronously acknowledges a screencast frame.
"""
try:
await self.browser_session.cdp_client.send.Page.screencastFrameAck(
params={'sessionId': event['sessionId']}, session_id=session_id
)
except Exception as e:
self.logger.debug(f'Failed to acknowledge screencast frame: {e}')
async def on_BrowserStopEvent(self, event: BrowserStopEvent) -> None:
"""
Stops the video recording and finalizes the video file.
"""
if self._recorder:
recorder = self._recorder
self._recorder = None
self.logger.debug('Stopping video recording and saving file...')
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, recorder.stop_and_save)

View File

@@ -229,6 +229,7 @@ class BrowserSession(BaseModel):
_dom_watchdog: Any | None = PrivateAttr(default=None)
_screenshot_watchdog: Any | None = PrivateAttr(default=None)
_permissions_watchdog: Any | None = PrivateAttr(default=None)
_recording_watchdog: Any | None = PrivateAttr(default=None)
_logger: Any = PrivateAttr(default=None)
@@ -281,6 +282,7 @@ class BrowserSession(BaseModel):
self._dom_watchdog = None
self._screenshot_watchdog = None
self._permissions_watchdog = None
self._recording_watchdog = None
def model_post_init(self, __context) -> None:
"""Register event handlers after model initialization."""
@@ -804,6 +806,7 @@ class BrowserSession(BaseModel):
from browser_use.browser.local_browser_watchdog import LocalBrowserWatchdog
from browser_use.browser.permissions_watchdog import PermissionsWatchdog
from browser_use.browser.popups_watchdog import PopupsWatchdog
from browser_use.browser.recording_watchdog import RecordingWatchdog
from browser_use.browser.screenshot_watchdog import ScreenshotWatchdog
from browser_use.browser.security_watchdog import SecurityWatchdog
# from browser_use.browser.storage_state_watchdog import StorageStateWatchdog
@@ -903,6 +906,11 @@ class BrowserSession(BaseModel):
# self.event_bus.on(BrowserStateRequestEvent, self._dom_watchdog.on_BrowserStateRequestEvent)
self._dom_watchdog.attach_to_session()
# Initialize RecordingWatchdog (handles video recording)
RecordingWatchdog.model_rebuild()
self._recording_watchdog = RecordingWatchdog(event_bus=self.event_bus, browser_session=self)
self._recording_watchdog.attach_to_session()
# Mark watchdogs as attached to prevent duplicate attachment
self._watchdogs_attached = True

View File

@@ -0,0 +1,125 @@
"""Video Recording Service for Browser Use Sessions."""
import base64
import logging
from pathlib import Path
from typing import Optional
from browser_use.browser.profile import ViewportSize
try:
import imageio.v2 as iio
from imageio.core.format import Format
IMAGEIO_AVAILABLE = True
except ImportError:
IMAGEIO_AVAILABLE = False
logger = logging.getLogger(__name__)
class VideoRecorderService:
"""
Handles the video encoding process for a browser session using imageio.
This service captures individual frames from the CDP screencast, decodes them,
and appends them to a video file using a pip-installable ffmpeg backend.
It automatically resizes frames to match the target video dimensions.
"""
def __init__(self, output_path: Path, size: ViewportSize, framerate: int):
"""
Initializes the video recorder.
Args:
output_path: The full path where the video will be saved.
size: A ViewportSize object specifying the width and height of the video.
framerate: The desired framerate for the output video.
"""
self.output_path = output_path
self.size = size
self.framerate = framerate
self._writer: Optional['Format.Writer'] = None
self._is_active = False
def start(self) -> None:
"""
Prepares and starts the video writer.
If the required optional dependencies are not installed, this method will
log an error and do nothing.
"""
if not IMAGEIO_AVAILABLE:
logger.error(
'MP4 recording requires optional dependencies. Please install them with: pip install "browser-use[video]"'
)
return
try:
self.output_path.parent.mkdir(parents=True, exist_ok=True)
self._writer = iio.get_writer(
str(self.output_path),
fps=self.framerate,
codec='libx264',
quality=8, # A good balance of quality and file size (1-10 scale)
pixelformat='yuv420p', # Ensures compatibility with most players
macro_block_size=16, # Recommended for h264
)
self._is_active = True
logger.debug(f'Video recorder started. Output will be saved to {self.output_path}')
except Exception as e:
logger.error(f'Failed to initialize video writer: {e}')
self._is_active = False
def add_frame(self, frame_data_b64: str) -> None:
"""
Decodes a base64-encoded PNG frame and appends it to the video.
This method is designed to be fast and non-blocking. It will
gracefully handle corrupted frames.
Args:
frame_data_b64: A base64-encoded string of the PNG frame data.
"""
if not self._is_active or not self._writer:
return
try:
frame_bytes = base64.b64decode(frame_data_b64)
# imageio reads bytes directly and converts to a numpy array
# The format is auto-detected from the bytes.
img_array = iio.imread(frame_bytes)
# Ensure frame dimensions match video dimensions
h, w, _ = img_array.shape
if w != self.size['width'] or h != self.size['height']:
# This can happen if the viewport changes mid-recording.
# A more robust solution could involve resizing, but that is non-trivial.
# For now, the video size must be the same as the viewport
logger.warning(
f'Frame size ({w}x{h}) does not match video size '
f'({self.size["width"]}x{self.size["height"]}). Skipping frame.'
)
return
self._writer.append_data(img_array)
except Exception as e:
logger.warning(f'Could not process and add video frame: {e}')
def stop_and_save(self) -> None:
"""
Finalizes the video file by closing the writer.
This method should be called when the recording session is complete.
"""
if not self._is_active or not self._writer:
return
try:
self._writer.close()
logger.info(f'📹 Video recording saved successfully to: {self.output_path}')
except Exception as e:
logger.error(f'Failed to finalize and save video: {e}')
finally:
self._is_active = False
self._writer = None

View File

@@ -0,0 +1,26 @@
import asyncio
from pathlib import Path
from browser_use import Agent, BrowserProfile, BrowserSession, ChatOpenAI
async def main():
# Define a profile that enables video recording
video_profile = BrowserProfile(headless=False, record_video_dir=Path('./tmp/recordings'))
browser_session = BrowserSession(browser_profile=video_profile)
agent = Agent(
task='Go to github.com/trending then navigate to the first trending repository.',
llm=ChatOpenAI(model='gpt-4.1-mini'),
browser_session=browser_session,
)
await agent.run(max_steps=5)
# The video will be saved automatically when the agent finishes and the session closes.
print('Agent run finished. Check the ./tmp/recordings directory for the video.')
if __name__ == '__main__':
asyncio.run(main())

View File

@@ -61,6 +61,9 @@ cli = [
aws = [
"boto3>=1.38.45"
]
video = [
"imageio[ffmpeg]>=2.37.0"
]
examples = [
# botocore: only needed for Bedrock Claude boto3 examples/models/bedrock_claude.py
"botocore>=1.37.23",