add video recording

2026-05-06 17:52:15 +02:00 · 2025-08-22 12:13:46 -03:00
parent aa60affe87
commit 49ee1a7a7b
6 changed files with 299 additions and 0 deletions
--- a/browser_use/browser/profile.py
+++ b/browser_use/browser/profile.py
@@ -662,6 +662,17 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
 		default=None, description='File to save cookies to. DEPRECATED, use `storage_state` instead.'
 	)

+	# --- Recording Options ---
+	record_video_dir: Path | None = Field(
+		default=None,
+		description='Directory to save video recordings. If set, a video of the session will be recorded.',
+		validation_alias=AliasChoices('save_recording_path', 'record_video_dir'),
+	)
+	record_video_size: ViewportSize | None = Field(
+		default=None, description='Video frame size. If not set, it will use the viewport size.'
+	)
+	record_video_framerate: int = Field(default=30, description='The framerate to use for the video recording.')
+
 	# TODO: finish implementing extension support in extensions.py
 	# extension_ids_to_preinstall: list[str] = Field(
 	# 	default_factory=list, description='List of Chrome extension IDs to preinstall.'
--- a/browser_use/browser/recording_watchdog.py
+++ b/browser_use/browser/recording_watchdog.py
@@ -0,0 +1,126 @@
+"""Recording Watchdog for Browser Use Sessions."""
+
+import asyncio
+from pathlib import Path
+from typing import ClassVar, Optional
+
+from bubus import BaseEvent
+from cdp_use.cdp.page.events import ScreencastFrameEvent
+from uuid_extensions import uuid7str
+
+from browser_use.browser.events import BrowserConnectedEvent, BrowserStopEvent
+from browser_use.browser.profile import ViewportSize
+from browser_use.browser.video_recorder import VideoRecorderService
+from browser_use.browser.watchdog_base import BaseWatchdog
+
+
+class RecordingWatchdog(BaseWatchdog):
+	"""
+	Manages video recording of a browser session using CDP screencasting.
+	"""
+
+	LISTENS_TO: ClassVar[list[type[BaseEvent]]] = [BrowserConnectedEvent, BrowserStopEvent]
+	EMITS: ClassVar[list[type[BaseEvent]]] = []
+
+	_recorder: Optional[VideoRecorderService] = None
+
+	async def on_BrowserConnectedEvent(self, event: BrowserConnectedEvent) -> None:
+		"""
+		Starts video recording if it is configured in the browser profile.
+		"""
+		profile = self.browser_session.browser_profile
+		if not profile.record_video_dir:
+			return
+
+		# Dynamically determine video size
+		size = profile.record_video_size
+		if not size:
+			self.logger.debug('record_video_size not specified, detecting viewport size...')
+			size = await self._get_current_viewport_size()
+
+		if not size:
+			self.logger.warning('Cannot start video recording: viewport size could not be determined.')
+			return
+
+		video_format = getattr(profile, 'record_video_format', 'mp4').strip('.')
+		output_path = Path(profile.record_video_dir) / f'{uuid7str()}.{video_format}'
+
+		self.logger.debug(f'Initializing video recorder for format: {video_format}')
+		self._recorder = VideoRecorderService(output_path=output_path, size=size, framerate=profile.record_video_framerate)
+		self._recorder.start()
+
+		if not self._recorder._is_active:
+			self._recorder = None
+			return
+
+		self.browser_session.cdp_client.register.Page.screencastFrame(self.on_screencastFrame)
+
+		try:
+			cdp_session = await self.browser_session.get_or_create_cdp_session()
+			await cdp_session.cdp_client.send.Page.startScreencast(
+				params={
+					'format': 'png',
+					'quality': 90,
+					'maxWidth': size['width'],
+					'maxHeight': size['height'],
+					'everyNthFrame': 1,
+				},
+				session_id=cdp_session.session_id,
+			)
+			self.logger.info(f'📹 Started video recording to {output_path}')
+		except Exception as e:
+			self.logger.error(f'Failed to start screencast via CDP: {e}')
+			if self._recorder:
+				self._recorder.stop_and_save()
+				self._recorder = None
+
+	async def _get_current_viewport_size(self) -> Optional[ViewportSize]:
+		"""Gets the current viewport size directly from the browser via CDP."""
+		try:
+			cdp_session = await self.browser_session.get_or_create_cdp_session()
+			metrics = await cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=cdp_session.session_id)
+
+			# Use cssVisualViewport for the most accurate representation of the visible area
+			viewport = metrics.get('cssVisualViewport', {})
+			width = viewport.get('clientWidth')
+			height = viewport.get('clientHeight')
+
+			if width and height:
+				self.logger.debug(f'Detected viewport size: {width}x{height}')
+				return ViewportSize(width=int(width), height=int(height))
+		except Exception as e:
+			self.logger.warning(f'Failed to get viewport size from browser: {e}')
+
+		return None
+
+	def on_screencastFrame(self, event: ScreencastFrameEvent, session_id: Optional[str]) -> None:
+		"""
+		Synchronous handler for incoming screencast frames.
+		"""
+		if not self._recorder:
+			return
+		self._recorder.add_frame(event['data'])
+		asyncio.create_task(self._ack_screencast_frame(event, session_id))
+
+	async def _ack_screencast_frame(self, event: ScreencastFrameEvent, session_id: Optional[str]) -> None:
+		"""
+		Asynchronously acknowledges a screencast frame.
+		"""
+		try:
+			await self.browser_session.cdp_client.send.Page.screencastFrameAck(
+				params={'sessionId': event['sessionId']}, session_id=session_id
+			)
+		except Exception as e:
+			self.logger.debug(f'Failed to acknowledge screencast frame: {e}')
+
+	async def on_BrowserStopEvent(self, event: BrowserStopEvent) -> None:
+		"""
+		Stops the video recording and finalizes the video file.
+		"""
+		if self._recorder:
+			recorder = self._recorder
+			self._recorder = None
+
+			self.logger.debug('Stopping video recording and saving file...')
+			loop = asyncio.get_event_loop()
+			await loop.run_in_executor(None, recorder.stop_and_save)
--- a/browser_use/browser/session.py
+++ b/browser_use/browser/session.py
@@ -229,6 +229,7 @@ class BrowserSession(BaseModel):
 	_dom_watchdog: Any | None = PrivateAttr(default=None)
 	_screenshot_watchdog: Any | None = PrivateAttr(default=None)
 	_permissions_watchdog: Any | None = PrivateAttr(default=None)
+	_recording_watchdog: Any | None = PrivateAttr(default=None)

 	_logger: Any = PrivateAttr(default=None)

@@ -281,6 +282,7 @@ class BrowserSession(BaseModel):
 		self._dom_watchdog = None
 		self._screenshot_watchdog = None
 		self._permissions_watchdog = None
+		self._recording_watchdog = None

 	def model_post_init(self, __context) -> None:
 		"""Register event handlers after model initialization."""
@@ -804,6 +806,7 @@ class BrowserSession(BaseModel):
 		from browser_use.browser.local_browser_watchdog import LocalBrowserWatchdog
 		from browser_use.browser.permissions_watchdog import PermissionsWatchdog
 		from browser_use.browser.popups_watchdog import PopupsWatchdog
+		from browser_use.browser.recording_watchdog import RecordingWatchdog
 		from browser_use.browser.screenshot_watchdog import ScreenshotWatchdog
 		from browser_use.browser.security_watchdog import SecurityWatchdog
 		# from browser_use.browser.storage_state_watchdog import StorageStateWatchdog
@@ -903,6 +906,11 @@ class BrowserSession(BaseModel):
 		# self.event_bus.on(BrowserStateRequestEvent, self._dom_watchdog.on_BrowserStateRequestEvent)
 		self._dom_watchdog.attach_to_session()

+		# Initialize RecordingWatchdog (handles video recording)
+		RecordingWatchdog.model_rebuild()
+		self._recording_watchdog = RecordingWatchdog(event_bus=self.event_bus, browser_session=self)
+		self._recording_watchdog.attach_to_session()
+
 		# Mark watchdogs as attached to prevent duplicate attachment
 		self._watchdogs_attached = True

--- a/browser_use/browser/video_recorder.py
+++ b/browser_use/browser/video_recorder.py
@@ -0,0 +1,125 @@
+"""Video Recording Service for Browser Use Sessions."""
+
+import base64
+import logging
+from pathlib import Path
+from typing import Optional
+
+from browser_use.browser.profile import ViewportSize
+
+try:
+	import imageio.v2 as iio
+	from imageio.core.format import Format
+
+	IMAGEIO_AVAILABLE = True
+except ImportError:
+	IMAGEIO_AVAILABLE = False
+
+logger = logging.getLogger(__name__)
+
+
+class VideoRecorderService:
+	"""
+	Handles the video encoding process for a browser session using imageio.
+
+	This service captures individual frames from the CDP screencast, decodes them,
+	and appends them to a video file using a pip-installable ffmpeg backend.
+	It automatically resizes frames to match the target video dimensions.
+	"""
+
+	def __init__(self, output_path: Path, size: ViewportSize, framerate: int):
+		"""
+		Initializes the video recorder.
+
+		Args:
+		    output_path: The full path where the video will be saved.
+		    size: A ViewportSize object specifying the width and height of the video.
+		    framerate: The desired framerate for the output video.
+		"""
+		self.output_path = output_path
+		self.size = size
+		self.framerate = framerate
+		self._writer: Optional['Format.Writer'] = None
+		self._is_active = False
+
+	def start(self) -> None:
+		"""
+		Prepares and starts the video writer.
+
+		If the required optional dependencies are not installed, this method will
+		log an error and do nothing.
+		"""
+		if not IMAGEIO_AVAILABLE:
+			logger.error(
+				'MP4 recording requires optional dependencies. Please install them with: pip install "browser-use[video]"'
+			)
+			return
+
+		try:
+			self.output_path.parent.mkdir(parents=True, exist_ok=True)
+			self._writer = iio.get_writer(
+				str(self.output_path),
+				fps=self.framerate,
+				codec='libx264',
+				quality=8,  # A good balance of quality and file size (1-10 scale)
+				pixelformat='yuv420p',  # Ensures compatibility with most players
+				macro_block_size=16,  # Recommended for h264
+			)
+			self._is_active = True
+			logger.debug(f'Video recorder started. Output will be saved to {self.output_path}')
+		except Exception as e:
+			logger.error(f'Failed to initialize video writer: {e}')
+			self._is_active = False
+
+	def add_frame(self, frame_data_b64: str) -> None:
+		"""
+		Decodes a base64-encoded PNG frame and appends it to the video.
+
+		This method is designed to be fast and non-blocking. It will
+		gracefully handle corrupted frames.
+
+		Args:
+		    frame_data_b64: A base64-encoded string of the PNG frame data.
+		"""
+		if not self._is_active or not self._writer:
+			return
+
+		try:
+			frame_bytes = base64.b64decode(frame_data_b64)
+			# imageio reads bytes directly and converts to a numpy array
+			# The format is auto-detected from the bytes.
+			img_array = iio.imread(frame_bytes)
+
+			# Ensure frame dimensions match video dimensions
+			h, w, _ = img_array.shape
+			if w != self.size['width'] or h != self.size['height']:
+				# This can happen if the viewport changes mid-recording.
+				# A more robust solution could involve resizing, but that is non-trivial.
+				# For now, the video size must be the same as the viewport
+				logger.warning(
+					f'Frame size ({w}x{h}) does not match video size '
+					f'({self.size["width"]}x{self.size["height"]}). Skipping frame.'
+				)
+				return
+
+			self._writer.append_data(img_array)
+		except Exception as e:
+			logger.warning(f'Could not process and add video frame: {e}')
+
+	def stop_and_save(self) -> None:
+		"""
+		Finalizes the video file by closing the writer.
+
+		This method should be called when the recording session is complete.
+		"""
+		if not self._is_active or not self._writer:
+			return
+
+		try:
+			self._writer.close()
+			logger.info(f'📹 Video recording saved successfully to: {self.output_path}')
+		except Exception as e:
+			logger.error(f'Failed to finalize and save video: {e}')
+		finally:
+			self._is_active = False
+			self._writer = None
--- a/examples/features/video_recording.py
+++ b/examples/features/video_recording.py
@@ -0,0 +1,26 @@
+import asyncio
+from pathlib import Path
+
+from browser_use import Agent, BrowserProfile, BrowserSession, ChatOpenAI
+
+
+async def main():
+	# Define a profile that enables video recording
+	video_profile = BrowserProfile(headless=False, record_video_dir=Path('./tmp/recordings'))
+
+	browser_session = BrowserSession(browser_profile=video_profile)
+
+	agent = Agent(
+		task='Go to github.com/trending then navigate to the first trending repository.',
+		llm=ChatOpenAI(model='gpt-4.1-mini'),
+		browser_session=browser_session,
+	)
+
+	await agent.run(max_steps=5)
+
+	# The video will be saved automatically when the agent finishes and the session closes.
+	print('Agent run finished. Check the ./tmp/recordings directory for the video.')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,6 +61,9 @@ cli = [
 aws = [
    "boto3>=1.38.45"
 ]
+video = [
+    "imageio[ffmpeg]>=2.37.0"
+]
 examples = [
    # botocore: only needed for Bedrock Claude boto3 examples/models/bedrock_claude.py
    "botocore>=1.37.23",