diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index 06b135497..4ec5b50ec 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -662,6 +662,17 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro default=None, description='File to save cookies to. DEPRECATED, use `storage_state` instead.' ) + # --- Recording Options --- + record_video_dir: Path | None = Field( + default=None, + description='Directory to save video recordings. If set, a video of the session will be recorded.', + validation_alias=AliasChoices('save_recording_path', 'record_video_dir'), + ) + record_video_size: ViewportSize | None = Field( + default=None, description='Video frame size. If not set, it will use the viewport size.' + ) + record_video_framerate: int = Field(default=30, description='The framerate to use for the video recording.') + # TODO: finish implementing extension support in extensions.py # extension_ids_to_preinstall: list[str] = Field( # default_factory=list, description='List of Chrome extension IDs to preinstall.' diff --git a/browser_use/browser/recording_watchdog.py b/browser_use/browser/recording_watchdog.py new file mode 100644 index 000000000..9b67c3e7d --- /dev/null +++ b/browser_use/browser/recording_watchdog.py @@ -0,0 +1,126 @@ +"""Recording Watchdog for Browser Use Sessions.""" + +import asyncio +from pathlib import Path +from typing import ClassVar, Optional + +from bubus import BaseEvent +from cdp_use.cdp.page.events import ScreencastFrameEvent +from uuid_extensions import uuid7str + +from browser_use.browser.events import BrowserConnectedEvent, BrowserStopEvent +from browser_use.browser.profile import ViewportSize +from browser_use.browser.video_recorder import VideoRecorderService +from browser_use.browser.watchdog_base import BaseWatchdog + + +class RecordingWatchdog(BaseWatchdog): + """ + Manages video recording of a browser session using CDP screencasting. + """ + + LISTENS_TO: ClassVar[list[type[BaseEvent]]] = [BrowserConnectedEvent, BrowserStopEvent] + EMITS: ClassVar[list[type[BaseEvent]]] = [] + + _recorder: Optional[VideoRecorderService] = None + + async def on_BrowserConnectedEvent(self, event: BrowserConnectedEvent) -> None: + """ + Starts video recording if it is configured in the browser profile. + """ + profile = self.browser_session.browser_profile + if not profile.record_video_dir: + return + + # Dynamically determine video size + size = profile.record_video_size + if not size: + self.logger.debug('record_video_size not specified, detecting viewport size...') + size = await self._get_current_viewport_size() + + if not size: + self.logger.warning('Cannot start video recording: viewport size could not be determined.') + return + + video_format = getattr(profile, 'record_video_format', 'mp4').strip('.') + output_path = Path(profile.record_video_dir) / f'{uuid7str()}.{video_format}' + + self.logger.debug(f'Initializing video recorder for format: {video_format}') + self._recorder = VideoRecorderService(output_path=output_path, size=size, framerate=profile.record_video_framerate) + self._recorder.start() + + if not self._recorder._is_active: + self._recorder = None + return + + self.browser_session.cdp_client.register.Page.screencastFrame(self.on_screencastFrame) + + try: + cdp_session = await self.browser_session.get_or_create_cdp_session() + await cdp_session.cdp_client.send.Page.startScreencast( + params={ + 'format': 'png', + 'quality': 90, + 'maxWidth': size['width'], + 'maxHeight': size['height'], + 'everyNthFrame': 1, + }, + session_id=cdp_session.session_id, + ) + self.logger.info(f'📹 Started video recording to {output_path}') + except Exception as e: + self.logger.error(f'Failed to start screencast via CDP: {e}') + if self._recorder: + self._recorder.stop_and_save() + self._recorder = None + + async def _get_current_viewport_size(self) -> Optional[ViewportSize]: + """Gets the current viewport size directly from the browser via CDP.""" + try: + cdp_session = await self.browser_session.get_or_create_cdp_session() + metrics = await cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=cdp_session.session_id) + + # Use cssVisualViewport for the most accurate representation of the visible area + viewport = metrics.get('cssVisualViewport', {}) + width = viewport.get('clientWidth') + height = viewport.get('clientHeight') + + if width and height: + self.logger.debug(f'Detected viewport size: {width}x{height}') + return ViewportSize(width=int(width), height=int(height)) + except Exception as e: + self.logger.warning(f'Failed to get viewport size from browser: {e}') + + return None + + def on_screencastFrame(self, event: ScreencastFrameEvent, session_id: Optional[str]) -> None: + """ + Synchronous handler for incoming screencast frames. + """ + if not self._recorder: + return + self._recorder.add_frame(event['data']) + asyncio.create_task(self._ack_screencast_frame(event, session_id)) + + async def _ack_screencast_frame(self, event: ScreencastFrameEvent, session_id: Optional[str]) -> None: + """ + Asynchronously acknowledges a screencast frame. + """ + try: + await self.browser_session.cdp_client.send.Page.screencastFrameAck( + params={'sessionId': event['sessionId']}, session_id=session_id + ) + except Exception as e: + self.logger.debug(f'Failed to acknowledge screencast frame: {e}') + + async def on_BrowserStopEvent(self, event: BrowserStopEvent) -> None: + """ + Stops the video recording and finalizes the video file. + """ + if self._recorder: + recorder = self._recorder + self._recorder = None + + self.logger.debug('Stopping video recording and saving file...') + loop = asyncio.get_event_loop() + await loop.run_in_executor(None, recorder.stop_and_save) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index e4d65092e..16d238878 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -229,6 +229,7 @@ class BrowserSession(BaseModel): _dom_watchdog: Any | None = PrivateAttr(default=None) _screenshot_watchdog: Any | None = PrivateAttr(default=None) _permissions_watchdog: Any | None = PrivateAttr(default=None) + _recording_watchdog: Any | None = PrivateAttr(default=None) _logger: Any = PrivateAttr(default=None) @@ -281,6 +282,7 @@ class BrowserSession(BaseModel): self._dom_watchdog = None self._screenshot_watchdog = None self._permissions_watchdog = None + self._recording_watchdog = None def model_post_init(self, __context) -> None: """Register event handlers after model initialization.""" @@ -804,6 +806,7 @@ class BrowserSession(BaseModel): from browser_use.browser.local_browser_watchdog import LocalBrowserWatchdog from browser_use.browser.permissions_watchdog import PermissionsWatchdog from browser_use.browser.popups_watchdog import PopupsWatchdog + from browser_use.browser.recording_watchdog import RecordingWatchdog from browser_use.browser.screenshot_watchdog import ScreenshotWatchdog from browser_use.browser.security_watchdog import SecurityWatchdog # from browser_use.browser.storage_state_watchdog import StorageStateWatchdog @@ -903,6 +906,11 @@ class BrowserSession(BaseModel): # self.event_bus.on(BrowserStateRequestEvent, self._dom_watchdog.on_BrowserStateRequestEvent) self._dom_watchdog.attach_to_session() + # Initialize RecordingWatchdog (handles video recording) + RecordingWatchdog.model_rebuild() + self._recording_watchdog = RecordingWatchdog(event_bus=self.event_bus, browser_session=self) + self._recording_watchdog.attach_to_session() + # Mark watchdogs as attached to prevent duplicate attachment self._watchdogs_attached = True diff --git a/browser_use/browser/video_recorder.py b/browser_use/browser/video_recorder.py new file mode 100644 index 000000000..5d260fe66 --- /dev/null +++ b/browser_use/browser/video_recorder.py @@ -0,0 +1,125 @@ +"""Video Recording Service for Browser Use Sessions.""" + +import base64 +import logging +from pathlib import Path +from typing import Optional + +from browser_use.browser.profile import ViewportSize + +try: + import imageio.v2 as iio + from imageio.core.format import Format + + IMAGEIO_AVAILABLE = True +except ImportError: + IMAGEIO_AVAILABLE = False + +logger = logging.getLogger(__name__) + + +class VideoRecorderService: + """ + Handles the video encoding process for a browser session using imageio. + + This service captures individual frames from the CDP screencast, decodes them, + and appends them to a video file using a pip-installable ffmpeg backend. + It automatically resizes frames to match the target video dimensions. + """ + + def __init__(self, output_path: Path, size: ViewportSize, framerate: int): + """ + Initializes the video recorder. + + Args: + output_path: The full path where the video will be saved. + size: A ViewportSize object specifying the width and height of the video. + framerate: The desired framerate for the output video. + """ + self.output_path = output_path + self.size = size + self.framerate = framerate + self._writer: Optional['Format.Writer'] = None + self._is_active = False + + def start(self) -> None: + """ + Prepares and starts the video writer. + + If the required optional dependencies are not installed, this method will + log an error and do nothing. + """ + if not IMAGEIO_AVAILABLE: + logger.error( + 'MP4 recording requires optional dependencies. Please install them with: pip install "browser-use[video]"' + ) + return + + try: + self.output_path.parent.mkdir(parents=True, exist_ok=True) + self._writer = iio.get_writer( + str(self.output_path), + fps=self.framerate, + codec='libx264', + quality=8, # A good balance of quality and file size (1-10 scale) + pixelformat='yuv420p', # Ensures compatibility with most players + macro_block_size=16, # Recommended for h264 + ) + self._is_active = True + logger.debug(f'Video recorder started. Output will be saved to {self.output_path}') + except Exception as e: + logger.error(f'Failed to initialize video writer: {e}') + self._is_active = False + + def add_frame(self, frame_data_b64: str) -> None: + """ + Decodes a base64-encoded PNG frame and appends it to the video. + + This method is designed to be fast and non-blocking. It will + gracefully handle corrupted frames. + + Args: + frame_data_b64: A base64-encoded string of the PNG frame data. + """ + if not self._is_active or not self._writer: + return + + try: + frame_bytes = base64.b64decode(frame_data_b64) + # imageio reads bytes directly and converts to a numpy array + # The format is auto-detected from the bytes. + img_array = iio.imread(frame_bytes) + + # Ensure frame dimensions match video dimensions + h, w, _ = img_array.shape + if w != self.size['width'] or h != self.size['height']: + # This can happen if the viewport changes mid-recording. + # A more robust solution could involve resizing, but that is non-trivial. + # For now, the video size must be the same as the viewport + logger.warning( + f'Frame size ({w}x{h}) does not match video size ' + f'({self.size["width"]}x{self.size["height"]}). Skipping frame.' + ) + return + + self._writer.append_data(img_array) + except Exception as e: + logger.warning(f'Could not process and add video frame: {e}') + + def stop_and_save(self) -> None: + """ + Finalizes the video file by closing the writer. + + This method should be called when the recording session is complete. + """ + if not self._is_active or not self._writer: + return + + try: + self._writer.close() + logger.info(f'📹 Video recording saved successfully to: {self.output_path}') + except Exception as e: + logger.error(f'Failed to finalize and save video: {e}') + finally: + self._is_active = False + self._writer = None diff --git a/examples/features/video_recording.py b/examples/features/video_recording.py new file mode 100644 index 000000000..0a3f90783 --- /dev/null +++ b/examples/features/video_recording.py @@ -0,0 +1,26 @@ +import asyncio +from pathlib import Path + +from browser_use import Agent, BrowserProfile, BrowserSession, ChatOpenAI + + +async def main(): + # Define a profile that enables video recording + video_profile = BrowserProfile(headless=False, record_video_dir=Path('./tmp/recordings')) + + browser_session = BrowserSession(browser_profile=video_profile) + + agent = Agent( + task='Go to github.com/trending then navigate to the first trending repository.', + llm=ChatOpenAI(model='gpt-4.1-mini'), + browser_session=browser_session, + ) + + await agent.run(max_steps=5) + + # The video will be saved automatically when the agent finishes and the session closes. + print('Agent run finished. Check the ./tmp/recordings directory for the video.') + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/pyproject.toml b/pyproject.toml index 3069190e1..7200c0e1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,6 +61,9 @@ cli = [ aws = [ "boto3>=1.38.45" ] +video = [ + "imageio[ffmpeg]>=2.37.0" +] examples = [ # botocore: only needed for Bedrock Claude boto3 examples/models/bedrock_claude.py "botocore>=1.37.23",