Merge commit 'ad68c577c27cafb22270b811fbe9a0b9f4fcd521' into fix-disable-security-bug

2026-05-06 17:52:15 +02:00 · 2025-09-06 15:19:59 -07:00
parent 5b202be744 ad68c577c2
commit 84db72b555
83 changed files with 2359 additions and 2144 deletions
--- a/.env.example
+++ b/.env.example
@@ -33,6 +33,12 @@ ANONYMIZED_TELEMETRY=true
 # Default LLM model to use
 # OPENAI_API_KEY=your_openai_api_key_here
 # ANTHROPIC_API_KEY=your_anthropic_api_key_here
+# AZURE_OPENAI_API_KEY=
+# AZURE_OPENAI_ENDPOINT=
+# GOOGLE_API_KEY=
+# DEEPSEEK_API_KEY=
+# GROK_API_KEY=
+# NOVITA_API_KEY=

 # Browser Configuration  
 # Path to Chrome/Chromium executable (optional)
--- a/.github/workflows/package.yaml
+++ b/.github/workflows/package.yaml
@@ -31,7 +31,7 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest, self-hosted]
+        os: [ubuntu-latest, macos-latest, windows-latest]
        python-version: ["3.11", "3.13"]
    env:
      ANONYMIZED_TELEMETRY: 'false'
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -21,16 +21,25 @@ on:
 jobs:
  find_tests:
    runs-on: ubuntu-latest
+    timeout-minutes: 5  # Prevent hanging
    outputs:
      TEST_FILENAMES: ${{ steps.lsgrep.outputs.TEST_FILENAMES }}
      # ["test_browser", "test_tools", "test_browser_session", "test_tab_management", ...]
    steps:
      - uses: actions/checkout@v4
+        with:
+          # Force fresh checkout to avoid any caching issues
+          fetch-depth: 1
      - id: lsgrep
        run: |
+          echo "🔍 Discovering test files at $(date)"
+          echo "Git commit: $(git rev-parse HEAD)"
+          echo "Git branch: $(git branch --show-current)"
+          echo ""
+          
          TEST_FILENAMES="$(ls tests/ci/test_*.py | sed 's|^tests/ci/||' | sed 's|\.py$||' | jq -R -s -c 'split("\n")[:-1]')"
          echo "TEST_FILENAMES=${TEST_FILENAMES}" >> "$GITHUB_OUTPUT"
-          echo "$TEST_FILENAMES"
+          echo "📋 Test matrix: $TEST_FILENAMES"
        # https://code.dblock.org/2021/09/03/generating-task-matrix-by-looping-over-repo-files-with-github-actions.html
      - name: Check that at least one test file is found
        run: |
@@ -42,6 +51,7 @@ jobs:
  tests:
    needs: find_tests
    runs-on: ubuntu-latest
+    timeout-minutes: 10  # Prevent individual tests from hanging
    env:
      IN_DOCKER: 'True'
      ANONYMIZED_TELEMETRY: 'false'
@@ -96,7 +106,20 @@ jobs:
          restore-keys: |
            ${{ runner.os }}-browseruse-extensions-

-      - run: pytest tests/ci/${{ matrix.test_filename }}.py
+      - name: Check if test file exists and run it
+        run: |
+          TEST_FILE="tests/ci/${{ matrix.test_filename }}.py"
+          if [ -f "$TEST_FILE" ]; then
+            echo "✅ Running test file: $TEST_FILE"
+            pytest "$TEST_FILE"
+          else
+            echo "❌ Test file not found: $TEST_FILE"
+            echo "This file may have been renamed or removed. Current test files:"
+            ls -1 tests/ci/test_*.py | sed 's|tests/ci/||' | sed 's|\.py$||' | sort
+            echo ""
+            echo "Skipping this test job since the file no longer exists."
+            exit 0  # Exit successfully to not fail the entire workflow
+          fi

  evaluate-tasks:
    runs-on: ubuntu-latest
--- a/README.md
+++ b/README.md
@@ -14,9 +14,34 @@
 [![Twitter Follow](https://img.shields.io/twitter/follow/Magnus?style=social)](https://x.com/intent/user?screen_name=mamagnus00)
 [![Weave Badge](https://img.shields.io/endpoint?url=https%3A%2F%2Fapp.workweave.ai%2Fapi%2Frepository%2Fbadge%2Forg_T5Pvn3UBswTHIsN1dWS3voPg%2F881458615&labelColor=#EC6341)](https://app.workweave.ai/reports/repository/org_T5Pvn3UBswTHIsN1dWS3voPg/881458615)

+<!-- Keep these links. Translations will automatically update with the README. -->
+[Deutsch](https://www.readme-i18n.com/browser-use/browser-use?lang=de) | 
+[Español](https://www.readme-i18n.com/browser-use/browser-use?lang=es) | 
+[français](https://www.readme-i18n.com/browser-use/browser-use?lang=fr) | 
+[日本語](https://www.readme-i18n.com/browser-use/browser-use?lang=ja) | 
+[한국어](https://www.readme-i18n.com/browser-use/browser-use?lang=ko) | 
+[Português](https://www.readme-i18n.com/browser-use/browser-use?lang=pt) | 
+[Русский](https://www.readme-i18n.com/browser-use/browser-use?lang=ru) | 
+[中文](https://www.readme-i18n.com/browser-use/browser-use?lang=zh)
+
 🌤️ Want to skip the setup? Use our <b>[cloud](https://cloud.browser-use.com)</b> for faster, scalable, stealth-enabled browser automation!

-# Quick start
+## 🎉 OSS Twitter Hackathon
+
+We just hit **69,000 GitHub ⭐**!
+To celebrate, we're launching **#nicehack69** — a Twitter-first hackathon with a **$6,900 prize pool**. Dream big and show us the future of browser-use agents that go beyond demos!
+
+**Deadline: September 6, 2025**
+
+**[🚀 Join the hackathon →](https://github.com/browser-use/nicehack69)**
+
+<div align="center">
+<a href="https://github.com/browser-use/nicehack69">
+<img src="./static/NiceHack69.png" alt="NiceHack69 Hackathon" width="600"/>
+</a>
+</div>
+
+# Quickstart

 With pip (Python>=3.11):

--- a/browser_use/init.py
+++ b/browser_use/init.py
@@ -51,6 +51,7 @@ if TYPE_CHECKING:
 	from browser_use.browser import BrowserProfile, BrowserSession
 	from browser_use.browser import BrowserSession as Browser
 	from browser_use.dom.service import DomService
+	from browser_use.llm import models
 	from browser_use.llm.anthropic.chat import ChatAnthropic
 	from browser_use.llm.azure.chat import ChatAzureOpenAI
 	from browser_use.llm.google.chat import ChatGoogle
@@ -85,6 +86,8 @@ _LAZY_IMPORTS = {
 	'ChatGroq': ('browser_use.llm.groq.chat', 'ChatGroq'),
 	'ChatAzureOpenAI': ('browser_use.llm.azure.chat', 'ChatAzureOpenAI'),
 	'ChatOllama': ('browser_use.llm.ollama.chat', 'ChatOllama'),
+	# LLM models module
+	'models': ('browser_use.llm.models', None),
 }


@@ -96,7 +99,11 @@ def __getattr__(name: str):
 			from importlib import import_module

 			module = import_module(module_path)
-			attr = getattr(module, attr_name)
+			if attr_name is None:
+				# For modules like 'models', return the module itself
+				attr = module
+			else:
+				attr = getattr(module, attr_name)
 			# Cache the imported attribute in the module's globals
 			globals()[name] = attr
 			return attr
@@ -126,4 +133,6 @@ __all__ = [
 	'ChatOllama',
 	'Tools',
 	'Controller',
+	# LLM models module
+	'models',
 ]
--- a/browser_use/agent/gif.py
+++ b/browser_use/agent/gif.py
@@ -87,6 +87,8 @@ def create_history_gif(
 		# Try different font options in order of preference
 		# ArialUni is a font that comes with Office and can render most non-alphabet characters
 		font_options = [
+			'PingFang',
+			'STHeiti Medium',
 			'Microsoft YaHei',  # 微软雅黑
 			'SimHei',  # 黑体
 			'SimSun',  # 宋体
--- a/browser_use/agent/message_manager/service.py
+++ b/browser_use/agent/message_manager/service.py
@@ -17,6 +17,7 @@ from browser_use.browser.views import BrowserStateSummary
 from browser_use.filesystem.file_system import FileSystem
 from browser_use.llm.messages import (
 	BaseMessage,
+	ContentPartImageParam,
 	ContentPartTextParam,
 	SystemMessage,
 )
@@ -108,6 +109,7 @@ class MessageManager:
 		vision_detail_level: Literal['auto', 'low', 'high'] = 'auto',
 		include_tool_call_examples: bool = False,
 		include_recent_events: bool = False,
+		sample_images: list[ContentPartTextParam | ContentPartImageParam] | None = None,
 	):
 		self.task = task
 		self.state = state
@@ -119,6 +121,7 @@ class MessageManager:
 		self.vision_detail_level = vision_detail_level
 		self.include_tool_call_examples = include_tool_call_examples
 		self.include_recent_events = include_recent_events
+		self.sample_images = sample_images

 		assert max_history_items is None or max_history_items > 5, 'max_history_items must be None or greater than 5'

@@ -190,10 +193,10 @@ class MessageManager:
 				logger.debug(f'Added extracted_content to read_state_description: {action_result.extracted_content}')

 			if action_result.long_term_memory:
-				action_results += f'Action {idx + 1}/{result_len}: {action_result.long_term_memory}\n'
+				action_results += f'{action_result.long_term_memory}\n'
 				logger.debug(f'Added long_term_memory to action_results: {action_result.long_term_memory}')
 			elif action_result.extracted_content and not action_result.include_extracted_content_only_once:
-				action_results += f'Action {idx + 1}/{result_len}: {action_result.extracted_content}\n'
+				action_results += f'{action_result.extracted_content}\n'
 				logger.debug(f'Added extracted_content to action_results: {action_result.extracted_content}')

 			if action_result.error:
@@ -201,13 +204,13 @@ class MessageManager:
 					error_text = action_result.error[:100] + '......' + action_result.error[-100:]
 				else:
 					error_text = action_result.error
-				action_results += f'Action {idx + 1}/{result_len}: {error_text}\n'
+				action_results += f'{error_text}\n'
 				logger.debug(f'Added error to action_results: {error_text}')

 		self.state.read_state_description = self.state.read_state_description.strip('\n')

 		if action_results:
-			action_results = f'Action Results:\n{action_results}'
+			action_results = f'Result:\n{action_results}'
 		action_results = action_results.strip('\n') if action_results else None

 		# Build the history item
@@ -306,6 +309,7 @@ class MessageManager:
 			screenshots=screenshots,
 			vision_detail_level=self.vision_detail_level,
 			include_recent_events=self.include_recent_events,
+			sample_images=self.sample_images,
 		).get_user_message(use_vision)

 		# Set the state message with caching enabled
--- a/browser_use/agent/message_manager/views.py
+++ b/browser_use/agent/message_manager/views.py
@@ -32,30 +32,28 @@ class HistoryItem(BaseModel):

 	def to_string(self) -> str:
 		"""Get string representation of the history item"""
-		step_str = f'step_{self.step_number}' if self.step_number is not None else 'step_unknown'
+		step_str = 'step' if self.step_number is not None else 'step_unknown'

 		if self.error:
 			return f"""<{step_str}>
 {self.error}
 </{step_str}>"""
 		elif self.system_message:
-			return f"""<sys>
-{self.system_message}
-</sys>"""
+			return ''  # empty string
 		else:
 			content_parts = []

 			# Only include evaluation_previous_goal if it's not None/empty
 			if self.evaluation_previous_goal:
-				content_parts.append(f'Evaluation of Previous Step: {self.evaluation_previous_goal}')
+				content_parts.append(f'{self.evaluation_previous_goal}')

 			# Always include memory
 			if self.memory:
-				content_parts.append(f'Memory: {self.memory}')
+				content_parts.append(f'{self.memory}')

 			# Only include next_goal if it's not None/empty
 			if self.next_goal:
-				content_parts.append(f'Next Goal: {self.next_goal}')
+				content_parts.append(f'{self.next_goal}')

 			if self.action_results:
 				content_parts.append(self.action_results)
--- a/browser_use/agent/prompts.py
+++ b/browser_use/agent/prompts.py
@@ -93,6 +93,7 @@ class AgentMessagePrompt:
 		screenshots: list[str] | None = None,
 		vision_detail_level: Literal['auto', 'low', 'high'] = 'auto',
 		include_recent_events: bool = False,
+		sample_images: list[ContentPartTextParam | ContentPartImageParam] | None = None,
 	):
 		self.browser_state: 'BrowserStateSummary' = browser_state_summary
 		self.file_system: 'FileSystem | None' = file_system
@@ -108,6 +109,7 @@ class AgentMessagePrompt:
 		self.screenshots = screenshots or []
 		self.vision_detail_level = vision_detail_level
 		self.include_recent_events = include_recent_events
+		self.sample_images = sample_images or []
 		assert self.browser_state

 	@observe_debug(ignore_input=True, ignore_output=True, name='_get_browser_state_description')
@@ -132,8 +134,13 @@ class AgentMessagePrompt:
 			pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0
 			total_pages = pi.page_height / pi.viewport_height if pi.viewport_height > 0 else 0
 			current_page_position = pi.scroll_y / max(pi.page_height - pi.viewport_height, 1)
-			page_info_text = f'Page info: {pi.viewport_width}x{pi.viewport_height}px viewport, {pi.page_width}x{pi.page_height}px total page size, {pages_above:.1f} pages above, {pages_below:.1f} pages below, {total_pages:.1f} total pages, at {current_page_position:.0%} of page'
-
+			page_info_text = '<page_info>'
+			page_info_text += f'Viewport size: {pi.viewport_width}x{pi.viewport_height}px, Total page size: {pi.page_width}x{pi.page_height}px, '
+			page_info_text += f'{pages_above:.1f} pages above, '
+			page_info_text += f'{pages_below:.1f} pages below, '
+			page_info_text += f'{total_pages:.1f} total pages'
+			page_info_text += '</page_info>\n'
+			# , at {current_page_position:.0%} of page
 		if elements_text != '':
 			if has_content_above:
 				if self.browser_state.page_info:
@@ -187,19 +194,23 @@ class AgentMessagePrompt:
 Available tabs:
 {tabs_text}
 {page_info_text}
-{recent_events_text}{pdf_message}Interactive elements from top layer of the current page inside the viewport{truncated_text}:
+{recent_events_text}{pdf_message}Elements you can interact with inside the viewport{truncated_text}:
 {elements_text}
 """
 		return browser_state

 	def _get_agent_state_description(self) -> str:
 		if self.step_info:
-			step_info_description = f'Step {self.step_info.step_number + 1} of {self.step_info.max_steps} max possible steps\n'
+			step_info_description = f'Step {self.step_info.step_number + 1}. Maximum steps: {self.step_info.max_steps}\n'
 		else:
 			step_info_description = ''
+
 		time_str = datetime.now().strftime('%Y-%m-%d %H:%M')
 		step_info_description += f'Current date and time: {time_str}'

+		time_str = datetime.now().strftime('%Y-%m-%d')
+		step_info_description += f'Current date: {time_str}'
+
 		_todo_contents = self.file_system.get_todo_contents() if self.file_system else ''
 		if not len(_todo_contents):
 			_todo_contents = '[Current todo.md is empty, fill it with your plan when applicable]'
@@ -240,7 +251,7 @@ Available tabs:
 		state_description = (
 			'<agent_history>\n'
 			+ (self.agent_history_description.strip('\n') if self.agent_history_description else '')
-			+ '\n</agent_history>\n'
+			+ '\n</agent_history>\n\n'
 		)
 		state_description += '<agent_state>\n' + self._get_agent_state_description().strip('\n') + '\n</agent_state>\n'
 		state_description += '<browser_state>\n' + self._get_browser_state_description().strip('\n') + '\n</browser_state>\n'
@@ -258,6 +269,9 @@ Available tabs:
 			# Start with text description
 			content_parts: list[ContentPartTextParam | ContentPartImageParam] = [ContentPartTextParam(text=state_description)]

+			# Add sample images
+			content_parts.extend(self.sample_images)
+
 			# Add screenshots with labels
 			for i, screenshot in enumerate(self.screenshots):
 				if i == len(self.screenshots) - 1:
--- a/browser_use/agent/service.py
+++ b/browser_use/agent/service.py
@@ -4,7 +4,6 @@ import inspect
 import json
 import logging
 import re
-import sys
 import tempfile
 import time
 from collections.abc import Awaitable, Callable
@@ -24,14 +23,14 @@ from browser_use.agent.cloud_events import (
 )
 from browser_use.agent.message_manager.utils import save_conversation
 from browser_use.llm.base import BaseChatModel
-from browser_use.llm.messages import BaseMessage, UserMessage
+from browser_use.llm.messages import BaseMessage, ContentPartImageParam, ContentPartTextParam, UserMessage
 from browser_use.llm.openai.chat import ChatOpenAI
 from browser_use.tokens.service import TokenCost

 load_dotenv()

 from bubus import EventBus
-from pydantic import ValidationError
+from pydantic import BaseModel, ValidationError
 from uuid_extensions import uuid7str

 from browser_use import Browser, BrowserProfile, BrowserSession
@@ -67,6 +66,7 @@ from browser_use.telemetry.views import AgentTelemetryEvent
 from browser_use.tools.registry.views import ActionModel
 from browser_use.tools.service import Tools
 from browser_use.utils import (
+	URL_PATTERN,
 	_log_pretty_path,
 	get_browser_use_version,
 	get_git_info,
@@ -128,7 +128,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 	def __init__(
 		self,
 		task: str,
-		llm: BaseChatModel = ChatOpenAI(model='gpt-4.1-mini'),
+		llm: BaseChatModel | None = None,
 		# Optional parameters
 		browser_profile: BrowserProfile | None = None,
 		browser_session: BrowserSession | None = None,
@@ -179,8 +179,28 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		step_timeout: int = 120,
 		directly_open_url: bool = True,
 		include_recent_events: bool = False,
+		sample_images: list[ContentPartTextParam | ContentPartImageParam] | None = None,
+		final_response_after_failure: bool = True,
+		_url_shortening_limit: int = 25,
 		**kwargs,
 	):
+		if llm is None:
+			default_llm_name = CONFIG.DEFAULT_LLM
+			if default_llm_name:
+				try:
+					from browser_use.llm.models import get_llm_by_name
+
+					llm = get_llm_by_name(default_llm_name)
+				except (ImportError, ValueError) as e:
+					# Use the logger that's already imported at the top of the module
+					logger.warning(
+						f'Failed to create default LLM "{default_llm_name}": {e}. Falling back to ChatOpenAI(model="gpt-4.1-mini")'
+					)
+					llm = ChatOpenAI(model='gpt-4.1-mini')
+			else:
+				# No default LLM specified, use the original default
+				llm = ChatOpenAI(model='gpt-4.1-mini')
+
 		if page_extraction_llm is None:
 			page_extraction_llm = llm
 		if available_file_paths is None:
@@ -210,6 +230,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		self.llm = llm
 		self.directly_open_url = directly_open_url
 		self.include_recent_events = include_recent_events
+		self._url_shortening_limit = _url_shortening_limit
 		if tools is not None:
 			self.tools = tools
 		elif controller is not None:
@@ -224,6 +245,8 @@ class Agent(Generic[Context, AgentStructuredOutput]):

 		self.sensitive_data = sensitive_data

+		self.sample_images = sample_images
+
 		self.settings = AgentSettings(
 			use_vision=use_vision,
 			vision_detail_level=vision_detail_level,
@@ -243,6 +266,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			include_tool_call_examples=include_tool_call_examples,
 			llm_timeout=llm_timeout,
 			step_timeout=step_timeout,
+			final_response_after_failure=final_response_after_failure,
 		)

 		# Token cost service
@@ -297,7 +321,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			self.logger.warning('⚠️ XAI models do not support use_vision=True yet. Setting use_vision=False for now...')
 			self.settings.use_vision = False

-		self.logger.info(f'🧠 Starting a browser-use version {self.version} with model={self.llm.model}')
 		logger.debug(
 			f'{" +vision" if self.settings.use_vision else ""}'
 			f' extraction_model={self.settings.page_extraction_llm.model if self.settings.page_extraction_llm else "Unknown"}'
@@ -330,6 +353,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			vision_detail_level=self.settings.vision_detail_level,
 			include_tool_call_examples=self.settings.include_tool_call_examples,
 			include_recent_events=self.include_recent_events,
+			sample_images=self.sample_images,
 		)

 		if self.sensitive_data:
@@ -339,23 +363,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			# If no allowed_domains are configured, show a security warning
 			if not self.browser_profile.allowed_domains:
 				self.logger.error(
-					'⚠️⚠️⚠️ Agent(sensitive_data=••••••••) was provided but BrowserSession(allowed_domains=[...]) is not locked down! ⚠️⚠️⚠️\n'
+					'⚠️ Agent(sensitive_data=••••••••) was provided but Browser(allowed_domains=[...]) is not locked down! ⚠️\n'
 					'          ☠️ If the agent visits a malicious website and encounters a prompt-injection attack, your sensitive_data may be exposed!\n\n'
-					'             https://docs.browser-use.com/customize/browser-settings#restrict-urls\n'
-					'Waiting 10 seconds before continuing... Press [Ctrl+C] to abort.'
-				)
-				if sys.stdin.isatty():
-					try:
-						time.sleep(10)
-					except KeyboardInterrupt:
-						print(
-							'\n\n 🛑 Exiting now... set BrowserSession(allowed_domains=["example.com", "example.org"]) to only domains you trust to see your sensitive_data.'
-						)
-						sys.exit(0)
-				else:
-					pass  # no point waiting if we're not in an interactive shell
-				self.logger.warning(
-					'‼️ Continuing with insecure settings for now... but this will become a hard error in the future!'
+					'   \n'
 				)

 			# If we're using domain-specific credentials, validate domain patterns
@@ -426,6 +436,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			self._last_known_downloads: list[str] = []
 			self.logger.debug('📁 Initialized download tracking for agent')

+		# Event-based pause control (kept out of AgentState for serialization)
 		self._external_pause_event = asyncio.Event()
 		self._external_pause_event.set()

@@ -606,8 +617,10 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			if await self.register_external_agent_status_raise_error_callback():
 				raise InterruptedError

-		if self.state.stopped or self.state.paused:
-			# self.logger.debug('Agent paused after getting state')
+		if self.state.stopped:
+			raise InterruptedError
+
+		if self.state.paused:
 			raise InterruptedError

 	@observe(name='agent.step', ignore_output=True, ignore_input=True)
@@ -615,6 +628,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 	async def step(self, step_info: AgentStepInfo | None = None) -> None:
 		"""Execute one step of the task"""
 		# Initialize timing first, before any exceptions can occur
+
 		self.step_start_time = time.time()

 		browser_state_summary = None
@@ -682,7 +696,8 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			available_file_paths=self.available_file_paths,  # Always pass current available_file_paths
 		)

-		await self._handle_final_step(step_info)
+		await self._force_done_after_last_step(step_info)
+		await self._force_done_after_failure()
 		return browser_state_summary

 	@observe_debug(ignore_input=True, name='get_next_action')
@@ -768,7 +783,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		# Handle all other exceptions
 		include_trace = self.logger.isEnabledFor(logging.DEBUG)
 		error_msg = AgentError.format_error(error, include_trace=include_trace)
-		prefix = f'❌ Result failed {self.state.consecutive_failures + 1}/{self.settings.max_failures} times:\n '
+		prefix = f'❌ Result failed {self.state.consecutive_failures + 1}/{self.settings.max_failures + int(self.settings.final_response_after_failure)} times:\n '
 		self.state.consecutive_failures += 1

 		# Handle InterruptedError specially
@@ -833,7 +848,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		# Increment step counter after step is fully completed
 		self.state.n_steps += 1

-	async def _handle_final_step(self, step_info: AgentStepInfo | None = None) -> None:
+	async def _force_done_after_last_step(self, step_info: AgentStepInfo | None = None) -> None:
 		"""Handle special processing for the last step"""
 		if step_info and step_info.is_last_step():
 			# Add last step warning if needed
@@ -845,6 +860,19 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			self._message_manager._add_context_message(UserMessage(content=msg))
 			self.AgentOutput = self.DoneAgentOutput

+	async def _force_done_after_failure(self) -> None:
+		"""Force done after failure"""
+		# Create recovery message
+		if self.state.consecutive_failures >= self.settings.max_failures and self.settings.final_response_after_failure:
+			msg = f'You have failed {self.settings.max_failures} consecutive times. This is your final step to complete the task or provide what you found. '
+			msg += 'Use only the "done" action now. No other actions - so here your action sequence must have length 1.'
+			msg += '\nIf the task could not be completed due to the failures, set success in "done" to false!'
+			msg += '\nInclude everything you found out for the task in the done text.'
+
+			self.logger.debug('Force done action, because we reached max_failures.')
+			self._message_manager._add_context_message(UserMessage(content=msg))
+			self.AgentOutput = self.DoneAgentOutput
+
 	async def _get_model_output_with_retry(self, input_messages: list[BaseMessage]) -> AgentOutput:
 		"""Get model output with retry logic for empty actions"""
 		model_output = await self.get_model_output(input_messages)
@@ -965,15 +993,172 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		text = re.sub(STRAY_CLOSE_TAG, '', text)
 		return text.strip()

+	# region - URL replacement
+	def _replace_urls_in_text(self, text: str) -> tuple[str, dict[str, str]]:
+		"""Replace URLs in a text string"""
+
+		replaced_urls: dict[str, str] = {}
+
+		def replace_url(match: re.Match) -> str:
+			"""Url can only have 1 query and 1 fragment"""
+			import hashlib
+
+			original_url = match.group(0)
+
+			# Find where the query/fragment starts
+			query_start = original_url.find('?')
+			fragment_start = original_url.find('#')
+
+			# Find the earliest position of query or fragment
+			after_path_start = len(original_url)  # Default: no query/fragment
+			if query_start != -1:
+				after_path_start = min(after_path_start, query_start)
+			if fragment_start != -1:
+				after_path_start = min(after_path_start, fragment_start)
+
+			# Split URL into base (up to path) and after_path (query + fragment)
+			base_url = original_url[:after_path_start]
+			after_path = original_url[after_path_start:]
+
+			# If after_path is within the limit, don't shorten
+			if len(after_path) <= self._url_shortening_limit:
+				return original_url
+
+			# If after_path is too long, truncate and add hash
+			if after_path:
+				truncated_after_path = after_path[: self._url_shortening_limit]
+				# Create a short hash of the full after_path content
+				hash_obj = hashlib.md5(after_path.encode('utf-8'))
+				short_hash = hash_obj.hexdigest()[:7]
+				# Create shortened URL
+				shortened = f'{base_url}{truncated_after_path}...{short_hash}'
+				# Only use shortened URL if it's actually shorter than the original
+				if len(shortened) < len(original_url):
+					replaced_urls[shortened] = original_url
+					return shortened
+
+			return original_url
+
+		return URL_PATTERN.sub(replace_url, text), replaced_urls
+
+	def _process_messsages_and_replace_long_urls_shorter_ones(self, input_messages: list[BaseMessage]) -> dict[str, str]:
+		"""Replace long URLs with shorter ones
+		? @dev edits input_messages in place
+
+		returns:
+			tuple[filtered_input_messages, urls we replaced {shorter_url: original_url}]
+		"""
+		from browser_use.llm.messages import AssistantMessage, UserMessage
+
+		urls_replaced: dict[str, str] = {}
+
+		# Process each message, in place
+		for message in input_messages:
+			# no need to process SystemMessage, we have control over that anyway
+			if isinstance(message, (UserMessage, AssistantMessage)):
+				if isinstance(message.content, str):
+					# Simple string content
+					message.content, replaced_urls = self._replace_urls_in_text(message.content)
+					urls_replaced.update(replaced_urls)
+
+				elif isinstance(message.content, list):
+					# List of content parts
+					for part in message.content:
+						if isinstance(part, ContentPartTextParam):
+							part.text, replaced_urls = self._replace_urls_in_text(part.text)
+							urls_replaced.update(replaced_urls)
+
+		return urls_replaced
+
+	@staticmethod
+	def _recursive_process_all_strings_inside_pydantic_model(model: BaseModel, url_replacements: dict[str, str]) -> None:
+		"""Recursively process all strings inside a Pydantic model, replacing shortened URLs with originals in place."""
+		for field_name, field_value in model.__dict__.items():
+			if isinstance(field_value, str):
+				# Replace shortened URLs with original URLs in string
+				processed_string = Agent._replace_shortened_urls_in_string(field_value, url_replacements)
+				setattr(model, field_name, processed_string)
+			elif isinstance(field_value, BaseModel):
+				# Recursively process nested Pydantic models
+				Agent._recursive_process_all_strings_inside_pydantic_model(field_value, url_replacements)
+			elif isinstance(field_value, dict):
+				# Process dictionary values in place
+				Agent._recursive_process_dict(field_value, url_replacements)
+			elif isinstance(field_value, (list, tuple)):
+				processed_value = Agent._recursive_process_list_or_tuple(field_value, url_replacements)
+				setattr(model, field_name, processed_value)
+
+	@staticmethod
+	def _recursive_process_dict(dictionary: dict, url_replacements: dict[str, str]) -> None:
+		"""Helper method to process dictionaries."""
+		for k, v in dictionary.items():
+			if isinstance(v, str):
+				dictionary[k] = Agent._replace_shortened_urls_in_string(v, url_replacements)
+			elif isinstance(v, BaseModel):
+				Agent._recursive_process_all_strings_inside_pydantic_model(v, url_replacements)
+			elif isinstance(v, dict):
+				Agent._recursive_process_dict(v, url_replacements)
+			elif isinstance(v, (list, tuple)):
+				dictionary[k] = Agent._recursive_process_list_or_tuple(v, url_replacements)
+
+	@staticmethod
+	def _recursive_process_list_or_tuple(container: list | tuple, url_replacements: dict[str, str]) -> list | tuple:
+		"""Helper method to process lists and tuples."""
+		if isinstance(container, tuple):
+			# For tuples, create a new tuple with processed items
+			processed_items = []
+			for item in container:
+				if isinstance(item, str):
+					processed_items.append(Agent._replace_shortened_urls_in_string(item, url_replacements))
+				elif isinstance(item, BaseModel):
+					Agent._recursive_process_all_strings_inside_pydantic_model(item, url_replacements)
+					processed_items.append(item)
+				elif isinstance(item, dict):
+					Agent._recursive_process_dict(item, url_replacements)
+					processed_items.append(item)
+				elif isinstance(item, (list, tuple)):
+					processed_items.append(Agent._recursive_process_list_or_tuple(item, url_replacements))
+				else:
+					processed_items.append(item)
+			return tuple(processed_items)
+		else:
+			# For lists, modify in place
+			for i, item in enumerate(container):
+				if isinstance(item, str):
+					container[i] = Agent._replace_shortened_urls_in_string(item, url_replacements)
+				elif isinstance(item, BaseModel):
+					Agent._recursive_process_all_strings_inside_pydantic_model(item, url_replacements)
+				elif isinstance(item, dict):
+					Agent._recursive_process_dict(item, url_replacements)
+				elif isinstance(item, (list, tuple)):
+					container[i] = Agent._recursive_process_list_or_tuple(item, url_replacements)
+			return container
+
+	@staticmethod
+	def _replace_shortened_urls_in_string(text: str, url_replacements: dict[str, str]) -> str:
+		"""Replace all shortened URLs in a string with their original URLs."""
+		result = text
+		for shortened_url, original_url in url_replacements.items():
+			result = result.replace(shortened_url, original_url)
+		return result
+
+	# endregion - URL replacement
+
 	@time_execution_async('--get_next_action')
 	@observe_debug(ignore_input=True, ignore_output=True, name='get_model_output')
 	async def get_model_output(self, input_messages: list[BaseMessage]) -> AgentOutput:
 		"""Get next action from LLM based on current state"""

+		urls_replaced = self._process_messsages_and_replace_long_urls_shorter_ones(input_messages)
+
 		try:
 			response = await self.llm.ainvoke(input_messages, output_format=self.AgentOutput)
 			parsed = response.completion

+			# Replace any shortened URLs in the LLM response back to original URLs
+			if urls_replaced:
+				self._recursive_process_all_strings_inside_pydantic_model(parsed, urls_replaced)
+
 			# cut the number of actions to max_actions_per_step if needed
 			if len(parsed.action) > self.settings.max_actions_per_step:
 				parsed.action = parsed.action[: self.settings.max_actions_per_step]
@@ -994,6 +1179,11 @@ class Agent(Generic[Context, AgentStructuredOutput]):

 		self.logger.debug(f'🤖 Browser-Use Library Version {self.version} ({self.source})')

+	def _log_first_step_startup(self) -> None:
+		"""Log startup message only on the first step"""
+		if len(self.history.history) == 0:
+			self.logger.info(f'🧠 Starting a browser-use version {self.version} with model={self.llm.model}')
+
 	def _log_step_context(self, browser_state_summary: BrowserStateSummary) -> None:
 		"""Log step context information"""
 		url = browser_state_summary.url if browser_state_summary else ''
@@ -1122,6 +1312,11 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		Returns:
 		        Tuple[bool, bool]: (is_done, is_valid)
 		"""
+		if len(self.history.history) == 0:
+			# First step
+			self._log_first_step_startup()
+			await self._execute_initial_actions()
+
 		await self.step(step_info)

 		if self.history.is_done():
@@ -1250,17 +1445,21 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 				self.logger.warning('⚠️ No browser focus established, may cause navigation issues')

 			await self._execute_initial_actions()
+			# Log startup message on first step (only if we haven't already done steps)
+			self._log_first_step_startup()

 			self.logger.debug(f'🔄 Starting main execution loop with max {max_steps} steps...')
 			for step in range(max_steps):
-				# Replace the polling with clean pause-wait
+				# Use the consolidated pause state management
 				if self.state.paused:
 					self.logger.debug(f'⏸️ Step {step}: Agent paused, waiting to resume...')
-					await self.wait_until_resumed()
+					await self._external_pause_event.wait()
 					signal_handler.reset()

-				# Check if we should stop due to too many failures
-				if self.state.consecutive_failures >= self.settings.max_failures:
+				# Check if we should stop due to too many failures, if final_response_after_failure is True, we try one last time
+				if (self.state.consecutive_failures) >= self.settings.max_failures + int(
+					self.settings.final_response_after_failure
+				):
 					self.logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
 					agent_run_error = f'Stopped due to {self.settings.max_failures} consecutive failures'
 					break
@@ -1271,12 +1470,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 					agent_run_error = 'Agent stopped programmatically'
 					break

-				while self.state.paused:
-					await asyncio.sleep(0.5)  # Small delay to prevent CPU spinning
-					if self.state.stopped:  # Allow stopping while paused
-						agent_run_error = 'Agent stopped programmatically while paused'
-						break
-
 				if on_step_start is not None:
 					await on_step_start(self)

@@ -1476,7 +1669,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 				if orig_target_hash != new_target_hash:
 					# Get names of remaining actions that won't be executed
 					remaining_actions_str = get_remaining_actions_str(actions, i)
-					msg = f'Page changed after action {i} / {total_actions}: actions {remaining_actions_str} were not executed'
+					msg = f'Page changed after action: actions {remaining_actions_str} are not yet executed'
 					logger.info(msg)
 					results.append(
 						ActionResult(
@@ -1716,39 +1909,28 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			file_path = 'AgentHistory.json'
 		self.history.save_to_file(file_path)

-	async def wait_until_resumed(self):
-		await self._external_pause_event.wait()
-
 	def pause(self) -> None:
 		"""Pause the agent before the next step"""
-		print(
-			'\n\n⏸️  Got [Ctrl+C], paused the agent and left the browser open.\n\tPress [Enter] to resume or [Ctrl+C] again to quit.'
-		)
+		print('\n\n⏸️ Paused the agent and left the browser open.\n\tPress [Enter] to resume or [Ctrl+C] again to quit.')
 		self.state.paused = True
 		self._external_pause_event.clear()

-		# Task paused
-
-		# The signal handler will handle the asyncio pause logic for us
-		# No need to duplicate the code here
-
 	def resume(self) -> None:
 		"""Resume the agent"""
+		# TODO: Locally the browser got closed
 		print('----------------------------------------------------------------------')
-		print('▶️  Got Enter, resuming agent execution where it left off...\n')
+		print('▶️  Resuming agent execution where it left off...\n')
 		self.state.paused = False
 		self._external_pause_event.set()

-		# Task resumed
-
-		# The signal handler should have already reset the flags
-		# through its reset() method when called from run()
-
 	def stop(self) -> None:
 		"""Stop the agent"""
 		self.logger.info('⏹️ Agent stopping')
 		self.state.stopped = True

+		# Signal pause event to unblock any waiting code so it can check the stopped state
+		self._external_pause_event.set()
+
 		# Task stopped

 	def _convert_initial_actions(self, actions: list[dict[str, dict[str, Any]]]) -> list[ActionModel]:
--- a/browser_use/agent/system_prompt.md
+++ b/browser_use/agent/system_prompt.md
@@ -61,7 +61,7 @@ Examples:
 Note that:
 - Only elements with numeric indexes in [] are interactive
 - (stacked) indentation (with \t) is important and means that the element is a (html) child of the element above (with a lower index)
- Elements tagged with `*[` are the new clickable elements that appeared on the website since the last step - if url has not changed.
+- Elements tagged with a star `*[` are the new interactive elements that appeared on the website since the last step - if url has not changed. Your previous actions caused that change. Think if you need to interact with them, e.g. after input_text you might need to select the right option from the list.
 - Pure text elements without [] are not interactive.
 </browser_state>

--- a/browser_use/agent/system_prompt_flash.md
+++ b/browser_use/agent/system_prompt_flash.md
@@ -59,7 +59,7 @@ Examples:
 Note that:
 - Only elements with numeric indexes in [] are interactive
 - (stacked) indentation (with \t) is important and means that the element is a (html) child of the element above (with a lower index)
- Elements tagged with `*[` are the new clickable elements that appeared on the website since the last step - if url has not changed.
+- Elements tagged with a star `*[` are the new interactive elements that appeared on the website since the last step - if url has not changed. Your previous actions caused that change. Think if you need to interact with them, e.g. after input_text you might need to select the right option from the list.
 - Pure text elements without [] are not interactive.
 </browser_state>

--- a/browser_use/agent/system_prompt_no_thinking.md
+++ b/browser_use/agent/system_prompt_no_thinking.md
@@ -61,7 +61,7 @@ Examples:
 Note that:
 - Only elements with numeric indexes in [] are interactive
 - (stacked) indentation (with \t) is important and means that the element is a (html) child of the element above (with a lower index)
- Elements tagged with `*[` are the new clickable elements that appeared on the website since the last step - if url has not changed.
+- Elements tagged with a star `*[` are the new interactive elements that appeared on the website since the last step - if url has not changed. Your previous actions caused that change. Think if you need to interact with them, e.g. after input_text you might need to select the right option from the list.
 - Pure text elements without [] are not interactive.
 </browser_state>

--- a/browser_use/agent/views.py
+++ b/browser_use/agent/views.py
@@ -39,7 +39,7 @@ class AgentSettings(BaseModel):
 	override_system_message: str | None = None
 	extend_system_message: str | None = None
 	include_attributes: list[str] | None = DEFAULT_INCLUDE_ATTRIBUTES
-	max_actions_per_step: int = 10
+	max_actions_per_step: int = 4
 	use_thinking: bool = True
 	flash_mode: bool = False  # If enabled, disables evaluation_previous_goal and next_goal, and sets use_thinking = False
 	max_history_items: int | None = None
@@ -49,17 +49,22 @@ class AgentSettings(BaseModel):
 	include_tool_call_examples: bool = False
 	llm_timeout: int = 60  # Timeout in seconds for LLM calls
 	step_timeout: int = 180  # Timeout in seconds for each step
+	final_response_after_failure: bool = True  # If True, attempt one final recovery call after max_failures


 class AgentState(BaseModel):
 	"""Holds all state information for an Agent"""

+	model_config = ConfigDict(arbitrary_types_allowed=True)
+
 	agent_id: str = Field(default_factory=uuid7str)
 	n_steps: int = 1
 	consecutive_failures: int = 0
 	last_result: list[ActionResult] | None = None
 	last_plan: str | None = None
 	last_model_output: AgentOutput | None = None
+
+	# Pause/resume state (kept serialisable for checkpointing)
 	paused: bool = False
 	stopped: bool = False
 	session_initialized: bool = False  # Track if session events have been dispatched
@@ -68,9 +73,6 @@ class AgentState(BaseModel):
 	message_manager_state: MessageManagerState = Field(default_factory=MessageManagerState)
 	file_system_state: FileSystemState | None = None

-	# class Config:
-	# 	arbitrary_types_allowed = True
-

@dataclass
 class AgentStepInfo:
--- a/browser_use/browser/events.py
+++ b/browser_use/browser/events.py
@@ -1,6 +1,7 @@
 """Event definitions for browser communication."""

 import inspect
+import os
 from typing import Any, Literal

 from bubus import BaseEvent
@@ -11,6 +12,37 @@ from pydantic import BaseModel, Field, field_validator
 from browser_use.browser.views import BrowserStateSummary
 from browser_use.dom.views import EnhancedDOMTreeNode

+
+def _get_timeout(env_var: str, default: float) -> float | None:
+	"""
+	Safely parse environment variable timeout values with robust error handling.
+
+	Args:
+		env_var: Environment variable name (e.g. 'TIMEOUT_NavigateToUrlEvent')
+		default: Default timeout value as float (e.g. 15.0)
+
+	Returns:
+		Parsed float value or the default if parsing fails
+
+	Raises:
+		ValueError: Only if both env_var and default are invalid (should not happen with valid defaults)
+	"""
+	# Try environment variable first
+	env_value = os.getenv(env_var)
+	if env_value:
+		try:
+			parsed = float(env_value)
+			if parsed < 0:
+				print(f'Warning: {env_var}={env_value} is negative, using default {default}')
+				return default
+			return parsed
+		except (ValueError, TypeError):
+			print(f'Warning: {env_var}={env_value} is not a valid number, using default {default}')
+
+	# Fall back to default
+	return default
+
+
 # ============================================================================
 # Agent/Tools -> BrowserSession Events (High-level browser actions)
 # ============================================================================
@@ -88,7 +120,7 @@ class NavigateToUrlEvent(BaseEvent[None]):
 	# existing_tab: PageHandle | None = None  # TODO

 	# time limits enforced by bubus, not exposed to LLM:
-	event_timeout: float | None = 15.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_NavigateToUrlEvent', 15.0)  # seconds


 class ClickElementEvent(ElementSelectedEvent[dict[str, Any] | None]):
@@ -103,7 +135,7 @@ class ClickElementEvent(ElementSelectedEvent[dict[str, Any] | None]):
 	# click_count: int = 1           # TODO
 	# expect_download: bool = False  # moved to downloads_watchdog.py

-	event_timeout: float | None = 15.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_ClickElementEvent', 15.0)  # seconds


 class TypeTextEvent(ElementSelectedEvent[dict | None]):
@@ -113,7 +145,7 @@ class TypeTextEvent(ElementSelectedEvent[dict | None]):
 	text: str
 	clear_existing: bool = True

-	event_timeout: float | None = 15.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_TypeTextEvent', 15.0)  # seconds


 class ScrollEvent(ElementSelectedEvent[None]):
@@ -123,7 +155,7 @@ class ScrollEvent(ElementSelectedEvent[None]):
 	amount: int  # pixels
 	node: 'EnhancedDOMTreeNode | None' = None  # None means scroll page

-	event_timeout: float | None = 8.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_ScrollEvent', 8.0)  # seconds


 class SwitchTabEvent(BaseEvent[TargetID]):
@@ -131,7 +163,7 @@ class SwitchTabEvent(BaseEvent[TargetID]):

 	target_id: TargetID | None = Field(default=None, description='None means switch to the most recently opened tab')

-	event_timeout: float | None = 10.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_SwitchTabEvent', 10.0)  # seconds


 class CloseTabEvent(BaseEvent[None]):
@@ -139,7 +171,7 @@ class CloseTabEvent(BaseEvent[None]):

 	target_id: TargetID

-	event_timeout: float | None = 10.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_CloseTabEvent', 10.0)  # seconds


 class ScreenshotEvent(BaseEvent[str]):
@@ -148,7 +180,7 @@ class ScreenshotEvent(BaseEvent[str]):
 	full_page: bool = False
 	clip: dict[str, float] | None = None  # {x, y, width, height}

-	event_timeout: float | None = 8.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_ScreenshotEvent', 8.0)  # seconds


 class BrowserStateRequestEvent(BaseEvent[BrowserStateSummary]):
@@ -159,7 +191,7 @@ class BrowserStateRequestEvent(BaseEvent[BrowserStateSummary]):
 	cache_clickable_elements_hashes: bool = True
 	include_recent_events: bool = False

-	event_timeout: float | None = 30.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_BrowserStateRequestEvent', 30.0)  # seconds


 # class WaitForConditionEvent(BaseEvent):
@@ -174,19 +206,19 @@ class BrowserStateRequestEvent(BaseEvent[BrowserStateSummary]):
 class GoBackEvent(BaseEvent[None]):
 	"""Navigate back in browser history."""

-	event_timeout: float | None = 15.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_GoBackEvent', 15.0)  # seconds


 class GoForwardEvent(BaseEvent[None]):
 	"""Navigate forward in browser history."""

-	event_timeout: float | None = 15.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_GoForwardEvent', 15.0)  # seconds


 class RefreshEvent(BaseEvent[None]):
 	"""Refresh/reload the current page."""

-	event_timeout: float | None = 15.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_RefreshEvent', 15.0)  # seconds


 class WaitEvent(BaseEvent[None]):
@@ -195,7 +227,7 @@ class WaitEvent(BaseEvent[None]):
 	seconds: float = 3.0
 	max_seconds: float = 10.0  # Safety cap

-	event_timeout: float | None = 60.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_WaitEvent', 60.0)  # seconds


 class SendKeysEvent(BaseEvent[None]):
@@ -203,7 +235,7 @@ class SendKeysEvent(BaseEvent[None]):

 	keys: str  # e.g., "ctrl+a", "cmd+c", "Enter"

-	event_timeout: float | None = 15.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_SendKeysEvent', 15.0)  # seconds


 class UploadFileEvent(ElementSelectedEvent[None]):
@@ -212,7 +244,7 @@ class UploadFileEvent(ElementSelectedEvent[None]):
 	node: 'EnhancedDOMTreeNode'
 	file_path: str

-	event_timeout: float | None = 30.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_UploadFileEvent', 30.0)  # seconds


 class GetDropdownOptionsEvent(ElementSelectedEvent[dict[str, str]]):
@@ -222,9 +254,10 @@ class GetDropdownOptionsEvent(ElementSelectedEvent[dict[str, str]]):

 	node: 'EnhancedDOMTreeNode'

-	event_timeout: float | None = (
-		15.0  # some dropdowns lazy-load the list of options on first interaction, so we need to wait for them to load (e.g. table filter lists can have thousands of options)
-	)
+	event_timeout: float | None = _get_timeout(
+		'TIMEOUT_GetDropdownOptionsEvent',
+		15.0,
+	)  # some dropdowns lazy-load the list of options on first interaction, so we need to wait for them to load (e.g. table filter lists can have thousands of options)


 class SelectDropdownOptionEvent(ElementSelectedEvent[dict[str, str]]):
@@ -235,7 +268,7 @@ class SelectDropdownOptionEvent(ElementSelectedEvent[dict[str, str]]):
 	node: 'EnhancedDOMTreeNode'
 	text: str  # The option text to select

-	event_timeout: float | None = 8.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_SelectDropdownOptionEvent', 8.0)  # seconds


 class ScrollToTextEvent(BaseEvent[None]):
@@ -244,7 +277,7 @@ class ScrollToTextEvent(BaseEvent[None]):
 	text: str
 	direction: Literal['up', 'down'] = 'down'

-	event_timeout: float | None = 15.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_ScrollToTextEvent', 15.0)  # seconds


 # ============================================================================
@@ -256,7 +289,7 @@ class BrowserStartEvent(BaseEvent):
 	cdp_url: str | None = None
 	launch_options: dict[str, Any] = Field(default_factory=dict)

-	event_timeout: float | None = 30.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_BrowserStartEvent', 30.0)  # seconds


 class BrowserStopEvent(BaseEvent):
@@ -264,7 +297,7 @@ class BrowserStopEvent(BaseEvent):

 	force: bool = False

-	event_timeout: float | None = 45.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_BrowserStopEvent', 45.0)  # seconds


 class BrowserLaunchResult(BaseModel):
@@ -279,13 +312,13 @@ class BrowserLaunchEvent(BaseEvent[BrowserLaunchResult]):

 	# TODO: add executable_path, proxy settings, preferences, extra launch args, etc.

-	event_timeout: float | None = 30.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_BrowserLaunchEvent', 30.0)  # seconds


 class BrowserKillEvent(BaseEvent):
 	"""Kill local browser subprocess."""

-	event_timeout: float | None = 30.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_BrowserKillEvent', 30.0)  # seconds


 # TODO: replace all Runtime.evaluate() calls with this event
@@ -338,7 +371,7 @@ class BrowserConnectedEvent(BaseEvent):

 	cdp_url: str

-	event_timeout: float | None = 30.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_BrowserConnectedEvent', 30.0)  # seconds


 class BrowserStoppedEvent(BaseEvent):
@@ -346,7 +379,7 @@ class BrowserStoppedEvent(BaseEvent):

 	reason: str | None = None

-	event_timeout: float | None = 30.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_BrowserStoppedEvent', 30.0)  # seconds


 class TabCreatedEvent(BaseEvent):
@@ -355,7 +388,7 @@ class TabCreatedEvent(BaseEvent):
 	target_id: TargetID
 	url: str

-	event_timeout: float | None = 30.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_TabCreatedEvent', 30.0)  # seconds


 class TabClosedEvent(BaseEvent):
@@ -367,7 +400,7 @@ class TabClosedEvent(BaseEvent):
 	# new_focus_target_id: int | None = None
 	# new_focus_url: str | None = None

-	event_timeout: float | None = 10.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_TabClosedEvent', 10.0)  # seconds


 # TODO: emit this when DOM changes significantly, inner frame navigates, form submits, history.pushState(), etc.
@@ -384,7 +417,7 @@ class AgentFocusChangedEvent(BaseEvent):
 	target_id: TargetID
 	url: str

-	event_timeout: float | None = 10.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_AgentFocusChangedEvent', 10.0)  # seconds


 class TargetCrashedEvent(BaseEvent):
@@ -393,7 +426,7 @@ class TargetCrashedEvent(BaseEvent):
 	target_id: TargetID
 	error: str

-	event_timeout: float | None = 10.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_TargetCrashedEvent', 10.0)  # seconds


 class NavigationStartedEvent(BaseEvent):
@@ -402,7 +435,7 @@ class NavigationStartedEvent(BaseEvent):
 	target_id: TargetID
 	url: str

-	event_timeout: float | None = 30.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_NavigationStartedEvent', 30.0)  # seconds


 class NavigationCompleteEvent(BaseEvent):
@@ -414,7 +447,7 @@ class NavigationCompleteEvent(BaseEvent):
 	error_message: str | None = None  # Error/timeout message if navigation had issues
 	loading_status: str | None = None  # Detailed loading status (e.g., network timeout info)

-	event_timeout: float | None = 30.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_NavigationCompleteEvent', 30.0)  # seconds


 # ============================================================================
@@ -429,7 +462,7 @@ class BrowserErrorEvent(BaseEvent):
 	message: str
 	details: dict[str, Any] = Field(default_factory=dict)

-	event_timeout: float | None = 30.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_BrowserErrorEvent', 30.0)  # seconds


 # ============================================================================
@@ -442,7 +475,7 @@ class SaveStorageStateEvent(BaseEvent):

 	path: str | None = None  # Optional path, uses profile default if not provided

-	event_timeout: float | None = 45.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_SaveStorageStateEvent', 45.0)  # seconds


 class StorageStateSavedEvent(BaseEvent):
@@ -452,7 +485,7 @@ class StorageStateSavedEvent(BaseEvent):
 	cookies_count: int
 	origins_count: int

-	event_timeout: float | None = 30.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_StorageStateSavedEvent', 30.0)  # seconds


 class LoadStorageStateEvent(BaseEvent):
@@ -460,7 +493,7 @@ class LoadStorageStateEvent(BaseEvent):

 	path: str | None = None  # Optional path, uses profile default if not provided

-	event_timeout: float | None = 45.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_LoadStorageStateEvent', 45.0)  # seconds


 # TODO: refactor this to:
@@ -474,7 +507,7 @@ class StorageStateLoadedEvent(BaseEvent):
 	cookies_count: int
 	origins_count: int

-	event_timeout: float | None = 30.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_StorageStateLoadedEvent', 30.0)  # seconds


 # ============================================================================
@@ -494,7 +527,7 @@ class FileDownloadedEvent(BaseEvent):
 	from_cache: bool = False
 	auto_download: bool = False  # Whether this was an automatic download (e.g., PDF auto-download)

-	event_timeout: float | None = 30.0  # seconds
+	event_timeout: float | None = _get_timeout('TIMEOUT_FileDownloadedEvent', 30.0)  # seconds


 class AboutBlankDVDScreensaverShownEvent(BaseEvent):
@@ -510,7 +543,7 @@ class DialogOpenedEvent(BaseEvent):
 	dialog_type: str  # 'alert', 'confirm', 'prompt', or 'beforeunload'
 	message: str
 	url: str
-	frame_id: str
+	frame_id: str | None = None  # Can be None when frameId is not provided by CDP
 	# target_id: TargetID   # TODO: add this to avoid needing target_id_from_frame() later


--- a/browser_use/browser/profile.py
+++ b/browser_use/browser/profile.py
@@ -10,7 +10,6 @@ from urllib.parse import urlparse
 from pydantic import AfterValidator, AliasChoices, BaseModel, ConfigDict, Field, field_validator, model_validator

 from browser_use.config import CONFIG
-from browser_use.observability import observe_debug
 from browser_use.utils import _log_pretty_path, logger

 CHROME_DEBUG_PORT = 9242  # use a non-default port to avoid conflicts with other tools / devs using 9222
@@ -616,6 +615,18 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
 	# save_har_path: alias of record_har_path
 	# trace_path: alias of traces_dir

+	# these shadow the old playwright args on BrowserContextArgs, but it's ok
+	# because we handle them ourselves in a watchdog and we no longer use playwright, so they should live in the scope for our own config in BrowserProfile long-term
+	record_video_dir: Path | None = Field(
+		default=None,
+		description='Directory to save video recordings. If set, a video of the session will be recorded.',
+		validation_alias=AliasChoices('save_recording_path', 'record_video_dir'),
+	)
+	record_video_size: ViewportSize | None = Field(
+		default=None, description='Video frame size. If not set, it will use the viewport size.'
+	)
+	record_video_framerate: int = Field(default=30, description='The framerate to use for the video recording.')
+
 	# TODO: finish implementing extension support in extensions.py
 	# extension_ids_to_preinstall: list[str] = Field(
 	# 	default_factory=list, description='List of Chrome extension IDs to preinstall.'
@@ -747,6 +758,10 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
 			if proxy_bypass:
 				pre_conversion_args.append(f'--proxy-bypass-list={proxy_bypass}')

+		# User agent flag
+		if self.user_agent:
+			pre_conversion_args.append(f'--user-agent={self.user_agent}')
+
 		# Special handling for --disable-features to merge values instead of overwriting
 		# This prevents disable_security=True from breaking extensions by ensuring
 		# both default features (including extension-related) and security features are preserved
@@ -776,6 +791,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro

 		# convert to dict and back to dedupe and merge other duplicate args
 		final_args_list = BrowserLaunchArgs.args_as_list(BrowserLaunchArgs.args_as_dict(non_disable_features_args))
+
 		return final_args_list

 	def _get_extension_args(self) -> list[str]:
@@ -1016,7 +1032,6 @@ async function initialize(checkInitialized, magic) {{

 				os.unlink(temp_zip.name)

-	@observe_debug(ignore_input=True, ignore_output=True, name='detect_display_configuration')
 	def detect_display_configuration(self) -> None:
 		"""
 		Detect the system display size and initialize the display-related config defaults:
@@ -1031,36 +1046,43 @@ async function initialize(checkInitialized, magic) {{
 		if self.headless is None:
 			self.headless = not has_screen_available

-		# set up window size and position if headful
+		# Determine viewport behavior based on mode and user preferences
+		user_provided_viewport = self.viewport is not None
+
 		if self.headless:
-			# headless mode: no window available, use viewport instead to constrain content size
+			# Headless mode: always use viewport for content size control
 			self.viewport = self.viewport or self.window_size or self.screen
-			self.window_position = None  # no windows to position in headless mode
+			self.window_position = None
 			self.window_size = None
-			self.no_viewport = False  # viewport is always enabled in headless mode
+			self.no_viewport = False
 		else:
-			# headful mode: use window, disable viewport by default, content fits to size of window
+			# Headful mode: respect user's viewport preference
 			self.window_size = self.window_size or self.screen
-			self.no_viewport = True if self.no_viewport is None else self.no_viewport
-			self.viewport = None if self.no_viewport else self.viewport

-		# automatically setup viewport if any config requires it
-		use_viewport = self.headless or self.viewport or self.device_scale_factor
-		self.no_viewport = not use_viewport if self.no_viewport is None else self.no_viewport
-		use_viewport = not self.no_viewport
+			if user_provided_viewport:
+				# User explicitly set viewport - enable viewport mode
+				self.no_viewport = False
+			else:
+				# Default headful: content fits to window (no viewport)
+				self.no_viewport = True if self.no_viewport is None else self.no_viewport

-		if use_viewport:
-			# if we are using viewport, make device_scale_factor and screen are set to real values to avoid easy fingerprinting
+		# Handle special requirements (device_scale_factor forces viewport mode)
+		if self.device_scale_factor and self.no_viewport is None:
+			self.no_viewport = False
+
+		# Finalize configuration
+		if self.no_viewport:
+			# No viewport mode: content adapts to window
+			self.viewport = None
+			self.device_scale_factor = None
+			self.screen = None
+			assert self.viewport is None
+			assert self.no_viewport is True
+		else:
+			# Viewport mode: ensure viewport is set
 			self.viewport = self.viewport or self.screen
 			self.device_scale_factor = self.device_scale_factor or 1.0
 			assert self.viewport is not None
 			assert self.no_viewport is False
-		else:
-			# device_scale_factor and screen are not supported non-viewport mode, the system monitor determines these
-			self.viewport = None
-			self.device_scale_factor = None  # only supported in viewport mode
-			self.screen = None  # only supported in viewport mode
-			assert self.viewport is None
-			assert self.no_viewport is True

 		assert not (self.headless and self.no_viewport), 'headless=True and no_viewport=True cannot both be set at the same time'
--- a/browser_use/browser/python_highlights.py
+++ b/browser_use/browser/python_highlights.py
@@ -18,6 +18,57 @@ from browser_use.utils import time_execution_async

 logger = logging.getLogger(__name__)

+# Font cache to prevent repeated font loading and reduce memory usage
+_FONT_CACHE: dict[tuple[str, int], ImageFont.FreeTypeFont | None] = {}
+
+# Cross-platform font paths
+_FONT_PATHS = [
+	'/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf',  # Linux (Debian/Ubuntu)
+	'/usr/share/fonts/TTF/DejaVuSans-Bold.ttf',  # Linux (Arch/Fedora)
+	'/System/Library/Fonts/Arial.ttf',  # macOS
+	'C:\\Windows\\Fonts\\arial.ttf',  # Windows
+	'arial.ttf',  # Windows (system path)
+	'Arial Bold.ttf',  # macOS alternative
+	'/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf',  # Linux alternative
+]
+
+
+def get_cross_platform_font(font_size: int) -> ImageFont.FreeTypeFont | None:
+	"""Get a cross-platform compatible font with caching to prevent memory leaks.
+
+	Args:
+	    font_size: Size of the font to load
+
+	Returns:
+	    ImageFont object or None if no system fonts are available
+	"""
+	# Use cache key based on font size
+	cache_key = ('system_font', font_size)
+
+	# Return cached font if available
+	if cache_key in _FONT_CACHE:
+		return _FONT_CACHE[cache_key]
+
+	# Try to load a system font
+	font = None
+	for font_path in _FONT_PATHS:
+		try:
+			font = ImageFont.truetype(font_path, font_size)
+			break
+		except OSError:
+			continue
+
+	# Cache the result (even if None) to avoid repeated attempts
+	_FONT_CACHE[cache_key] = font
+	return font
+
+
+def cleanup_font_cache() -> None:
+	"""Clean up the font cache to prevent memory leaks in long-running applications."""
+	global _FONT_CACHE
+	_FONT_CACHE.clear()
+
+
 # Color scheme for different element types
 ELEMENT_COLORS = {
 	'button': '#FF6B6B',  # Red for buttons
@@ -102,18 +153,10 @@ def draw_enhanced_bounding_box_with_text(
 			css_width = img_width  # / device_pixel_ratio
 			# Much smaller scaling - 1% of CSS viewport width, max 16px to prevent huge highlights
 			base_font_size = max(10, min(20, int(css_width * 0.01)))
-			big_font = None
-			try:
-				big_font = ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', base_font_size)
-			except OSError:
-				try:
-					big_font = ImageFont.truetype('arial.ttf', base_font_size)
-				except OSError:
-					# Try system fonts on different platforms
-					try:
-						big_font = ImageFont.truetype('Arial Bold.ttf', base_font_size)
-					except OSError:
-						big_font = font  # Fallback to original font
+			# Use shared font loading function with caching
+			big_font = get_cross_platform_font(base_font_size)
+			if big_font is None:
+				big_font = font  # Fallback to original font if no system fonts found

 			# Get text size with bigger font
 			if big_font:
@@ -391,15 +434,9 @@ async def create_highlighted_screenshot(
 		# Create drawing context
 		draw = ImageDraw.Draw(image)

-		# Try to load a font, fall back to default if not available
-		font = None
-		try:
-			font = ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', 12)
-		except OSError:
-			try:
-				font = ImageFont.truetype('arial.ttf', 12)
-			except OSError:
-				font = None  # Use default font
+		# Load font using shared function with caching
+		font = get_cross_platform_font(12)
+		# If no system fonts found, font remains None and will use default font

 		# Process elements sequentially to avoid ImageDraw thread safety issues
 		# PIL ImageDraw is not thread-safe, so we process elements one by one
@@ -408,16 +445,24 @@ async def create_highlighted_screenshot(

 		# Convert back to base64
 		output_buffer = io.BytesIO()
-		image.save(output_buffer, format='PNG')
-		output_buffer.seek(0)
+		try:
+			image.save(output_buffer, format='PNG')
+			output_buffer.seek(0)
+			highlighted_b64 = base64.b64encode(output_buffer.getvalue()).decode('utf-8')

-		highlighted_b64 = base64.b64encode(output_buffer.getvalue()).decode('utf-8')
-
-		logger.debug(f'Successfully created highlighted screenshot with {len(selector_map)} elements')
-		return highlighted_b64
+			logger.debug(f'Successfully created highlighted screenshot with {len(selector_map)} elements')
+			return highlighted_b64
+		finally:
+			# Explicit cleanup to prevent memory leaks
+			output_buffer.close()
+			if 'image' in locals():
+				image.close()

 	except Exception as e:
 		logger.error(f'Failed to create highlighted screenshot: {e}')
+		# Clean up on error as well
+		if 'image' in locals():
+			image.close()
 		# Return original screenshot on error
 		return screenshot_b64

@@ -463,6 +508,7 @@ async def create_highlighted_screenshot_async(
 	    screenshot_b64: Base64 encoded screenshot
 	    selector_map: Map of interactive elements
 	    cdp_session: CDP session for getting viewport info
+	    filter_highlight_ids: Whether to filter element IDs based on meaningful text

 	Returns:
 	    Base64 encoded highlighted screenshot
@@ -496,3 +542,7 @@ async def create_highlighted_screenshot_async(

 		await asyncio.to_thread(_write_screenshot)
 	return final_screenshot
+
+
+# Export the cleanup function for external use in long-running applications
+__all__ = ['create_highlighted_screenshot', 'create_highlighted_screenshot_async', 'cleanup_font_cache']
--- a/browser_use/browser/session.py
+++ b/browser_use/browser/session.py
@@ -44,9 +44,6 @@ from browser_use.utils import _log_pretty_url, is_new_tab_page

 DEFAULT_BROWSER_PROFILE = BrowserProfile()

-MAX_SCREENSHOT_HEIGHT = 2000
-MAX_SCREENSHOT_WIDTH = 1920
-
 _LOGGED_UNIQUE_SESSION_IDS = set()  # track unique session IDs that have been logged to make sure we always assign a unique enough id to new sessions and avoid ambiguity in logs
 red = '\033[91m'
 reset = '\033[0m'
@@ -247,6 +244,8 @@ class BrowserSession(BaseModel):
 		record_har_mode: str | None = None,
 		record_har_path: str | Path | None = None,
 		record_video_dir: str | Path | None = None,
+		record_video_framerate: int | None = None,
+		record_video_size: dict | None = None,
 		# From BrowserLaunchPersistentContextArgs
 		user_data_dir: str | Path | None = None,
 		# From BrowserNewContextArgs
@@ -338,6 +337,7 @@ class BrowserSession(BaseModel):
 	_dom_watchdog: Any | None = PrivateAttr(default=None)
 	_screenshot_watchdog: Any | None = PrivateAttr(default=None)
 	_permissions_watchdog: Any | None = PrivateAttr(default=None)
+	_recording_watchdog: Any | None = PrivateAttr(default=None)

 	_logger: Any = PrivateAttr(default=None)

@@ -404,6 +404,7 @@ class BrowserSession(BaseModel):
 		self._dom_watchdog = None
 		self._screenshot_watchdog = None
 		self._permissions_watchdog = None
+		self._recording_watchdog = None

 	def model_post_init(self, __context) -> None:
 		"""Register event handlers after model initialization."""
@@ -425,6 +426,7 @@ class BrowserSession(BaseModel):
 		BaseWatchdog.attach_handler_to_session(self, BrowserStopEvent, self.on_BrowserStopEvent)
 		BaseWatchdog.attach_handler_to_session(self, NavigateToUrlEvent, self.on_NavigateToUrlEvent)
 		BaseWatchdog.attach_handler_to_session(self, SwitchTabEvent, self.on_SwitchTabEvent)
+		BaseWatchdog.attach_handler_to_session(self, TabCreatedEvent, self.on_TabCreatedEvent)
 		BaseWatchdog.attach_handler_to_session(self, TabClosedEvent, self.on_TabClosedEvent)
 		BaseWatchdog.attach_handler_to_session(self, AgentFocusChangedEvent, self.on_AgentFocusChangedEvent)
 		BaseWatchdog.attach_handler_to_session(self, FileDownloadedEvent, self.on_FileDownloadedEvent)
@@ -707,6 +709,22 @@ class BrowserSession(BaseModel):
 		await self.event_bus.dispatch(TabClosedEvent(target_id=event.target_id))
 		await cdp_session.cdp_client.send.Target.closeTarget(params={'targetId': event.target_id})

+	async def on_TabCreatedEvent(self, event: TabCreatedEvent) -> None:
+		"""Handle tab creation - apply viewport settings to new tab."""
+		# Apply viewport settings if configured
+		if self.browser_profile.viewport and not self.browser_profile.no_viewport:
+			try:
+				viewport_width = self.browser_profile.viewport.width
+				viewport_height = self.browser_profile.viewport.height
+				device_scale_factor = self.browser_profile.device_scale_factor or 1.0
+
+				# Use the helper method with the new tab's target_id
+				await self._cdp_set_viewport(viewport_width, viewport_height, device_scale_factor, target_id=event.target_id)
+
+				self.logger.debug(f'Applied viewport {viewport_width}x{viewport_height} to tab {event.target_id[-8:]}')
+			except Exception as e:
+				self.logger.warning(f'Failed to set viewport for new tab {event.target_id[-8:]}: {e}')
+
 	async def on_TabClosedEvent(self, event: TabClosedEvent) -> None:
 		"""Handle tab closure - update focus if needed."""
 		if not self.agent_focus:
@@ -955,9 +973,10 @@ class BrowserSession(BaseModel):
 		from browser_use.browser.watchdogs.local_browser_watchdog import LocalBrowserWatchdog
 		from browser_use.browser.watchdogs.permissions_watchdog import PermissionsWatchdog
 		from browser_use.browser.watchdogs.popups_watchdog import PopupsWatchdog
+		from browser_use.browser.watchdogs.recording_watchdog import RecordingWatchdog
 		from browser_use.browser.watchdogs.screenshot_watchdog import ScreenshotWatchdog
 		from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
-		# from browser_use.browser.storage_state_watchdog import StorageStateWatchdog
+		from browser_use.browser.watchdogs.storage_state_watchdog import StorageStateWatchdog

 		# Initialize CrashWatchdog
 		# CrashWatchdog.model_rebuild()
@@ -978,14 +997,27 @@ class BrowserSession(BaseModel):
 		if self.browser_profile.auto_download_pdfs:
 			self.logger.debug('📄 PDF auto-download enabled for this session')

-		# # Initialize StorageStateWatchdog
-		# StorageStateWatchdog.model_rebuild()
-		# self._storage_state_watchdog = StorageStateWatchdog(event_bus=self.event_bus, browser_session=self)
-		# # self.event_bus.on(BrowserConnectedEvent, self._storage_state_watchdog.on_BrowserConnectedEvent)
-		# # self.event_bus.on(BrowserStopEvent, self._storage_state_watchdog.on_BrowserStopEvent)
-		# # self.event_bus.on(SaveStorageStateEvent, self._storage_state_watchdog.on_SaveStorageStateEvent)
-		# # self.event_bus.on(LoadStorageStateEvent, self._storage_state_watchdog.on_LoadStorageStateEvent)
-		# self._storage_state_watchdog.attach_to_session()
+		# Initialize StorageStateWatchdog conditionally
+		# Enable when user provides either storage_state or user_data_dir (indicating they want persistence)
+		should_enable_storage_state = (
+			self.browser_profile.storage_state is not None or self.browser_profile.user_data_dir is not None
+		)
+
+		if should_enable_storage_state:
+			StorageStateWatchdog.model_rebuild()
+			self._storage_state_watchdog = StorageStateWatchdog(
+				event_bus=self.event_bus,
+				browser_session=self,
+				# More conservative defaults when auto-enabled
+				auto_save_interval=60.0,  # 1 minute instead of 30 seconds
+				save_on_change=False,  # Only save on shutdown by default
+			)
+			self._storage_state_watchdog.attach_to_session()
+			self.logger.debug(
+				f'🍪 StorageStateWatchdog enabled (storage_state: {bool(self.browser_profile.storage_state)}, user_data_dir: {bool(self.browser_profile.user_data_dir)})'
+			)
+		else:
+			self.logger.debug('🍪 StorageStateWatchdog disabled (no storage_state or user_data_dir configured)')

 		# Initialize LocalBrowserWatchdog
 		LocalBrowserWatchdog.model_rebuild()
@@ -1054,6 +1086,11 @@ class BrowserSession(BaseModel):
 		# self.event_bus.on(BrowserStateRequestEvent, self._dom_watchdog.on_BrowserStateRequestEvent)
 		self._dom_watchdog.attach_to_session()

+		# Initialize RecordingWatchdog (handles video recording)
+		RecordingWatchdog.model_rebuild()
+		self._recording_watchdog = RecordingWatchdog(event_bus=self.event_bus, browser_session=self)
+		self._recording_watchdog.attach_to_session()
+
 		# Mark watchdogs as attached to prevent duplicate attachment
 		self._watchdogs_attached = True

@@ -1631,7 +1668,7 @@ class BrowserSession(BaseModel):
 		"""Get list of files downloaded during this browser session.

 		Returns:
-		    list[str]: List of absolute file paths to downloaded files in this session
+			list[str]: List of absolute file paths to downloaded files in this session
 		"""
 		return self._downloaded_files.copy()

@@ -1758,22 +1795,119 @@ class BrowserSession(BaseModel):
 			params={'identifier': identifier}, session_id=cdp_session.session_id
 		)

-	async def _cdp_set_viewport(self, width: int, height: int, device_scale_factor: float = 1.0, mobile: bool = False) -> None:
-		"""Set viewport using CDP Emulation.setDeviceMetricsOverride."""
-		await self.cdp_client.send.Emulation.setDeviceMetricsOverride(
-			params={'width': width, 'height': height, 'deviceScaleFactor': device_scale_factor, 'mobile': mobile}
+	async def _cdp_set_viewport(
+		self, width: int, height: int, device_scale_factor: float = 1.0, mobile: bool = False, target_id: str | None = None
+	) -> None:
+		"""Set viewport using CDP Emulation.setDeviceMetricsOverride.
+
+		Args:
+			width: Viewport width
+			height: Viewport height
+			device_scale_factor: Device scale factor (default 1.0)
+			mobile: Whether to emulate mobile device (default False)
+			target_id: Optional target ID to set viewport for. If not provided, uses agent_focus.
+		"""
+		if target_id:
+			# Set viewport for specific target
+			cdp_session = await self.get_or_create_cdp_session(target_id, focus=False, new_socket=False)
+		elif self.agent_focus:
+			# Use current focus
+			cdp_session = self.agent_focus
+		else:
+			self.logger.warning('Cannot set viewport: no target_id provided and agent_focus not initialized')
+			return
+
+		await cdp_session.cdp_client.send.Emulation.setDeviceMetricsOverride(
+			params={'width': width, 'height': height, 'deviceScaleFactor': device_scale_factor, 'mobile': mobile},
+			session_id=cdp_session.session_id,
 		)

+	async def _cdp_get_origins(self) -> list[dict[str, Any]]:
+		"""Get origins with localStorage and sessionStorage using CDP."""
+		origins = []
+		cdp_session = await self.get_or_create_cdp_session(target_id=None, new_socket=False)
+
+		try:
+			# Enable DOMStorage domain to track storage
+			await cdp_session.cdp_client.send.DOMStorage.enable(session_id=cdp_session.session_id)
+
+			try:
+				# Get all frames to find unique origins
+				frames_result = await cdp_session.cdp_client.send.Page.getFrameTree(session_id=cdp_session.session_id)
+
+				# Extract unique origins from frames
+				unique_origins = set()
+
+				def _extract_origins(frame_tree):
+					"""Recursively extract origins from frame tree."""
+					frame = frame_tree.get('frame', {})
+					origin = frame.get('securityOrigin')
+					if origin and origin != 'null':
+						unique_origins.add(origin)
+
+					# Process child frames
+					for child in frame_tree.get('childFrames', []):
+						_extract_origins(child)
+
+				async def _get_storage_items(origin: str, is_local_storage: bool) -> list[dict[str, str]] | None:
+					"""Helper to get storage items for an origin."""
+					storage_type = 'localStorage' if is_local_storage else 'sessionStorage'
+					try:
+						result = await cdp_session.cdp_client.send.DOMStorage.getDOMStorageItems(
+							params={'storageId': {'securityOrigin': origin, 'isLocalStorage': is_local_storage}},
+							session_id=cdp_session.session_id,
+						)
+
+						items = []
+						for item in result.get('entries', []):
+							if len(item) == 2:  # Each item is [key, value]
+								items.append({'name': item[0], 'value': item[1]})
+
+						return items if items else None
+					except Exception as e:
+						self.logger.debug(f'Failed to get {storage_type} for {origin}: {e}')
+						return None
+
+				_extract_origins(frames_result.get('frameTree', {}))
+
+				# For each unique origin, get localStorage and sessionStorage
+				for origin in unique_origins:
+					origin_data = {'origin': origin}
+
+					# Get localStorage
+					local_storage = await _get_storage_items(origin, is_local_storage=True)
+					if local_storage:
+						origin_data['localStorage'] = local_storage
+
+					# Get sessionStorage
+					session_storage = await _get_storage_items(origin, is_local_storage=False)
+					if session_storage:
+						origin_data['sessionStorage'] = session_storage
+
+					# Only add origin if it has storage data
+					if 'localStorage' in origin_data or 'sessionStorage' in origin_data:
+						origins.append(origin_data)
+
+			finally:
+				# Always disable DOMStorage tracking when done
+				await cdp_session.cdp_client.send.DOMStorage.disable(session_id=cdp_session.session_id)
+
+		except Exception as e:
+			self.logger.warning(f'Failed to get origins: {e}')
+
+		return origins
+
 	async def _cdp_get_storage_state(self) -> dict:
 		"""Get storage state (cookies, localStorage, sessionStorage) using CDP."""
 		# Use the _cdp_get_cookies helper which handles session attachment
 		cookies = await self._cdp_get_cookies()

-		# Get localStorage and sessionStorage would require evaluating JavaScript
-		# on each origin, which is more complex. For now, return cookies only.
+		# Get origins with localStorage/sessionStorage
+		origins = await self._cdp_get_origins()
+
 		return {
 			'cookies': cookies,
-			'origins': [],  # Would need to iterate through origins for localStorage/sessionStorage
+			'origins': origins,
 		}

 	async def _cdp_navigate(self, url: str, target_id: TargetID | None = None) -> None:
--- a/browser_use/browser/video_recorder.py
+++ b/browser_use/browser/video_recorder.py
@@ -0,0 +1,162 @@
+"""Video Recording Service for Browser Use Sessions."""
+
+import base64
+import logging
+import math
+import subprocess
+from pathlib import Path
+from typing import Optional
+
+from browser_use.browser.profile import ViewportSize
+
+try:
+	import imageio.v2 as iio
+	import imageio_ffmpeg
+	import numpy as np
+	from imageio.core.format import Format
+
+	IMAGEIO_AVAILABLE = True
+except ImportError:
+	IMAGEIO_AVAILABLE = False
+
+logger = logging.getLogger(__name__)
+
+
+def _get_padded_size(size: ViewportSize, macro_block_size: int = 16) -> ViewportSize:
+	"""Calculates the dimensions padded to the nearest multiple of macro_block_size."""
+	width = int(math.ceil(size['width'] / macro_block_size)) * macro_block_size
+	height = int(math.ceil(size['height'] / macro_block_size)) * macro_block_size
+	return ViewportSize(width=width, height=height)
+
+
+class VideoRecorderService:
+	"""
+	Handles the video encoding process for a browser session using imageio.
+
+	This service captures individual frames from the CDP screencast, decodes them,
+	and appends them to a video file using a pip-installable ffmpeg backend.
+	It automatically resizes frames to match the target video dimensions.
+	"""
+
+	def __init__(self, output_path: Path, size: ViewportSize, framerate: int):
+		"""
+		Initializes the video recorder.
+
+		Args:
+		    output_path: The full path where the video will be saved.
+		    size: A ViewportSize object specifying the width and height of the video.
+		    framerate: The desired framerate for the output video.
+		"""
+		self.output_path = output_path
+		self.size = size
+		self.framerate = framerate
+		self._writer: Optional['Format.Writer'] = None
+		self._is_active = False
+		self.padded_size = _get_padded_size(self.size)
+
+	def start(self) -> None:
+		"""
+		Prepares and starts the video writer.
+
+		If the required optional dependencies are not installed, this method will
+		log an error and do nothing.
+		"""
+		if not IMAGEIO_AVAILABLE:
+			logger.error(
+				'MP4 recording requires optional dependencies. Please install them with: pip install "browser-use[video]"'
+			)
+			return
+
+		try:
+			self.output_path.parent.mkdir(parents=True, exist_ok=True)
+			# The macro_block_size is set to None because we handle padding ourselves
+			self._writer = iio.get_writer(
+				str(self.output_path),
+				fps=self.framerate,
+				codec='libx264',
+				quality=8,  # A good balance of quality and file size (1-10 scale)
+				pixelformat='yuv420p',  # Ensures compatibility with most players
+				macro_block_size=None,
+			)
+			self._is_active = True
+			logger.debug(f'Video recorder started. Output will be saved to {self.output_path}')
+		except Exception as e:
+			logger.error(f'Failed to initialize video writer: {e}')
+			self._is_active = False
+
+	def add_frame(self, frame_data_b64: str) -> None:
+		"""
+		Decodes a base64-encoded PNG frame, resizes it, pads it to be codec-compatible,
+		and appends it to the video.
+
+		Args:
+		    frame_data_b64: A base64-encoded string of the PNG frame data.
+		"""
+		if not self._is_active or not self._writer:
+			return
+
+		try:
+			frame_bytes = base64.b64decode(frame_data_b64)
+
+			# Build a filter chain for ffmpeg:
+			# 1. scale: Resizes the frame to the user-specified dimensions.
+			# 2. pad: Adds black bars to meet codec's macro-block requirements,
+			#    centering the original content.
+			vf_chain = (
+				f'scale={self.size["width"]}:{self.size["height"]},'
+				f'pad={self.padded_size["width"]}:{self.padded_size["height"]}:(ow-iw)/2:(oh-ih)/2:color=black'
+			)
+
+			output_pix_fmt = 'rgb24'
+			command = [
+				imageio_ffmpeg.get_ffmpeg_exe(),
+				'-f',
+				'image2pipe',  # Input format from a pipe
+				'-c:v',
+				'png',  # Specify input codec is PNG
+				'-i',
+				'-',  # Input from stdin
+				'-vf',
+				vf_chain,  # Video filter for resizing and padding
+				'-f',
+				'rawvideo',  # Output format is raw video
+				'-pix_fmt',
+				output_pix_fmt,  # Output pixel format
+				'-',  # Output to stdout
+			]
+
+			# Execute ffmpeg as a subprocess
+			proc = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+			out, err = proc.communicate(input=frame_bytes)
+
+			if proc.returncode != 0:
+				err_msg = err.decode(errors='ignore').strip()
+				if 'deprecated pixel format used' not in err_msg.lower():
+					raise OSError(f'ffmpeg error during resizing/padding: {err_msg}')
+				else:
+					logger.debug(f'ffmpeg warning during resizing/padding: {err_msg}')
+
+			# Convert the raw output bytes to a numpy array with the padded dimensions
+			img_array = np.frombuffer(out, dtype=np.uint8).reshape((self.padded_size['height'], self.padded_size['width'], 3))
+
+			self._writer.append_data(img_array)
+		except Exception as e:
+			logger.warning(f'Could not process and add video frame: {e}')
+
+	def stop_and_save(self) -> None:
+		"""
+		Finalizes the video file by closing the writer.
+
+		This method should be called when the recording session is complete.
+		"""
+		if not self._is_active or not self._writer:
+			return
+
+		try:
+			self._writer.close()
+			logger.info(f'📹 Video recording saved successfully to: {self.output_path}')
+		except Exception as e:
+			logger.error(f'Failed to finalize and save video: {e}')
+		finally:
+			self._is_active = False
+			self._writer = None
--- a/browser_use/browser/watchdogs/popups_watchdog.py
+++ b/browser_use/browser/watchdogs/popups_watchdog.py
@@ -6,16 +6,16 @@ from typing import ClassVar
 from bubus import BaseEvent
 from pydantic import PrivateAttr

-from browser_use.browser.events import DialogOpenedEvent, TabCreatedEvent
+from browser_use.browser.events import TabCreatedEvent
 from browser_use.browser.watchdog_base import BaseWatchdog


 class PopupsWatchdog(BaseWatchdog):
-	"""Handles JavaScript dialogs (alert, confirm, prompt) by automatically accepting them."""
+	"""Handles JavaScript dialogs (alert, confirm, prompt) by automatically accepting them immediately."""

 	# Events this watchdog listens to and emits
-	LISTENS_TO: ClassVar[list[type[BaseEvent]]] = [TabCreatedEvent, DialogOpenedEvent]
-	EMITS: ClassVar[list[type[BaseEvent]]] = [DialogOpenedEvent]
+	LISTENS_TO: ClassVar[list[type[BaseEvent]]] = [TabCreatedEvent]
+	EMITS: ClassVar[list[type[BaseEvent]]] = []

 	# Track which targets have dialog handlers registered
 	_dialog_listeners_registered: set[str] = PrivateAttr(default_factory=set)
@@ -36,107 +36,77 @@ class PopupsWatchdog(BaseWatchdog):

 		self.logger.debug(f'📌 Starting dialog handler setup for target {target_id}')
 		try:
+			# Get all CDP sessions for this target and any child frames
 			cdp_session = await self.browser_session.get_or_create_cdp_session(
 				target_id, focus=False
 			)  # don't auto-focus new tabs! sometimes we need to open tabs in background

-			# Set up async handler for JavaScript dialogs - now we can handle them immediately!
+			# Also register for the root CDP client to catch dialogs from any frame
+			if self.browser_session._cdp_client_root:
+				self.logger.debug('📌 Also registering handler on root CDP client')
+
+			# Set up async handler for JavaScript dialogs - accept immediately without event dispatch
 			async def handle_dialog(event_data, session_id: str | None = None):
-				"""Handle JavaScript dialog events - accept immediately and dispatch event."""
-				self.logger.debug(f'🚨 DIALOG EVENT RECEIVED: {event_data}, session_id={session_id}')
-
-				dialog_type = event_data.get('type', 'alert')
-				message = event_data.get('message', '')
-				url = event_data.get('url')
-				frame_id = event_data.get('frameId')
-
-				self.logger.debug(f"🔔 JavaScript {dialog_type} dialog detected: '{message[:50]}...' - accepting immediately")
-
-				# Dispatch the event first so tests can observe it
-				event = self.browser_session.event_bus.dispatch(
-					DialogOpenedEvent(
-						frame_id=frame_id,
-						dialog_type=dialog_type,
-						message=message,
-						url=url,
-					)
-				)
-				await event.event_result(raise_if_none=False, raise_if_any=True, timeout=5.0)
-
-				# Accept the dialog immediately to unblock the browser
+				"""Handle JavaScript dialog events - accept immediately."""
 				try:
-					if self.browser_session._cdp_client_root and session_id:
-						self.logger.debug('🔄 Sending handleJavaScriptDialog command')
-						await self.browser_session._cdp_client_root.send.Page.handleJavaScriptDialog(
-							params={'accept': True},
-							session_id=session_id,
-						)
-						self.logger.info('✅ Dialog accepted successfully')
-					else:
-						self.logger.error('Cannot accept dialog - CDP client or session not available')
-				except Exception as e:
-					self.logger.error(f'Failed to accept dialog: {e}')
+					dialog_type = event_data.get('type', 'alert')
+					message = event_data.get('message', '')

+					self.logger.info(f"🔔 JavaScript {dialog_type} dialog: '{message[:100]}' - attempting to accept...")
+
+					self.logger.debug('Trying all approaches to accept dialog...')
+
+					# Approach 1: Use the session that detected the dialog
+					if self.browser_session._cdp_client_root and session_id:
+						try:
+							self.logger.debug(f'🔄 Approach 1: Using session {session_id}')
+							await asyncio.wait_for(
+								self.browser_session._cdp_client_root.send.Page.handleJavaScriptDialog(
+									params={'accept': True},
+									session_id=session_id,
+								),
+								timeout=0.25,
+							)
+						except (TimeoutError, Exception) as e:
+							pass
+
+					# Approach 2: Try with current agent focus session
+					if self.browser_session._cdp_client_root and self.browser_session.agent_focus:
+						try:
+							self.logger.debug(
+								f'🔄 Approach 2: Using agent focus session {self.browser_session.agent_focus.session_id}'
+							)
+							await asyncio.wait_for(
+								self.browser_session._cdp_client_root.send.Page.handleJavaScriptDialog(
+									params={'accept': True},
+									session_id=self.browser_session.agent_focus.session_id,
+								),
+								timeout=0.25,
+							)
+						except (TimeoutError, Exception) as e:
+							pass
+
+				except Exception as e:
+					self.logger.error(f'❌ Critical error in dialog handler: {type(e).__name__}: {e}')
+
+			# Register handler on the specific session
 			cdp_session.cdp_client.register.Page.javascriptDialogOpening(handle_dialog)  # type: ignore[arg-type]
 			self.logger.debug(
 				f'Successfully registered Page.javascriptDialogOpening handler for session {cdp_session.session_id}'
 			)

+			# Also register on root CDP client to catch dialogs from any frame
+			if hasattr(self.browser_session._cdp_client_root, 'register'):
+				try:
+					self.browser_session._cdp_client_root.register.Page.javascriptDialogOpening(handle_dialog)  # type: ignore[arg-type]
+					self.logger.debug('Successfully registered dialog handler on root CDP client for all frames')
+				except Exception as root_error:
+					self.logger.warning(f'Failed to register on root CDP client: {root_error}')
+
 			# Mark this target as having dialog handling set up
 			self._dialog_listeners_registered.add(target_id)

 			self.logger.debug(f'Set up JavaScript dialog handling for tab {target_id}')

 		except Exception as e:
-			self.logger.warning(f'Failed to set up dialog handling for tab {target_id}: {e}')
-
-	async def on_DialogOpenedEvent(self, event: DialogOpenedEvent) -> None:
-		"""Handle the async closing of JavaScript dialogs."""
-		self.logger.debug(
-			f'📋 on_DialogOpenedEvent called with frame_id={event.frame_id} url={event.url} message={event.message}'
-		)
-
-		assert self.browser_session.agent_focus is not None, 'Agent focus not set when handling DialogOpenedEvent'
-
-		current_focus_url = self.browser_session.agent_focus.url
-		current_focus_target_id = self.browser_session.agent_focus.target_id
-
-		cdp_session = await asyncio.wait_for(self.browser_session.cdp_client_for_frame(event.frame_id), timeout=5.0)
-		try:
-			# delay to look more human before auto-closing, some popular antibot fingerprint tests check for modals closing too fast
-			await asyncio.sleep(0.25)
-			assert self.browser_session._cdp_client_root
-			# self.browser_session._cdp_client_root.register.Page.javascriptDialogClosed(lambda *args: None)
-			await asyncio.wait_for(
-				self.browser_session._cdp_client_root.send.Page.handleJavaScriptDialog(
-					params={'accept': True},
-					session_id=cdp_session.session_id,
-				),
-				timeout=5.0,
-			)
-			# CRITICAL: you must re-focus (Target.activateTarget()) after handling the dialog, otherwise the browser will crash ~5 seconds later
-			await self.browser_session.get_or_create_cdp_session(target_id=current_focus_target_id, focus=True)
-			self.logger.info('✅ JS dialog popup handled successfully')
-
-			# graveyard of past attempts:
-			# # new_target = await self.browser_session._cdp_client_root.send.Target.createTarget(params={'url': current_focus_url})
-			# # self.browser_session.agent_focus = await self.browser_session.get_or_create_cdp_session(target_id=new_target.get('targetId'), new_socket=True, focus=True)
-			# # raise NotImplementedError('TODO: figure out why this requires a hard refresh and new socket to avoid crashing the entire browser on JS dialogs')
-			# await asyncio.sleep(0.2)
-			# await asyncio.wait_for(
-			# 	self.browser_session._cdp_client_root.send.Runtime.evaluate(
-			# 		params={'expression': '1'},
-			# 		session_id=cdp_session.session_id,
-			# 	),
-			# 	timeout=5.0,
-			# )
-			# # self.browser_session.agent_focus = await self.browser_session.get_or_create_cdp_session(current_focus.target_id, focus=True, new_socket=True)
-			# # assert await self.browser_session.agent_focus.cdp_client.send.Page.getFrameTree(session_id=self.browser_session.agent_focus.session_id) is not None, "Agent focus not set after handling dialog"
-		except Exception as e:
-			self.logger.error(f'Failed to handle JavaScript dialog gracefully: {e}')
-			# raise
-		# finally:
-		# 	self.event_bus.dispatch(AgentFocusChangedEvent(
-		# 		target_id=current_focus_target_id,
-		# 		url=self.browser_session.agent_focus.url,
-		# 	))
+			self.logger.warning(f'Failed to set up popup handling for tab {target_id}: {e}')
--- a/browser_use/browser/watchdogs/recording_watchdog.py
+++ b/browser_use/browser/watchdogs/recording_watchdog.py
@@ -0,0 +1,126 @@
+"""Recording Watchdog for Browser Use Sessions."""
+
+import asyncio
+from pathlib import Path
+from typing import ClassVar
+
+from bubus import BaseEvent
+from cdp_use.cdp.page.events import ScreencastFrameEvent
+from uuid_extensions import uuid7str
+
+from browser_use.browser.events import BrowserConnectedEvent, BrowserStopEvent
+from browser_use.browser.profile import ViewportSize
+from browser_use.browser.video_recorder import VideoRecorderService
+from browser_use.browser.watchdog_base import BaseWatchdog
+
+
+class RecordingWatchdog(BaseWatchdog):
+	"""
+	Manages video recording of a browser session using CDP screencasting.
+	"""
+
+	LISTENS_TO: ClassVar[list[type[BaseEvent]]] = [BrowserConnectedEvent, BrowserStopEvent]
+	EMITS: ClassVar[list[type[BaseEvent]]] = []
+
+	_recorder: VideoRecorderService | None = None
+
+	async def on_BrowserConnectedEvent(self, event: BrowserConnectedEvent) -> None:
+		"""
+		Starts video recording if it is configured in the browser profile.
+		"""
+		profile = self.browser_session.browser_profile
+		if not profile.record_video_dir:
+			return
+
+		# Dynamically determine video size
+		size = profile.record_video_size
+		if not size:
+			self.logger.debug('record_video_size not specified, detecting viewport size...')
+			size = await self._get_current_viewport_size()
+
+		if not size:
+			self.logger.warning('Cannot start video recording: viewport size could not be determined.')
+			return
+
+		video_format = getattr(profile, 'record_video_format', 'mp4').strip('.')
+		output_path = Path(profile.record_video_dir) / f'{uuid7str()}.{video_format}'
+
+		self.logger.debug(f'Initializing video recorder for format: {video_format}')
+		self._recorder = VideoRecorderService(output_path=output_path, size=size, framerate=profile.record_video_framerate)
+		self._recorder.start()
+
+		if not self._recorder._is_active:
+			self._recorder = None
+			return
+
+		self.browser_session.cdp_client.register.Page.screencastFrame(self.on_screencastFrame)
+
+		try:
+			cdp_session = await self.browser_session.get_or_create_cdp_session()
+			await cdp_session.cdp_client.send.Page.startScreencast(
+				params={
+					'format': 'png',
+					'quality': 90,
+					'maxWidth': size['width'],
+					'maxHeight': size['height'],
+					'everyNthFrame': 1,
+				},
+				session_id=cdp_session.session_id,
+			)
+			self.logger.info(f'📹 Started video recording to {output_path}')
+		except Exception as e:
+			self.logger.error(f'Failed to start screencast via CDP: {e}')
+			if self._recorder:
+				self._recorder.stop_and_save()
+				self._recorder = None
+
+	async def _get_current_viewport_size(self) -> ViewportSize | None:
+		"""Gets the current viewport size directly from the browser via CDP."""
+		try:
+			cdp_session = await self.browser_session.get_or_create_cdp_session()
+			metrics = await cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=cdp_session.session_id)
+
+			# Use cssVisualViewport for the most accurate representation of the visible area
+			viewport = metrics.get('cssVisualViewport', {})
+			width = viewport.get('clientWidth')
+			height = viewport.get('clientHeight')
+
+			if width and height:
+				self.logger.debug(f'Detected viewport size: {width}x{height}')
+				return ViewportSize(width=int(width), height=int(height))
+		except Exception as e:
+			self.logger.warning(f'Failed to get viewport size from browser: {e}')
+
+		return None
+
+	def on_screencastFrame(self, event: ScreencastFrameEvent, session_id: str | None) -> None:
+		"""
+		Synchronous handler for incoming screencast frames.
+		"""
+		if not self._recorder:
+			return
+		self._recorder.add_frame(event['data'])
+		asyncio.create_task(self._ack_screencast_frame(event, session_id))
+
+	async def _ack_screencast_frame(self, event: ScreencastFrameEvent, session_id: str | None) -> None:
+		"""
+		Asynchronously acknowledges a screencast frame.
+		"""
+		try:
+			await self.browser_session.cdp_client.send.Page.screencastFrameAck(
+				params={'sessionId': event['sessionId']}, session_id=session_id
+			)
+		except Exception as e:
+			self.logger.debug(f'Failed to acknowledge screencast frame: {e}')
+
+	async def on_BrowserStopEvent(self, event: BrowserStopEvent) -> None:
+		"""
+		Stops the video recording and finalizes the video file.
+		"""
+		if self._recorder:
+			recorder = self._recorder
+			self._recorder = None
+
+			self.logger.debug('Stopping video recording and saving file...')
+			loop = asyncio.get_event_loop()
+			await loop.run_in_executor(None, recorder.stop_and_save)
--- a/browser_use/browser/watchdogs/security_watchdog.py
+++ b/browser_use/browser/watchdogs/security_watchdog.py
@@ -156,11 +156,14 @@ class SecurityWatchdog(BaseWatchdog):
 						return True
 				else:
 					# Use fnmatch for other glob patterns
-					if fnmatch.fnmatch(host, pattern):
+					if fnmatch.fnmatch(
+						full_url_pattern if '://' in pattern else host,
+						pattern,
+					):
 						return True
 			else:
 				# Exact match
-				if pattern.startswith(('http://', 'https://', 'chrome://', 'brave://', 'file://')):
+				if '://' in pattern:
 					# Full URL pattern
 					if url.startswith(pattern):
 						return True
--- a/browser_use/browser/watchdogs/storage_state_watchdog.py
+++ b/browser_use/browser/watchdogs/storage_state_watchdog.py
@@ -12,6 +12,7 @@ from pydantic import Field, PrivateAttr

 from browser_use.browser.events import (
 	BrowserConnectedEvent,
+	BrowserStopEvent,
 	LoadStorageStateEvent,
 	SaveStorageStateEvent,
 	StorageStateLoadedEvent,
@@ -26,6 +27,7 @@ class StorageStateWatchdog(BaseWatchdog):
 	# Event contracts
 	LISTENS_TO: ClassVar[list[type[BaseEvent]]] = [
 		BrowserConnectedEvent,
+		BrowserStopEvent,
 		SaveStorageStateEvent,
 		LoadStorageStateEvent,
 	]
@@ -51,7 +53,12 @@ class StorageStateWatchdog(BaseWatchdog):
 		await self._start_monitoring()

 		# Automatically load storage state after browser start
-		self.event_bus.dispatch(LoadStorageStateEvent())
+		await self.event_bus.dispatch(LoadStorageStateEvent())
+
+	async def on_BrowserStopEvent(self, event: BrowserStopEvent) -> None:
+		"""Stop monitoring when browser stops."""
+		self.logger.debug('[StorageStateWatchdog] Stopping storage_state monitoring')
+		await self._stop_monitoring()

 	async def on_SaveStorageStateEvent(self, event: SaveStorageStateEvent) -> None:
 		"""Handle storage state save request."""
--- a/browser_use/config.py
+++ b/browser_use/config.py
@@ -159,6 +159,10 @@ class OldConfig:
 	def SKIP_LLM_API_KEY_VERIFICATION(self) -> bool:
 		return os.getenv('SKIP_LLM_API_KEY_VERIFICATION', 'false').lower()[:1] in 'ty1'

+	@property
+	def DEFAULT_LLM(self) -> str:
+		return os.getenv('DEFAULT_LLM', '')
+
 	# Runtime hints
 	@property
 	def IN_DOCKER(self) -> bool:
@@ -203,6 +207,7 @@ class FlatEnvConfig(BaseSettings):
 	AZURE_OPENAI_ENDPOINT: str = Field(default='')
 	AZURE_OPENAI_KEY: str = Field(default='')
 	SKIP_LLM_API_KEY_VERIFICATION: bool = Field(default=False)
+	DEFAULT_LLM: str = Field(default='')

 	# Runtime hints
 	IN_DOCKER: bool | None = Field(default=None)
--- a/browser_use/dom/enhanced_snapshot.py
+++ b/browser_use/dom/enhanced_snapshot.py
@@ -16,32 +16,16 @@ from browser_use.dom.views import DOMRect, EnhancedSnapshotNode

 # Only the ESSENTIAL computed styles for interactivity and visibility detection
 REQUIRED_COMPUTED_STYLES = [
-	# Essential for visibility
-	'display',
-	'visibility',
-	'opacity',
-	'position',
-	'z-index',
-	'pointer-events',
-	'cursor',
-	'overflow',
-	'overflow-x',
-	'overflow-y',
-	'width',
-	'height',
-	'top',
-	'left',
-	'right',
-	'bottom',
-	'transform',
-	'clip',
-	'clip-path',
-	'user-select',
-	'background-color',
-	'color',
-	'border',
-	'margin',
-	'padding',
+	# Only styles actually accessed in the codebase (prevents Chrome crashes on heavy sites)
+	'display',  # Used in service.py visibility detection
+	'visibility',  # Used in service.py visibility detection
+	'opacity',  # Used in service.py visibility detection
+	'overflow',  # Used in views.py scrollability detection
+	'overflow-x',  # Used in views.py scrollability detection
+	'overflow-y',  # Used in views.py scrollability detection
+	'cursor',  # Used in enhanced_snapshot.py cursor extraction
+	'pointer-events',  # Used for clickability logic
+	'position',  # Used for visibility logic
 ]


@@ -81,6 +65,14 @@ def build_snapshot_lookup(
 			for i, backend_node_id in enumerate(nodes['backendNodeId']):
 				backend_node_to_snapshot_index[backend_node_id] = i

+		# PERFORMANCE: Pre-build layout index map to eliminate O(n²) double lookups
+		# Preserve original behavior: use FIRST occurrence for duplicates
+		layout_index_map = {}
+		if layout and 'nodeIndex' in layout:
+			for layout_idx, node_index in enumerate(layout['nodeIndex']):
+				if node_index not in layout_index_map:  # Only store first occurrence
+					layout_index_map[node_index] = layout_idx
+
 		# Build snapshot lookup for each backend node id
 		for backend_node_id, snapshot_index in backend_node_to_snapshot_index.items():
 			is_clickable = None
@@ -98,8 +90,9 @@ def build_snapshot_lookup(
 			client_rects = None
 			scroll_rects = None
 			stacking_contexts = None
-			for layout_idx, node_index in enumerate(layout.get('nodeIndex', [])):
-				if node_index == snapshot_index and layout_idx < len(layout.get('bounds', [])):
+			if snapshot_index in layout_index_map:
+				layout_idx = layout_index_map[snapshot_index]
+				if layout_idx < len(layout.get('bounds', [])):
 					# Parse bounding box
 					bounds = layout['bounds'][layout_idx]
 					if len(bounds) >= 4:
@@ -153,8 +146,6 @@ def build_snapshot_lookup(
 					if layout_idx < len(layout.get('stackingContexts', [])):
 						stacking_contexts = layout.get('stackingContexts', {}).get('index', [])[layout_idx]

-					break
-
 			snapshot_lookup[backend_node_id] = EnhancedSnapshotNode(
 				is_clickable=is_clickable,
 				cursor_style=cursor_style,
--- a/browser_use/filesystem/file_system.py
+++ b/browser_use/filesystem/file_system.py
@@ -6,8 +6,10 @@ from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
 from typing import Any

-from markdown_pdf import MarkdownPdf, Section
 from pydantic import BaseModel, Field
+from reportlab.lib.pagesizes import letter
+from reportlab.lib.styles import getSampleStyleSheet
+from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer

 INVALID_FILENAME_ERROR_MESSAGE = 'Error: Invalid filename format. Must be alphanumeric with supported extension.'
 DEFAULT_FILE_SYSTEM_PATH = 'browseruse_agent_data'
@@ -120,9 +122,32 @@ class PdfFile(BaseFile):
 	def sync_to_disk_sync(self, path: Path) -> None:
 		file_path = path / self.full_name
 		try:
-			md_pdf = MarkdownPdf()
-			md_pdf.add_section(Section(self.content))
-			md_pdf.save(file_path)
+			# Create PDF document
+			doc = SimpleDocTemplate(str(file_path), pagesize=letter)
+			styles = getSampleStyleSheet()
+			story = []
+
+			# Convert markdown content to simple text and add to PDF
+			# For basic implementation, we'll treat content as plain text
+			# This avoids the AGPL license issue while maintaining functionality
+			content_lines = self.content.split('\n')
+
+			for line in content_lines:
+				if line.strip():
+					# Handle basic markdown headers
+					if line.startswith('# '):
+						para = Paragraph(line[2:], styles['Title'])
+					elif line.startswith('## '):
+						para = Paragraph(line[3:], styles['Heading1'])
+					elif line.startswith('### '):
+						para = Paragraph(line[4:], styles['Heading2'])
+					else:
+						para = Paragraph(line, styles['Normal'])
+					story.append(para)
+				else:
+					story.append(Spacer(1, 6))
+
+			doc.build(story)
 		except Exception as e:
 			raise FileSystemError(f"Error: Could not write to file '{self.full_name}'. {str(e)}")

--- a/browser_use/llm/init.py
+++ b/browser_use/llm/init.py
@@ -37,6 +37,41 @@ if TYPE_CHECKING:
 	from browser_use.llm.openai.chat import ChatOpenAI
 	from browser_use.llm.openrouter.chat import ChatOpenRouter

+	# Type stubs for model instances - enables IDE autocomplete
+	openai_gpt_4o: ChatOpenAI
+	openai_gpt_4o_mini: ChatOpenAI
+	openai_gpt_4_1_mini: ChatOpenAI
+	openai_o1: ChatOpenAI
+	openai_o1_mini: ChatOpenAI
+	openai_o1_pro: ChatOpenAI
+	openai_o3: ChatOpenAI
+	openai_o3_mini: ChatOpenAI
+	openai_o3_pro: ChatOpenAI
+	openai_o4_mini: ChatOpenAI
+	openai_gpt_5: ChatOpenAI
+	openai_gpt_5_mini: ChatOpenAI
+	openai_gpt_5_nano: ChatOpenAI
+
+	azure_gpt_4o: ChatAzureOpenAI
+	azure_gpt_4o_mini: ChatAzureOpenAI
+	azure_gpt_4_1_mini: ChatAzureOpenAI
+	azure_o1: ChatAzureOpenAI
+	azure_o1_mini: ChatAzureOpenAI
+	azure_o1_pro: ChatAzureOpenAI
+	azure_o3: ChatAzureOpenAI
+	azure_o3_mini: ChatAzureOpenAI
+	azure_o3_pro: ChatAzureOpenAI
+	azure_gpt_5: ChatAzureOpenAI
+	azure_gpt_5_mini: ChatAzureOpenAI
+
+	google_gemini_2_0_flash: ChatGoogle
+	google_gemini_2_0_pro: ChatGoogle
+	google_gemini_2_5_pro: ChatGoogle
+	google_gemini_2_5_flash: ChatGoogle
+	google_gemini_2_5_flash_lite: ChatGoogle
+
+# Models are imported on-demand via __getattr__
+
 # Lazy imports mapping for heavy chat models
 _LAZY_IMPORTS = {
 	'ChatAnthropic': ('browser_use.llm.anthropic.chat', 'ChatAnthropic'),
@@ -51,9 +86,12 @@ _LAZY_IMPORTS = {
 	'ChatOpenRouter': ('browser_use.llm.openrouter.chat', 'ChatOpenRouter'),
 }

+# Cache for model instances - only created when accessed
+_model_cache: dict[str, 'BaseChatModel'] = {}
+

 def __getattr__(name: str):
-	"""Lazy import mechanism for heavy chat model imports."""
+	"""Lazy import mechanism for heavy chat model imports and model instances."""
 	if name in _LAZY_IMPORTS:
 		module_path, attr_name = _LAZY_IMPORTS[name]
 		try:
@@ -61,12 +99,25 @@ def __getattr__(name: str):

 			module = import_module(module_path)
 			attr = getattr(module, attr_name)
-			# Cache the imported attribute in the module's globals
-			globals()[name] = attr
 			return attr
 		except ImportError as e:
 			raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e

+	# Check cache first for model instances
+	if name in _model_cache:
+		return _model_cache[name]
+
+	# Try to get model instances from models module on-demand
+	try:
+		from browser_use.llm.models import __getattr__ as models_getattr
+
+		attr = models_getattr(name)
+		# Cache in our clean cache dict
+		_model_cache[name] = attr
+		return attr
+	except (AttributeError, ImportError):
+		pass
+
 	raise AttributeError(f"module '{__name__}' has no attribute '{name}'")


--- a/browser_use/llm/google/chat.py
+++ b/browser_use/llm/google/chat.py
@@ -73,10 +73,11 @@ class ChatGoogle(BaseChatModel):

 	# Model configuration
 	model: VerifiedGeminiModels | str
-	temperature: float | None = None
+	temperature: float | None = 0.2
 	top_p: float | None = None
 	seed: int | None = None
 	thinking_budget: int | None = None
+	max_output_tokens: int | None = 4096
 	config: types.GenerateContentConfigDict | None = None

 	# Client initialization parameters
@@ -193,6 +194,9 @@ class ChatGoogle(BaseChatModel):
 			thinking_config_dict: types.ThinkingConfigDict = {'thinking_budget': self.thinking_budget}
 			config['thinking_config'] = thinking_config_dict

+		if self.max_output_tokens is not None:
+			config['max_output_tokens'] = self.max_output_tokens
+
 		async def _make_api_call():
 			if output_format is None:
 				# Return string response
@@ -389,6 +393,10 @@ class ChatGoogle(BaseChatModel):
 				):
 					cleaned['properties'] = {'_placeholder': {'type': 'string'}}

+				# Also remove 'title' from the required list if it exists
+				if 'required' in cleaned and isinstance(cleaned.get('required'), list):
+					cleaned['required'] = [p for p in cleaned['required'] if p != 'title']
+
 				return cleaned
 			elif isinstance(obj, list):
 				return [clean_schema(item) for item in obj]
--- a/browser_use/llm/models.py
+++ b/browser_use/llm/models.py
@@ -0,0 +1,171 @@
+"""
+Convenient access to LLM models.
+
+Usage:
+    from browser_use import llm
+
+    # Simple model access
+    model = llm.azure_gpt_4_1_mini
+    model = llm.openai_gpt_4o
+    model = llm.google_gemini_2_5_pro
+"""
+
+import os
+from typing import TYPE_CHECKING
+
+from browser_use.llm.azure.chat import ChatAzureOpenAI
+from browser_use.llm.google.chat import ChatGoogle
+from browser_use.llm.openai.chat import ChatOpenAI
+
+if TYPE_CHECKING:
+	from browser_use.llm.base import BaseChatModel
+
+# Type stubs for IDE autocomplete
+openai_gpt_4o: 'BaseChatModel'
+openai_gpt_4o_mini: 'BaseChatModel'
+openai_gpt_4_1_mini: 'BaseChatModel'
+openai_o1: 'BaseChatModel'
+openai_o1_mini: 'BaseChatModel'
+openai_o1_pro: 'BaseChatModel'
+openai_o3: 'BaseChatModel'
+openai_o3_mini: 'BaseChatModel'
+openai_o3_pro: 'BaseChatModel'
+openai_o4_mini: 'BaseChatModel'
+openai_gpt_5: 'BaseChatModel'
+openai_gpt_5_mini: 'BaseChatModel'
+openai_gpt_5_nano: 'BaseChatModel'
+
+azure_gpt_4o: 'BaseChatModel'
+azure_gpt_4o_mini: 'BaseChatModel'
+azure_gpt_4_1_mini: 'BaseChatModel'
+azure_o1: 'BaseChatModel'
+azure_o1_mini: 'BaseChatModel'
+azure_o1_pro: 'BaseChatModel'
+azure_o3: 'BaseChatModel'
+azure_o3_mini: 'BaseChatModel'
+azure_o3_pro: 'BaseChatModel'
+azure_gpt_5: 'BaseChatModel'
+azure_gpt_5_mini: 'BaseChatModel'
+
+google_gemini_2_0_flash: 'BaseChatModel'
+google_gemini_2_0_pro: 'BaseChatModel'
+google_gemini_2_5_pro: 'BaseChatModel'
+google_gemini_2_5_flash: 'BaseChatModel'
+google_gemini_2_5_flash_lite: 'BaseChatModel'
+
+
+def get_llm_by_name(model_name: str):
+	"""
+	Factory function to create LLM instances from string names with API keys from environment.
+
+	Args:
+	    model_name: String name like 'azure_gpt_4_1_mini', 'openai_gpt_4o', etc.
+
+	Returns:
+	    LLM instance with API keys from environment variables
+
+	Raises:
+	    ValueError: If model_name is not recognized
+	"""
+	if not model_name:
+		raise ValueError('Model name cannot be empty')
+
+	# Parse model name
+	parts = model_name.split('_', 1)
+	if len(parts) < 2:
+		raise ValueError(f"Invalid model name format: '{model_name}'. Expected format: 'provider_model_name'")
+
+	provider = parts[0]
+	model_part = parts[1]
+
+	# Convert underscores back to dots/dashes for actual model names
+	if 'gpt_4_1_mini' in model_part:
+		model = model_part.replace('gpt_4_1_mini', 'gpt-4.1-mini')
+	elif 'gpt_4o_mini' in model_part:
+		model = model_part.replace('gpt_4o_mini', 'gpt-4o-mini')
+	elif 'gpt_4o' in model_part:
+		model = model_part.replace('gpt_4o', 'gpt-4o')
+	elif 'gemini_2_0' in model_part:
+		model = model_part.replace('gemini_2_0', 'gemini-2.0').replace('_', '-')
+	elif 'gemini_2_5' in model_part:
+		model = model_part.replace('gemini_2_5', 'gemini-2.5').replace('_', '-')
+	else:
+		model = model_part.replace('_', '-')
+
+	# OpenAI Models
+	if provider == 'openai':
+		api_key = os.getenv('OPENAI_API_KEY')
+		return ChatOpenAI(model=model, api_key=api_key)
+
+	# Azure OpenAI Models
+	elif provider == 'azure':
+		api_key = os.getenv('AZURE_OPENAI_KEY') or os.getenv('AZURE_OPENAI_API_KEY')
+		azure_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
+		return ChatAzureOpenAI(model=model, api_key=api_key, azure_endpoint=azure_endpoint)
+
+	# Google Models
+	elif provider == 'google':
+		api_key = os.getenv('GOOGLE_API_KEY')
+		return ChatGoogle(model=model, api_key=api_key)
+
+	else:
+		available_providers = ['openai', 'azure', 'google']
+		raise ValueError(f"Unknown provider: '{provider}'. Available providers: {', '.join(available_providers)}")
+
+
+# Pre-configured model instances (lazy loaded via __getattr__)
+def __getattr__(name: str) -> 'BaseChatModel':
+	"""Create model instances on demand with API keys from environment."""
+	# Handle chat classes first
+	if name == 'ChatOpenAI':
+		return ChatOpenAI  # type: ignore
+	elif name == 'ChatAzureOpenAI':
+		return ChatAzureOpenAI  # type: ignore
+	elif name == 'ChatGoogle':
+		return ChatGoogle  # type: ignore
+
+	# Handle model instances - these are the main use case
+	try:
+		return get_llm_by_name(name)
+	except ValueError:
+		raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
+
+
+__all__ = [
+	'ChatOpenAI',
+	'ChatAzureOpenAI',
+	'ChatGoogle',
+	'get_llm_by_name',
+	# OpenAI instances - created on demand
+	'openai_gpt_4o',
+	'openai_gpt_4o_mini',
+	'openai_gpt_4_1_mini',
+	'openai_o1',
+	'openai_o1_mini',
+	'openai_o1_pro',
+	'openai_o3',
+	'openai_o3_mini',
+	'openai_o3_pro',
+	'openai_o4_mini',
+	'openai_gpt_5',
+	'openai_gpt_5_mini',
+	'openai_gpt_5_nano',
+	# Azure instances - created on demand
+	'azure_gpt_4o',
+	'azure_gpt_4o_mini',
+	'azure_gpt_4_1_mini',
+	'azure_o1',
+	'azure_o1_mini',
+	'azure_o1_pro',
+	'azure_o3',
+	'azure_o3_mini',
+	'azure_o3_pro',
+	'azure_gpt_5',
+	'azure_gpt_5_mini',
+	# Google instances - created on demand
+	'google_gemini_2_0_flash',
+	'google_gemini_2_0_pro',
+	'google_gemini_2_5_pro',
+	'google_gemini_2_5_flash',
+	'google_gemini_2_5_flash_lite',
+]
--- a/browser_use/llm/ollama/chat.py
+++ b/browser_use/llm/ollama/chat.py
@@ -1,8 +1,10 @@
+from collections.abc import Mapping
 from dataclasses import dataclass
 from typing import Any, TypeVar, overload

 import httpx
 from ollama import AsyncClient as OllamaAsyncClient
+from ollama import Options
 from pydantic import BaseModel

 from browser_use.llm.base import BaseChatModel
@@ -30,6 +32,7 @@ class ChatOllama(BaseChatModel):
 	host: str | None = None
 	timeout: float | httpx.Timeout | None = None
 	client_params: dict[str, Any] | None = None
+	ollama_options: Mapping[str, Any] | Options | None = None

 	# Static
 	@property
@@ -70,6 +73,7 @@ class ChatOllama(BaseChatModel):
 				response = await self.get_client().chat(
 					model=self.model,
 					messages=ollama_messages,
+					options=self.ollama_options,
 				)

 				return ChatInvokeCompletion(completion=response.message.content or '', usage=None)
@@ -80,6 +84,7 @@ class ChatOllama(BaseChatModel):
 					model=self.model,
 					messages=ollama_messages,
 					format=schema,
+					options=self.ollama_options,
 				)

 				completion = response.message.content or ''
--- a/browser_use/llm/tests/test_gemini_image.py
+++ b/browser_use/llm/tests/test_gemini_image.py
@@ -3,7 +3,6 @@ import base64
 import io
 import random

-from lmnr import Laminar
 from PIL import Image, ImageDraw, ImageFont

 from browser_use.llm.google.chat import ChatGoogle
@@ -17,8 +16,6 @@ from browser_use.llm.messages import (
 	UserMessage,
 )

-Laminar.initialize()
-

 def create_random_text_image(text: str = 'hello world', width: int = 4000, height: int = 4000) -> str:
 	# Create image with random background color
--- a/browser_use/logging_config.py
+++ b/browser_use/logging_config.py
@@ -138,7 +138,7 @@ def setup_logging(stream=None, log_level=None, force_setup=False, debug_log_file

 	# Create debug log file handler
 	if debug_log_file:
-		debug_handler = logging.FileHandler(debug_log_file)
+		debug_handler = logging.FileHandler(debug_log_file, encoding='utf-8')
 		debug_handler.setLevel(logging.DEBUG)
 		debug_handler.setFormatter(BrowserUseFormatter('%(asctime)s - %(levelname)-8s [%(name)s] %(message)s', logging.DEBUG))
 		file_handlers.append(debug_handler)
@@ -146,7 +146,7 @@ def setup_logging(stream=None, log_level=None, force_setup=False, debug_log_file

 	# Create info log file handler
 	if info_log_file:
-		info_handler = logging.FileHandler(info_log_file)
+		info_handler = logging.FileHandler(info_log_file, encoding='utf-8')
 		info_handler.setLevel(logging.INFO)
 		info_handler.setFormatter(BrowserUseFormatter('%(asctime)s - %(levelname)-8s [%(name)s] %(message)s', logging.INFO))
 		file_handlers.append(info_handler)
--- a/browser_use/tools/registry/service.py
+++ b/browser_use/tools/registry/service.py
@@ -8,6 +8,7 @@ from inspect import Parameter, iscoroutinefunction, signature
 from types import UnionType
 from typing import Any, Generic, Optional, TypeVar, Union, get_args, get_origin

+import pyotp
 from pydantic import BaseModel, Field, RootModel, create_model

 from browser_use.browser import BrowserSession
@@ -433,10 +434,17 @@ class Registry(Generic[Context]):
 		def recursively_replace_secrets(value: str | dict | list) -> str | dict | list:
 			if isinstance(value, str):
 				matches = secret_pattern.findall(value)
-
+				# check if the placeholder key, like x_password is in the output parameters of the LLM and replace it with the sensitive data
 				for placeholder in matches:
 					if placeholder in applicable_secrets:
-						value = value.replace(f'<secret>{placeholder}</secret>', applicable_secrets[placeholder])
+						# generate a totp code if secret is a 2fa secret
+						if 'bu_2fa_code' in placeholder:
+							totp = pyotp.TOTP(applicable_secrets[placeholder], digits=6)
+							replacement_value = totp.now()
+						else:
+							replacement_value = applicable_secrets[placeholder]
+
+						value = value.replace(f'<secret>{placeholder}</secret>', replacement_value)
 						replaced_placeholders.add(placeholder)
 					else:
 						# Keep track of missing placeholders
--- a/browser_use/tools/service.py
+++ b/browser_use/tools/service.py
@@ -236,17 +236,17 @@ class Tools(Generic[Context]):
 				return ActionResult(error=error_msg)

 		@self.registry.action(
-			'Wait for x seconds default 3 (max 10 seconds). This can be used to wait until the page is fully loaded.'
+			'Wait for x seconds (default 3) (max 30 seconds). This can be used to wait until the page is fully loaded.'
 		)
 		async def wait(seconds: int = 3):
-			# Cap wait time at maximum 10 seconds
+			# Cap wait time at maximum 30 seconds
 			# Reduce the wait time by 3 seconds to account for the llm call which takes at least 3 seconds
 			# So if the model decides to wait for 5 seconds, the llm call took at least 3 seconds, so we only need to wait for 2 seconds
 			# Note by Mert: the above doesnt make sense because we do the LLM call right after this or this could be followed by another action after which we would like to wait
 			# so I revert this.
-			actual_seconds = min(max(seconds, 0), 10)
-			memory = f'Waited for {actual_seconds} seconds'
-			logger.info(f'🕒 {memory}')
+			actual_seconds = min(max(seconds - 3, 0), 30)
+			memory = f'Waited for {seconds} seconds'
+			logger.info(f'🕒 waited for {actual_seconds} seconds + 3 seconds for LLM call')
 			await asyncio.sleep(actual_seconds)
 			return ActionResult(extracted_content=memory, long_term_memory=memory)

@@ -266,7 +266,7 @@ class Tools(Generic[Context]):
 				# Look up the node from the selector map
 				node = await browser_session.get_element_by_index(params.index)
 				if node is None:
-					raise ValueError(f'Element index {params.index} not found in DOM')
+					raise ValueError(f'Element index {params.index} not found in browser state')

 				event = browser_session.event_bus.dispatch(
 					ClickElementEvent(node=node, while_holding_ctrl=params.while_holding_ctrl or False)
@@ -315,7 +315,7 @@ class Tools(Generic[Context]):
 			# Look up the node from the selector map
 			node = await browser_session.get_element_by_index(params.index)
 			if node is None:
-				raise ValueError(f'Element index {params.index} not found in DOM')
+				raise ValueError(f'Element index {params.index} not found in browser state')

 			# Dispatch type text event with node
 			try:
@@ -325,7 +325,7 @@ class Tools(Generic[Context]):
 				await event
 				input_metadata = await event.event_result(raise_if_any=True, raise_if_none=False)
 				msg = f"Input '{params.text}' into element {params.index}."
-				logger.info(msg)
+				logger.debug(msg)

 				# Include input coordinates in metadata if available
 				return ActionResult(
@@ -669,7 +669,9 @@ You will be given a query and the markdown of a webpage that has been filtered t
 				raise RuntimeError(str(e))

 		@self.registry.action(
-			"""Scroll the page by specified number of pages (set down=True to scroll down, down=False to scroll up, num_pages=number of pages to scroll like 0.5 for half page, 10.0 for ten pages, etc.). Optional index parameter to scroll within a specific element or its scroll container (works well for dropdowns and custom UI components). If you want to scroll the entire page, don't use index.
+			"""Scroll the page by specified number of pages (set down=True to scroll down, down=False to scroll up, num_pages=number of pages to scroll like 0.5 for half page, 10.0 for ten pages, etc.). 
+			Default behavior is to scroll the entire page. This is enough for most cases.
+			Optional if there are multiple scroll containers, use frame_element_index parameter with an element inside the container you want to scroll in. For that you must use indices that exist in your browser_state (works well for dropdowns and custom UI components). 
 			Instead of scrolling step after step, use a high number of pages at once like 10 to get to the bottom of the page.
 			If you know where you want to scroll to, use scroll_to_text instead of this tool.
 			""",
@@ -681,18 +683,15 @@ You will be given a query and the markdown of a webpage that has been filtered t
 				# Special case: index 0 means scroll the whole page (root/body element)
 				node = None
 				if params.frame_element_index is not None and params.frame_element_index != 0:
-					try:
-						node = await browser_session.get_element_by_index(params.frame_element_index)
-						if node is None:
-							# Element not found - return error
-							raise ValueError(f'Element index {params.frame_element_index} not found in DOM')
-					except Exception as e:
-						# Error getting element - return error
-						raise ValueError(f'Failed to get element {params.frame_element_index}: {e}') from e
+					node = await browser_session.get_element_by_index(params.frame_element_index)
+					if node is None:
+						# Element does not exist
+						msg = f'Element index {params.frame_element_index} not found in browser state'
+						return ActionResult(error=msg)

 				# Dispatch scroll event with node - the complex logic is handled in the event handler
-				# Convert pages to pixels (assuming 800px per page as standard viewport height)
-				pixels = int(params.num_pages * 800)
+				# Convert pages to pixels (assuming 1000px per page as standard viewport height)
+				pixels = int(params.num_pages * 1000)
 				event = browser_session.event_bus.dispatch(
 					ScrollEvent(direction='down' if params.down else 'up', amount=pixels, node=node)
 				)
@@ -765,7 +764,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 		# Dropdown Actions

 		@self.registry.action(
-			'Get list of option values exposed by a specific dropdown input field. Only works on dropdown-style form elements (<select>, Semantic UI/aria-labeled select, etc.).',
+			'Get list of values for a dropdown input field. Only works on dropdown-style form elements (<select>, Semantic UI/aria-labeled select, etc.). Do not use this tool for none dropdown elements.',
 			param_model=GetDropdownOptionsAction,
 		)
 		async def get_dropdown_options(params: GetDropdownOptionsAction, browser_session: BrowserSession):
@@ -773,7 +772,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 			# Look up the node from the selector map
 			node = await browser_session.get_element_by_index(params.index)
 			if node is None:
-				raise ValueError(f'Element index {params.index} not found in DOM')
+				raise ValueError(f'Element index {params.index} not found in browser state')

 			# Dispatch GetDropdownOptionsEvent to the event handler

@@ -799,7 +798,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
 			# Look up the node from the selector map
 			node = await browser_session.get_element_by_index(params.index)
 			if node is None:
-				raise ValueError(f'Element index {params.index} not found in DOM')
+				raise ValueError(f'Element index {params.index} not found in browser state')

 			# Dispatch SelectDropdownOptionEvent to the event handler
 			from browser_use.browser.events import SelectDropdownOptionEvent
--- a/browser_use/utils.py
+++ b/browser_use/utils.py
@@ -2,6 +2,7 @@ import asyncio
 import logging
 import os
 import platform
+import re
 import signal
 import time
 from collections.abc import Callable, Coroutine
@@ -16,6 +17,9 @@ from dotenv import load_dotenv

 load_dotenv()

+# Pre-compiled regex for URL detection - used in URL shortening
+URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+|[^\s<>"\']+\.[a-z]{2,}(?:/[^\s<>"\']*)?', re.IGNORECASE)
+

 logger = logging.getLogger(__name__)

--- a/docs/cloud/v2/node-quickstart.mdx
+++ b/docs/cloud/v2/node-quickstart.mdx
@@ -1,146 +0,0 @@
---
-title: "Node.js"
-description: "Get started with Browser Use Cloud API using Node.js"
-icon: "node-js"
-mode: "wide"
---
-
-<img src="/images/cloud-banner-js.png" alt="Browser Use Node.js" width="full" />
-
-> The repository is available on [GitHub](https://github.com/browser-use/browser-use-node)
-
-<CodeGroup>
-
-```sh npm
-npm install browser-use-sdk
-```
-
-```sh pnpm
-pnpm add browser-use-sdk
-```
-
-```sh yarn
-yarn add browser-use-sdk
-```
-
-```sh bun
-bun add browser-use-sdk
-```
-
-</CodeGroup>
-
-☝️ Get your API Key at [Browser Use Cloud](https://cloud.browser-use.com/billing)
-
-```ts
-import BrowserUse from "browser-use-sdk";
-
-const client = new BrowserUse({
-  apiKey: "bu_...",
-});
-
-const result = await client.tasks.run({
-  task: "Search for the top 10 Hacker News posts and return the title and url.",
-});
-
-console.log(result.doneOutput);
-```
-
-> The full API of this library can be found in [api.md](https://github.com/browser-use/browser-use-node/blob/main/api.md).
-
-### Structured Output with Zod
-
-```ts
-import z from "zod";
-
-const TaskOutput = z.object({
-  posts: z.array(
-    z.object({
-      title: z.string(),
-      url: z.string(),
-    })
-  ),
-});
-
-const result = await client.tasks.run({
-  task: "Search for the top 10 Hacker News posts and return the title and url.",
-  schema: TaskOutput,
-});
-
-for (const post of result.parsedOutput.posts) {
-  console.log(`${post.title} - ${post.url}`);
-}
-```
-
-### Streaming Agent Updates
-
-```ts
-const task = await browseruse.tasks.create({
-  task: "Search for the top 10 Hacker News posts and return the title and url.",
-  schema: TaskOutput,
-});
-
-const stream = browseruse.tasks.stream({
-  taskId: task.id,
-  schema: TaskOutput,
-});
-
-for await (const msg of stream) {
-  switch (msg.status) {
-    case "started":
-      console.log(`started: ${msg.data.session.liveUrl}`);
-      break;
-    case "paused":
-    case "stopped":
-      console.log(`running: ${msg}`);
-      break;
-
-    case "finished":
-      console.log(`done:`);
-
-      for (const post of msg.parsedOutput.posts) {
-        console.log(`${post.title} - ${post.url}`);
-      }
-      break;
-  }
-}
-```
-
-## Webhook Verification
-
-> We encourage you to use the SDK functions that verify and parse webhook events.
-
-```ts
-import {
-  verifyWebhookEventSignature,
-  type WebhookAgentTaskStatusUpdatePayload,
-} from "browser-use-sdk/lib/webhooks";
-
-export async function POST(req: Request) {
-  const signature = req.headers["x-browser-use-signature"] as string;
-  const timestamp = req.headers["x-browser-use-timestamp"] as string;
-
-  const event = await verifyWebhookEventSignature(
-    {
-      body,
-      signature,
-      timestamp,
-    },
-    {
-      secret: SECRET_KEY,
-    }
-  );
-
-  if (!event.ok) {
-    return;
-  }
-
-  switch (event.event.type) {
-    case "agent.task.status_update":
-      break;
-    case "test":
-      break;
-    default:
-      break;
-  }
-}
-```
--- a/docs/cloud/v2/python-quickstart.mdx
+++ b/docs/cloud/v2/python-quickstart.mdx
@@ -1,131 +0,0 @@
---
-title: "Python"
-description: "Get started with Browser Use Cloud API using Python"
-icon: "python"
-mode: "wide"
---
-
-<img
-  src="/images/cloud-banner-python.png"
-  alt="Browser Use Python"
-  width="full"
-/>
-
-> The repository is available on [GitHub](https://github.com/browser-use/browser-use-python).
-
-```sh
-pip install browser-use-sdk
-```
-
-☝️ Get your API Key at [Browser Use Cloud](https://cloud.browser-use.com/billing)
-
-```python
-from browser_use_sdk import BrowserUse
-
-client = BrowserUse(api_key="bu_...")
-
-result = client.tasks.run(
-    task="Search for the top 10 Hacker News posts and return the title and url."
-)
-
-result.done_output
-```
-
-> The full API reference can be found in [api.md](https://github.com/browser-use/browser-use-python/blob/main/api.md).
-
-## Async usage
-
-Simply import `AsyncBrowserUse` instead of `BrowserUse` and use `await` with each API call:
-
-```python
-import os
-import asyncio
-from browser_use_sdk import AsyncBrowserUse
-
-client = AsyncBrowserUse(
-    api_key=os.environ.get("BROWSER_USE_API_KEY"),  # This is the default and can be omitted
-)
-
-
-async def main() -> None:
-    task = await client.tasks.run(
-        task="Search for the top 10 Hacker News posts and return the title and url.",
-    )
-    print(task.done_output)
-
-
-asyncio.run(main())
-```
-
-Functionality between the synchronous and asynchronous clients is otherwise identical.
-
-## Structured Output with Pydantic
-
-Browser Use Python SDK provides first class support for Pydantic models.
-
-```py
-class HackerNewsPost(BaseModel):
-    title: str
-    url: str
-
-class SearchResult(BaseModel):
-    posts: List[HackerNewsPost]
-
-async def main() -> None:
-    structured_result = await client.tasks.run(
-        task="""
-        Find top 10 Hacker News articles and return the title and url.
-        """,
-        structured_output_json=SearchResult,
-    )
-
-    if structured_result.parsed_output is not None:
-        print("Top HackerNews Posts:")
-        for post in structured_result.parsed_output.posts:
-            print(f" - {post.title} - {post.url}")
-
-asyncio.run(main())
-```
-
-## Streaming Updates with Async Iterators
-
-```py
-class HackerNewsPost(BaseModel):
-    title: str
-    url: str
-
-class SearchResult(BaseModel):
-    posts: List[HackerNewsPost]
-
-
-async def main() -> None:
-    structured_task = await client.tasks.create(
-        task="""
-        Find top 10 Hacker News articles and return the title and url.
-        """,
-        structured_output_json=SearchResult,
-    )
-
-    async for update in client.tasks.stream(structured_task.id, structured_output_json=SearchResult):
-        if len(update.steps) > 0:
-            last_step = update.steps[-1]
-            print(f"{update.status}: {last_step.url} ({last_step.next_goal})")
-        else:
-            print(f"{update.status}")
-
-        if update.status == "finished":
-            if update.parsed_output is None:
-                print("No output...")
-            else:
-                print("Top HackerNews Posts:")
-                for post in update.parsed_output.posts:
-                    print(f" - {post.title} - {post.url}")
-
-                break
-
-asyncio.run(main())
-```
-
-## Advanced
-
-For more advanced usage of the SDK and contributions to the SDK, see [Github repository](https://github.com/browser-use/browser-use-python).
--- a/docs/cloud/v2/quickstart.mdx
+++ b/docs/cloud/v2/quickstart.mdx
@@ -1,79 +0,0 @@
---
-title: "Quickstart"
-description: "Skip the setup with Browser Use Cloud"
-icon: "cloud"
-mode: "wide"
---
-
-<img
-  className="block dark:hidden rounded-2xl"
-  src="/images/cloud-banner.png"
-  alt="Browser Use Cloud Banner"
-/>
-<img
-  className="hidden dark:block rounded-2xl"
-  src="/images/cloud-banner-dark.png"
-  alt="Browser Use Cloud Banner"
-/>
-
-## Get Started
-
-☝️ Get your API Key at [Browser Use Cloud](https://cloud.browser-use.com) then choose your language.
-
-<CardGroup cols={2}>
-  <Card
-    title="Python SDK"
-    icon="python"
-    href="/cloud/v2/python-quickstart"
-
->
-
-    Browser Use NPC Mode SDK 🤖
-
-  </Card>
-  <Card
-    title="Node.js SDK"
-    icon="node-js"
-    href="/cloud/v2/node-quickstart"
-
->
-
-    Browser Use Wizard Mode SDK 🧙‍♂️
-
-  </Card>
-
-</CardGroup>
-
-{/* <br /> */}
-
-> To play around with the API, you can use the [Browser Use Cloud Playground](https://cloud.browser-use.com/playground).
-
-## Examples
-
-Explore quick start examples to see how to use the SDKs.
-
-<CardGroup cols={2}>
-  <Card
-    title="Python Examples"
-    icon="python"
-    href="https://github.com/browser-use/browser-use-examples/tree/main/python"
-  >
-    Explore quick start examples for Python.
-  </Card>
-
-<Card
-  title="Typescript Examples"
-  icon="js"
-  href="https://github.com/browser-use/browser-use-examples/tree/main/typescript"
->
-  Explore quick start examples for Typescript.
-</Card>
-
-   <Card
-    title="NextJS Examples"
-    icon={<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="currentColor" className="remixicon text-basis h-8 w-8 text-primary"><path d="M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM15.9999 8H14.6499V12H15.9999V8ZM9.34609 9.70937L15.405 17.5379L16.4591 16.7293L9.68281 8H8V15.9969H9.34609V9.70937Z"></path></svg>}
-    href="https://github.com/browser-use/browser-use-examples/tree/main/typescript/scrapper"
-  >
-    Explore quick start examples for NextJS.
-  </Card>
-</CardGroup>
--- a/docs/customize/agent/all-parameters.mdx
+++ b/docs/customize/agent/all-parameters.mdx
@@ -21,6 +21,7 @@ mode: "wide"
 - `initial_actions`: List of actions to run before the main task without LLM. [Example](https://github.com/browser-use/browser-use/blob/main/examples/features/initial_actions.py)
 - `max_actions_per_step` (default: `10`): Maximum actions per step, e.g. for form filling the agent can output 10 fields at once. We execute the actions until the page changes.
 - `max_failures` (default: `3`): Maximum retries for steps with errors
+- `final_response_after_failure` (default: `True`): If True, attempt to force one final model call with intermediate output after max_failures is reached
 - `use_thinking` (default: `True`): Controls whether the agent uses its internal "thinking" field for explicit reasoning steps.
 - `flash_mode` (default: `False`): Fast mode that skips evaluation, next goal and thinking and only uses memory. If `flash_mode` is enabled, it overrides `use_thinking` and disables the thinking process entirely. [Example](https://github.com/browser-use/browser-use/blob/main/examples/getting_started/05_fast_agent.py)

--- a/docs/customize/browser/all-parameters.mdx
+++ b/docs/customize/browser/all-parameters.mdx
@@ -73,7 +73,9 @@ mode: "wide"
 - `screen`: Screen size information, same format as `window_size`

 ## Recording & Debugging
- `record_video_dir`: Directory to save video recordings as `.webm` files
+- `record_video_dir`: Directory to save video recordings as `.mp4` files
+- `record_video_size` (default: `ViewportSize`): The frame size (width, height) of the video recording.
+- `record_video_framerate` (default: `30`): The framerate to use for the video recording.
 - `record_har_path`: Path to save network trace files as `.har` format
 - `traces_dir`: Directory to save complete trace files for debugging
 - `record_har_content` (default: `'embed'`): HAR content mode (`'omit'`, `'embed'`, `'attach'`)
--- a/docs/customize/examples/prompting-guide.mdx
+++ b/docs/customize/examples/prompting-guide.mdx
@@ -0,0 +1,92 @@
+---
+title: "Prompting Guide"
+description: "Tips and tricks "
+icon: "lightbulb"
+---
+
+Prompting can trasticly improve performance and solve existing limitations of the library.
+
+### 1. Be Specific vs Open-Ended
+
+**✅ Specific (Recommended)**
+```python
+task = """
+1. Go to https://quotes.toscrape.com/
+2. Use extract_structured_data action with the query "first 3 quotes with their authors"
+3. Save results to quotes.csv using write_file action
+4. Do a google search for the first quote and find when it was written
+"""
+```
+
+**❌ Open-Ended**
+```python
+task = "Go to web and make money"
+```
+
+### 2. Name Actions Directly
+
+When you know exactly what the agent should do, reference actions by name:
+
+```python
+task = """
+1. Use search_google action to find "Python tutorials"
+2. Use click_element_by_index to open first result in a new tab   
+3. Use scroll action to scroll down 2 pages
+4. Use extract_structured_data to extract the names of the first 5 items 
+5. Wait for 2 seconds if the page is not loaded, refresh it and wait 10 sec
+6. Use send_keys action with "Tab Tab ArrowDown Enter" 
+"""
+```
+
+See [Available Tools](/customize/tools/available) for the complete list of actions.
+
+
+### 3. Handle interaction problems via keyboard navigation
+
+Sometimes buttons can't be clicked (you found a bug in the library - open an issue). 
+Good news - often you can work around it with keyboard navigation!
+
+```python
+task = """
+If the submit button cannot be clicked:
+1. Use send_keys action with "Tab Tab Enter" to navigate and activate
+2. Or use send_keys with "ArrowDown ArrowDown Enter" for form submission
+"""
+```
+
+
+
+
+### 4. Custom Actions Integration
+
+```python
+# When you have custom actions
+@controller.action("Get 2FA code from authenticator app")
+async def get_2fa_code():
+    # Your implementation
+    pass
+
+task = """
+Login with 2FA:
+1. Enter username/password
+2. When prompted for 2FA, use get_2fa_code action
+3. NEVER try to extract 2FA codes from the page manually
+4. ALWAYS use the get_2fa_code action for authentication codes
+"""
+```
+
+### 5. Error Recovery
+
+```python
+task = """
+Robust data extraction:
+1. Go to openai.com to find their CEO
+2. If navigation fails due to anti-bot protection:
+   - Use google search to find the CEO
+3. If page times out, use go_back and try alternative approach
+"""
+```
+
+
+
+The key to effective prompting is being specific about actions.
--- a/docs/customize/examples/sensitive-data.mdx
+++ b/docs/customize/examples/sensitive-data.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Sensitive Data"
-description: "Handle sensitive information securely and avoid sending PII & passwords to the LLM."
+description: "Handle secret information securely and avoid sending PII & passwords to the LLM."
 icon: "shield"
 mode: "wide"
 ---
@@ -11,14 +11,24 @@ import os
 from browser_use import Agent, Browser, ChatOpenAI
 os.environ['ANONYMIZED_TELEMETRY'] = "false"

+
+company_credentials = {'x_user': 'your-real-username@email.com', 'x_pass': 'your-real-password123'}
+
+# Option 1: Secrets available for all websites
+sensitive_data = company_credentials
+
+# Option 2: Secrets per domain with regex
+# sensitive_data = {
+#     'https://*.example-staging.com': company_credentials,
+#     'http*://test.example.com': company_credentials,
+#     'https://example.com': company_credentials,
+#     'https://google.com': {'g_email': 'user@gmail.com', 'g_pass': 'google_password'},
+# }
+
+
 agent = Agent(
    task='Log into example.com with username x_user and password x_pass',
-    sensitive_data={
-        'https://example.com': {
-            'x_user': 'your-real-username@email.com',
-            'x_pass': 'your-real-password123',
-        },
-    },
+    sensitive_data=sensitive_data,
    use_vision=False,  #  Disable vision to prevent LLM seeing sensitive data in screenshots
    llm=ChatOpenAI(model='gpt-4.1-mini'),
 )
--- a/docs/customize/hooks.mdx
+++ b/docs/customize/hooks.mdx
@@ -2,7 +2,6 @@
 title: "Lifecycle Hooks"
 description: "Customize agent behavior with lifecycle hooks"
 icon: "Wrench"
-author: "Carlos A. Planchón"
 mode: "wide"
 ---

@@ -27,55 +26,63 @@ Each hook should be an `async` callable function that accepts the `agent` instan
 ### Basic Example

 ```python
+import asyncio
+from pathlib import Path
+
 from browser_use import Agent, ChatOpenAI
+from browser_use.browser.events import ScreenshotEvent


 async def my_step_hook(agent: Agent):
-    # inside a hook you can access all the state and methods under the Agent object:
-    #   agent.settings, agent.state, agent.task
-    #   agent.tools, agent.llm, agent.browser_session
-    #   agent.pause(), agent.resume(), agent.add_new_task(...), etc.
+	# inside a hook you can access all the state and methods under the Agent object:
+	#   agent.settings, agent.state, agent.task
+	#   agent.tools, agent.llm, agent.browser_session
+	#   agent.pause(), agent.resume(), agent.add_new_task(...), etc.

-    # You also have direct access to the browser state
-    state = await agent.browser_session.get_browser_state_summary()
-    
-    current_url = state.url
-    visit_log = agent.history.urls()
-    previous_url = visit_log[-2] if len(visit_log) >= 2 else None
-    print(f"Agent was last on URL: {previous_url} and is now on {current_url}")
+	# You also have direct access to the browser state
+	state = await agent.browser_session.get_browser_state_summary()

-    # Example: listen for events on the page, interact with the DOM, run JS directly, etc.
-    await page.on('domcontentloaded', lambda: print('page navigated to a new url...'))
-    await page.locator("css=form > input[type=submit]").click()
-    await page.evaluate('() => alert(1)')
-    await page.browser.new_tab
-    await agent.browser_session.session.context.add_init_script('/* some JS to run on every page */')
+	current_url = state.url
+	visit_log = agent.history.urls()
+	previous_url = visit_log[-2] if len(visit_log) >= 2 else None
+	print(f'Agent was last on URL: {previous_url} and is now on {current_url}')
+	cdp_session = await agent.browser_session.get_or_create_cdp_session()

-    # Example: monitor or intercept all network requests
-    async def handle_request(route):
-		# Print, modify, block, etc. do anything to the requests here
-        #   https://playwright.dev/python/docs/network#handle-requests
-		print(route.request, route.request.headers)
-		await route.continue_(headers=route.request.headers)
-	await page.route("**/*", handle_route)
+	# Example: Get page HTML content
+	doc = await cdp_session.cdp_client.send.DOM.getDocument(session_id=cdp_session.session_id)
+	html_result = await cdp_session.cdp_client.send.DOM.getOuterHTML(
+		params={'nodeId': doc['root']['nodeId']}, session_id=cdp_session.session_id
+	)
+	page_html = html_result['outerHTML']

-    # Example: pause agent execution and resume it based on some custom code
-    if '/completed' in current_url:
-        agent.pause()
-        Path('result.txt').write_text(await page.content())
-        input('Saved "completed" page content to result.txt, press [Enter] to resume...')
-        agent.resume()
+	# Example: Take a screenshot using the event system
+	screenshot_event = agent.browser_session.event_bus.dispatch(ScreenshotEvent(full_page=False))
+	await screenshot_event
+	result = await screenshot_event.event_result(raise_if_any=True, raise_if_none=True)

-agent = Agent(
-    task="Search for the latest news about AI",
-    llm=ChatOpenAI(model="gpt-4.1-mini"),
-)
+	# Example: pause agent execution and resume it based on some custom code
+	if '/finished' in current_url:
+		agent.pause()
+		Path('result.txt').write_text(page_html)
+		input('Saved "finished" page content to result.txt, press [Enter] to resume...')
+		agent.resume()

-await agent.run(
-    on_step_start=my_step_hook,
-    # on_step_end=...
-    max_steps=10
-)
+
+async def main():
+	agent = Agent(
+		task='Search for the latest news about AI',
+		llm=ChatOpenAI(model='gpt-5-mini'),
+	)
+
+	await agent.run(
+		on_step_start=my_step_hook,
+		# on_step_end=...
+		max_steps=10,
+	)
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
 ```

 ## Data Available in Hooks
@@ -96,287 +103,17 @@ When working with agent hooks, you have access to the entire `Agent` instance. H
  - `agent.history.model_actions()`: Actions taken by the agent
  - `agent.history.extracted_content()`: Content extracted from web pages
  - `agent.history.urls()`: URLs visited by the agent
- `agent.browser_session` gives direct access to the `Browser()` and CDP interface
+- `agent.browser_session` gives direct access to the `BrowserSession` and CDP interface
  - `agent.browser_session.agent_focus`: Get the current CDP session the agent is focused on
  - `agent.browser_session.get_or_create_cdp_session()`: Get the current CDP session for browser interaction
  - `agent.browser_session.get_tabs()`: Get all tabs currently open
-  - `agent.browser_session.get_page_html()`: Current page HTML
-  - `agent.browser_session.take_screenshot()`: Screenshot of the current page
+  - `agent.browser_session.get_current_page_url()`: Get the URL of the current active tab
+  - `agent.browser_session.get_current_page_title()`: Get the title of the current active tab

 ## Tips for Using Hooks

- **Avoid blocking operations**: Since hooks run in the same execution thread as the agent, try to keep them efficient or use asynchronous patterns.
- **Handle exceptions**: Make sure your hook functions handle exceptions gracefully to prevent interrupting the agent's main flow.
- **Use custom actions instead**: hooks are fairly advanced, most things can be implemented with [custom action functions](/customize/custom-functions) instead
+- **Avoid blocking operations**: Since hooks run in the same execution thread as the agent, keep them efficient and avoid blocking operations.
+- **Use custom tools instead**: hooks are fairly advanced, most things can be implemented with [custom tools](/customize/tools/basics) instead
+- **Increase step_timeout**: If your hook is doing something that takes a long time, you can increase the `step_timeout` parameter in the `Agent(...)` constructor.

 ---
-
-## Complex Example: Agent Activity Recording System
-
-This comprehensive example demonstrates a complete implementation for recording and saving Browser-Use agent activity, consisting of both server and client components.
-
-### Setup Instructions
-
-To use this example, you'll need to:
-
-1. Set up the required dependencies:
-
-   ```bash
-   pip install fastapi uvicorn prettyprinter pyobjtojson dotenv browser-use
-   ```
-
-2. Create two separate Python files:
-
-   - `api.py` - The FastAPI server component
-   - `client.py` - The Browser-Use agent with recording hook
-
-3. Run both components:
-   - Start the API server first: `python api.py`
-   - Then run the client: `python client.py`
-
-### Server Component (api.py)
-
-The server component handles receiving and storing the agent's activity data:
-
-```python
-#!/usr/bin/env python3
-
-#
-# FastAPI API to record and save Browser-Use activity data.
-# Save this code to api.py and run with `python api.py`
-#
-
-import json
-import base64
-from pathlib import Path
-
-from fastapi import FastAPI, Request
-import prettyprinter
-import uvicorn
-
-prettyprinter.install_extras()
-
-# Utility function to save screenshots
-def b64_to_png(b64_string: str, output_file):
-    """
-    Convert a Base64-encoded string to a PNG file.
-
-    :param b64_string: A string containing Base64-encoded data
-    :param output_file: The path to the output PNG file
-    """
-    with open(output_file, "wb") as f:
-        f.write(base64.b64decode(b64_string))
-
-# Initialize FastAPI app
-app = FastAPI()
-
-
-@app.post("/post_agent_history_step")
-async def post_agent_history_step(request: Request):
-    data = await request.json()
-    prettyprinter.cpprint(data)
-
-    # Ensure the "recordings" folder exists using pathlib
-    recordings_folder = Path("recordings")
-    recordings_folder.mkdir(exist_ok=True)
-
-    # Determine the next file number by examining existing .json files
-    existing_numbers = []
-    for item in recordings_folder.iterdir():
-        if item.is_file() and item.suffix == ".json":
-            try:
-                file_num = int(item.stem)
-                existing_numbers.append(file_num)
-            except ValueError:
-                # In case the file name isn't just a number
-                pass
-
-    if existing_numbers:
-        next_number = max(existing_numbers) + 1
-    else:
-        next_number = 1
-
-    # Construct the file path
-    file_path = recordings_folder / f"{next_number}.json"
-
-    # Save the JSON data to the file
-    with file_path.open("w") as f:
-        json.dump(data, f, indent=2)
-
-    # Optionally save screenshot if needed
-    # if "website_screenshot" in data and data["website_screenshot"]:
-    #     screenshot_folder = Path("screenshots")
-    #     screenshot_folder.mkdir(exist_ok=True)
-    #     b64_to_png(data["website_screenshot"], screenshot_folder / f"{next_number}.png")
-
-    return {"status": "ok", "message": f"Saved to {file_path}"}
-
-if __name__ == "__main__":
-    print("Starting Browser-Use recording API on http://0.0.0.0:9000")
-    uvicorn.run(app, host="0.0.0.0", port=9000)
-```
-
-### Client Component (client.py)
-
-The client component runs the Browser-Use agent with a recording hook:
-
-```python
-#!/usr/bin/env python3
-
-#
-# Client to record and save Browser-Use activity.
-# Save this code to client.py and run with `python client.py`
-#
-
-import asyncio
-import requests
-from dotenv import load_dotenv
-from pyobjtojson import obj_to_json
-from browser_use.llm import ChatOpenAI
-from browser_use import Agent
-
-# Load environment variables (for API keys)
-load_dotenv()
-
-
-def send_agent_history_step(data):
-    """Send the agent step data to the recording API"""
-    url = "http://127.0.0.1:9000/post_agent_history_step"
-    response = requests.post(url, json=data)
-    return response.json()
-
-
-async def record_activity(agent_obj):
-    """Hook function that captures and records agent activity at each step"""
-    website_html = None
-    website_screenshot = None
-    urls_json_last_elem = None
-    model_thoughts_last_elem = None
-    model_outputs_json_last_elem = None
-    model_actions_json_last_elem = None
-    extracted_content_json_last_elem = None
-
-    print('--- ON_STEP_START HOOK ---')
-
-    # Capture current page state
-    website_html = await agent_obj.browser_session.get_page_html()
-    website_screenshot = await agent_obj.browser_session.take_screenshot()
-
-    # Make sure we have state history
-    if hasattr(agent_obj, "state"):
-        history = agent_obj.state.history
-    else:
-        history = None
-        print("Warning: Agent has no state history")
-        return
-
-    # Process model thoughts
-    model_thoughts = obj_to_json(
-        obj=history.model_thoughts(),
-        check_circular=False
-    )
-    if len(model_thoughts) > 0:
-        model_thoughts_last_elem = model_thoughts[-1]
-
-    # Process model outputs
-    model_outputs = agent_obj.state.history.model_outputs()
-    model_outputs_json = obj_to_json(
-        obj=model_outputs,
-        check_circular=False
-    )
-    if len(model_outputs_json) > 0:
-        model_outputs_json_last_elem = model_outputs_json[-1]
-
-    # Process model actions
-    model_actions = agent_obj.state.history.model_actions()
-    model_actions_json = obj_to_json(
-        obj=model_actions,
-        check_circular=False
-    )
-    if len(model_actions_json) > 0:
-        model_actions_json_last_elem = model_actions_json[-1]
-
-    # Process extracted content
-    extracted_content = agent_obj.state.history.extracted_content()
-    extracted_content_json = obj_to_json(
-        obj=extracted_content,
-        check_circular=False
-    )
-    if len(extracted_content_json) > 0:
-        extracted_content_json_last_elem = extracted_content_json[-1]
-
-    # Process URLs
-    urls = agent_obj.state.history.urls()
-    urls_json = obj_to_json(
-        obj=urls,
-        check_circular=False
-    )
-    if len(urls_json) > 0:
-        urls_json_last_elem = urls_json[-1]
-
-    # Create a summary of all data for this step
-    model_step_summary = {
-        "website_html": website_html,
-        "website_screenshot": website_screenshot,
-        "url": urls_json_last_elem,
-        "model_thoughts": model_thoughts_last_elem,
-        "model_outputs": model_outputs_json_last_elem,
-        "model_actions": model_actions_json_last_elem,
-        "extracted_content": extracted_content_json_last_elem
-    }
-
-    print("--- MODEL STEP SUMMARY ---")
-    print(f"URL: {urls_json_last_elem}")
-
-    # Send data to the API
-    result = send_agent_history_step(data=model_step_summary)
-    print(f"Recording API response: {result}")
-
-
-async def run_agent():
-    """Run the Browser-Use agent with the recording hook"""
-    agent = Agent(
-        task="Compare the price of gpt-4o and DeepSeek-V3",
-        llm=ChatOpenAI(model="gpt-4.1-mini"),
-    )
-
-    try:
-        print("Starting Browser-Use agent with recording hook")
-        await agent.run(
-            on_step_start=record_activity,
-            max_steps=30
-        )
-    except Exception as e:
-        print(f"Error running agent: {e}")
-
-
-if __name__ == "__main__":
-    # Check if API is running
-    try:
-        requests.get("http://127.0.0.1:9000")
-        print("Recording API is available")
-    except:
-        print("Warning: Recording API may not be running. Start api.py first.")
-
-    # Run the agent
-    asyncio.run(run_agent())
-```
-
-Contribution by Carlos A. Planchón.
-
-### Working with the Recorded Data
-
-After running the agent, you'll find the recorded data in the `recordings` directory. Here's how you can use this data:
-
-1. **View recorded sessions**: Each JSON file contains a snapshot of agent activity for one step
-2. **Extract screenshots**: You can modify the API to save screenshots separately
-3. **Analyze agent behavior**: Use the recorded data to study how the agent navigates websites
-
-### Extending the Example
-
-You can extend this recording system in several ways:
-
-1. **Save screenshots separately**: Uncomment the screenshot saving code in the API
-2. **Add a web dashboard**: Create a simple web interface to view recorded sessions
-3. **Add session IDs**: Modify the API to group steps by agent session
-4. **Add filtering**: Implement filters to record only specific types of actions
--- a/docs/customize/mcp-client.mdx
+++ b/docs/customize/mcp-client.mdx
@@ -1,252 +0,0 @@
---
-title: "MCP Client"
-description: "Connect external MCP servers to extend browser-use with additional tools and integrations"
-icon: "plug"
-mode: "wide"
---
-
-The MCP (Model Context Protocol) client allows browser-use agents to connect to external MCP servers, automatically exposing their tools as actions.
-
-<Note>
-  MCP is an open protocol for integrating LLMs with external data sources and tools. Learn more at [modelcontextprotocol.io](https://modelcontextprotocol.io).
-</Note>
-
-<Info>
-  Looking to expose browser-use as an MCP server instead? See [MCP Server](/customize/mcp-server).
-</Info>
-
-## Installation
-
-```bash
-uv pip install "browser-use[cli]"
-```
-
-## Quick Start
-
-```python
-import os
-from browser_use import Agent, Tools
-from browser_use.mcp.client import MCPClient
-
-# Create tools
-tools = Tools()
-
-# Connect to MCP server
-mcp_client = MCPClient(
-    server_name="filesystem",
-    command="npx",
-    args=["@modelcontextprotocol/server-filesystem", "/path/to/files"]
-)
-
-# Connect and register
-await mcp_client.connect()
-await mcp_client.register_to_tools(tools)
-
-# Agent can now use filesystem tools
-agent = Agent(
-    task="Read the README.md file",
-    tools=tools
-)
-await agent.run()
-
-# Clean up
-await mcp_client.disconnect()
-```
-
-## API Reference
-
-### MCPClient
-
-```python
-class MCPClient:
-    def __init__(
-        self,
-        server_name: str,
-        command: str,
-        args: list[str] | None = None,
-        env: dict[str, str] | None = None,
-    ) -> None
-```
-
-**Parameters:**
- `server_name`: Name of the MCP server (for logging)
- `command`: Command to start the server (e.g., `"npx"`)
- `args`: Arguments for the command
- `env`: Environment variables for the server
-
-**Key Methods:**
-
-```python
-# Connect to server
-await mcp_client.connect()
-
-# Register tools to tools
-await mcp_client.register_to_tools(
-    tools,
-    tool_filter=['read_file', 'write_file'],  # Optional
-    prefix='fs_'  # Optional prefix
-)
-
-# Disconnect
-await mcp_client.disconnect()
-```
-
-### Context Manager Usage
-
-```python
-async with MCPClient(
-    server_name="github",
-    command="npx",
-    args=["@modelcontextprotocol/server-github"],
-    env={"GITHUB_TOKEN": os.getenv("GITHUB_TOKEN")}
-) as client:
-    await client.register_to_tools(tools)
-    await agent.run()
-# Automatically disconnected
-```
-
-## Common MCP Servers
-
-### Filesystem
-```python
-MCPClient(
-    server_name="filesystem",
-    command="npx",
-    args=["@modelcontextprotocol/server-filesystem", "/path"]
-)
-```
-
-### PostgreSQL
-```python
-MCPClient(
-    server_name="postgres",
-    command="npx",
-    args=["@modelcontextprotocol/server-postgres", "postgresql://localhost/db"]
-)
-```
-
-### GitHub
-```python
-MCPClient(
-    server_name="github",
-    command="npx",
-    args=["@modelcontextprotocol/server-github"],
-    env={"GITHUB_TOKEN": os.getenv("GITHUB_TOKEN")}
-)
-```
-
-## Multiple Servers
-
-Connect multiple servers with prefixes to avoid conflicts:
-
-```python
-# Filesystem server
-fs_client = MCPClient(
-    server_name="filesystem",
-    command="npx",
-    args=["@modelcontextprotocol/server-filesystem", "."]
-)
-await fs_client.connect()
-await fs_client.register_to_tools(tools, prefix="fs_")
-
-# GitHub server
-gh_client = MCPClient(
-    server_name="github",
-    command="npx",
-    args=["@modelcontextprotocol/server-github"],
-    env={"GITHUB_TOKEN": os.getenv("GITHUB_TOKEN")}
-)
-await gh_client.connect()
-await gh_client.register_to_tools(tools, prefix="gh_")
-
-# Agent can use both
-agent = Agent(
-    task="Read README.md and create a GitHub issue",
-    tools=tools
-)
-await agent.run()
-
-# Clean up
-await fs_client.disconnect()
-await gh_client.disconnect()
-```
-
-## Tool Filtering
-
-Register only specific tools:
-
-```python
-await mcp_client.register_to_tools(
-    tools,
-    tool_filter=['read_file', 'list_directory']
-)
-```
-
-## Custom MCP Server
-
-Create your own MCP server:
-
-```python
-# my_server.py
-import mcp.server.stdio
-import mcp.types as types
-from mcp.server import Server
-
-server = Server("custom-tools")
-
-@server.list_tools()
-async def handle_list_tools() -> list[types.Tool]:
-    return [
-        types.Tool(
-            name="calculate",
-            description="Perform calculation",
-            inputSchema={
-                "type": "object",
-                "properties": {
-                    "expression": {"type": "string"}
-                },
-                "required": ["expression"]
-            }
-        )
-    ]
-
-@server.call_tool()
-async def handle_call_tool(name: str, arguments: dict) -> list[types.TextContent]:
-    if name == "calculate":
-        result = eval(arguments["expression"])
-        return [types.TextContent(type="text", text=str(result))]
-    return []
-
-# Run server
-async def main():
-    async with mcp.server.stdio.stdio_server() as (read, write):
-        await server.run(read, write, ...)
-
-if __name__ == "__main__":
-    import asyncio
-    asyncio.run(main())
-```
-
-Connect custom server:
-
-```python
-custom_client = MCPClient(
-    server_name="custom",
-    command="python",
-    args=["my_server.py"]
-)
-```
-
-## Best Practices
-
-1. **Always disconnect** when done
-2. **Use prefixes** when connecting multiple servers
-3. **Filter tools** to limit capabilities
-4. **Use context managers** for automatic cleanup
-
-
-## See Also
-
- [MCP Server](/customize/mcp-server) - Expose browser-use as an MCP server
- [Custom Functions](/customize/custom-functions) - Write custom actions directly
- [Model Context Protocol](https://modelcontextprotocol.io) - MCP specification
--- a/docs/customize/mcp-server.mdx
+++ b/docs/customize/mcp-server.mdx
@@ -1,436 +0,0 @@
---
-title: "MCP Server"
-description: "Expose browser-use capabilities as an MCP server for AI assistants like Claude Desktop"
-icon: "server"
-mode: "wide"
---
-
-The MCP server exposes browser-use's browser automation capabilities as tools that can be used by AI assistants like Claude Desktop. This allows external MCP clients to control browsers, navigate websites, extract content, and perform automated tasks.
-
-<Note>
-  This is the opposite of the [MCP Client](/customize/mcp-client). The MCP client lets browser-use connect to external MCP servers, while this MCP server lets external AI assistants connect to browser-use.
-</Note>
-
-## Overview
-
-The MCP server acts as a bridge between MCP-compatible AI assistants and browser-use:
-
-```mermaid
-graph LR
-    A[Claude Desktop] -->|MCP Protocol| B[Browser-use MCP Server]
-    B --> C[Browser]
-    B --> D[Tools]
-    B --> E[FileSystem]
-    C --> F[Playwright Browser]
-    
-    style B fill:#f9f,stroke:#333,stroke-width:2px
-```
-
-## Installation
-
-```bash
-uv pip install "browser-use[cli]"
-```
-
-## Quick Start
-
-### 1. Configure Claude Desktop
-
-Add browser-use to your Claude Desktop configuration:
-
-<Tabs>
-  <Tab title="macOS">
-    Edit `~/Library/Application Support/Claude/claude_desktop_config.json`:
-    ```json
-    {
-      "mcpServers": {
-        "browser-use": {
-          "command": "uvx",
-          "args": ["browser-use[cli]", "--mcp"],
-          "env": {
-            "OPENAI_API_KEY": "sk-..."  // Optional: for content extraction
-          }
-        }
-      }
-    }
-    ```
-  </Tab>
-  <Tab title="Windows">
-    Edit `%APPDATA%\Claude\claude_desktop_config.json`:
-    ```json
-    {
-      "mcpServers": {
-        "browser-use": {
-          "command": "uvx",
-          "args": ["browser-use[cli]", "--mcp"],
-          "env": {
-            "OPENAI_API_KEY": "sk-..."  // Optional: for content extraction
-          }
-        }
-      }
-    }
-    ```
-  </Tab>
-</Tabs>
-
-### 2. Restart Claude Desktop
-
-The browser-use tools will appear in Claude's tools menu (🔌 icon).
-
-### 3. Use Browser Automation
-
-Ask Claude to perform browser tasks:
- "Navigate to example.com and describe what you see"
- "Search for 'browser automation' on Google"
- "Fill out the contact form on this website"
-
-## API Reference
-
-### Available Tools
-
-The MCP server exposes the following tools to MCP clients:
-
-#### Navigation Tools
-
-##### `browser_navigate`
-
-Navigate to a URL.
-
-```typescript
-browser_navigate(url: string, new_tab?: boolean): string
-```
-
-**Parameters:**
-| Parameter | Type | Required | Description |
-|-----------|------|----------|-------------|
-| `url` | `string` | Yes | URL to navigate to |
-| `new_tab` | `boolean` | No | Open in new tab (default: false) |
-
-**Returns:** Success message with URL
-
-##### `browser_go_back`
-
-Navigate back in browser history.
-
-```typescript
-browser_go_back(): string
-```
-
-**Returns:** "Navigated back"
-
-#### Interaction Tools
-
-##### `browser_click`
-
-Click an element by index.
-
-```typescript
-browser_click(index: number, new_tab?: boolean): string
-```
-
-**Parameters:**
-| Parameter | Type | Required | Description |
-|-----------|------|----------|-------------|
-| `index` | `number` | Yes | Element index from browser state |
-| `new_tab` | `boolean` | No | Open link in new tab (default: false) |
-
-**Returns:** Success message indicating click action
-
-**Note:** When `new_tab` is true:
- For links: Extracts href and opens in new tab
- For other elements: Uses Cmd/Ctrl+Click
-
-##### `browser_type`
-
-Type text into an input field.
-
-```typescript
-browser_type(index: number, text: string): string
-```
-
-**Parameters:**
-| Parameter | Type | Required | Description |
-|-----------|------|----------|-------------|
-| `index` | `number` | Yes | Element index from browser state |
-| `text` | `string` | Yes | Text to type |
-
-**Returns:** Success message with typed text
-
-##### `browser_scroll`
-
-Scroll the page.
-
-```typescript
-browser_scroll(direction?: "up" | "down"): string
-```
-
-**Parameters:**
-| Parameter | Type | Required | Description |
-|-----------|------|----------|-------------|
-| `direction` | `"up" \| "down"` | No | Scroll direction (default: "down") |
-
-**Returns:** "Scrolled {direction}"
-
-#### State & Content Tools
-
-##### `browser_get_state`
-
-Get current browser state with all interactive elements.
-
-```typescript
-browser_get_state(include_screenshot?: boolean): string
-```
-
-**Parameters:**
-| Parameter | Type | Required | Description |
-|-----------|------|----------|-------------|
-| `include_screenshot` | `boolean` | No | Include base64 screenshot (default: false) |
-
-**Returns:** JSON string containing:
-```json
-{
-  "url": "current page URL",
-  "title": "page title",
-  "tabs": [{"url": "...", "title": "..."}],
-  "interactive_elements": [
-    {
-      "index": 0,
-      "tag": "button",
-      "text": "element text (max 100 chars)",
-      "placeholder": "if present",
-      "href": "if link"
-    }
-  ],
-  "screenshot": "base64 if requested"
-}
-```
-
-The interactive elements include all clickable and interactive elements on the page, with their:
- `index`: Used to reference the element in other commands (click, type)
- `tag`: HTML tag name (button, input, a, etc.)
- `text`: Visible text content, truncated to 100 characters
- `placeholder`: For input fields (if present)
- `href`: For links (if present)
-
-##### `browser_extract_content`
-
-Extract structured content from the current page using AI.
-
-```typescript
-browser_extract_content(query: string, extract_links?: boolean): string
-```
-
-**Parameters:**
-| Parameter | Type | Required | Description |
-|-----------|------|----------|-------------|
-| `query` | `string` | Yes | What to extract (e.g., "all product prices") |
-| `extract_links` | `boolean` | No | Include links in extraction (default: false) |
-
-**Returns:** Extracted content based on query
-
-**Note:** Requires `OPENAI_API_KEY` environment variable for AI extraction.
-
-#### Tab Management Tools
-
-##### `browser_list_tabs`
-
-List all open browser tabs.
-
-```typescript
-browser_list_tabs(): string
-```
-
-**Returns:** JSON array of tab information:
-```json
-[
-  {
-    "tab_id": 'AE21',
-    "url": "https://example.com",
-    "title": "Page Title"
-  }
-]
-```
-
-##### `browser_switch_tab`
-
-Switch to a specific tab.
-
-```typescript
-browser_switch_tab(tab_id: string): string
-```
-
-**Parameters:**
-| Parameter | Type | Required | Description |
-|-----------|------|----------|-------------|
-| `tab_id` | `string` | Yes | ID of tab to switch to (last 4 characters of TargetID) |
-
-**Returns:** Success message with tab URL
-
-##### `browser_close_tab`
-
-Close a specific tab.
-
-```typescript
-browser_close_tab(tab_id: string): string
-```
-
-**Parameters:**
-| Parameter | Type | Required | Description |
-|-----------|------|----------|-------------|
-| `tab_id` | `string` | Yes | ID of the Tab to close (last 4 characters of TargetID) |
-
-**Returns:** Success message with closed tab URL
-
-### Tool Response Format
-
-All tools return text content. Errors are returned as strings starting with "Error:".
-
-## Configuration
-
-### Environment Variables
-
-Configure the MCP server behavior through environment variables in Claude Desktop config:
-
-```json
-{
-  "mcpServers": {
-    "browser-use": {
-      "command": "python",
-      "args": ["-m", "browser_use.mcp.server"],
-      "env": {
-        "OPENAI_API_KEY": "sk-..."  // For AI content extraction
-      }
-    }
-  }
-}
-```
-
-### Browser Profile Settings
-
-The MCP server creates a browser session with these default settings:
- **Downloads Path**: `~/Downloads/browser-use-mcp/`
- **Wait Between Actions**: 0.5 seconds
- **Keep Alive**: True (browser stays open between commands)
- **Allowed Domains**: None by default (all domains allowed)
-
-## Advanced Usage
-
-### Running Standalone
-
-Test the MCP server without Claude Desktop:
-
-```bash
-# Run server (reads from stdin, writes to stdout)
-uvx 'browser-use[cli]' --mcp
-
-# The server communicates via JSON-RPC on stdio
-```
-
-### Security Considerations
-
-<Warning>
-  The MCP server provides full browser control to connected AI assistants. Consider these security measures:
-</Warning>
-
-1. **Domain Restrictions**: Currently not configurable via environment variables, but the server creates sessions with no domain restrictions by default
-2. **File System Access**: The server creates a FileSystem instance at `~/.browser-use-mcp` for extraction operations
-3. **Downloads**: Files download to `~/Downloads/browser-use-mcp/`
-
-## Implementation Details
-
-### Browser Session Management
-
- **Lazy Initialization**: Browser session is created on first browser tool use
- **Persistent Session**: Session remains active across multiple tool calls
- **Single Session**: Currently maintains one browser session per server instance
-
-### Tool Categories
-
-1. **Direct Browser Control**: Tools starting with `browser_` that directly interact with the browser
-2. **Agent Tasks**: Currently commented out in implementation (`browser_use_run_task`)
-
-### Error Handling
-
- All exceptions are caught and returned as text: `"Error: {message}"`
- Browser session initialization errors are returned to the client
- Missing dependencies (e.g., OPENAI_API_KEY) return descriptive error messages
-
-## Troubleshooting
-
-### Server Not Appearing in Claude
-
-1. **Check configuration path:**
-   - macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
-   - Windows: `%APPDATA%\Claude\claude_desktop_config.json`
-
-2. **Verify Python installation:**
-   ```bash
-   uvx 'browser-use[cli]' --version
-   uvx 'browser-use[cli]' --mcp --help
-   ```
-
-3. **Check Claude logs:**
-   - macOS: `~/Library/Logs/Claude/mcp.log`
-   - Windows: `%APPDATA%\Claude\logs\mcp.log`
-
-### Browser Not Launching
-
-```bash
-# Install Playwright browsers
-playwright install chromium
-
-# Test browser launch
-python -c "from browser_use import Browser; import asyncio; asyncio.run(Browser().start())"
-```
-
-### Connection Errors
-
-If you see "MCP server connection failed":
-
-1. Test the server directly:
-   ```bash
-   uvx 'browser-use[cli]' --mcp
-   ```
-
-2. Check all dependencies:
-   ```bash
-   uv pip install "browser-use[cli]"
-   ```
-
-### Content Extraction Not Working
-
-If `browser_extract_content` returns errors:
-1. Ensure `OPENAI_API_KEY` is set in the environment configuration
-2. Verify the API key is valid
-3. Check that you have credits/access to the OpenAI API
-
-## Limitations
-
-| Limitation | Description | Workaround |
-|------------|-------------|------------|
-| Single Browser Session | One browser instance per server | Restart server for new session |
-| No Domain Restrictions Config | Cannot configure allowed domains via env vars | Modify server code if needed |
-| No Agent Mode | `browser_use_run_task` is commented out | Use direct browser control tools |
-| Text-Only Responses | All responses are text strings | Parse JSON responses client-side |
-
-## Comparison with MCP Client
-
-| Feature | MCP Server (this) | [MCP Client](/customize/mcp-client) |
-|---------|-------------------|-------------------------------------|
-| **Purpose** | Expose browser to AI | Connect agent to tools |
-| **User** | Claude Desktop, etc. | Browser-use agents |
-| **Direction** | External → Browser | Agent → External |
-| **Configuration** | JSON config file | Python code |
-| **Tools** | Fixed browser tools | Dynamic from server |
-| **Use Case** | Interactive assistance | Automated workflows |
-
-## Code Examples
-
- [Simple MCP client example](https://github.com/browser-use/browser-use/tree/main/examples/mcp/simple_server.py) - Basic MCP client connecting to browser-use server
- [Advanced MCP client example](https://github.com/browser-use/browser-use/tree/main/examples/mcp/advanced_server.py) - Multi-server orchestration and complex workflows
-
-## See Also
-
- [MCP Client](/customize/mcp-client) - Connect browser-use to external MCP servers
- [Model Context Protocol](https://modelcontextprotocol.io) - MCP specification
- [Claude Desktop](https://claude.ai/download) - Primary MCP client
--- a/docs/development/contribution-guide.mdx
+++ b/docs/development/contribution-guide.mdx
@@ -1,72 +0,0 @@
---
-title: "Contribution Guide"
-description: "Learn how to contribute to Browser Use"
-icon: "github"
-mode: "wide"
---
-
-# Join the Browser Use Community!
-
-We're thrilled you're interested in contributing to Browser Use! This guide will help you get started with contributing to our project. Your contributions are what make the open-source community such an amazing place to learn, inspire, and create.
-
-## Quick Setup
-
-Get started with Browser Use development in minutes:
-
-```bash
-git clone https://github.com/browser-use/browser-use
-cd browser-use
-uv sync --all-extras --dev
-# or pip install -U git+https://github.com/browser-use/browser-use.git@main
-
-echo "BROWSER_USE_LOGGING_LEVEL=debug" >> .env
-```
-
-For more detailed setup instructions, see our [Local Setup Guide](/development/local-setup).
-
-## How to Contribute
-
-### Find Something to Work On
-
- Browse our [GitHub Issues](https://github.com/browser-use/browser-use/issues) for beginner-friendly issues labeled `good-first-issue`
- Check out our most active issues or ask in [Discord](https://discord.gg/zXJJHtJf3k) for ideas of what to work on
- Get inspiration and share what you build in the [`#showcase-your-work`](https://discord.com/channels/1303749220842340412/1305549200678850642) channel
- Explore or contribute to [`awesome-browser-use-prompts`](https://github.com/browser-use/awesome-prompts)!
-
-### Making a Great Pull Request
-
-When submitting a pull request, please:
-
- Include a clear description of what the PR does and why it's needed
- Add tests that cover your changes
- Include a demo screenshot/gif or an example script demonstrating your changes
- Make sure the PR passes all CI checks and tests
- Keep your PR focused on a single issue or feature to make it easier to review
-
-Note: We appreciate quality over quantity. Instead of submitting small typo/style-only PRs, consider including those fixes as part of larger bugfix or feature PRs.
-
-### Contribution Process
-
-1. Fork the repository
-2. Create a new branch for your feature or bugfix
-3. Make your changes
-4. Run tests to ensure everything works
-5. Submit a pull request
-6. Respond to any feedback from maintainers
-7. Celebrate your contribution!
-
-Feel free to bump your issues/PRs with comments periodically if you need faster feedback.
-
-## Code of Conduct
-
-We're committed to providing a welcoming and inclusive environment for all contributors. Please be respectful and constructive in all interactions.
-
-## Getting Help
-
-If you need help at any point:
-
- Join our [Discord community](https://link.browser-use.com/discord)
- Ask questions in the appropriate GitHub issue
- Check our [documentation](/introduction)
-
-We're here to help you succeed in contributing to Browser Use!
--- a/docs/development/get-help.mdx
+++ b/docs/development/get-help.mdx
@@ -0,0 +1,11 @@
+---
+title: "Get Help"
+description: "More than 20k developers help each other"
+icon: "circle-question"
+mode: "wide"
+---
+
+
+1. Check our [GitHub Issues](https://github.com/browser-use/browser-use/issues)
+2. Ask in our [Discord community](https://link.browser-use.com/discord)
+3. Get support for your enterprise with support@browser-use.com
--- a/docs/development/local-setup.mdx
+++ b/docs/development/local-setup.mdx
@@ -1,160 +0,0 @@
---
-title: "Local Setup"
-description: "Set up Browser Use development environment locally"
-icon: "laptop-code"
-mode: "wide"
---
-
-# Welcome to Browser Use Development!
-
-We're excited to have you join our community of contributors. This guide will help you set up your local development environment quickly and easily.
-
-## Quick Setup
-
-If you're familiar with Python development, here's the quick way to get started:
-
-```bash
-git clone https://github.com/browser-use/browser-use
-cd browser-use
-uv sync --all-extras --dev
-# or pip install -U git+https://github.com/browser-use/browser-use.git@main
-
-echo "BROWSER_USE_LOGGING_LEVEL=debug" >> .env
-```
-
-## Helper Scripts
-
-We provide several convenient shell scripts in the `bin/` directory to help with common development tasks:
-
-```bash
-# Complete setup script - installs uv, creates a venv, and installs dependencies
-./bin/setup.sh
-
-# Run all pre-commit hooks (formatting, linting, type checking)
-./bin/lint.sh
-
-# Run the core test suite that's executed in CI
-./bin/test.sh
-```
-
-## Prerequisites
-
-Browser Use requires Python 3.11 or higher. We recommend using [uv](https://docs.astral.sh/uv/) for Python environment management.
-
-## Detailed Setup Instructions
-
-### Clone the Repository
-
-First, clone the Browser Use repository:
-
-```bash
-git clone https://github.com/browser-use/browser-use
-cd browser-use
-```
-
-### Environment Setup
-
-1. Create and activate a virtual environment:
-
-```bash
-uv venv --python 3.11
-source .venv/bin/activate
-```
-
-2. Install dependencies:
-
-```bash
-# Install the package in editable mode with all development dependencies
-uv sync --all-extras
-
-# Install the default browser
-playwright install chromium --with-deps --no-shell
-```
-
-## Configuration
-
-Set up your environment variables:
-
-```bash
-# Copy the example environment file
-cp .env.example .env
-```
-
-Or manually create a `.env` file with the API key for the models you want to use set:
-
-```bash .env
-OPENAI_API_KEY=...
-ANTHROPIC_API_KEY=
-AZURE_ENDPOINT=
-AZURE_OPENAI_API_KEY=
-GOOGLE_API_KEY=
-DEEPSEEK_API_KEY=
-GROK_API_KEY=
-NOVITA_API_KEY=
-BROWSER_USE_LOGGING_LEVEL=debug  # Helpful for development
-```
-
-<Note>
-  See [Supported Models](/customize/supported-models) for available LLM options
-  and their specific API key requirements.
-</Note>
-
-## Development
-
-After setup, you can:
-
- Try demos in the example library with `uv run examples/simple.py`
- Run the linter/formatter with `uv run ruff format examples/some/file.py`
- Run tests with `uv run pytest`
- Build the package with `uv build`
-
-### Linting
-
-```bash
-# Run the linter on the whole project (must pass for PR to be allowed to merge)
-uv run pre-commit run --all-files
-# or use our convenience script
-./bin/lint.sh
-
-# Install the linter & formatter pre-commit hooks to run automatically
-pre-commit install --install-hooks
-
-# Experimental: run the type checker
-uv run type
-```
-
-### Tests
-
-```bash
-# Run all tests that run in CI
-./bin/test.sh
-
-# Run specific tests
-uv run pytest                                                                         # run everything
-uv run pytest tests/test_tools.py                                                # run a specific test file
-uv run pytest tests/test_sensitive_data.py tests/test_tab_management.py               # run two test files
-uv run pytest tests/test_tab_management.py::TestTabManagement::test_user_changes_tab  # run a single test
-```
-
-### Build
-
-```bash
-uv build
-uv pip install dist/*.whl
-
-# push build to PyPI (automatically run by Github Actions CI)
-uv publish
-```
-
-## Getting Help
-
-If you run into any issues:
-
-1. Check our [GitHub Issues](https://github.com/browser-use/browser-use/issues)
-2. Join our [Discord community](https://link.browser-use.com/discord) for support
-
-<Note>
-  We welcome contributions! See our [Contribution
-  Guide](/development/contribution-guide) for guidelines on how to help improve
-  Browser Use.
-</Note>
--- a/docs/development/monitoring/observability.mdx
+++ b/docs/development/monitoring/observability.mdx
@@ -31,7 +31,7 @@ import asyncio

 from lmnr import Laminar, Instruments
 # this line auto-instruments Browser Use and any browser you use (local or remote)
-Laminar.initialize(project_api_key="...")
+Laminar.initialize(project_api_key="...", disabled_instruments={Instruments.BROWSER_USE})

 async def main():
    agent = Agent(
--- a/docs/development/monitoring/telemetry.mdx
+++ b/docs/development/monitoring/telemetry.mdx
@@ -0,0 +1,31 @@
+---
+title: "Telemetry"
+description: "Understanding Browser Use's telemetry"
+icon: "chart-mixed"
+mode: "wide"
+---
+
+## Overview
+
+Browser Use is free under the MIT license. To help us continue improving the library, we collect anonymous usage data with [PostHog](https://posthog.com) . This information helps us understand how the library is used, fix bugs more quickly, and prioritize new features.
+
+
+## Opting Out
+
+You can disable telemetry by setting the environment variable:
+
+```bash .env
+ANONYMIZED_TELEMETRY=false
+```
+
+Or in your Python code:
+
+```python
+import os
+os.environ["ANONYMIZED_TELEMETRY"] = "false"
+```
+
+<Note>
+  Even when enabled, telemetry has zero impact on the library's performance. Code is available in [Telemetry
+  Service](https://github.com/browser-use/browser-use/tree/main/browser_use/telemetry).
+</Note>
--- a/docs/development/setup/contribution-guide.mdx
+++ b/docs/development/setup/contribution-guide.mdx
@@ -0,0 +1,37 @@
+---
+title: "Contribution Guide"
+description: ""
+icon: "handshake"
+mode: "wide"
+---
+
+## Mission
+
+- Make developers happy
+- Do more clicks than human
+- Tell your computer what to do, and it gets it done.
+-  Make agents faster and more reliable.
+
+
+## What to work on?
+
+- This space is moving fast. We have 10 ideas daily. Let's exchange some.
+- Browse our [GitHub Issues](https://github.com/browser-use/browser-use/issues)
+- Check out our most active issues on [Discord](https://discord.gg/zXJJHtJf3k) 
+- Get inspiration in [`#showcase-your-work`](https://discord.com/channels/1303749220842340412/1305549200678850642) channel
+
+
+## What makes a great PR?
+
+1. Why do we need this PR?
+2. Include a demo screenshot/gif 
+3. Make sure the PR passes all CI tests
+4. Keep your PR focused on a single feature 
+
+
+## How?
+1. Fork the repository
+2. Create a new branch for your feature 
+3. Submit a PR
+
+We are overwhelmed with Issues. Feel free to bump your issues/PRs with comments periodically if you need faster feedback.
--- a/docs/development/setup/local-setup.mdx
+++ b/docs/development/setup/local-setup.mdx
@@ -0,0 +1,49 @@
+---
+title: "Local Setup"
+description: "We're excited to have you join our community of contributors. "
+icon: "laptop-code"
+mode: "wide"
+---
+
+## Welcome to Browser Use Development!
+
+```bash
+git clone https://github.com/browser-use/browser-use
+cd browser-use
+uv sync --all-extras --dev
+# or pip install -U git+https://github.com/browser-use/browser-use.git@main
+```
+
+## Configuration
+
+Set up your environment variables:
+
+```bash
+# Copy the example environment file
+cp .env.example .env
+
+# set logging level
+# BROWSER_USE_LOGGING_LEVEL=debug
+```
+
+
+## Helper Scripts 
+For common development tasks
+```bash
+# Complete setup script - installs uv, creates a venv, and installs dependencies
+./bin/setup.sh
+
+# Run all pre-commit hooks (formatting, linting, type checking)
+./bin/lint.sh
+
+# Run the core test suite that's executed in CI
+./bin/test.sh
+```
+
+
+
+## Run examples
+
+```bash
+uv run examples/simple.py
+```
--- a/docs/development/telemetry.mdx
+++ b/docs/development/telemetry.mdx
@@ -1,40 +0,0 @@
---
-title: "Telemetry"
-description: "Understanding Browser Use's telemetry and privacy settings"
-icon: "chart-mixed"
-mode: "wide"
---
-
-## Overview
-
-Browser Use collects anonymous usage data to help us understand how the library is being used and to improve the user experience. It also helps us fix bugs faster and prioritize feature development.
-
-## Data Collection
-
-We use [PostHog](https://posthog.com) for telemetry collection. The data is completely anonymized and contains no personally identifiable information.
-
-<Note>
-  We never collect personal information, credentials, or specific content from
-  your browser automation tasks.
-</Note>
-
-## Opting Out
-
-You can disable telemetry by setting an environment variable:
-
-```bash .env
-ANONYMIZED_TELEMETRY=false
-```
-
-Or in your Python code:
-
-```python
-import os
-os.environ["ANONYMIZED_TELEMETRY"] = "false"
-```
-
-<Note>
-  Even when enabled, telemetry has zero impact on the library's performance or
-  functionality. Code is available in [Telemetry
-  Service](https://github.com/browser-use/browser-use/tree/main/browser_use/telemetry).
-</Note>
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -9,7 +9,10 @@
  },
  "favicon": "/favicon.ico",
  "contextual": {
-    "options": ["copy", "view"]
+    "options": [
+      "copy",
+      "view"
+    ]
  },
  "fonts": {
    "family": "Geist"
@@ -42,11 +45,31 @@
    },
    {
      "source": "/development/evaluations",
-      "destination": "/development/contribution-guide"
+      "destination": "/development/setup/contribution-guide"
    },
    {
      "source": "/cli",
      "destination": "/quickstart"
+    },
+    {
+      "source": "/development/local-setup",
+      "destination": "/development/setup/local-setup"
+    },
+    {
+      "source": "/development/contribution-guide",
+      "destination": "/development/setup/contribution-guide"
+    },
+    {
+      "source": "/development/telemetry",
+      "destination": "/development/monitoring/telemetry"
+    },
+    {
+      "source": "/development/observability",
+      "destination": "/development/monitoring/observability"
+    },
+    {
+      "source": "/development/hooks",
+      "destination": "/customize/hooks"
    }
  ],
  "navigation": {
@@ -56,7 +79,11 @@
        "groups": [
          {
            "group": "Get Started",
-            "pages": ["introduction", "quickstart", "quickstart_llm"]
+            "pages": [
+              "introduction",
+              "quickstart",
+              "quickstart_llm"
+            ]
          },
          {
            "group": "Customize",
@@ -104,7 +131,8 @@
                  "customize/examples/parallel-browser",
                  "customize/examples/sensitive-data",
                  "customize/examples/secure",
-                  "customize/examples/more-examples"
+                  "customize/examples/more-examples",
+                  "customize/examples/prompting-guide"
                ]
              }
            ]
@@ -112,22 +140,40 @@
          {
            "group": "Development",
            "pages": [
-              "development/contribution-guide",
-              "development/local-setup",
              {
-                "group": "MCP",
-                "icon": "link",
-                "pages": ["customize/mcp-client", "customize/mcp-server"]
+                "group": "Contribution",
+                "icon": "github",
+                "isDefaultOpen": true,
+                "pages": [
+                  "development/setup/local-setup",
+                  "development/setup/contribution-guide"
+                ]
              },
-              "customize/hooks",
-              "development/telemetry",
-              "development/observability"
+              {
+                "group": "Advanced",
+                "icon": "gear",
+                "isDefaultOpen": false,
+                "pages": [
+                  "customize/hooks"
+                ]
+              },
+              {
+                "group": "Monitoring",
+                "icon": "chart-mixed",
+                "isDefaultOpen": false,
+                "pages": [
+                  "development/monitoring/observability",
+                  "development/monitoring/telemetry"
+                ]
+              },
+              "development/get-help"
            ]
          }
        ]
      },
      {
        "tab": "Cloud",
+        "hidden": true,
        "versions": [
          {
            "version": "v1",
@@ -155,27 +201,6 @@
                "openapi": "https://api.browser-use.com/api/v1/openapi.json"
              }
            ]
-          },
-          {
-            "version": "v2",
-            "groups": [
-              {
-                "group": "Get Started",
-                "pages": [
-                  "cloud/v2/quickstart",
-                  "cloud/v2/python-quickstart",
-                  "cloud/v2/node-quickstart"
-                ]
-              },
-              {
-                "group": "Platform",
-                "pages": [
-                  "cloud/v1/pricing",
-                  "cloud/v1/n8n-browser-use-integration",
-                  "cloud/v1/search"
-                ]
-              }
-            ]
          }
        ]
      }
@@ -191,7 +216,11 @@
      "display": "interactive"
    },
    "examples": {
-      "languages": ["javascript", "curl", "python"],
+      "languages": [
+        "javascript",
+        "curl",
+        "python"
+      ],
      "required": true
    }
  },
--- a/docs/introduction.mdx
+++ b/docs/introduction.mdx
@@ -20,9 +20,9 @@ icon: "book-open"
     Open-source Python library.
  </Card>
  <Card
-    title="Cloud API"
+    title="Cloud Setup"
    icon="cloud"
-    href="/cloud/v2/quickstart"
+    href="https://docs.cloud.browser-use.com"
    color="#FE750E"
  >
      Scale up with our cloud.
--- a/docs/quickstart.mdx
+++ b/docs/quickstart.mdx
@@ -9,13 +9,13 @@ icon: "rocket"

 <Tabs>
 <Tab title="uv">
-```bash create environment
+```bash create environment 
 uv venv --python 3.12
 ```
 </Tab>
 <Tab title="pip">
-```bash create environment
-python -m venv .venv
+```bash create environment with python >= 3.11 
+python3.12 -m venv .venv
 ```
 </Tab>
 </Tabs>
@@ -43,7 +43,7 @@ uvx playwright install chromium --with-deps
 <Tab title="pip">
 ```bash install browser-use & chromium
 pip install browser-use
-playwright install chromium --with-deps
+pip install playwright && playwright install chromium --with-deps
 ```
 </Tab>
 </Tabs>
--- a/docs/quickstart_llm.mdx
+++ b/docs/quickstart_llm.mdx
@@ -6,5 +6,5 @@ icon: "brain"



-1. Copy all content [🔗  from here](https://docs.browser-use.com/llms-full.txt)  (~40k tokens)
+1. Copy all content [🔗  from here](https://docs.browser-use.com/llms-full.txt)  (~32k tokens)
 2. Paste it into your favorite coding agent (Cursor, Claude, ChatGPT ...). 
--- a/examples/custom-functions/2fa.py
+++ b/examples/custom-functions/2fa.py
@@ -1,5 +1,3 @@
-import asyncio
-import logging
 import os
 import sys

@@ -9,58 +7,26 @@ from dotenv import load_dotenv

 load_dotenv()

-import pyotp  # type: ignore

-from browser_use import ActionResult, Agent, ChatOpenAI, Tools
+from browser_use import Agent

-# Set up logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
+secret_key = os.environ.get('OTP_SECRET_KEY')
+if not secret_key:
+	# For this example copy the code from the website https://authenticationtest.com/totpChallenge/
+	# For real 2fa just copy the secret key when you setup 2fa, you can get this e.g. in 1Password
+	secret_key = 'JBSWY3DPEHPK3PXP'


-tools = Tools()
+sensitive_data = {'bu_2fa_code': secret_key}


-@tools.registry.action('Get 2FA code from when OTP is required')
-async def get_otp_2fa() -> ActionResult:
-	"""
-	Custom action to retrieve 2FA/MFA code from OTP secret key using pyotp.
-	The OTP secret key should be set in the environment variable OTP_SECRET_KEY.
-	"""
-	secret_key = os.environ.get('OTP_SECRET_KEY')
-	if not secret_key:
-		raise ValueError('OTP_SECRET_KEY environment variable is not set')
+task = """
+1. Go to https://authenticationtest.com/totpChallenge/ and try to log in.
+2. If prompted for 2FA code:
+Input the the secret bu_2fa_code.

-	totp = pyotp.TOTP(secret_key, digits=6)
-	code = totp.now()
-	return ActionResult(extracted_content=code)
+When you input bu_2fa_code, the 6 digit code will be generated automatically.
+"""


-async def main():
-	# Example task using the 1Password 2FA action
-	task = """
-	Steps:
-	1. Go to https://authenticationtest.com/totpChallenge/ and try to log in.
-	2. If prompted for 2FA code:
-	2.1. Use the get_2fa_code action to retrieve the 2FA code.
-	2.2. Submit the code provided by the get_2fa_code action.
-	
-	Considerations:
-	- ALWAYS use the get_2fa_code action to retrieve the 2FA code if needed.
-	- NEVER skip the 2FA step if the page requires it.
-	- NEVER extract the code from the page.
-	- NEVER use a code that is not generated by the get_2fa_code action.
-	- NEVER hallucinate the 2FA code, always use the get_2fa_code action to get it.
-	
-	You are completely FORBIDDEN to use any other method to get the 2FA code.
-	"""
-
-	model = ChatOpenAI(model='gpt-4.1-mini')
-	agent = Agent(task=task, llm=model, tools=tools)
-
-	result = await agent.run()
-	print(f'Task completed with result: {result}')
-
-
-if __name__ == '__main__':
-	asyncio.run(main())
+Agent(task=task, sensitive_data=sensitive_data).run_sync()  # type: ignore
--- a/examples/custom-functions/cua.py
+++ b/examples/custom-functions/cua.py
@@ -28,13 +28,6 @@ from browser_use import Agent, ChatOpenAI, Tools
 from browser_use.agent.views import ActionResult
 from browser_use.browser import BrowserSession

-try:
-	from lmnr import Laminar
-
-	Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY'))
-except ImportError:
-	pass
-

 class OpenAICUAAction(BaseModel):
 	"""Parameters for OpenAI Computer Use Assistant action."""
--- a/examples/features/add_image_context.py
+++ b/examples/features/add_image_context.py
@@ -0,0 +1,113 @@
+"""
+Show how to use sample_images to add image context for your task
+"""
+
+import asyncio
+import base64
+from pathlib import Path
+from typing import Any
+
+from dotenv import load_dotenv
+
+from browser_use import Agent
+from browser_use.llm import ChatOpenAI
+from browser_use.llm.messages import ContentPartImageParam, ContentPartTextParam, ImageURL
+
+# Load environment variables
+load_dotenv()
+
+
+def image_to_base64(image_path: str) -> str:
+	"""
+	Convert image file to base64 string.
+
+	Args:
+	    image_path: Path to the image file
+
+	Returns:
+	    Base64 encoded string of the image
+
+	Raises:
+	    FileNotFoundError: If image file doesn't exist
+	    IOError: If image file cannot be read
+	"""
+	image_file = Path(image_path)
+	if not image_file.exists():
+		raise FileNotFoundError(f'Image file not found: {image_path}')
+
+	try:
+		with open(image_file, 'rb') as f:
+			encoded_string = base64.b64encode(f.read())
+			return encoded_string.decode('utf-8')
+	except OSError as e:
+		raise OSError(f'Failed to read image file: {e}')
+
+
+def create_sample_images() -> list[ContentPartTextParam | ContentPartImageParam]:
+	"""
+	Create image context for the agent.
+
+	Returns:
+	    list of content parts containing text and image data
+	"""
+	# Image path - replace with your actual image path
+	image_path = 'sample_image.png'
+
+	# Image context configuration
+	image_context: list[dict[str, Any]] = [
+		{
+			'type': 'text',
+			'value': (
+				'The following image explains the google layout. '
+				'The image highlights several buttons with red boxes, '
+				'and next to them are corresponding labels in red text.\n'
+				'Each label corresponds to a button as follows:\n'
+				'Label 1 is the "image" button.'
+			),
+		},
+		{'type': 'image', 'value': image_to_base64(image_path)},
+	]
+
+	# Convert to content parts
+	content_parts = []
+	for item in image_context:
+		if item['type'] == 'text':
+			content_parts.append(ContentPartTextParam(text=item['value']))
+		elif item['type'] == 'image':
+			content_parts.append(
+				ContentPartImageParam(
+					image_url=ImageURL(
+						url=f'data:image/png;base64,{item["value"]}',
+						media_type='image/png',
+					),
+				)
+			)
+
+	return content_parts
+
+
+async def main() -> None:
+	"""
+	Main function to run the browser agent with image context.
+	"""
+	# Task configuration
+	task_str = 'goto https://www.google.com/ and click image button'
+
+	# Initialize the language model
+	model = ChatOpenAI(model='gpt-4.1')
+
+	# Create sample images for context
+	try:
+		sample_images = create_sample_images()
+	except (FileNotFoundError, OSError) as e:
+		print(f'Error loading sample images: {e}')
+		print('Continuing without sample images...')
+		sample_images = []
+
+	# Initialize and run the agent
+	agent = Agent(task=task_str, llm=model, sample_images=sample_images)
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/examples/features/parallel_agents.py
+++ b/examples/features/parallel_agents.py
@@ -1,6 +1,7 @@
 import asyncio
 import os
 import sys
+from pathlib import Path

 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))

@@ -16,7 +17,7 @@ browser_session = BrowserSession(
 	browser_profile=BrowserProfile(
 		keep_alive=True,
 		headless=False,
-		record_video_dir='./tmp/recordings',
+		record_video_dir=Path('./tmp/recordings'),
 		user_data_dir='~/.config/browseruse/profiles/default',
 	)
 )
--- a/examples/features/secure.py
+++ b/examples/features/secure.py
@@ -69,11 +69,11 @@ browser_profile = BrowserProfile(allowed_domains=['*google.com', 'browser-use.co
 # Sensitive data (optional) - {key: sensitive_information} - we filter out the sensitive_information from any input to the LLM, it will only work with placeholder.
 # By default we pass screenshots to the LLM which can contain your information. Set use_vision=False to disable this.
 # If you trust your LLM endpoint, you don't need to worry about this.
-sensitive_data: dict[str, str | dict[str, str]] = {'company_name': 'browser-use'}
+sensitive_data = {'company_name': 'browser-use'}


 # Create Agent
-agent = Agent(task=task, llm=llm, browser_profile=browser_profile, sensitive_data=sensitive_data)
+agent = Agent(task=task, llm=llm, browser_profile=browser_profile, sensitive_data=sensitive_data)  # type: ignore


 async def main():
--- a/examples/features/sensitive_data.py
+++ b/examples/features/sensitive_data.py
@@ -25,13 +25,14 @@ company_credentials = {'company_username': 'user@example.com', 'company_password

 # Map the same credentials to multiple domains for secure access control
 # Type annotation to satisfy pyright
-sensitive_data: dict[str, str | dict[str, str]] = {
+sensitive_data = {
 	'https://example.com': company_credentials,
 	'https://admin.example.com': company_credentials,
 	'https://*.example-staging.com': company_credentials,
 	'http*://test.example.com': company_credentials,
-	# You can also add domain-specific credentials
-	'https://*.google.com': {'g_email': 'user@gmail.com', 'g_pass': 'google_password'},
+	# # You can also add domain-specific credentials
+	# 'https://google.com': {'g_email': 'user@gmail.com', 'g_pass': 'google_password'},
+	'this_email_works_on_all_domains': 'test@test.com',
 }
 # Update task to use one of the credentials above
 task = 'Go to google.com and put the login information in the search bar.'
--- a/examples/features/video_recording.py
+++ b/examples/features/video_recording.py
@@ -0,0 +1,25 @@
+import asyncio
+from pathlib import Path
+
+from browser_use import Agent, Browser, ChatOpenAI
+
+# NOTE: To use this example, install imageio[ffmpeg], e.g. with uv pip install "browser-use[video]"
+
+
+async def main():
+	browser_session = Browser(record_video_dir=Path('./tmp/recordings'))
+
+	agent = Agent(
+		task='Go to github.com/trending then navigate to the first trending repository and report how many commits it has.',
+		llm=ChatOpenAI(model='gpt-4.1-mini'),
+		browser_session=browser_session,
+	)
+
+	await agent.run(max_steps=5)
+
+	# The video will be saved automatically when the agent finishes and the session closes.
+	print('Agent run finished. Check the ./tmp/recordings directory for the video.')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/examples/integrations/agentmail/2fa.py
+++ b/examples/integrations/agentmail/2fa.py
@@ -0,0 +1,42 @@
+import asyncio
+import os
+import sys
+
+from agentmail import AsyncAgentMail  # type: ignore
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from browser_use import Agent, Browser, models
+from examples.integrations.agentmail.email_tools import EmailTools
+
+TASK = """
+Go to reddit.com, create a new account (use the get_email_address), make up password and all other information, confirm the 2fa with get_latest_email, and like latest post on r/elon subreddit.
+"""
+
+
+async def main():
+	# Create email inbox
+	# Get an API key from https://agentmail.to/
+	email_client = AsyncAgentMail()
+	inbox = await email_client.inboxes.create()
+	print(f'Your email address is: {inbox.inbox_id}\n\n')
+
+	# Initialize the tools for browser-use agent
+	tools = EmailTools(email_client=email_client, inbox=inbox)
+
+	# Initialize the LLM for browser-use agent
+	llm = models.azure_gpt_4_1_mini
+
+	# Set your local browser path
+	browser = Browser(executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome')
+
+	agent = Agent(task=TASK, tools=tools, llm=llm, browser=browser)
+
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/examples/integrations/agentmail/email_tools.py
+++ b/examples/integrations/agentmail/email_tools.py
@@ -0,0 +1,187 @@
+"""
+Email management to enable 2fa.
+"""
+
+import asyncio
+import logging
+
+# run `pip install agentmail` to install the library
+from agentmail import AsyncAgentMail, Message, MessageReceivedEvent, Subscribe  # type: ignore
+from agentmail.inboxes.types.inbox import Inbox  # type: ignore
+from agentmail.inboxes.types.inbox_id import InboxId  # type: ignore
+
+from browser_use import Tools
+
+# Configure basic logging if not already configured
+if not logging.getLogger().handlers:
+	logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(name)s - %(message)s')
+
+logger = logging.getLogger(__name__)
+
+
+class EmailTools(Tools):
+	def __init__(
+		self,
+		email_client: AsyncAgentMail | None = None,
+		email_timeout: int = 30,
+		inbox: Inbox | None = None,
+	):
+		super().__init__()
+		self.email_client = email_client or AsyncAgentMail()
+
+		self.email_timeout = email_timeout
+
+		self.register_email_tools()
+
+		self.inbox: Inbox | None = inbox
+
+	def _serialize_message_for_llm(self, message: Message) -> str:
+		"""
+		Serialize a message for the LLM
+		"""
+		# Use text if available, otherwise convert HTML to simple text
+		body_content = message.text
+		if not body_content and message.html:
+			body_content = self._html_to_text(message.html)
+
+		msg = f'From: {message.from_}\nTo: {message.to}\nTimestamp: {message.timestamp.isoformat()}\nSubject: {message.subject}\nBody: {body_content}'
+		return msg
+
+	def _html_to_text(self, html: str) -> str:
+		"""
+		Simple HTML to text conversion
+		"""
+		import re
+
+		# Remove script and style elements - handle spaces in closing tags
+		html = re.sub(r'<script\b[^>]*>.*?</script\s*>', '', html, flags=re.DOTALL | re.IGNORECASE)
+		html = re.sub(r'<style\b[^>]*>.*?</style\s*>', '', html, flags=re.DOTALL | re.IGNORECASE)
+
+		# Remove HTML tags
+		html = re.sub(r'<[^>]+>', '', html)
+
+		# Decode HTML entities
+		html = html.replace('&nbsp;', ' ')
+		html = html.replace('&amp;', '&')
+		html = html.replace('&lt;', '<')
+		html = html.replace('&gt;', '>')
+		html = html.replace('&quot;', '"')
+		html = html.replace('&#39;', "'")
+
+		# Clean up whitespace
+		html = re.sub(r'\s+', ' ', html)
+		html = html.strip()
+
+		return html
+
+	async def get_or_create_inbox_client(self) -> Inbox:
+		"""
+		Create a default inbox profile for this API key (assume that agent is on free tier)
+
+		If you are not on free tier it is recommended to create 1 inbox per agent.
+		"""
+		if self.inbox:
+			return self.inbox
+
+		return await self.create_inbox_client()
+
+	async def create_inbox_client(self) -> Inbox:
+		"""
+		Create a default inbox profile for this API key (assume that agent is on free tier)
+
+		If you are not on free tier it is recommended to create 1 inbox per agent.
+		"""
+		inbox = await self.email_client.inboxes.create()
+		self.inbox = inbox
+		return inbox
+
+	async def wait_for_message(self, inbox_id: InboxId) -> Message:
+		"""
+		Wait for a message to be received in the inbox
+		"""
+		async with self.email_client.websockets.connect() as ws:
+			await ws.send_subscribe(message=Subscribe(inbox_ids=[inbox_id]))
+
+			try:
+				while True:
+					data = await asyncio.wait_for(ws.recv(), timeout=self.email_timeout)
+					if isinstance(data, MessageReceivedEvent):
+						await self.email_client.inboxes.messages.update(
+							inbox_id=inbox_id, message_id=data.message.message_id, remove_labels=['unread']
+						)
+						msg = data.message
+						logger.info(f'Received new message from: {msg.from_} with subject: {msg.subject}')
+						return msg
+					# If not MessageReceived, continue waiting for the next event
+			except TimeoutError:
+				raise TimeoutError(f'No email received in the inbox in {self.email_timeout}s')
+
+	def register_email_tools(self):
+		"""Register all email-related controller actions"""
+
+		@self.action('Get email address for login. You can use this email to login to any service with email and password')
+		async def get_email_address() -> str:
+			"""
+			Get the email address of the inbox
+			"""
+			inbox = await self.get_or_create_inbox_client()
+			logger.info(f'Email address: {inbox.inbox_id}')
+			return inbox.inbox_id
+
+		@self.action(
+			'Get the latest unread email from the inbox from the last max_age_minutes (default 5 minutes). Waits some seconds for new emails if none found. Use for 2FA codes.'
+		)
+		async def get_latest_email(max_age_minutes: int = 5) -> str:
+			"""
+			1. Check for unread emails within the last max_age_minutes
+			2. If no recent unread email, wait 30 seconds for new email via websocket
+			"""
+			from datetime import datetime, timedelta, timezone
+
+			inbox = await self.get_or_create_inbox_client()
+
+			# Get unread emails
+			emails = await self.email_client.inboxes.messages.list(inbox_id=inbox.inbox_id, labels=['unread'])
+			# Filter unread emails by time window - use UTC timezone to match email timestamps
+			time_cutoff = datetime.now(timezone.utc) - timedelta(minutes=max_age_minutes)
+			logger.debug(f'Time cutoff: {time_cutoff}')
+			logger.info(f'Found {len(emails.messages)} unread emails for inbox {inbox.inbox_id}')
+			recent_unread_emails = []
+
+			for i, email_summary in enumerate(emails.messages):
+				# Get full email details to check timestamp
+				full_email = await self.email_client.inboxes.messages.get(
+					inbox_id=inbox.inbox_id, message_id=email_summary.message_id
+				)
+				# Handle timezone comparison properly
+				email_timestamp = full_email.timestamp
+				if email_timestamp.tzinfo is None:
+					# If email timestamp is naive, assume UTC
+					email_timestamp = email_timestamp.replace(tzinfo=timezone.utc)
+
+				if email_timestamp >= time_cutoff:
+					recent_unread_emails.append(full_email)
+
+			# If we have recent unread emails, return the latest one
+			if recent_unread_emails:
+				# Sort by timestamp and get the most recent
+				recent_unread_emails.sort(key=lambda x: x.timestamp, reverse=True)
+				logger.info(f'Found {len(recent_unread_emails)} recent unread emails for inbox {inbox.inbox_id}')
+
+				latest_email = recent_unread_emails[0]
+
+				# Mark as read
+				await self.email_client.inboxes.messages.update(
+					inbox_id=inbox.inbox_id, message_id=latest_email.message_id, remove_labels=['unread']
+				)
+				logger.info(f'Latest email from: {latest_email.from_} with subject: {latest_email.subject}')
+				return self._serialize_message_for_llm(latest_email)
+			else:
+				logger.info('No recent unread emails, waiting for a new one')
+			# No recent unread emails, wait for new one
+			try:
+				latest_message = await self.wait_for_message(inbox_id=inbox.inbox_id)
+			except TimeoutError:
+				return f'No email received in the inbox in {self.email_timeout}s'
+			# logger.info(f'Latest message: {latest_message}')
+			return self._serialize_message_for_llm(latest_message)
--- a/examples/integrations/gmail_2fa_integration.py
+++ b/examples/integrations/gmail_2fa_integration.py
@@ -67,12 +67,10 @@ class GmailGrantManager:
 			with open(self.credentials_file) as f:
 				creds = json.load(f)

-			required_fields = ['web']
-			web = creds['web']
-			if not web:
-				return False, "Invalid credentials format - missing 'web' section"
-
-			return True, 'Credentials file is valid'
+			# Accept if either 'web' or 'installed' section exists and is not empty
+			if creds.get('web') or creds.get('installed'):
+				return True, 'Credentials file is valid'
+			return False, "Invalid credentials format - neither 'web' nor 'installed' sections found"

 		except json.JSONDecodeError:
 			return False, 'Credentials file is not valid JSON'
--- a/examples/models/aws.py
+++ b/examples/models/aws.py
@@ -14,13 +14,9 @@ Requirements:

 import asyncio

-from lmnr import Laminar
-
 from browser_use import Agent
 from browser_use.llm import ChatAnthropicBedrock, ChatAWSBedrock

-Laminar.initialize()
-

 async def example_anthropic_bedrock():
 	"""Example using ChatAnthropicBedrock - convenience class for Claude models."""
--- a/examples/models/claude-4-sonnet.py
+++ b/examples/models/claude-4-sonnet.py
@@ -10,10 +10,8 @@ import sys
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))

 from dotenv import load_dotenv
-from lmnr import Laminar

 load_dotenv()
-Laminar.initialize()

 from browser_use import Agent
 from browser_use.llm import ChatAnthropic
--- a/examples/models/gemini.py
+++ b/examples/models/gemini.py
@@ -5,15 +5,11 @@ import sys
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))

 from dotenv import load_dotenv
-from lmnr import Laminar
-
-load_dotenv()
-
-Laminar.initialize()
-

 from browser_use import Agent, ChatGoogle

+load_dotenv()
+
 api_key = os.getenv('GOOGLE_API_KEY')
 if not api_key:
 	raise ValueError('GOOGLE_API_KEY is not set')
--- a/examples/models/gpt-4.1.py
+++ b/examples/models/gpt-4.1.py
@@ -7,15 +7,11 @@ Simple try of the agent.
 import asyncio

 from dotenv import load_dotenv
-from lmnr import Laminar

 from browser_use import Agent, ChatOpenAI

 load_dotenv()

-
-Laminar.initialize()
-
 # All the models are type safe from OpenAI in case you need a list of supported models
 llm = ChatOpenAI(model='gpt-4.1-mini')
 agent = Agent(
--- a/examples/models/gpt-5-mini.py
+++ b/examples/models/gpt-5-mini.py
@@ -7,15 +7,11 @@ Simple try of the agent.
 import asyncio

 from dotenv import load_dotenv
-from lmnr import Laminar

 from browser_use import Agent, ChatOpenAI

 load_dotenv()

-
-Laminar.initialize()
-
 # All the models are type safe from OpenAI in case you need a list of supported models
 llm = ChatOpenAI(model='gpt-5-mini')
 agent = Agent(
--- a/examples/models/langchain/example.py
+++ b/examples/models/langchain/example.py
@@ -12,13 +12,10 @@ This example demonstrates how to:
 import asyncio

 from langchain_openai import ChatOpenAI  # pyright: ignore
-from lmnr import Laminar

 from browser_use import Agent
 from examples.models.langchain.chat import ChatLangchain

-Laminar.initialize()
-

 async def main():
 	"""Basic example using ChatLangchain with OpenAI through LangChain."""
--- a/examples/models/lazy_import.py
+++ b/examples/models/lazy_import.py
@@ -0,0 +1,6 @@
+from browser_use import Agent, models
+
+# available providers for this import style: openai, azure, google
+agent = Agent(task='Find founders of browser-use', llm=models.azure_gpt_4_1_mini)
+
+agent.run_sync()
--- a/examples/models/llama4-groq.py
+++ b/examples/models/llama4-groq.py
@@ -5,14 +5,10 @@ import sys
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

 from dotenv import load_dotenv
-from lmnr import Laminar

 load_dotenv()


-Laminar.initialize()
-
-
 from browser_use import Agent
 from browser_use.llm import ChatGroq

--- a/examples/models/openrouter.py
+++ b/examples/models/openrouter.py
@@ -8,15 +8,11 @@ import asyncio
 import os

 from dotenv import load_dotenv
-from lmnr import Laminar

 from browser_use import Agent, ChatOpenAI

 load_dotenv()

-
-Laminar.initialize()
-
 # All the models are type safe from OpenAI in case you need a list of supported models
 llm = ChatOpenAI(
 	model='x-ai/grok-4',
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "browser-use"
 description = "Make websites accessible for AI agents"
 authors = [{ name = "Gregor Zunic" }]
-version = "0.7.1"
+version = "0.7.3"
 readme = "README.md"
 requires-python = ">=3.11,<4.0"
 classifiers = [
@@ -28,9 +28,9 @@ dependencies = [
    "typing-extensions>=4.12.2",
    "uuid7>=0.1.0",
    "authlib>=1.6.0",
-    "google-genai==1.29.0",
-    "openai==1.99.2",
-    "anthropic==0.58.2",
+    "google-genai>=1.29.0,<2.0.0",
+    "openai>=1.99.2,<2.0.0",
+    "anthropic>=0.58.2,<1.0.0",
    "groq>=0.30.0",
    "ollama>=0.5.1",
    "google-api-python-client>=2.174.0",
@@ -38,9 +38,11 @@ dependencies = [
    "google-auth-oauthlib>=1.2.2",
    "mcp>=1.10.1",
    "pypdf>=5.7.0",
+    "reportlab>=4.0.0",
    "cdp-use>=1.4.0",
-    "markdown-pdf==1.5",
+    "pyotp>=2.9.0",
    "html2text>=2025.4.15",
+    "pillow>=11.2.1",
 ]
 # google-api-core: only used for Google LLM APIs
 # pyperclip: only used for examples that use copy/paste
@@ -61,7 +63,12 @@ cli = [
 aws = [
    "boto3>=1.38.45"
 ]
+video = [
+    "imageio[ffmpeg]>=2.37.0",
+    "numpy>=2.3.2",
+]
 examples = [
+    "agentmail>=0.0.53",
    # botocore: only needed for Bedrock Claude boto3 examples/models/bedrock_claude.py
    "botocore>=1.37.23",
    "imgcat>=0.6.0",
@@ -70,9 +77,8 @@ examples = [
    "langchain-openai>=0.3.26",
 ]
 eval = [
-    "lmnr[all]==0.7.6",
+    "lmnr[all]==0.7.10",
    "anyio>=4.9.0",
-    "Pillow>=11.2.1",
    "psutil>=7.0.0",
    "datamodel-code-generator>=0.26.0",
    "hyperbrowser==0.47.0",
@@ -195,8 +201,8 @@ dev-dependencies = [
    "pyright>=1.1.403",
    "ty>=0.0.1a1",
    "pytest-xdist>=3.7.0",
-    "pillow>=11.2.1",
-    "lmnr[all]==0.7.6",
+    "lmnr[all]==0.7.10",
    # "pytest-playwright-asyncio>=0.7.0",  # not actually needed I think
    "pytest-timeout>=2.4.0",
+    "pydantic_settings>=2.10.1"
 ]
--- a/static/NiceHack69.png
+++ b/static/NiceHack69.png
--- a/tests/agent_tasks/google_maps_3d.yaml
+++ b/tests/agent_tasks/google_maps_3d.yaml
@@ -0,0 +1,10 @@
+name: Google Maps 3d Screenshot
+task: Go to google.com/maps and search for ETH Zurich Hauptgebäude. When found, close the side panel to see the map full screen. Then, if not already in Satellite View, switch to Satellite View. With Satellite View enabled, click the 3d icon to enable 3d view. Pan the map so that ETH Zurich Hauptgebäude and the Zurich Lake in the background are clearly visible. If able, take a screenshot.
+judge_context:
+  - Agent must only use www.google.com/maps
+  - Agent should correctly search for ETH Zurich Hauptgebäude
+  - Agent should close the side panel
+  - After the agent performing the task the map should be visible in Satellite view, if it was not already
+  - The Agent should correctly click the correct 3d button to enable 3d mode
+  - The Agent should correctly pan the map so that ETH Zurich Hauptgebäude as well as the Zurich Lake in the background should be visible
+max_steps: 25
--- a/tests/ci/test_browser_watchdog_security2.py
+++ b/tests/ci/test_browser_watchdog_security2.py
@@ -51,7 +51,14 @@ class TestUrlAllowlistSecurity:

 		# Test more complex glob patterns
 		browser_profile = BrowserProfile(
-			allowed_domains=['*.google.com', 'https://wiki.org', 'https://good.com', 'chrome://version', 'brave://*'],
+			allowed_domains=[
+				'*.google.com',
+				'https://wiki.org',
+				'https://good.com',
+				'https://*.test.com',
+				'chrome://version',
+				'brave://*',
+			],
 			headless=True,
 			user_data_dir=None,
 		)
@@ -90,6 +97,10 @@ class TestUrlAllowlistSecurity:
 		assert watchdog._is_url_allowed('https://sub.example.com%20@malicious.org') is False
 		assert watchdog._is_url_allowed('https://anygoogle.com@evil.org') is False

+		# Test pattern matching
+		assert watchdog._is_url_allowed('https://www.test.com') is True
+		assert watchdog._is_url_allowed('https://www.testx.com') is False
+
 	def test_glob_pattern_edge_cases(self):
 		"""Test edge cases for glob pattern matching to ensure proper behavior."""
 		from bubus import EventBus
--- a/tests/ci/test_radio_buttons.py
+++ b/tests/ci/test_radio_buttons.py
@@ -10,8 +10,11 @@ The serialization shows radio buttons as:

 Usage:
    uv run pytest tests/ci/test_radio_buttons.py -v -s
+
+Note: This test requires a real LLM API key and is skipped in CI environments.
 """

+import os
 from pathlib import Path

 import pytest
@@ -64,6 +67,10 @@ async def browser_session():
 	await browser_session.kill()


+@pytest.mark.skipif(
+	os.getenv('CI') == 'true' or os.getenv('GITHUB_ACTIONS') == 'true',
+	reason='Skipped in CI: requires real LLM API key which blocks other tests',
+)
 class TestRadioButtons:
 	"""Test cases for radio button interactions."""

--- a/tests/ci/test_tools.py
+++ b/tests/ci/test_tools.py
@@ -164,7 +164,7 @@ class TestToolsIntegration:
 		assert schema['properties']['seconds']['default'] == 3

 		# Create wait action for 1 second - fix to use a dictionary
-		wait_action = {'wait': {'seconds': 1}}  # Corrected format
+		wait_action = {'wait': {'seconds': 3}}  # Corrected format

 		class WaitActionModel(ActionModel):
 			wait: dict | None = None
@@ -184,7 +184,7 @@ class TestToolsIntegration:
 		assert 'Waited for' in result.extracted_content or 'Waiting for' in result.extracted_content

 		# Verify that approximately 1 second has passed (allowing some margin)
-		assert 0.8 <= end_time - start_time <= 1.5  # Allow some timing margin for 1 second wait
+		assert end_time - start_time <= 0.5  # We wait 3-3 seconds for LLM call

 		# longer wait
 		# Create wait action for 1 second - fix to use a dictionary
@@ -204,9 +204,7 @@ class TestToolsIntegration:
 		assert result.extracted_content is not None
 		assert 'Waited for' in result.extracted_content or 'Waiting for' in result.extracted_content

-		# Verify that approximately 5 seconds have passed (allowing some margin)
-		assert 4.5 <= end_time - start_time <= 6.0  # Allow some timing margin for 5 second wait
-		assert end_time - start_time >= 1.9  # Allow some timing margin
+		assert 1.5 <= end_time - start_time <= 2.5  # We wait 5-3 seconds for LLM call

 	async def test_go_back_action(self, tools, browser_session, base_url):
 		"""Test that go_back action navigates to the previous page."""
--- a/tests/ci/test_url_shortening.py
+++ b/tests/ci/test_url_shortening.py
@@ -0,0 +1,161 @@
+"""
+Simplified tests for URL shortening functionality in Agent service.
+
+Three focused tests:
+1. Input message processing with URL shortening
+2. Output processing with custom actions and URL restoration
+3. End-to-end pipeline test
+"""
+
+import json
+
+import pytest
+
+from browser_use.agent.service import Agent
+from browser_use.agent.views import AgentOutput
+from browser_use.llm.messages import AssistantMessage, BaseMessage, UserMessage
+
+# Super long URL to reuse across tests - much longer than the 25 character limit
+# Includes both query params (?...) and fragment params (#...)
+SUPER_LONG_URL = 'https://documentation.example-company.com/api/v3/enterprise/user-management/endpoints/administration/create-new-user-account-with-permissions/advanced-settings?format=detailed-json&version=3.2.1&timestamp=1699123456789&session_id=abc123def456ghi789&authentication_token=very_long_authentication_token_string_here&include_metadata=true&expand_relationships=user_groups,permissions,roles&sort_by=created_at&order=desc&page_size=100&include_deprecated_fields=false&api_key=super_long_api_key_that_exceeds_normal_limits#section=user_management&tab=advanced&view=detailed&scroll_to=permissions_table&highlight=admin_settings&filter=active_users&expand_all=true&debug_mode=enabled'
+
+
+@pytest.fixture
+def agent():
+	"""Create an agent instance for testing URL shortening functionality."""
+	from tests.ci.conftest import create_mock_llm
+
+	return Agent(task='Test URL shortening', llm=create_mock_llm(), url_shortening_limit=25)
+
+
+class TestUrlShorteningInputProcessing:
+	"""Test URL shortening for input messages."""
+
+	def test_process_input_messages_with_url_shortening(self, agent: Agent):
+		"""Test that long URLs in input messages are shortened and mappings stored."""
+		original_content = f'Please visit {SUPER_LONG_URL} and extract information'
+
+		messages: list[BaseMessage] = [UserMessage(content=original_content)]
+
+		# Process messages (modifies messages in-place and returns URL mappings)
+		url_mappings = agent._process_messsages_and_replace_long_urls_shorter_ones(messages)
+
+		# Verify URL was shortened in the message (modified in-place)
+		processed_content = messages[0].content or ''
+		assert processed_content != original_content
+		assert 'https://documentation.example-company.com' in processed_content
+		assert len(processed_content) < len(original_content)
+
+		# Verify URL mapping was returned
+		assert len(url_mappings) == 1
+		shortened_url = next(iter(url_mappings.keys()))
+		assert url_mappings[shortened_url] == SUPER_LONG_URL
+
+	def test_process_user_and_assistant_messages_with_url_shortening(self, agent: Agent):
+		"""Test URL shortening in both UserMessage and AssistantMessage."""
+		user_content = f'I need to access {SUPER_LONG_URL} for the API documentation'
+		assistant_content = f'I will help you navigate to {SUPER_LONG_URL} to retrieve the documentation'
+
+		messages: list[BaseMessage] = [UserMessage(content=user_content), AssistantMessage(content=assistant_content)]
+
+		# Process messages (modifies messages in-place and returns URL mappings)
+		url_mappings = agent._process_messsages_and_replace_long_urls_shorter_ones(messages)
+
+		# Verify URL was shortened in both messages
+		user_processed_content = messages[0].content or ''
+		assistant_processed_content = messages[1].content or ''
+
+		assert user_processed_content != user_content
+		assert assistant_processed_content != assistant_content
+		assert 'https://documentation.example-company.com' in user_processed_content
+		assert 'https://documentation.example-company.com' in assistant_processed_content
+		assert len(user_processed_content) < len(user_content)
+		assert len(assistant_processed_content) < len(assistant_content)
+
+		# Verify URL mapping was returned (should be same shortened URL for both occurrences)
+		assert len(url_mappings) == 1
+		shortened_url = next(iter(url_mappings.keys()))
+		assert url_mappings[shortened_url] == SUPER_LONG_URL
+
+
+class TestUrlShorteningOutputProcessing:
+	"""Test URL restoration for output processing with custom actions."""
+
+	def test_process_output_with_custom_actions_and_url_restoration(self, agent: Agent):
+		"""Test that shortened URLs in AgentOutput with custom actions are restored."""
+		# Set up URL mapping (simulating previous shortening)
+		shortened_url: str = agent._replace_urls_in_text(SUPER_LONG_URL)[0]
+		url_mappings = {shortened_url: SUPER_LONG_URL}
+
+		# Create AgentOutput with shortened URLs using JSON parsing
+		output_json = {
+			'thinking': f'I need to navigate to {shortened_url} for documentation',
+			'evaluation_previous_goal': 'Successfully processed the request',
+			'memory': f'Found useful info at {shortened_url}',
+			'next_goal': 'Complete the documentation review',
+			'action': [{'go_to_url': {'url': shortened_url, 'new_tab': False}}],
+		}
+
+		# Create properly typed AgentOutput with custom actions
+		tools = agent.tools
+		ActionModel = tools.registry.create_action_model()
+		AgentOutputWithActions = AgentOutput.type_with_custom_actions(ActionModel)
+		agent_output = AgentOutputWithActions.model_validate_json(json.dumps(output_json))
+
+		# Process the output to restore URLs (modifies agent_output in-place)
+		agent._recursive_process_all_strings_inside_pydantic_model(agent_output, url_mappings)
+
+		# Verify URLs were restored in all locations
+		assert SUPER_LONG_URL in (agent_output.thinking or '')
+		assert SUPER_LONG_URL in (agent_output.memory or '')
+		action_data = agent_output.action[0].model_dump()
+		assert action_data['go_to_url']['url'] == SUPER_LONG_URL
+
+
+class TestUrlShorteningEndToEnd:
+	"""Test complete URL shortening pipeline end-to-end."""
+
+	def test_complete_url_shortening_pipeline(self, agent: Agent):
+		"""Test the complete pipeline: input shortening -> processing -> output restoration."""
+
+		# Step 1: Input processing with URL shortening
+		original_content = f'Navigate to {SUPER_LONG_URL} and extract the API documentation'
+
+		messages: list[BaseMessage] = [UserMessage(content=original_content)]
+
+		url_mappings = agent._process_messsages_and_replace_long_urls_shorter_ones(messages)
+
+		# Verify URL was shortened in input
+		assert len(url_mappings) == 1
+		shortened_url = next(iter(url_mappings.keys()))
+		assert url_mappings[shortened_url] == SUPER_LONG_URL
+		assert shortened_url in (messages[0].content or '')
+
+		# Step 2: Simulate agent output with shortened URL
+		output_json = {
+			'thinking': f'I will navigate to {shortened_url} to get the documentation',
+			'evaluation_previous_goal': 'Starting documentation extraction',
+			'memory': f'Target URL: {shortened_url}',
+			'next_goal': 'Extract API documentation',
+			'action': [{'go_to_url': {'url': shortened_url, 'new_tab': True}}],
+		}
+
+		# Create AgentOutput with custom actions
+		tools = agent.tools
+		ActionModel = tools.registry.create_action_model()
+		AgentOutputWithActions = AgentOutput.type_with_custom_actions(ActionModel)
+		agent_output = AgentOutputWithActions.model_validate_json(json.dumps(output_json))
+
+		# Step 3: Output processing with URL restoration (modifies agent_output in-place)
+		agent._recursive_process_all_strings_inside_pydantic_model(agent_output, url_mappings)
+
+		# Verify complete pipeline worked correctly
+		assert SUPER_LONG_URL in (agent_output.thinking or '')
+		assert SUPER_LONG_URL in (agent_output.memory or '')
+		action_data = agent_output.action[0].model_dump()
+		assert action_data['go_to_url']['url'] == SUPER_LONG_URL
+		assert action_data['go_to_url']['new_tab'] is True
+
+		# Verify original shortened content is no longer present
+		assert shortened_url not in (agent_output.thinking or '')
+		assert shortened_url not in (agent_output.memory or '')