Merge branch 'main' into feat/custom-screenshot-quality

2026-05-13 17:56:35 +02:00 · 2025-07-21 13:27:07 -03:00
parent eb5ec58374 eae2296f49
commit 0c5451caa0
15 changed files with 634 additions and 218 deletions
--- a/browser_use/init.py
+++ b/browser_use/init.py
@@ -1,4 +1,5 @@
 import os
+from typing import TYPE_CHECKING

 from browser_use.logging_config import setup_logging

@@ -13,21 +14,6 @@ else:
 # Monkeypatch BaseSubprocessTransport.__del__ to handle closed event loops gracefully
 from asyncio import base_subprocess

-from browser_use.agent.prompts import SystemPrompt
-from browser_use.agent.service import Agent
-from browser_use.agent.views import ActionModel, ActionResult, AgentHistoryList
-from browser_use.browser import Browser, BrowserConfig, BrowserContext, BrowserContextConfig, BrowserProfile, BrowserSession
-from browser_use.controller.service import Controller
-from browser_use.dom.service import DomService
-from browser_use.llm import (
-	ChatAnthropic,
-	ChatAzureOpenAI,
-	ChatGoogle,
-	ChatGroq,
-	ChatOllama,
-	ChatOpenAI,
-)
-
 _original_del = base_subprocess.BaseSubprocessTransport.__del__


@@ -50,6 +36,71 @@ def _patched_del(self):
 base_subprocess.BaseSubprocessTransport.__del__ = _patched_del


+# Type stubs for lazy imports - fixes linter warnings
+if TYPE_CHECKING:
+	from browser_use.agent.prompts import SystemPrompt
+	from browser_use.agent.service import Agent
+	from browser_use.agent.views import ActionModel, ActionResult, AgentHistoryList
+	from browser_use.browser import Browser, BrowserConfig, BrowserContext, BrowserContextConfig, BrowserProfile, BrowserSession
+	from browser_use.controller.service import Controller
+	from browser_use.dom.service import DomService
+	from browser_use.llm.anthropic.chat import ChatAnthropic
+	from browser_use.llm.azure.chat import ChatAzureOpenAI
+	from browser_use.llm.google.chat import ChatGoogle
+	from browser_use.llm.groq.chat import ChatGroq
+	from browser_use.llm.ollama.chat import ChatOllama
+	from browser_use.llm.openai.chat import ChatOpenAI
+
+
+# Lazy imports mapping - only import when actually accessed
+_LAZY_IMPORTS = {
+	# Agent service (heavy due to dependencies)
+	'Agent': ('browser_use.agent.service', 'Agent'),
+	# System prompt (moderate weight due to agent.views imports)
+	'SystemPrompt': ('browser_use.agent.prompts', 'SystemPrompt'),
+	# Agent views (very heavy - over 1 second!)
+	'ActionModel': ('browser_use.agent.views', 'ActionModel'),
+	'ActionResult': ('browser_use.agent.views', 'ActionResult'),
+	'AgentHistoryList': ('browser_use.agent.views', 'AgentHistoryList'),
+	# Browser components (heavy due to playwright/patchright)
+	'Browser': ('browser_use.browser', 'Browser'),
+	'BrowserConfig': ('browser_use.browser', 'BrowserConfig'),
+	'BrowserSession': ('browser_use.browser', 'BrowserSession'),
+	'BrowserProfile': ('browser_use.browser', 'BrowserProfile'),
+	'BrowserContext': ('browser_use.browser', 'BrowserContext'),
+	'BrowserContextConfig': ('browser_use.browser', 'BrowserContextConfig'),
+	# Controller (moderate weight)
+	'Controller': ('browser_use.controller.service', 'Controller'),
+	# DOM service (moderate weight)
+	'DomService': ('browser_use.dom.service', 'DomService'),
+	# Chat models (very heavy imports)
+	'ChatOpenAI': ('browser_use.llm.openai.chat', 'ChatOpenAI'),
+	'ChatGoogle': ('browser_use.llm.google.chat', 'ChatGoogle'),
+	'ChatAnthropic': ('browser_use.llm.anthropic.chat', 'ChatAnthropic'),
+	'ChatGroq': ('browser_use.llm.groq.chat', 'ChatGroq'),
+	'ChatAzureOpenAI': ('browser_use.llm.azure.chat', 'ChatAzureOpenAI'),
+	'ChatOllama': ('browser_use.llm.ollama.chat', 'ChatOllama'),
+}
+
+
+def __getattr__(name: str):
+	"""Lazy import mechanism - only import modules when they're actually accessed."""
+	if name in _LAZY_IMPORTS:
+		module_path, attr_name = _LAZY_IMPORTS[name]
+		try:
+			from importlib import import_module
+
+			module = import_module(module_path)
+			attr = getattr(module, attr_name)
+			# Cache the imported attribute in the module's globals
+			globals()[name] = attr
+			return attr
+		except ImportError as e:
+			raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e
+
+	raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
+
+
 __all__ = [
 	'Agent',
 	'Browser',
--- a/browser_use/agent/service.py
+++ b/browser_use/agent/service.py
@@ -34,7 +34,8 @@ from bubus import EventBus
 from pydantic import ValidationError
 from uuid_extensions import uuid7str

-from browser_use.agent.gif import create_history_gif
+# Lazy import for gif to avoid heavy agent.views import at startup
+# from browser_use.agent.gif import create_history_gif
 from browser_use.agent.message_manager.service import (
 	MessageManager,
 )
@@ -184,6 +185,8 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		display_files_in_done_text: bool = True,
 		include_tool_call_examples: bool = False,
 		vision_detail_level: Literal['auto', 'low', 'high'] = 'auto',
+		llm_timeout: int = 60,
+		step_timeout: int = 180,
 		**kwargs,
 	):
 		# Check for deprecated planner parameters
@@ -261,6 +264,8 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			extend_planner_system_message=None,  # Always None now (deprecated)
 			calculate_cost=calculate_cost,
 			include_tool_call_examples=include_tool_call_examples,
+			llm_timeout=llm_timeout,
+			step_timeout=step_timeout,
 		)

 		# Token cost service
@@ -280,7 +285,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		self._set_browser_use_version_and_source(source)
 		self.initial_actions = self._convert_initial_actions(initial_actions) if initial_actions else None

-		# Verify we can connect to the LLM and setup the tool calling method
+		# Verify we can connect to the model
 		self._verify_and_setup_llm()

 		# TODO: move this logic to the LLMs
@@ -644,6 +649,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		self.task = new_task
 		self._message_manager.add_new_task(new_task)

+	@observe_debug(ignore_input=True, ignore_output=True, name='_raise_if_stopped_or_paused')
 	async def _raise_if_stopped_or_paused(self) -> None:
 		"""Utility function that raises an InterruptedError if the agent is stopped or paused."""

@@ -655,24 +661,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			# self.logger.debug('Agent paused after getting state')
 			raise InterruptedError

-	@observe_debug(ignore_input=True, ignore_output=True, name='get_browser_state_with_recovery')
-	async def _get_browser_state_with_recovery(self, cache_clickable_elements_hashes: bool = True) -> BrowserStateSummary:
-		"""Get browser state with multiple fallback strategies for error recovery"""
-
-		assert self.browser_session is not None, 'BrowserSession is not set up'
-
-		# Try 1: Full state summary (current implementation) - like main branch
-		try:
-			return await self.browser_session.get_state_summary(cache_clickable_elements_hashes)
-		except Exception as e:
-			if self.state.last_result is None:
-				self.state.last_result = []
-			self.state.last_result.append(ActionResult(error=str(e)))
-			self.logger.warning(f'Full state retrieval failed: {type(e).__name__}: {e}')
-
-		self.logger.warning('🔄 Falling back to minimal state summary')
-		return await self.browser_session.get_minimal_state_summary()
-
 	@observe(name='agent.step', ignore_output=True, ignore_input=True)
 	@time_execution_async('--step')
 	async def step(self, step_info: AgentStepInfo | None = None) -> None:
@@ -707,7 +695,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		assert self.browser_session is not None, 'BrowserSession is not set up'

 		self.logger.debug(f'🌐 Step {self.state.n_steps + 1}: Getting browser state...')
-		browser_state_summary = await self._get_browser_state_with_recovery(cache_clickable_elements_hashes=True)
+		browser_state_summary = await self.browser_session.get_browser_state_with_recovery(
+			cache_clickable_elements_hashes=True, include_screenshot=self.settings.use_vision
+		)
 		current_page = await self.browser_session.get_current_page()

 		# Check for new downloads after getting browser state (catches PDF auto-downloads and previous step downloads)
@@ -744,6 +734,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		await self._handle_final_step(step_info)
 		return browser_state_summary

+	@observe_debug(ignore_input=True, name='get_next_action')
 	async def _get_next_action(self, browser_state_summary: BrowserStateSummary) -> None:
 		"""Execute LLM interaction with retry logic and handle callbacks"""
 		input_messages = self._message_manager.get_messages()
@@ -751,7 +742,15 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 			f'🤖 Step {self.state.n_steps + 1}: Calling LLM with {len(input_messages)} messages (model: {self.llm.model})...'
 		)

-		model_output = await self._get_model_output_with_retry(input_messages)
+		try:
+			model_output = await asyncio.wait_for(
+				self._get_model_output_with_retry(input_messages), timeout=self.settings.llm_timeout
+			)
+		except TimeoutError:
+			raise TimeoutError(
+				f'LLM call timed out after {self.settings.llm_timeout} seconds. Keep your thinking and output short.'
+			)
+
 		self.state.last_model_output = model_output

 		# Check again for paused/stopped state after getting model output
@@ -988,6 +987,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		return text.strip()

 	@time_execution_async('--get_next_action')
+	@observe_debug(ignore_input=True, ignore_output=True, name='get_model_output')
 	async def get_model_output(self, input_messages: list[BaseMessage]) -> AgentOutput:
 		"""Get next action from LLM based on current state"""

@@ -1249,15 +1249,15 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 				try:
 					await asyncio.wait_for(
 						self.step(step_info),
-						timeout=300,  # 5 minute step timeout - more generous for slow LLM calls
+						timeout=self.settings.step_timeout,
 					)
 					self.logger.debug(f'✅ Completed step {step + 1}/{max_steps}')
 				except TimeoutError:
 					# Handle step timeout gracefully
-					error_msg = f'Step {step + 1} timed out after 300 seconds'
+					error_msg = f'Step {step + 1} timed out after {self.settings.step_timeout} seconds'
 					self.logger.error(f'⏰ {error_msg}')
 					self.state.consecutive_failures += 1
-					self.state.last_result = [ActionResult(error=error_msg, include_in_memory=True)]
+					self.state.last_result = [ActionResult(error=error_msg)]

 				if on_step_end is not None:
 					await on_step_end(self)
@@ -1347,6 +1347,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 				if isinstance(self.settings.generate_gif, str):
 					output_path = self.settings.generate_gif

+				# Lazy import gif module to avoid heavy startup cost
+				from browser_use.agent.gif import create_history_gif
+
 				create_history_gif(task=self.task, history=self.state.history, output_path=output_path)

 				# Emit output file generated event for GIF
@@ -1381,56 +1384,63 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 		results: list[ActionResult] = []

 		assert self.browser_session is not None, 'BrowserSession is not set up'
-		cached_selector_map = await self.browser_session.get_selector_map()
-		cached_path_hashes = {e.hash.branch_path_hash for e in cached_selector_map.values()}
-
-		try:
-			await self.browser_session.remove_highlights()
-		except TimeoutError:
-			# we don't care if this times out
-			self.logger.debug('Timeout to remove highlights')
-
-		for i, action in enumerate(actions):
-			# DO NOT ALLOW TO CALL `done` AS A SINGLE ACTION
-			if i > 0 and action.model_dump(exclude_unset=True).get('done') is not None:
-				msg = f'Done action is allowed only as a single action - stopped after action {i} / {len(actions)}.'
-				logger.info(msg)
+		cached_selector_map = {}
+		cached_path_hashes = set()
+		# check all actions if any has index, if so, get the selector map
+		for action in actions:
+			if action.get_index() is not None:
+				cached_selector_map = await self.browser_session.get_selector_map()
+				cached_path_hashes = {e.hash.branch_path_hash for e in cached_selector_map.values()}
 				break

-			if action.get_index() is not None and i != 0:
-				new_browser_state_summary = await self.browser_session.get_state_summary(cache_clickable_elements_hashes=False)
-				new_selector_map = new_browser_state_summary.selector_map
-
-				# Detect index change after previous action
-				orig_target = cached_selector_map.get(action.get_index())  # type: ignore
-				orig_target_hash = orig_target.hash.branch_path_hash if orig_target else None
-				new_target = new_selector_map.get(action.get_index())  # type: ignore
-				new_target_hash = new_target.hash.branch_path_hash if new_target else None
-				if orig_target_hash != new_target_hash:
-					msg = f'Element index changed after action {i} / {len(actions)}, because page changed.'
+		# loop over actions and execute them
+		for i, action in enumerate(actions):
+			if i > 0:
+				# ONLY ALLOW TO CALL `done` IF IT IS A SINGLE ACTION
+				if action.model_dump(exclude_unset=True).get('done') is not None:
+					msg = f'Done action is allowed only as a single action - stopped after action {i} / {len(actions)}.'
 					logger.info(msg)
-					results.append(
-						ActionResult(
-							extracted_content=msg,
-							include_in_memory=True,
-							long_term_memory=msg,
-						)
-					)
 					break

-				new_path_hashes = {e.hash.branch_path_hash for e in new_selector_map.values()}
-				if check_for_new_elements and not new_path_hashes.issubset(cached_path_hashes):
-					# next action requires index but there are new elements on the page
-					msg = f'Something new appeared after action {i} / {len(actions)}, following actions are NOT executed and should be retried.'
-					logger.info(msg)
-					results.append(
-						ActionResult(
-							extracted_content=msg,
-							include_in_memory=True,
-							long_term_memory=msg,
-						)
+				if action.get_index() is not None:
+					new_browser_state_summary = await self.browser_session.get_browser_state_with_recovery(
+						cache_clickable_elements_hashes=False, include_screenshot=False
 					)
-					break
+					new_selector_map = new_browser_state_summary.selector_map
+
+					# Detect index change after previous action
+					orig_target = cached_selector_map.get(action.get_index())  # type: ignore
+					orig_target_hash = orig_target.hash.branch_path_hash if orig_target else None
+					new_target = new_selector_map.get(action.get_index())  # type: ignore
+					new_target_hash = new_target.hash.branch_path_hash if new_target else None
+					if orig_target_hash != new_target_hash:
+						msg = f'Element index changed after action {i} / {len(actions)}, because page changed.'
+						logger.info(msg)
+						results.append(
+							ActionResult(
+								extracted_content=msg,
+								include_in_memory=True,
+								long_term_memory=msg,
+							)
+						)
+						break
+
+					new_path_hashes = {e.hash.branch_path_hash for e in new_selector_map.values()}
+					if check_for_new_elements and not new_path_hashes.issubset(cached_path_hashes):
+						# next action requires index but there are new elements on the page
+						msg = f'Something new appeared after action {i} / {len(actions)}, following actions are NOT executed and should be retried.'
+						logger.info(msg)
+						results.append(
+							ActionResult(
+								extracted_content=msg,
+								include_in_memory=True,
+								long_term_memory=msg,
+							)
+						)
+						break
+
+				# wait between actions
+				await asyncio.sleep(self.browser_profile.wait_between_actions)

 			try:
 				await self._raise_if_stopped_or_paused()
@@ -1455,9 +1465,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 				if results[-1].is_done or results[-1].error or i == len(actions) - 1:
 					break

-				await asyncio.sleep(self.browser_profile.wait_between_actions)
-				# hash all elements. if it is a subset of cached_state its fine - else break (new elements on page)
-
 			except Exception as e:
 				# Handle any exceptions during action execution
 				self.logger.error(f'Action {i + 1} failed: {type(e).__name__}: {e}')
@@ -1535,7 +1542,9 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 	async def _execute_history_step(self, history_item: AgentHistory, delay: float) -> list[ActionResult]:
 		"""Execute a single step from history with element validation"""
 		assert self.browser_session is not None, 'BrowserSession is not set up'
-		state = await self.browser_session.get_state_summary(cache_clickable_elements_hashes=False)
+		state = await self.browser_session.get_browser_state_with_recovery(
+			cache_clickable_elements_hashes=False, include_screenshot=False
+		)
 		if not state or not history_item.model_output:
 			raise ValueError('Invalid state or model output')
 		updated_actions = []
--- a/browser_use/agent/views.py
+++ b/browser_use/agent/views.py
@@ -65,6 +65,8 @@ class AgentSettings(BaseModel):
 	extend_planner_system_message: str | None = None
 	calculate_cost: bool = False
 	include_tool_call_examples: bool = False
+	llm_timeout: int = 60  # Timeout in seconds for LLM calls
+	step_timeout: int = 180  # Timeout in seconds for each step


 class AgentState(BaseModel):
--- a/browser_use/browser/init.py
+++ b/browser_use/browser/init.py
@@ -1,6 +1,41 @@
-from .browser import Browser, BrowserConfig
-from .context import BrowserContext, BrowserContextConfig
-from .profile import BrowserProfile
-from .session import BrowserSession
+from typing import TYPE_CHECKING
+
+# Type stubs for lazy imports
+if TYPE_CHECKING:
+	from .browser import Browser, BrowserConfig
+	from .context import BrowserContext, BrowserContextConfig
+	from .profile import BrowserProfile
+	from .session import BrowserSession
+
+# Lazy imports mapping for heavy browser components
+_LAZY_IMPORTS = {
+	'Browser': ('.browser', 'Browser'),
+	'BrowserConfig': ('.browser', 'BrowserConfig'),
+	'BrowserContext': ('.context', 'BrowserContext'),
+	'BrowserContextConfig': ('.context', 'BrowserContextConfig'),
+	'BrowserProfile': ('.profile', 'BrowserProfile'),
+	'BrowserSession': ('.session', 'BrowserSession'),
+}
+
+
+def __getattr__(name: str):
+	"""Lazy import mechanism for heavy browser components."""
+	if name in _LAZY_IMPORTS:
+		module_path, attr_name = _LAZY_IMPORTS[name]
+		try:
+			from importlib import import_module
+
+			# Use relative import for current package
+			full_module_path = f'browser_use.browser{module_path}'
+			module = import_module(full_module_path)
+			attr = getattr(module, attr_name)
+			# Cache the imported attribute in the module's globals
+			globals()[name] = attr
+			return attr
+		except ImportError as e:
+			raise ImportError(f'Failed to import {name} from {full_module_path}: {e}') from e
+
+	raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
+

 __all__ = ['Browser', 'BrowserConfig', 'BrowserContext', 'BrowserContextConfig', 'BrowserSession', 'BrowserProfile']
--- a/browser_use/browser/profile.py
+++ b/browser_use/browser/profile.py
@@ -169,6 +169,10 @@ CHROME_DEFAULT_ARGS = [
 	'--disable-desktop-notifications',
 	'--noerrdialogs',
 	'--silent-debugger-extension-api',
+	# Extension welcome tab suppression for automation
+	'--disable-extensions-http-throttling',
+	'--extensions-on-chrome-urls',
+	'--disable-default-apps',
 	f'--disable-features={",".join(CHROME_DISABLED_COMPONENTS)}',
 ]

@@ -558,6 +562,10 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
 		description='List of allowed domains for navigation e.g. ["*.google.com", "https://example.com", "chrome-extension://*"]',
 	)
 	keep_alive: bool | None = Field(default=None, description='Keep browser alive after agent run.')
+	enable_default_extensions: bool = Field(
+		default=True,
+		description="Enable automation-optimized extensions: ad blocking (uBlock Origin), cookie handling (I still don't care about cookies), and URL cleaning (ClearURLs). All extensions work automatically without manual intervention. Extensions are automatically downloaded and loaded when enabled.",
+	)
 	window_size: ViewportSize | None = Field(
 		default=None,
 		description='Browser window size to use when headless=False.',
@@ -620,6 +628,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
 			window_size['width'] = window_size['width'] or self.window_width or 1280
 			window_size['height'] = window_size['height'] or self.window_height or 1100
 			self.window_size = window_size
+
 		return self

 	@model_validator(mode='after')
@@ -699,12 +708,162 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
 				if self.window_position
 				else []
 			),
+			*(self._get_extension_args() if self.enable_default_extensions else []),
 		]

 		# convert to dict and back to dedupe and merge duplicate args
 		final_args_list = BrowserLaunchArgs.args_as_list(BrowserLaunchArgs.args_as_dict(pre_conversion_args))
 		return final_args_list

+	def _get_extension_args(self) -> list[str]:
+		"""Get Chrome args for enabling default extensions (ad blocker and cookie handler)."""
+		extension_paths = self._ensure_default_extensions_downloaded()
+
+		args = [
+			'--enable-extensions',
+			'--disable-extensions-file-access-check',
+			'--disable-extensions-http-throttling',
+			'--enable-extension-activity-logging',
+		]
+
+		if extension_paths:
+			args.append(f'--load-extension={",".join(extension_paths)}')
+
+		return args
+
+	def _ensure_default_extensions_downloaded(self) -> list[str]:
+		"""
+		Ensure default extensions are downloaded and cached locally.
+		Returns list of paths to extension directories.
+		"""
+		from pathlib import Path
+
+		# Extension definitions - optimized for automation and content extraction
+		extensions = [
+			{
+				'name': 'uBlock Origin',
+				'id': 'cjpalhdlnbpafiamejdnhcphjbkeiagm',
+				'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dcjpalhdlnbpafiamejdnhcphjbkeiagm%26uc',
+			},
+			{
+				'name': "I still don't care about cookies",
+				'id': 'edibdbjcniadpccecjdfdjjppcpchdlm',
+				'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dedibdbjcniadpccecjdfdjjppcpchdlm%26uc',
+			},
+			{
+				'name': 'ClearURLs',
+				'id': 'lckanjgmijmafbedllaakclkaicjfmnk',
+				'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dlckanjgmijmafbedllaakclkaicjfmnk%26uc',
+			},
+		]
+
+		# Create extensions cache directory
+		cache_dir = Path.home() / '.browser-use' / 'extensions'
+		cache_dir.mkdir(parents=True, exist_ok=True)
+
+		extension_paths = []
+		loaded_extension_names = []
+
+		for ext in extensions:
+			ext_dir = cache_dir / ext['id']
+			crx_file = cache_dir / f'{ext["id"]}.crx'
+
+			# Check if extension is already extracted
+			if ext_dir.exists() and (ext_dir / 'manifest.json').exists():
+				extension_paths.append(str(ext_dir))
+				loaded_extension_names.append(ext['name'])
+				continue
+
+			try:
+				# Download extension if not cached
+				if not crx_file.exists():
+					logger.info(f'📦 Downloading {ext["name"]} extension...')
+					self._download_extension(ext['url'], crx_file)
+
+				# Extract extension
+				if crx_file.exists():
+					logger.info(f'📂 Extracting {ext["name"]} extension...')
+					self._extract_extension(crx_file, ext_dir)
+					extension_paths.append(str(ext_dir))
+					loaded_extension_names.append(ext['name'])
+
+			except Exception as e:
+				logger.warning(f'⚠️ Failed to setup {ext["name"]} extension: {e}')
+				continue
+
+		if extension_paths:
+			logger.info(f'✅ Extensions ready: {len(extension_paths)} extensions loaded ({", ".join(loaded_extension_names)})')
+		else:
+			logger.warning('⚠️ No default extensions could be loaded')
+
+		return extension_paths
+
+	def _download_extension(self, url: str, output_path: Path) -> None:
+		"""Download extension .crx file."""
+		import urllib.request
+
+		try:
+			with urllib.request.urlopen(url) as response:
+				with open(output_path, 'wb') as f:
+					f.write(response.read())
+		except Exception as e:
+			raise Exception(f'Failed to download extension: {e}')
+
+	def _extract_extension(self, crx_path: Path, extract_dir: Path) -> None:
+		"""Extract .crx file to directory."""
+		import os
+		import zipfile
+
+		# Remove existing directory
+		if extract_dir.exists():
+			import shutil
+
+			shutil.rmtree(extract_dir)
+
+		extract_dir.mkdir(parents=True, exist_ok=True)
+
+		try:
+			# CRX files are ZIP files with a header, try to extract as ZIP
+			with zipfile.ZipFile(crx_path, 'r') as zip_ref:
+				zip_ref.extractall(extract_dir)
+
+			# Verify manifest exists
+			if not (extract_dir / 'manifest.json').exists():
+				raise Exception('No manifest.json found in extension')
+
+		except zipfile.BadZipFile:
+			# CRX files have a header before the ZIP data
+			# Skip the CRX header and extract the ZIP part
+			with open(crx_path, 'rb') as f:
+				# Read CRX header to find ZIP start
+				magic = f.read(4)
+				if magic != b'Cr24':
+					raise Exception('Invalid CRX file format')
+
+				version = int.from_bytes(f.read(4), 'little')
+				if version == 2:
+					pubkey_len = int.from_bytes(f.read(4), 'little')
+					sig_len = int.from_bytes(f.read(4), 'little')
+					f.seek(16 + pubkey_len + sig_len)  # Skip to ZIP data
+				elif version == 3:
+					header_len = int.from_bytes(f.read(4), 'little')
+					f.seek(12 + header_len)  # Skip to ZIP data
+
+				# Extract ZIP data
+				zip_data = f.read()
+
+			# Write ZIP data to temp file and extract
+			import tempfile
+
+			with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_zip:
+				temp_zip.write(zip_data)
+				temp_zip.flush()
+
+				with zipfile.ZipFile(temp_zip.name, 'r') as zip_ref:
+					zip_ref.extractall(extract_dir)
+
+				os.unlink(temp_zip.name)
+
 	def kwargs_for_launch_persistent_context(self) -> BrowserLaunchPersistentContextArgs:
 		"""Return the kwargs for BrowserType.launch()."""
 		return BrowserLaunchPersistentContextArgs(**self.model_dump(exclude={'args'}), args=self.get_args())
@@ -721,22 +880,6 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
 		"""Return the kwargs for BrowserType.connect_over_cdp()."""
 		return BrowserLaunchArgs(**self.model_dump(exclude={'args'}), args=self.get_args())

-	# def preinstall_extensions(self) -> None:
-	# 	"""Preinstall the extensions."""
-
-	#     # create the local unpacked extensions dir
-	# 	extensions_dir = self.user_data_dir / 'Extensions'
-	# 	extensions_dir.mkdir(parents=True, exist_ok=True)
-
-	#     # download from the chrome web store using the chrome web store api
-	# 	for extension_id in self.extension_ids_to_preinstall:
-	# 		extension_path = extensions_dir / f'{extension_id}.crx'
-	# 		if extension_path.exists():
-	# 			logger.warning(f'⚠️ Extension {extension_id} is already installed, skipping preinstall.')
-	# 		else:
-	# 			logger.info(f'🔍 Preinstalling extension {extension_id}...')
-	# 			# TODO: copy this from ArchiveBox implementation
-
 	@observe_debug(ignore_input=True, ignore_output=True, name='detect_display_configuration')
 	def detect_display_configuration(self) -> None:
 		"""
--- a/browser_use/browser/session.py
+++ b/browser_use/browser/session.py
@@ -51,8 +51,10 @@ from browser_use.browser.views import (
 	TabInfo,
 	URLNotAllowedError,
 )
-from browser_use.dom.clickable_element_processor.service import ClickableElementProcessor
-from browser_use.dom.service import DomService
+
+# Lazy imports for heavy DOM services to improve startup time
+# from browser_use.dom.clickable_element_processor.service import ClickableElementProcessor
+# from browser_use.dom.service import DomService
 from browser_use.dom.views import DOMElementNode, SelectorMap
 from browser_use.utils import (
 	is_new_tab_page,
@@ -160,12 +162,14 @@ def require_healthy_browser(usable_page=True, reopen_page=True):
 								await self._recover_unresponsive_page(
 									func.__name__, timeout_ms=int(self.browser_profile.default_navigation_timeout or 5000) + 5_000
 								)
+								page_url = self.agent_current_page.url if self.agent_current_page else 'unknown page'
 								self.logger.debug(
-									f'🤕 Crashed page recovery finished, attempting to continue with {func.__name__}() on {_log_pretty_url(self.agent_current_page.url)}...'
+									f'🤕 Crashed page recovery finished, attempting to continue with {func.__name__}() on {_log_pretty_url(page_url)}...'
 								)
 							except Exception as e:
+								page_url = self.agent_current_page.url if self.agent_current_page else 'unknown page'
 								self.logger.warning(
-									f'❌ Crashed page recovery failed, could not run {func.__name__}(), page is stuck unresponsive on {_log_pretty_url(self.agent_current_page.url)}...'
+									f'❌ Crashed page recovery failed, could not run {func.__name__}(), page is stuck unresponsive on {_log_pretty_url(page_url)}...'
 								)
 								raise  # Re-raise to let retry decorator / callsite handle it

@@ -384,10 +388,19 @@ class BrowserSession(BaseModel):
 			# Ensure we have a context
 			assert self.browser_context, f'Failed to create BrowserContext for browser={self.browser}'

-			# Configure browser
-			await self._setup_viewports()
-			await self._setup_current_page_change_listeners()
-			await self._start_context_tracing()
+			# Configure browser - run some setup tasks in parallel for speed
+			setup_results = await asyncio.gather(
+				self._setup_viewports(),
+				self._setup_current_page_change_listeners(),
+				self._start_context_tracing(),
+				return_exceptions=True,
+			)
+
+			# Check for exceptions in setup results
+			for i, result in enumerate(setup_results):
+				if isinstance(result, Exception):
+					setup_task_names = ['_setup_viewports', '_setup_current_page_change_listeners', '_start_context_tracing']
+					raise Exception(f'Browser setup failed in {setup_task_names[i]}: {result}') from result

 			self.initialized = True
 			return self
@@ -837,6 +850,7 @@ class BrowserSession(BaseModel):

 		atexit.register(shudown_playwright)

+	@observe_debug(ignore_input=True, ignore_output=True, name='setup_browser_via_passed_objects')
 	async def setup_browser_via_passed_objects(self) -> None:
 		"""Override to customize the set up of the connection to an existing browser"""

@@ -878,6 +892,7 @@ class BrowserSession(BaseModel):
 			self.logger.info(f'🎭 Connected to existing user-provided browser: {self.browser_context}')
 			self._set_browser_keep_alive(True)  # we connected to an existing browser, dont kill it at the end

+	@observe_debug(ignore_input=True, ignore_output=True, name='setup_browser_via_browser_pid')
 	async def setup_browser_via_browser_pid(self) -> None:
 		"""if browser_pid is provided, calcuclate its CDP URL by looking for --remote-debugging-port=... in its CLI args, then connect to it"""

@@ -922,11 +937,10 @@ class BrowserSession(BaseModel):
 		# Wait for CDP port to become available (Chrome might still be starting)
 		import httpx

-		# Add initial delay to give Chrome time to start up before first check
-		await asyncio.sleep(2)
+		# No initial sleep needed - the polling loop below handles waiting if Chrome isn't ready yet

 		async with httpx.AsyncClient() as client:
-			for i in range(30):  # 30 second timeout
+			for i in range(30):  # timeout
 				# First check if the Chrome process has exited
 				try:
 					chrome_process = psutil.Process(pid=self.browser_pid)
@@ -988,7 +1002,7 @@ class BrowserSession(BaseModel):
 				except (httpx.ConnectError, httpx.TimeoutException):
 					if i == 0:
 						self.logger.debug(f'⏳ Waiting for Chrome CDP port {debug_port} to become available...')
-					await asyncio.sleep(1)
+					await asyncio.sleep(0.5)
 			else:
 				self.logger.error(f'❌ Chrome CDP port {debug_port} did not become available after 30 seconds')
 				self.browser_pid = None
@@ -1010,6 +1024,7 @@ class BrowserSession(BaseModel):
 		)
 		self._set_browser_keep_alive(True)  # we connected to an existing browser, dont kill it at the end

+	@observe_debug(ignore_input=True, ignore_output=True, name='setup_browser_via_wss_url')
 	async def setup_browser_via_wss_url(self) -> None:
 		"""check for a passed wss_url, connect to a remote playwright browser server via WSS"""

@@ -1044,7 +1059,8 @@ class BrowserSession(BaseModel):
 		)
 		self._set_browser_keep_alive(True)  # we connected to an existing browser, dont kill it at the end

-	@retry(wait=1, retries=2, timeout=45, semaphore_limit=1, semaphore_scope='self', semaphore_lax=False)
+	@observe_debug(ignore_input=True, ignore_output=True, name='setup_new_browser_context')
+	@retry(wait=0.1, retries=5, timeout=45, semaphore_limit=1, semaphore_scope='self', semaphore_lax=False)
 	async def setup_new_browser_context(self) -> None:
 		"""Launch a new browser and browser_context"""
 		# Double-check after semaphore acquisition to prevent duplicate browser launches
@@ -1059,6 +1075,7 @@ class BrowserSession(BaseModel):
 				pass
 		await self._unsafe_setup_new_browser_context()

+	@observe_debug(ignore_input=True, ignore_output=True, name='_unsafe_setup_new_browser_context')
 	async def _unsafe_setup_new_browser_context(self) -> None:
 		"""Unsafe browser context setup without retry protection."""

@@ -2015,7 +2032,6 @@ class BrowserSession(BaseModel):
 		await page.wait_for_selector(selector, state='visible', timeout=timeout)

 	@observe_debug(name='remove_highlights', ignore_output=True, ignore_input=True)
-	@require_healthy_browser(usable_page=True, reopen_page=True)
 	@time_execution_async('--remove_highlights')
 	@retry(timeout=2, retries=0)
 	async def remove_highlights(self):
@@ -2048,14 +2064,16 @@ class BrowserSession(BaseModel):
 			self.logger.debug(f'⚠️ Failed to remove highlights (this is usually ok): {type(e).__name__}: {e}')
 			# Don't raise the error since this is not critical functionality

+	@observe_debug(ignore_output=True, name='get_dom_element_by_index')
 	@require_healthy_browser(usable_page=True, reopen_page=True)
 	async def get_dom_element_by_index(self, index: int) -> DOMElementNode | None:
 		"""Get DOM element by index."""
 		selector_map = await self.get_selector_map()
 		return selector_map.get(index)

-	@require_healthy_browser(usable_page=True, reopen_page=True)
 	@time_execution_async('--click_element_node')
+	@observe_debug(ignore_input=True, name='click_element_node')
+	@require_healthy_browser(usable_page=True, reopen_page=True)
 	async def _click_element_node(self, element_node: DOMElementNode) -> str | None:
 		"""
 		Optimized method to click an element using xpath.
@@ -2069,7 +2087,8 @@ class BrowserSession(BaseModel):
 			element_handle = await self.get_locate_element(element_node)

 			if element_handle is None:
-				raise Exception(f'Element: {repr(element_node)} not found')
+				self.logger.debug(f'Element: {repr(element_node)} not found')
+				raise Exception('Element not found')

 			async def perform_click(click_func):
 				"""Performs the actual click, handling both download and navigation scenarios."""
@@ -2163,10 +2182,10 @@ class BrowserSession(BaseModel):
 		except URLNotAllowedError as e:
 			raise e
 		except Exception as e:
-			raise Exception(f'Failed to click element: {repr(element_node)}. Error: {str(e)}')
+			raise Exception(f'Failed to click element. Error: {str(e)}')

 	@time_execution_async('--get_tabs_info')
-	@retry(timeout=6, retries=1)
+	@retry(timeout=3, retries=1)
 	@require_healthy_browser(usable_page=False, reopen_page=False)
 	async def get_tabs_info(self) -> list[TabInfo]:
 		"""Get information about all tabs"""
@@ -2174,7 +2193,7 @@ class BrowserSession(BaseModel):
 		tabs_info = []
 		for page_id, page in enumerate(self.browser_context.pages):
 			try:
-				title = await asyncio.wait_for(page.title(), timeout=3.0)
+				title = await asyncio.wait_for(page.title(), timeout=2.0)
 				tab_info = TabInfo(page_id=page_id, url=page.url, title=title)
 			except Exception:
 				# page.title() can hang forever on tabs that are crashed/disappeared/about:blank
@@ -2255,8 +2274,14 @@ class BrowserSession(BaseModel):
 		# Check if URL is allowed
 		if not self._is_url_allowed(normalized_url):
 			raise BrowserError(f'⛔️ Navigation to non-allowed URL: {normalized_url}')
-
-		timeout_ms = min(3000, int(timeout_ms or self.browser_profile.default_navigation_timeout or 12000))
+		# If timeout_ms is not None, use it (even if 0); else try profile.default_navigation_timeout (even if 0); else 12000
+		if timeout_ms is not None:
+			user_timeout_ms = int(timeout_ms)
+		elif self.browser_profile.default_navigation_timeout is not None:
+			user_timeout_ms = int(self.browser_profile.default_navigation_timeout)
+		else:
+			user_timeout_ms = 12000
+		timeout_ms = min(3000, user_timeout_ms)

 		# Handle new tab creation
 		if new_tab:
@@ -2279,7 +2304,7 @@ class BrowserSession(BaseModel):

 		# Navigate to URL
 		try:
-			# Use asyncio.wait to prevent hanging on slow page loads
+			# Use asyncio.wait to prevent hanging on a slow page loads
 			# Don't cap the timeout - respect what was requested
 			self.logger.debug(f'🧭 Starting navigation to {_log_pretty_url(normalized_url)} with timeout {timeout_ms}ms')
 			nav_task = asyncio.create_task(page.goto(normalized_url, wait_until='load', timeout=timeout_ms))
@@ -2797,15 +2822,27 @@ class BrowserSession(BaseModel):
 	@observe_debug(ignore_input=True, ignore_output=True, name='wait_for_page_and_frames_load')
 	async def _wait_for_page_and_frames_load(self, timeout_overwrite: float | None = None):
 		"""
-		Ensures page is fully loaded before continuing.
-		Waits for either network to be idle or minimum WAIT_TIME, whichever is longer.
+		Ensures page is fully loaded and stable before continuing.
+		Waits for network idle, DOM stability, and minimum WAIT_TIME.
 		Also checks if the loaded URL is allowed.
+
+		Parameters:
+		-----------
+		timeout_overwrite: float | None
+			Override the minimum wait time
 		"""
 		# Start timing
 		start_time = time.time()

 		# Wait for page load
 		page = await self.get_current_page()
+
+		# Skip network waiting for new tab pages (about:blank, chrome://new-tab-page, etc.)
+		# These pages load instantly and don't need network idle time
+		if is_new_tab_page(page.url):
+			self.logger.debug(f'⚡ Skipping page load wait for new tab page: {page.url}')
+			return
+
 		try:
 			await self._wait_for_stable_network()

@@ -3052,7 +3089,9 @@ class BrowserSession(BaseModel):
 	@observe_debug(ignore_input=True, ignore_output=True)
 	@time_execution_async('--get_state_summary')
 	@require_healthy_browser(usable_page=True, reopen_page=True)
-	async def get_state_summary(self, cache_clickable_elements_hashes: bool) -> BrowserStateSummary:
+	async def get_state_summary(
+		self, cache_clickable_elements_hashes: bool, include_screenshot: bool = True
+	) -> BrowserStateSummary:
 		self.logger.debug('🔄 Starting get_state_summary...')
 		"""Get a summary of the current browser state

@@ -3065,13 +3104,19 @@ class BrowserSession(BaseModel):
 			If True, cache the clickable elements hashes for the current state.
 			This is used to calculate which elements are new to the LLM since the last message,
 			which helps reduce token usage.
+		include_screenshot: bool
+			If True, include screenshot in the state summary. Set to False to improve performance
+			when screenshots are not needed (e.g., in multi_act element validation).
 		"""
-		await self._wait_for_page_and_frames_load()
-		updated_state = await self._get_updated_state()
+
+		updated_state = await self._get_updated_state(include_screenshot=include_screenshot)

 		# Find out which elements are new
 		# Do this only if url has not changed
 		if cache_clickable_elements_hashes:
+			# Lazy import heavy DOM service
+			from browser_use.dom.clickable_element_processor.service import ClickableElementProcessor
+
 			# if we are on the same url as the last state, we can use the cached hashes
 			if self._cached_clickable_element_hashes and self._cached_clickable_element_hashes.url == updated_state.url:
 				# Pointers, feel free to edit in place
@@ -3142,20 +3187,12 @@ class BrowserSession(BaseModel):
 		)

 	@observe_debug(ignore_input=True, ignore_output=True, name='get_updated_state')
-	async def _get_updated_state(self, focus_element: int = -1) -> BrowserStateSummary:
+	async def _get_updated_state(self, focus_element: int = -1, include_screenshot: bool = True) -> BrowserStateSummary:
 		"""Update and return state."""

 		# Check if current page is still valid, if not switch to another available page
 		page = await self.get_current_page()

-		try:
-			# Test if page is still accessible
-			# NOTE: This also happens on invalid urls like www.sadfdsafdssdafd.com
-			await asyncio.wait_for(page.evaluate('1'), timeout=2.5)
-		except Exception as e:
-			self.logger.debug(f'👋 Current page is not accessible: {type(e).__name__}: {e}')
-			raise BrowserError('Page is not accessible')
-
 		try:
 			self.logger.debug('🧹 Removing highlights...')
 			try:
@@ -3172,6 +3209,8 @@ class BrowserSession(BaseModel):
 				self.logger.debug(f'PDF auto-download check failed: {type(e).__name__}: {e}')

 			self.logger.debug('🌳 Starting DOM processing...')
+			from browser_use.dom.service import DomService
+
 			dom_service = DomService(page, logger=self.logger)
 			try:
 				content = await asyncio.wait_for(
@@ -3228,13 +3267,16 @@ class BrowserSession(BaseModel):
 			# 		)
 			# 	)

-			try:
-				self.logger.debug('📸 Capturing screenshot...')
-				# Reasonable timeout for screenshot
-				screenshot_b64 = await self.take_screenshot()
-				# self.logger.debug('✅ Screenshot completed')
-			except Exception as e:
-				self.logger.warning(f'❌ Screenshot failed for {_log_pretty_url(page.url)}: {type(e).__name__} {e}')
+			if include_screenshot:
+				try:
+					self.logger.debug('📸 Capturing screenshot...')
+					# Reasonable timeout for screenshot
+					screenshot_b64 = await self.take_screenshot()
+					# self.logger.debug('✅ Screenshot completed')
+				except Exception as e:
+					self.logger.warning(f'❌ Screenshot failed for {_log_pretty_url(page.url)}: {type(e).__name__} {e}')
+					screenshot_b64 = None
+			else:
 				screenshot_b64 = None

 			# Get comprehensive page information
@@ -3475,6 +3517,7 @@ class BrowserSession(BaseModel):
 				'Browser is unable to load any new about:blank pages (something is very wrong or browser is extremely overloaded)'
 			)

+	@observe_debug(ignore_input=True, name='recover_unresponsive_page')
 	async def _recover_unresponsive_page(self, calling_method: str, timeout_ms: int | None = None) -> None:
 		"""Recover from an unresponsive page by closing and reopening it."""
 		self.logger.warning(f'⚠️ Page JS engine became unresponsive in {calling_method}(), attempting recovery...')
@@ -3828,6 +3871,7 @@ class BrowserSession(BaseModel):

 	@require_healthy_browser(usable_page=True, reopen_page=True)
 	@time_execution_async('--get_locate_element')
+	@observe_debug(ignore_input=True, name='get_locate_element')
 	async def get_locate_element(self, element: DOMElementNode) -> ElementHandle | None:
 		page = await self.get_current_page()
 		current_frame = page
@@ -3881,7 +3925,7 @@ class BrowserSession(BaseModel):
 				if element_handle:
 					is_visible = await self._is_visible(element_handle)
 					if is_visible:
-						await element_handle.scroll_into_view_if_needed()
+						await element_handle.scroll_into_view_if_needed(timeout=1_000)
 					return element_handle
 				return None
 		except Exception as e:
@@ -3897,7 +3941,7 @@ class BrowserSession(BaseModel):
 					if element_handle:
 						is_visible = await self._is_visible(element_handle)
 						if is_visible:
-							await element_handle.scroll_into_view_if_needed()
+							await element_handle.scroll_into_view_if_needed(timeout=1_000)
 						return element_handle
 				except Exception as xpath_e:
 					self.logger.error(
@@ -3924,7 +3968,7 @@ class BrowserSession(BaseModel):
 			if element_handle:
 				is_visible = await self._is_visible(element_handle)
 				if is_visible:
-					await element_handle.scroll_into_view_if_needed()
+					await element_handle.scroll_into_view_if_needed(timeout=1_000)
 				return element_handle
 			return None
 		except Exception as e:
@@ -3945,7 +3989,7 @@ class BrowserSession(BaseModel):
 			if element_handle:
 				is_visible = await self._is_visible(element_handle)
 				if is_visible:
-					await element_handle.scroll_into_view_if_needed()
+					await element_handle.scroll_into_view_if_needed(timeout=1_000)
 				return element_handle
 			return None
 		except Exception as e:
@@ -3989,7 +4033,7 @@ class BrowserSession(BaseModel):

 			is_visible = await self._is_visible(element_handle)
 			if is_visible:
-				await element_handle.scroll_into_view_if_needed()
+				await element_handle.scroll_into_view_if_needed(timeout=1_000)
 			return element_handle
 		except Exception as e:
 			self.logger.error(
@@ -3999,6 +4043,7 @@ class BrowserSession(BaseModel):

 	@require_healthy_browser(usable_page=True, reopen_page=True)
 	@time_execution_async('--input_text_element_node')
+	@observe_debug(ignore_input=True, name='input_text_element_node')
 	async def _input_text_element_node(self, element_node: DOMElementNode, text: str):
 		"""
 		Input text into an element with proper error handling and state management.
@@ -4022,7 +4067,7 @@ class BrowserSession(BaseModel):
 			# let's first try to click and type
 			try:
 				await element_handle.evaluate('el => {el.textContent = ""; el.value = "";}')
-				await element_handle.click()
+				await element_handle.click(timeout=2_000)  # Add 2 second timeout
 				await asyncio.sleep(0.1)  # Increased sleep time
 				page = await self.get_current_page()
 				await page.keyboard.type(text)
@@ -4044,9 +4089,9 @@ class BrowserSession(BaseModel):
 			try:
 				if (await is_contenteditable.json_value() or tag_name == 'input') and not (readonly or disabled):
 					await element_handle.evaluate('el => {el.textContent = ""; el.value = "";}')
-					await element_handle.type(text, delay=5)
+					await element_handle.type(text, delay=5, timeout=5_000)  # Add 5 second timeout
 				else:
-					await element_handle.fill(text)
+					await element_handle.fill(text, timeout=3_000)  # Add 3 second timeout
 			except Exception as e:
 				self.logger.error(f'Error during input text into element: {type(e).__name__}: {e}')
 				raise BrowserError(f'Failed to input text into element: {repr(element_node)}')
@@ -4471,32 +4516,29 @@ class BrowserSession(BaseModel):
 		except Exception as e:
 			self.logger.debug(f'❌ Failed to show 📀 DVD loading animation: {type(e).__name__}: {e}')

-	@observe_debug(ignore_input=True, ignore_output=True, name='get_state_summary_with_fallback')
-	@require_healthy_browser(usable_page=True, reopen_page=True)
-	@time_execution_async('--get_state_summary_with_fallback')
-	async def get_state_summary_with_fallback(self, cache_clickable_elements_hashes: bool = True) -> BrowserStateSummary:
-		"""Get browser state with fallback to minimal state on errors
-
-		This method first tries to get a full state summary. If that fails,
-		it falls back to a minimal state summary to allow basic navigation.
+	@observe_debug(ignore_input=True, ignore_output=True, name='get_browser_state_with_recovery')
+	async def get_browser_state_with_recovery(
+		self, cache_clickable_elements_hashes: bool = True, include_screenshot: bool = True
+	) -> BrowserStateSummary:
+		"""Get browser state with multiple fallback strategies for error recovery

 		Parameters:
 		-----------
 		cache_clickable_elements_hashes: bool
 			If True, cache the clickable elements hashes for the current state.
-
-		Returns:
-		--------
-		BrowserStateSummary: Either full state or minimal fallback state
+		include_screenshot: bool
+			If True, include screenshot in the state summary. Set to False to improve performance
+			when screenshots are not needed (e.g., in multi_act element validation).
 		"""
-		# Try 1: Full state summary (current implementation)
+
+		# Try 1: Full state summary (current implementation) - like main branch
 		try:
-			return await self.get_state_summary(cache_clickable_elements_hashes)
+			await self._wait_for_page_and_frames_load()
+			return await self.get_state_summary(cache_clickable_elements_hashes, include_screenshot=include_screenshot)
 		except Exception as e:
 			self.logger.warning(f'Full state retrieval failed: {type(e).__name__}: {e}')
-			self.logger.warning('🔄 Falling back to minimal state summary')

-		# Try 2: Minimal state summary as fallback
+		self.logger.warning('🔄 Falling back to minimal state summary')
 		return await self.get_minimal_state_summary()

 	async def _is_pdf_viewer(self, page: Page) -> bool:
--- a/browser_use/controller/service.py
+++ b/browser_use/controller/service.py
@@ -130,23 +130,20 @@ class Controller(Generic[Context]):
 			await browser_session.go_back()
 			msg = '🔙  Navigated back'
 			logger.info(msg)
-			return ActionResult(extracted_content=msg, include_in_memory=True, long_term_memory='Navigated back')
+			return ActionResult(extracted_content=msg)

-		# wait for x seconds
-
-		@self.registry.action('Wait for x seconds default 3 (max 10 seconds)')
+		@self.registry.action(
+			'Wait for x seconds default 3 (max 10 seconds). This can be used to wait until the page is fully loaded.'
+		)
 		async def wait(seconds: int = 3):
 			# Cap wait time at maximum 10 seconds
-			actual_seconds = min(max(seconds, 0), 10)
-			if actual_seconds != seconds:
-				msg = f'🕒  Waiting for {actual_seconds} seconds (capped from {seconds} seconds, max 10 seconds)'
-			else:
-				msg = f'🕒  Waiting for {actual_seconds} seconds'
+			# Reduce the wait time by 3 seconds to account for the llm call which takes at least 3 seconds
+			# So if the model decides to wait for 5 seconds, the llm call took at least 3 seconds, so we only need to wait for 2 seconds
+			actual_seconds = min(max(seconds - 3, 0), 10)
+			msg = f'🕒  Waiting for {actual_seconds + 3} seconds'
 			logger.info(msg)
 			await asyncio.sleep(actual_seconds)
-			return ActionResult(
-				extracted_content=msg, include_in_memory=True, long_term_memory=f'Waited for {actual_seconds} seconds'
-			)
+			return ActionResult(extracted_content=msg)

 		# Element Interaction Actions

--- a/browser_use/dom/service.py
+++ b/browser_use/dom/service.py
@@ -15,6 +15,7 @@ from browser_use.dom.views import (
 	SelectorMap,
 	ViewportInfo,
 )
+from browser_use.observability import observe_debug
 from browser_use.utils import is_new_tab_page, time_execution_async

 # @dataclass
@@ -34,6 +35,7 @@ class DomService:
 		self.js_code = resources.files('browser_use.dom.dom_tree').joinpath('index.js').read_text()

 	# region - Clickable elements
+	@observe_debug(ignore_input=True, ignore_output=True, name='get_clickable_elements')
 	@time_execution_async('--get_clickable_elements')
 	async def get_clickable_elements(
 		self,
--- a/browser_use/llm/init.py
+++ b/browser_use/llm/init.py
@@ -4,14 +4,10 @@ We have switched all of our code from langchain to openai.types.chat.chat_comple
 For easier transition we have
 """

-from browser_use.llm.anthropic.chat import ChatAnthropic
-from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock
-from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock
-from browser_use.llm.azure.chat import ChatAzureOpenAI
+from typing import TYPE_CHECKING
+
+# Lightweight imports that are commonly used
 from browser_use.llm.base import BaseChatModel
-from browser_use.llm.deepseek.chat import ChatDeepSeek
-from browser_use.llm.google.chat import ChatGoogle
-from browser_use.llm.groq.chat import ChatGroq
 from browser_use.llm.messages import (
 	AssistantMessage,
 	BaseMessage,
@@ -27,11 +23,52 @@ from browser_use.llm.messages import (
 from browser_use.llm.messages import (
 	ContentPartTextParam as ContentText,
 )
-from browser_use.llm.ollama.chat import ChatOllama
-from browser_use.llm.openai.chat import ChatOpenAI
-from browser_use.llm.openrouter.chat import ChatOpenRouter

-# Make better names for the message
+# Type stubs for lazy imports
+if TYPE_CHECKING:
+	from browser_use.llm.anthropic.chat import ChatAnthropic
+	from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock
+	from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock
+	from browser_use.llm.azure.chat import ChatAzureOpenAI
+	from browser_use.llm.deepseek.chat import ChatDeepSeek
+	from browser_use.llm.google.chat import ChatGoogle
+	from browser_use.llm.groq.chat import ChatGroq
+	from browser_use.llm.ollama.chat import ChatOllama
+	from browser_use.llm.openai.chat import ChatOpenAI
+	from browser_use.llm.openrouter.chat import ChatOpenRouter
+
+# Lazy imports mapping for heavy chat models
+_LAZY_IMPORTS = {
+	'ChatAnthropic': ('browser_use.llm.anthropic.chat', 'ChatAnthropic'),
+	'ChatAnthropicBedrock': ('browser_use.llm.aws.chat_anthropic', 'ChatAnthropicBedrock'),
+	'ChatAWSBedrock': ('browser_use.llm.aws.chat_bedrock', 'ChatAWSBedrock'),
+	'ChatAzureOpenAI': ('browser_use.llm.azure.chat', 'ChatAzureOpenAI'),
+	'ChatDeepSeek': ('browser_use.llm.deepseek.chat', 'ChatDeepSeek'),
+	'ChatGoogle': ('browser_use.llm.google.chat', 'ChatGoogle'),
+	'ChatGroq': ('browser_use.llm.groq.chat', 'ChatGroq'),
+	'ChatOllama': ('browser_use.llm.ollama.chat', 'ChatOllama'),
+	'ChatOpenAI': ('browser_use.llm.openai.chat', 'ChatOpenAI'),
+	'ChatOpenRouter': ('browser_use.llm.openrouter.chat', 'ChatOpenRouter'),
+}
+
+
+def __getattr__(name: str):
+	"""Lazy import mechanism for heavy chat model imports."""
+	if name in _LAZY_IMPORTS:
+		module_path, attr_name = _LAZY_IMPORTS[name]
+		try:
+			from importlib import import_module
+
+			module = import_module(module_path)
+			attr = getattr(module, attr_name)
+			# Cache the imported attribute in the module's globals
+			globals()[name] = attr
+			return attr
+		except ImportError as e:
+			raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e
+
+	raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
+

 __all__ = [
 	# Message types -> for easier transition from langchain
--- a/browser_use/llm/aws/init.py
+++ b/browser_use/llm/aws/init.py
@@ -1,5 +1,34 @@
-from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock
-from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock
+from typing import TYPE_CHECKING
+
+# Type stubs for lazy imports
+if TYPE_CHECKING:
+	from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock
+	from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock
+
+# Lazy imports mapping for AWS chat models
+_LAZY_IMPORTS = {
+	'ChatAnthropicBedrock': ('browser_use.llm.aws.chat_anthropic', 'ChatAnthropicBedrock'),
+	'ChatAWSBedrock': ('browser_use.llm.aws.chat_bedrock', 'ChatAWSBedrock'),
+}
+
+
+def __getattr__(name: str):
+	"""Lazy import mechanism for AWS chat models."""
+	if name in _LAZY_IMPORTS:
+		module_path, attr_name = _LAZY_IMPORTS[name]
+		try:
+			from importlib import import_module
+
+			module = import_module(module_path)
+			attr = getattr(module, attr_name)
+			# Cache the imported attribute in the module's globals
+			globals()[name] = attr
+			return attr
+		except ImportError as e:
+			raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e
+
+	raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
+

 __all__ = [
 	'ChatAWSBedrock',
--- a/browser_use/llm/openai/chat.py
+++ b/browser_use/llm/openai/chat.py
@@ -35,7 +35,8 @@ class ChatOpenAI(BaseChatModel):
 	model: ChatModel | str

 	# Model params
-	temperature: float | None = None
+	temperature: float | None = 0.2
+	frequency_penalty: float | None = 0.05
 	reasoning_effort: ReasoningEffort = 'low'

 	# Client initialization parameters
@@ -50,6 +51,8 @@ class ChatOpenAI(BaseChatModel):
 	default_query: Mapping[str, object] | None = None
 	http_client: httpx.AsyncClient | None = None
 	_strict_response_validation: bool = False
+	max_completion_tokens: int | None = 8000
+	top_p: float | None = None

 	# Static
 	@property
@@ -144,12 +147,24 @@ class ChatOpenAI(BaseChatModel):

 		try:
 			model_params: dict[str, Any] = {}
-			if self.model in ReasoningModels:
-				model_params['reasoning_effort'] = self.reasoning_effort

 			if self.temperature is not None:
 				model_params['temperature'] = self.temperature

+			if self.frequency_penalty is not None:
+				model_params['frequency_penalty'] = self.frequency_penalty
+
+			if self.max_completion_tokens is not None:
+				model_params['max_completion_tokens'] = self.max_completion_tokens
+
+			if self.top_p is not None:
+				model_params['top_p'] = self.top_p
+
+			if self.model in ReasoningModels:
+				model_params['reasoning_effort'] = self.reasoning_effort
+				model_params['temperature'] = 1
+				model_params['frequency_penalty'] = 0
+
 			if output_format is None:
 				# Return string response
 				response = await self.get_client().chat.completions.create(
--- a/browser_use/mcp/server.py
+++ b/browser_use/mcp/server.py
@@ -659,7 +659,7 @@ class BrowserUseServer:
 		if not self.browser_session:
 			return 'Error: No browser session active'

-		state = await self.browser_session.get_state_summary(cache_clickable_elements_hashes=False)
+		state = await self.browser_session.get_browser_state_with_recovery(cache_clickable_elements_hashes=False)

 		result = {
 			'url': state.url,
--- a/browser_use/telemetry/init.py
+++ b/browser_use/telemetry/init.py
@@ -2,18 +2,50 @@
 Telemetry for Browser Use.
 """

-from browser_use.telemetry.service import ProductTelemetry
-from browser_use.telemetry.views import (
-	BaseTelemetryEvent,
-	CLITelemetryEvent,
-	MCPClientTelemetryEvent,
-	MCPServerTelemetryEvent,
-)
+from typing import TYPE_CHECKING
+
+# Type stubs for lazy imports
+if TYPE_CHECKING:
+	from browser_use.telemetry.service import ProductTelemetry
+	from browser_use.telemetry.views import (
+		BaseTelemetryEvent,
+		CLITelemetryEvent,
+		MCPClientTelemetryEvent,
+		MCPServerTelemetryEvent,
+	)
+
+# Lazy imports mapping
+_LAZY_IMPORTS = {
+	'ProductTelemetry': ('browser_use.telemetry.service', 'ProductTelemetry'),
+	'BaseTelemetryEvent': ('browser_use.telemetry.views', 'BaseTelemetryEvent'),
+	'CLITelemetryEvent': ('browser_use.telemetry.views', 'CLITelemetryEvent'),
+	'MCPClientTelemetryEvent': ('browser_use.telemetry.views', 'MCPClientTelemetryEvent'),
+	'MCPServerTelemetryEvent': ('browser_use.telemetry.views', 'MCPServerTelemetryEvent'),
+}
+
+
+def __getattr__(name: str):
+	"""Lazy import mechanism for telemetry components."""
+	if name in _LAZY_IMPORTS:
+		module_path, attr_name = _LAZY_IMPORTS[name]
+		try:
+			from importlib import import_module
+
+			module = import_module(module_path)
+			attr = getattr(module, attr_name)
+			# Cache the imported attribute in the module's globals
+			globals()[name] = attr
+			return attr
+		except ImportError as e:
+			raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e
+
+	raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
+

 __all__ = [
 	'BaseTelemetryEvent',
 	'ProductTelemetry',
+	'CLITelemetryEvent',
 	'MCPClientTelemetryEvent',
 	'MCPServerTelemetryEvent',
-	'CLITelemetryEvent',
 ]
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,10 +31,10 @@ dependencies = [
    "typing-extensions>=4.12.2",
    "uuid7>=0.1.0",
    "authlib>=1.6.0",
-    "google-genai>=1.21.1",
-    "openai>=1.81.0",
-    "anthropic>=0.54.0",
-    "groq>=0.28.0",
+    "google-genai>=1.26.0",
+    "openai>=1.97.0",
+    "anthropic>=0.58.2",
+    "groq>=0.30.0",
    "ollama>=0.5.1",
    "google-api-python-client>=2.174.0",
    "google-auth>=2.40.3",
--- a/tests/ci/test_controller.py
+++ b/tests/ci/test_controller.py
@@ -328,8 +328,30 @@ class TestControllerIntegration:
 		assert result.extracted_content is not None
 		assert 'Waiting for' in result.extracted_content

-		# Verify that at least 1 second has passed
-		assert end_time - start_time >= 0.9  # Allow some timing margin
+		# Verify that less than 0.1 second has passed (because we deducted 3 seconds to account for the llm call)
+		assert end_time - start_time <= 0.1  # Allow some timing margin
+
+		# longer wait
+		# Create wait action for 1 second - fix to use a dictionary
+		wait_action = {'wait': {'seconds': 5}}  # Corrected format
+
+		# Record start time
+		start_time = time.time()
+
+		# Execute wait action
+		result = await controller.act(WaitActionModel(**wait_action), browser_session)
+
+		# Record end time
+		end_time = time.time()
+
+		# Verify the result
+		assert isinstance(result, ActionResult)
+		assert result.extracted_content is not None
+		assert 'Waiting for' in result.extracted_content
+
+		# Verify that we took 2 sec (5s-3s (llm call)= 2s)
+		assert end_time - start_time <= 2.1  # Allow some timing margin
+		assert end_time - start_time >= 1.9  # Allow some timing margin

 	async def test_go_back_action(self, controller, browser_session, base_url):
 		"""Test that go_back action navigates to the previous page."""