mirror of
https://github.com/browser-use/browser-use
synced 2026-04-22 17:45:09 +02:00
- type: ignore on each param line in sessions.py (pyright per-line) - Remove ActionHandler assert in browser.py (breaks pre-existing tests) - Ruff format
280 lines
8.5 KiB
Python
280 lines
8.5 KiB
Python
"""Jupyter-like persistent Python execution for browser-use CLI."""
|
|
|
|
import asyncio
|
|
import io
|
|
import traceback
|
|
from contextlib import redirect_stderr, redirect_stdout
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING, Any, Literal
|
|
|
|
if TYPE_CHECKING:
|
|
from browser_use.browser.session import BrowserSession
|
|
from browser_use.skill_cli.actions import ActionHandler
|
|
|
|
|
|
@dataclass
|
|
class ExecutionResult:
|
|
"""Result of Python code execution."""
|
|
|
|
success: bool
|
|
output: str = ''
|
|
error: str | None = None
|
|
|
|
|
|
@dataclass
|
|
class PythonSession:
|
|
"""Jupyter-like persistent Python execution.
|
|
|
|
Maintains a namespace across multiple code executions, allowing variables
|
|
to persist between commands. Provides a `browser` object for browser control.
|
|
"""
|
|
|
|
namespace: dict[str, Any] = field(default_factory=dict)
|
|
execution_count: int = 0
|
|
history: list[tuple[str, ExecutionResult]] = field(default_factory=list)
|
|
|
|
def __post_init__(self) -> None:
|
|
"""Initialize namespace with useful imports."""
|
|
self.namespace.update(
|
|
{
|
|
'__name__': '__main__',
|
|
'__doc__': None,
|
|
'json': __import__('json'),
|
|
're': __import__('re'),
|
|
'os': __import__('os'),
|
|
'Path': Path,
|
|
'asyncio': asyncio,
|
|
}
|
|
)
|
|
|
|
def execute(
|
|
self,
|
|
code: str,
|
|
browser_session: 'BrowserSession',
|
|
loop: asyncio.AbstractEventLoop | None = None,
|
|
actions: 'ActionHandler | None' = None,
|
|
) -> ExecutionResult:
|
|
"""Execute code in persistent namespace.
|
|
|
|
The `browser` variable is injected into the namespace before each execution,
|
|
providing a convenient wrapper around the BrowserSession.
|
|
|
|
Args:
|
|
code: Python code to execute
|
|
browser_session: The browser session for browser operations
|
|
loop: The event loop for async operations (required for browser access)
|
|
actions: Optional ActionHandler for direct execution (no event bus)
|
|
"""
|
|
# Inject browser wrapper with the event loop for async operations
|
|
if loop is not None and actions is not None:
|
|
self.namespace['browser'] = BrowserWrapper(browser_session, loop, actions)
|
|
self.execution_count += 1
|
|
|
|
stdout = io.StringIO()
|
|
stderr = io.StringIO()
|
|
|
|
try:
|
|
with redirect_stdout(stdout), redirect_stderr(stderr):
|
|
try:
|
|
# First try to compile as expression (for REPL-like behavior)
|
|
compiled = compile(code, '<input>', 'eval')
|
|
result = eval(compiled, self.namespace)
|
|
if result is not None:
|
|
print(repr(result))
|
|
except SyntaxError:
|
|
# Compile as statements
|
|
compiled = compile(code, '<input>', 'exec')
|
|
exec(compiled, self.namespace)
|
|
|
|
output = stdout.getvalue()
|
|
if stderr.getvalue():
|
|
output += stderr.getvalue()
|
|
|
|
result = ExecutionResult(success=True, output=output)
|
|
|
|
except Exception as e:
|
|
output = stdout.getvalue()
|
|
error_msg = traceback.format_exc()
|
|
result = ExecutionResult(success=False, output=output, error=error_msg)
|
|
|
|
self.history.append((code, result))
|
|
return result
|
|
|
|
def reset(self) -> None:
|
|
"""Clear namespace and history."""
|
|
self.namespace.clear()
|
|
self.history.clear()
|
|
self.execution_count = 0
|
|
self.__post_init__()
|
|
|
|
def get_variables(self) -> dict[str, str]:
|
|
"""Get user-defined variables and their types."""
|
|
skip = {'__name__', '__doc__', 'json', 're', 'os', 'Path', 'asyncio', 'browser'}
|
|
return {k: type(v).__name__ for k, v in self.namespace.items() if not k.startswith('_') and k not in skip}
|
|
|
|
|
|
class BrowserWrapper:
|
|
"""Convenient browser access for Python code.
|
|
|
|
Provides synchronous methods that wrap async BrowserSession operations.
|
|
Runs coroutines on the server's event loop using run_coroutine_threadsafe.
|
|
"""
|
|
|
|
def __init__(self, session: 'BrowserSession', loop: asyncio.AbstractEventLoop, actions: 'ActionHandler') -> None:
|
|
self._session = session
|
|
self._loop = loop
|
|
self._actions = actions
|
|
|
|
def _run(self, coro: Any) -> Any:
|
|
"""Run coroutine on the server's event loop."""
|
|
future = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
|
return future.result(timeout=60)
|
|
|
|
@property
|
|
def url(self) -> str:
|
|
"""Get current page URL."""
|
|
return self._run(self._get_url())
|
|
|
|
async def _get_url(self) -> str:
|
|
state = await self._session.get_browser_state_summary(include_screenshot=False)
|
|
return state.url if state else ''
|
|
|
|
@property
|
|
def title(self) -> str:
|
|
"""Get current page title."""
|
|
return self._run(self._get_title())
|
|
|
|
async def _get_title(self) -> str:
|
|
state = await self._session.get_browser_state_summary(include_screenshot=False)
|
|
return state.title if state else ''
|
|
|
|
def goto(self, url: str) -> None:
|
|
"""Navigate to URL."""
|
|
self._run(self._goto_async(url))
|
|
|
|
async def _goto_async(self, url: str) -> None:
|
|
await self._actions.navigate(url)
|
|
|
|
def click(self, index: int) -> None:
|
|
"""Click element by index."""
|
|
self._run(self._click_async(index))
|
|
|
|
async def _click_async(self, index: int) -> None:
|
|
node = await self._session.get_element_by_index(index)
|
|
if node is None:
|
|
raise ValueError(f'Element index {index} not found')
|
|
await self._actions.click_element(node)
|
|
|
|
def type(self, text: str) -> None:
|
|
"""Type text into focused element."""
|
|
self._run(self._type_async(text))
|
|
|
|
async def _type_async(self, text: str) -> None:
|
|
cdp_session = await self._session.get_or_create_cdp_session(target_id=None, focus=False)
|
|
if not cdp_session:
|
|
raise RuntimeError('No active browser session')
|
|
await cdp_session.cdp_client.send.Input.insertText(
|
|
params={'text': text},
|
|
session_id=cdp_session.session_id,
|
|
)
|
|
|
|
def input(self, index: int, text: str) -> None:
|
|
"""Click element and type text."""
|
|
self._run(self._input_async(index, text))
|
|
|
|
async def _input_async(self, index: int, text: str) -> None:
|
|
node = await self._session.get_element_by_index(index)
|
|
if node is None:
|
|
raise ValueError(f'Element index {index} not found')
|
|
await self._actions.click_element(node)
|
|
await self._actions.type_text(node, text)
|
|
|
|
def upload(self, index: int, path: str) -> None:
|
|
"""Upload a file to a file input element."""
|
|
self._run(self._upload_async(index, path))
|
|
|
|
async def _upload_async(self, index: int, path: str) -> None:
|
|
from pathlib import Path as P
|
|
|
|
file_path = str(P(path).expanduser().resolve())
|
|
p = P(file_path)
|
|
if not p.exists():
|
|
raise FileNotFoundError(f'File not found: {file_path}')
|
|
if not p.is_file():
|
|
raise ValueError(f'Not a file: {file_path}')
|
|
if p.stat().st_size == 0:
|
|
raise ValueError(f'File is empty (0 bytes): {file_path}')
|
|
|
|
node = await self._session.get_element_by_index(index)
|
|
if node is None:
|
|
raise ValueError(f'Element index {index} not found')
|
|
|
|
file_input_node = self._session.find_file_input_near_element(node)
|
|
if file_input_node is None:
|
|
raise ValueError(f'Element {index} is not a file input and no file input found nearby')
|
|
|
|
await self._actions.upload_file(file_input_node, file_path)
|
|
|
|
def scroll(self, direction: Literal['up', 'down', 'left', 'right'] = 'down', amount: int = 500) -> None:
|
|
"""Scroll the page."""
|
|
self._run(self._scroll_async(direction, amount))
|
|
|
|
async def _scroll_async(self, direction: Literal['up', 'down', 'left', 'right'], amount: int) -> None:
|
|
await self._actions.scroll(direction, amount)
|
|
|
|
def screenshot(self, path: str | None = None) -> bytes:
|
|
"""Take screenshot, optionally save to file."""
|
|
data = self._run(self._session.take_screenshot())
|
|
if path:
|
|
Path(path).write_bytes(data)
|
|
return data
|
|
|
|
@property
|
|
def html(self) -> str:
|
|
"""Get page HTML."""
|
|
return self._run(self._get_html())
|
|
|
|
async def _get_html(self) -> str:
|
|
cdp_session = await self._session.get_or_create_cdp_session(target_id=None, focus=False)
|
|
if not cdp_session:
|
|
return ''
|
|
# Get the document root
|
|
doc = await cdp_session.cdp_client.send.DOM.getDocument(
|
|
params={},
|
|
session_id=cdp_session.session_id,
|
|
)
|
|
if not doc or 'root' not in doc:
|
|
return ''
|
|
# Get outer HTML of the root node
|
|
result = await cdp_session.cdp_client.send.DOM.getOuterHTML(
|
|
params={'nodeId': doc['root']['nodeId']},
|
|
session_id=cdp_session.session_id,
|
|
)
|
|
return result.get('outerHTML', '') if result else ''
|
|
|
|
def keys(self, keys: str) -> None:
|
|
"""Send keyboard keys."""
|
|
self._run(self._keys_async(keys))
|
|
|
|
async def _keys_async(self, keys: str) -> None:
|
|
await self._actions.send_keys(keys)
|
|
|
|
def back(self) -> None:
|
|
"""Go back in history."""
|
|
self._run(self._back_async())
|
|
|
|
async def _back_async(self) -> None:
|
|
await self._actions.go_back()
|
|
|
|
def wait(self, seconds: float) -> None:
|
|
"""Wait for specified seconds."""
|
|
import time
|
|
|
|
time.sleep(seconds)
|
|
|
|
def extract(self, query: str) -> Any:
|
|
"""Extract data using LLM (requires API key)."""
|
|
# This would need LLM integration
|
|
raise NotImplementedError('extract() requires LLM integration - use agent.run() instead')
|