mirror of
https://github.com/browser-use/browser-use
synced 2026-05-06 17:52:15 +02:00
476 lines
13 KiB
Python
476 lines
13 KiB
Python
"""
|
|
Playwright browser on steroids.
|
|
"""
|
|
|
|
import asyncio
|
|
import base64
|
|
import logging
|
|
import time
|
|
from dataclasses import dataclass
|
|
|
|
from playwright.async_api import Browser as PlaywrightBrowser
|
|
from playwright.async_api import BrowserContext, ElementHandle, Page, Playwright, async_playwright
|
|
|
|
from browser_use.browser.views import BrowserError, BrowserState, TabInfo
|
|
from browser_use.dom.service import DomService
|
|
from browser_use.dom.views import SelectorMap
|
|
from browser_use.utils import time_execution_sync
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class BrowserSession:
|
|
playwright: Playwright
|
|
browser: PlaywrightBrowser
|
|
context: BrowserContext
|
|
current_page: Page
|
|
cached_state: BrowserState
|
|
# current_page_id: str
|
|
# opened_tabs: dict[str, TabInfo] = field(default_factory=dict)
|
|
|
|
|
|
class Browser:
|
|
MINIMUM_WAIT_TIME = 0.5
|
|
MAXIMUM_WAIT_TIME = 5
|
|
|
|
def __init__(self, headless: bool = False, keep_open: bool = False):
|
|
self.headless = headless
|
|
self.keep_open = keep_open
|
|
|
|
# Initialize these as None - they'll be set up when needed
|
|
self.session: BrowserSession | None = None
|
|
|
|
async def _initialize_session(self):
|
|
"""Initialize the browser session"""
|
|
playwright = await async_playwright().start()
|
|
browser = await self._setup_browser(playwright)
|
|
context = await self._create_context(browser)
|
|
page = await context.new_page()
|
|
|
|
# Instead of calling _update_state(), create an empty initial state
|
|
initial_state = BrowserState(
|
|
items=[],
|
|
selector_map={},
|
|
url=page.url,
|
|
title=await page.title(),
|
|
screenshot=None,
|
|
tabs=[],
|
|
)
|
|
|
|
self.session = BrowserSession(
|
|
playwright=playwright,
|
|
browser=browser,
|
|
context=context,
|
|
current_page=page,
|
|
cached_state=initial_state,
|
|
)
|
|
|
|
return self.session
|
|
|
|
async def get_session(self) -> BrowserSession:
|
|
"""Lazy initialization of the browser and related components"""
|
|
if self.session is None:
|
|
return await self._initialize_session()
|
|
return self.session
|
|
|
|
async def get_current_page(self) -> Page:
|
|
"""Get the current page"""
|
|
session = await self.get_session()
|
|
return session.current_page
|
|
|
|
async def _setup_browser(self, playwright: Playwright) -> PlaywrightBrowser:
|
|
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
|
|
try:
|
|
browser = await playwright.chromium.launch(
|
|
headless=self.headless,
|
|
ignore_default_args=['--enable-automation'], # Helps with anti-detection
|
|
args=[
|
|
'--no-sandbox',
|
|
'--disable-blink-features=AutomationControlled',
|
|
'--disable-extensions',
|
|
'--disable-infobars',
|
|
'--disable-background-timer-throttling',
|
|
'--disable-popup-blocking',
|
|
'--disable-backgrounding-occluded-windows',
|
|
'--disable-renderer-backgrounding',
|
|
'--disable-window-activation',
|
|
'--disable-focus-on-load', # Prevents focus on navigation
|
|
'--no-first-run',
|
|
'--no-default-browser-check',
|
|
'--no-startup-window', # Prevents initial focus
|
|
'--window-position=0,0',
|
|
],
|
|
)
|
|
|
|
return browser
|
|
except Exception as e:
|
|
logger.error(f'Failed to initialize Playwright browser: {str(e)}')
|
|
raise
|
|
|
|
async def _create_context(self, browser: PlaywrightBrowser):
|
|
"""Creates a new browser context with anti-detection measures."""
|
|
context = await browser.new_context(
|
|
viewport={'width': 1280, 'height': 1024},
|
|
user_agent=(
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
|
|
'(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
|
|
),
|
|
java_script_enabled=True,
|
|
)
|
|
|
|
# Expose anti-detection scripts
|
|
await context.add_init_script(
|
|
"""
|
|
// Webdriver property
|
|
Object.defineProperty(navigator, 'webdriver', {
|
|
get: () => undefined
|
|
});
|
|
|
|
// Languages
|
|
Object.defineProperty(navigator, 'languages', {
|
|
get: () => ['en-US', 'en']
|
|
});
|
|
|
|
// Plugins
|
|
Object.defineProperty(navigator, 'plugins', {
|
|
get: () => [1, 2, 3, 4, 5]
|
|
});
|
|
|
|
// Chrome runtime
|
|
window.chrome = { runtime: {} };
|
|
|
|
// Permissions
|
|
const originalQuery = window.navigator.permissions.query;
|
|
window.navigator.permissions.query = (parameters) => (
|
|
parameters.name === 'notifications' ?
|
|
Promise.resolve({ state: Notification.permission }) :
|
|
originalQuery(parameters)
|
|
);
|
|
"""
|
|
)
|
|
|
|
return context
|
|
|
|
async def wait_for_page_load(self, timeout_overwrite: float | None = None):
|
|
"""
|
|
Ensures page is fully loaded before continuing.
|
|
Waits for either document.readyState to be complete or minimum WAIT_TIME, whichever is longer.
|
|
"""
|
|
page = await self.get_current_page()
|
|
|
|
# Start timing
|
|
start_time = time.time()
|
|
|
|
# Wait for page load
|
|
try:
|
|
await page.wait_for_load_state('load', timeout=5000)
|
|
except Exception:
|
|
pass
|
|
|
|
# Calculate remaining time to meet minimum WAIT_TIME
|
|
elapsed = time.time() - start_time
|
|
remaining = max((timeout_overwrite or self.MINIMUM_WAIT_TIME) - elapsed, 0)
|
|
|
|
logger.debug(
|
|
f'--Page loaded in {elapsed:.2f} seconds, waiting for additional {remaining:.2f} seconds'
|
|
)
|
|
|
|
# Sleep remaining time if needed
|
|
if remaining > 0:
|
|
await asyncio.sleep(remaining)
|
|
|
|
async def close(self, force: bool = False):
|
|
"""Close the browser instance"""
|
|
if force and not self.keep_open:
|
|
session = await self.get_session()
|
|
await session.browser.close()
|
|
await session.playwright.stop()
|
|
else:
|
|
# Note: input() is blocking - consider an async alternative if needed
|
|
input('Press Enter to close Browser...')
|
|
self.keep_open = False
|
|
await self.close(force=True)
|
|
|
|
def __del__(self):
|
|
"""Async cleanup when object is destroyed"""
|
|
if self.session is not None:
|
|
asyncio.run(self.close(force=True))
|
|
|
|
async def navigate_to(self, url: str):
|
|
"""Navigate to a URL"""
|
|
page = await self.get_current_page()
|
|
await page.goto(url)
|
|
await self.wait_for_page_load()
|
|
|
|
async def refresh_page(self):
|
|
"""Refresh the current page"""
|
|
page = await self.get_current_page()
|
|
await page.reload()
|
|
await self.wait_for_page_load()
|
|
|
|
async def go_back(self):
|
|
"""Navigate back in history"""
|
|
page = await self.get_current_page()
|
|
await page.go_back()
|
|
await self.wait_for_page_load()
|
|
|
|
async def go_forward(self):
|
|
"""Navigate forward in history"""
|
|
page = await self.get_current_page()
|
|
await page.go_forward()
|
|
await self.wait_for_page_load()
|
|
|
|
async def close_current_tab(self):
|
|
"""Close the current tab"""
|
|
session = await self.get_session()
|
|
page = session.current_page
|
|
await page.close()
|
|
|
|
# Switch to the first available tab if any exist
|
|
if session.context.pages:
|
|
await self.switch_to_tab(0)
|
|
|
|
# otherwise the browser will be closed
|
|
|
|
async def get_page_html(self) -> str:
|
|
"""Get the current page HTML content"""
|
|
page = await self.get_current_page()
|
|
return await page.content()
|
|
|
|
async def execute_javascript(self, script: str):
|
|
"""Execute JavaScript code on the page"""
|
|
page = await self.get_current_page()
|
|
return await page.evaluate(script)
|
|
|
|
@time_execution_sync('--get_state') # This decorator might need to be updated to handle async
|
|
async def get_state(self, use_vision: bool = False) -> BrowserState:
|
|
"""Get the current state of the browser"""
|
|
session = await self.get_session()
|
|
session.cached_state = await self._update_state(use_vision=use_vision)
|
|
return session.cached_state
|
|
|
|
async def _update_state(self, use_vision: bool = False) -> BrowserState:
|
|
"""Update and return state."""
|
|
page = await self.get_current_page()
|
|
dom_service = DomService(page)
|
|
content = await dom_service.get_clickable_elements() # Assuming this is async
|
|
|
|
screenshot_b64 = None
|
|
if use_vision:
|
|
screenshot_b64 = await self.take_screenshot(selector_map=content.selector_map)
|
|
|
|
self.current_state = BrowserState(
|
|
items=content.items,
|
|
selector_map=content.selector_map,
|
|
url=page.url,
|
|
title=await page.title(),
|
|
tabs=await self.get_tabs_info(),
|
|
screenshot=screenshot_b64,
|
|
)
|
|
|
|
return self.current_state
|
|
|
|
# region - Browser Actions
|
|
|
|
async def take_screenshot(
|
|
self, selector_map: SelectorMap | None, full_page: bool = False
|
|
) -> str:
|
|
"""
|
|
Returns a base64 encoded screenshot of the current page.
|
|
"""
|
|
page = await self.get_current_page()
|
|
|
|
if selector_map:
|
|
await self.highlight_selector_map_elements(selector_map)
|
|
|
|
screenshot = await page.screenshot(
|
|
full_page=full_page,
|
|
animations='disabled',
|
|
)
|
|
|
|
screenshot_b64 = base64.b64encode(screenshot).decode('utf-8')
|
|
|
|
if selector_map:
|
|
await self.remove_highlights()
|
|
|
|
return screenshot_b64
|
|
|
|
async def highlight_selector_map_elements(self, selector_map: SelectorMap):
|
|
page = await self.get_current_page()
|
|
await self.remove_highlights()
|
|
|
|
script = """
|
|
const highlights = {
|
|
"""
|
|
|
|
# Build the highlights object with all selectors and indices
|
|
for index, selector in selector_map.items():
|
|
# Adjusting the JavaScript code to accept variables
|
|
script += f'"{index}": "{selector}",\n'
|
|
|
|
script += """
|
|
};
|
|
|
|
for (const [index, selector] of Object.entries(highlights)) {
|
|
const el = document.evaluate(selector, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
|
if (!el) continue; // Skip if element not found
|
|
el.style.outline = "2px solid red";
|
|
el.setAttribute('browser-user-highlight-id', 'playwright-highlight');
|
|
|
|
const label = document.createElement("div");
|
|
label.className = 'playwright-highlight-label';
|
|
label.style.position = "fixed";
|
|
label.style.background = "red";
|
|
label.style.color = "white";
|
|
label.style.padding = "2px 6px";
|
|
label.style.borderRadius = "10px";
|
|
label.style.fontSize = "12px";
|
|
label.style.zIndex = "9999999";
|
|
label.textContent = index;
|
|
const rect = el.getBoundingClientRect();
|
|
label.style.top = (rect.top - 20) + "px";
|
|
label.style.left = rect.left + "px";
|
|
document.body.appendChild(label);
|
|
}
|
|
"""
|
|
|
|
await page.evaluate(script)
|
|
|
|
async def remove_highlights(self):
|
|
"""
|
|
Removes all highlight outlines and labels created by highlight_selector_map_elements
|
|
|
|
"""
|
|
page = await self.get_current_page()
|
|
await page.evaluate(
|
|
"""
|
|
// Remove all highlight outlines
|
|
const highlightedElements = document.querySelectorAll('[browser-user-highlight-id="playwright-highlight"]');
|
|
highlightedElements.forEach(el => {
|
|
el.style.outline = '';
|
|
el.removeAttribute('browser-user-highlight-id');
|
|
});
|
|
|
|
|
|
// Remove all labels
|
|
const labels = document.querySelectorAll('.playwright-highlight-label');
|
|
labels.forEach(label => label.remove());
|
|
"""
|
|
)
|
|
|
|
# endregion
|
|
|
|
# region - User Actions
|
|
|
|
async def _input_text_by_xpath(self, xpath: str, text: str):
|
|
page = await self.get_current_page()
|
|
|
|
try:
|
|
element = await page.wait_for_selector(f'xpath={xpath}', timeout=5000, state='visible')
|
|
|
|
if element is None:
|
|
raise Exception(f'Element with xpath: {xpath} not found')
|
|
|
|
await element.scroll_into_view_if_needed(timeout=2500)
|
|
await element.fill('')
|
|
await element.type(text)
|
|
await self.wait_for_page_load()
|
|
|
|
except Exception as e:
|
|
raise Exception(
|
|
f'Failed to input text into element with xpath: {xpath}. Error: {str(e)}'
|
|
)
|
|
|
|
async def _click_element_by_xpath(self, xpath: str):
|
|
"""
|
|
Optimized method to click an element using xpath.
|
|
"""
|
|
page = await self.get_current_page()
|
|
|
|
try:
|
|
element = await page.wait_for_selector(f'xpath={xpath}', timeout=5000, state='visible')
|
|
|
|
if element is None:
|
|
raise Exception(f'Element with xpath: {xpath} not found')
|
|
|
|
# await element.scroll_into_view_if_needed()
|
|
|
|
try:
|
|
await element.click(timeout=2500)
|
|
await self.wait_for_page_load()
|
|
return
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
await page.evaluate('(el) => el.click()', element)
|
|
await self.wait_for_page_load()
|
|
return
|
|
except Exception as e:
|
|
raise Exception(f'Failed to click element: {str(e)}')
|
|
|
|
except Exception as e:
|
|
raise Exception(f'Failed to click element with xpath: {xpath}. Error: {str(e)}')
|
|
|
|
async def get_tabs_info(self) -> list[TabInfo]:
|
|
"""Get information about all tabs"""
|
|
session = await self.get_session()
|
|
|
|
tabs_info = []
|
|
for page_id, page in enumerate(session.context.pages):
|
|
tab_info = TabInfo(page_id=page_id, url=page.url, title=await page.title())
|
|
tabs_info.append(tab_info)
|
|
|
|
return tabs_info
|
|
|
|
async def switch_to_tab(self, page_id: int) -> None:
|
|
"""Switch to a specific tab by its page_id
|
|
|
|
@You can also use negative indices to switch to tabs from the end (Pure pythonic way)
|
|
"""
|
|
session = await self.get_session()
|
|
pages = session.context.pages
|
|
|
|
if page_id >= len(pages):
|
|
raise BrowserError(f'No tab found with page_id: {page_id}')
|
|
|
|
page = pages[page_id]
|
|
session.current_page = page
|
|
|
|
await page.bring_to_front()
|
|
await self.wait_for_page_load()
|
|
|
|
async def create_new_tab(self, url: str | None = None) -> None:
|
|
"""Create a new tab and optionally navigate to a URL"""
|
|
session = await self.get_session()
|
|
new_page = await session.context.new_page()
|
|
session.current_page = new_page
|
|
|
|
await self.wait_for_page_load()
|
|
|
|
page = await self.get_current_page()
|
|
|
|
if url:
|
|
await page.goto(url)
|
|
await self.wait_for_page_load(timeout_overwrite=1)
|
|
|
|
# endregion
|
|
|
|
# region - Helper methods for easier access to the DOM
|
|
async def get_selector_map(self) -> SelectorMap:
|
|
session = await self.get_session()
|
|
return session.cached_state.selector_map
|
|
|
|
async def get_xpath(self, index: int) -> str:
|
|
selector_map = await self.get_selector_map()
|
|
return selector_map[index]
|
|
|
|
async def get_element_by_index(self, index: int) -> ElementHandle | None:
|
|
page = await self.get_current_page()
|
|
return await page.wait_for_selector(
|
|
await self.get_xpath(index), timeout=2500, state='visible'
|
|
)
|
|
|
|
# endregion
|