mirror of
https://github.com/browser-use/browser-use
synced 2026-05-06 17:52:15 +02:00
245 lines
8.5 KiB
Python
245 lines
8.5 KiB
Python
import asyncio
|
|
import logging
|
|
|
|
from main_content_extractor import MainContentExtractor
|
|
|
|
from browser_use.agent.views import ActionModel, ActionResult
|
|
from browser_use.browser.service import Browser, BrowserConfig
|
|
from browser_use.controller.registry.service import Registry
|
|
from browser_use.controller.views import (
|
|
ClickElementAction,
|
|
DoneAction,
|
|
ExtractPageContentAction,
|
|
GoToUrlAction,
|
|
InputTextAction,
|
|
OpenTabAction,
|
|
ScrollAction,
|
|
SearchGoogleAction,
|
|
SwitchTabAction,
|
|
)
|
|
from browser_use.utils import time_execution_async, time_execution_sync
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class Controller:
|
|
def __init__(
|
|
self,
|
|
browser_config: BrowserConfig = BrowserConfig(),
|
|
):
|
|
self.browser = Browser(config=browser_config)
|
|
self.wait_between_actions = browser_config.wait_between_actions
|
|
self.registry = Registry()
|
|
self._register_default_actions()
|
|
|
|
def _register_default_actions(self):
|
|
"""Register all default browser actions"""
|
|
|
|
# Basic Navigation Actions
|
|
@self.registry.action(
|
|
'Search Google in the current tab',
|
|
param_model=SearchGoogleAction,
|
|
requires_browser=True,
|
|
)
|
|
async def search_google(params: SearchGoogleAction, browser: Browser):
|
|
page = await browser.get_current_page()
|
|
await page.goto(f'https://www.google.com/search?q={params.query}')
|
|
await page.wait_for_load_state()
|
|
|
|
@self.registry.action(
|
|
'Navigate to URL in the current tab', param_model=GoToUrlAction, requires_browser=True
|
|
)
|
|
async def go_to_url(params: GoToUrlAction, browser: Browser):
|
|
page = await browser.get_current_page()
|
|
await page.goto(params.url)
|
|
await page.wait_for_load_state()
|
|
|
|
@self.registry.action('Go back', requires_browser=True)
|
|
async def go_back(browser: Browser):
|
|
page = await browser.get_current_page()
|
|
await page.go_back()
|
|
await page.wait_for_load_state()
|
|
|
|
# Element Interaction Actions
|
|
@self.registry.action(
|
|
'Click element', param_model=ClickElementAction, requires_browser=True
|
|
)
|
|
async def click_element(params: ClickElementAction, browser: Browser):
|
|
session = await browser.get_session()
|
|
state = session.cached_state
|
|
|
|
if params.index not in state.selector_map:
|
|
raise Exception(
|
|
f'Element with index {params.index} does not exist - retry or use alternative actions'
|
|
)
|
|
|
|
element_node = state.selector_map[params.index]
|
|
initial_pages = len(session.context.pages)
|
|
|
|
msg = None
|
|
|
|
try:
|
|
await browser._click_element_node(element_node)
|
|
msg = f'🖱️ Clicked element {params.index}: {element_node.xpath}'
|
|
if len(session.context.pages) > initial_pages:
|
|
await browser.switch_to_tab(-1)
|
|
return ActionResult(extracted_content=msg, include_in_memory=True)
|
|
except Exception as e:
|
|
logger.warning(
|
|
f'Element no longer available with index {params.index} - most likely the page changed'
|
|
)
|
|
return ActionResult(error=str(e))
|
|
|
|
@self.registry.action('Input text', param_model=InputTextAction, requires_browser=True)
|
|
async def input_text(params: InputTextAction, browser: Browser):
|
|
session = await browser.get_session()
|
|
state = session.cached_state
|
|
|
|
if params.index not in state.selector_map:
|
|
raise Exception(
|
|
f'Element index {params.index} does not exist - retry or use alternative actions'
|
|
)
|
|
|
|
element_node = state.selector_map[params.index]
|
|
await browser._input_text_element_node(element_node, params.text)
|
|
msg = f'⌨️ Input "{params.text}" into {params.index}: {element_node.xpath}'
|
|
return ActionResult(extracted_content=msg, include_in_memory=True)
|
|
|
|
# Tab Management Actions
|
|
@self.registry.action('Switch tab', param_model=SwitchTabAction, requires_browser=True)
|
|
async def switch_tab(params: SwitchTabAction, browser: Browser):
|
|
await browser.switch_to_tab(params.page_id)
|
|
# Wait for tab to be ready
|
|
page = await browser.get_current_page()
|
|
await page.wait_for_load_state()
|
|
|
|
@self.registry.action(
|
|
'Open url in new tab', param_model=OpenTabAction, requires_browser=True
|
|
)
|
|
async def open_tab(params: OpenTabAction, browser: Browser):
|
|
await browser.create_new_tab(params.url)
|
|
|
|
# Content Actions
|
|
@self.registry.action(
|
|
'Extract page content to get the text or markdown ',
|
|
param_model=ExtractPageContentAction,
|
|
requires_browser=True,
|
|
)
|
|
async def extract_content(params: ExtractPageContentAction, browser: Browser):
|
|
page = await browser.get_current_page()
|
|
|
|
content = MainContentExtractor.extract( # type: ignore
|
|
html=await page.content(),
|
|
output_format=params.value,
|
|
)
|
|
return ActionResult(extracted_content=content)
|
|
|
|
@self.registry.action('Complete task', param_model=DoneAction, requires_browser=True)
|
|
async def done(params: DoneAction, browser: Browser):
|
|
session = await browser.get_session()
|
|
state = session.cached_state
|
|
return ActionResult(is_done=True, extracted_content=params.text)
|
|
|
|
@self.registry.action(
|
|
'Scroll down the page by pixel amount - if no amount is specified, scroll down one page',
|
|
param_model=ScrollAction,
|
|
requires_browser=True,
|
|
)
|
|
async def scroll_down(params: ScrollAction, browser: Browser):
|
|
page = await browser.get_current_page()
|
|
if params.amount is not None:
|
|
await page.evaluate(f'window.scrollBy(0, {params.amount});')
|
|
else:
|
|
await page.keyboard.press('PageDown')
|
|
|
|
amount = params.amount if params.amount is not None else 'one page'
|
|
return ActionResult(
|
|
extracted_content=f'Scrolled down the page by {amount} pixels',
|
|
include_in_memory=True,
|
|
)
|
|
|
|
# scroll up
|
|
@self.registry.action(
|
|
'Scroll up the page by pixel amount - if no amount is specified, scroll up one page',
|
|
param_model=ScrollAction,
|
|
requires_browser=True,
|
|
)
|
|
async def scroll_up(params: ScrollAction, browser: Browser):
|
|
page = await browser.get_current_page()
|
|
if params.amount is not None:
|
|
await page.evaluate(f'window.scrollBy(0, -{params.amount});')
|
|
else:
|
|
await page.keyboard.press('PageUp')
|
|
|
|
amount = params.amount if params.amount is not None else 'one page'
|
|
return ActionResult(
|
|
extracted_content=f'Scrolled up the page by {amount} pixels',
|
|
include_in_memory=True,
|
|
)
|
|
|
|
def action(self, description: str, **kwargs):
|
|
"""Decorator for registering custom actions
|
|
|
|
@param description: Describe the LLM what the function does (better description == better function calling)
|
|
"""
|
|
return self.registry.action(description, **kwargs)
|
|
|
|
@time_execution_async('--multi-act')
|
|
async def multi_act(self, actions: list[ActionModel]) -> list[ActionResult]:
|
|
"""Execute multiple actions"""
|
|
results = []
|
|
changed = False
|
|
await self.browser.remove_highlights()
|
|
session = await self.browser.get_session()
|
|
cached_selector_map = session.cached_state.selector_map
|
|
cached_att_hashes = set(e.hash.attributes_hash for e in cached_selector_map.values())
|
|
cached_path_hashes = set(e.hash.branch_path_hash for e in cached_selector_map.values())
|
|
|
|
for i, action in enumerate(actions):
|
|
if changed and action.get_index() is not None:
|
|
# next action requires index but there are new elements on the page
|
|
break
|
|
|
|
results.append(await self.act(action))
|
|
logger.debug(f'Executed action {i + 1} / {len(actions)}')
|
|
if results[-1].is_done or results[-1].error or i == len(actions) - 1:
|
|
break
|
|
|
|
await asyncio.sleep(self.wait_between_actions)
|
|
# hash all elements. if it is a subset of cached_state its fine - else break (new elements on page)
|
|
|
|
new_state = await self.browser.get_state()
|
|
new_att_hashes = set(e.hash.attributes_hash for e in new_state.selector_map.values())
|
|
|
|
if not new_att_hashes.issubset(cached_att_hashes):
|
|
logger.debug(f'Attributes changed - stopping after {i + 1} actions')
|
|
changed = True
|
|
new_path_hashes = set(e.hash.branch_path_hash for e in new_state.selector_map.values())
|
|
if not new_path_hashes.issubset(cached_path_hashes):
|
|
logger.debug(f'Branch path changed - stopping after {i + 1} actions')
|
|
changed = True
|
|
|
|
return results
|
|
|
|
@time_execution_sync('--act')
|
|
async def act(self, action: ActionModel) -> ActionResult:
|
|
"""Execute an action"""
|
|
try:
|
|
for action_name, params in action.model_dump(exclude_unset=True).items():
|
|
if params is not None:
|
|
# remove highlights
|
|
result = await self.registry.execute_action(
|
|
action_name, params, browser=self.browser
|
|
)
|
|
if isinstance(result, str):
|
|
return ActionResult(extracted_content=result)
|
|
elif isinstance(result, ActionResult):
|
|
return result
|
|
elif result is None:
|
|
return ActionResult()
|
|
else:
|
|
raise ValueError(f'Invalid action result type: {type(result)} of {result}')
|
|
return ActionResult()
|
|
except Exception as e:
|
|
raise e
|