mirror of
https://github.com/browser-use/browser-use
synced 2026-05-13 17:56:35 +02:00
Merge branch 'speed-ups3' of https://github.com/browser-use/browser-use into speed-ups3
This commit is contained in:
@@ -188,7 +188,7 @@ class CreateAgentTaskEvent(BaseEvent):
|
||||
user_id: str = Field(max_length=255) # Added for authorization checks
|
||||
device_id: str | None = Field(None, max_length=255) # Device ID for auth lookup
|
||||
agent_session_id: str
|
||||
llm_model: str = Field(max_length=100) # LLMModel enum value as string
|
||||
llm_model: str = Field(max_length=200) # LLMModel enum value as string
|
||||
stopped: bool = False
|
||||
paused: bool = False
|
||||
task: str = Field(max_length=MAX_TASK_LENGTH)
|
||||
|
||||
@@ -28,7 +28,7 @@ class SystemPrompt:
|
||||
self.use_thinking = use_thinking
|
||||
self.flash_mode = flash_mode
|
||||
prompt = ''
|
||||
if override_system_message:
|
||||
if override_system_message is not None:
|
||||
prompt = override_system_message
|
||||
else:
|
||||
self._load_prompt_template()
|
||||
@@ -265,7 +265,8 @@ class AgentMessagePrompt:
|
||||
# Check if current page is a PDF viewer and add appropriate message
|
||||
pdf_message = ''
|
||||
if self.browser_state.is_pdf_viewer:
|
||||
pdf_message = 'PDF viewer cannot be rendered. In this page, DO NOT use the extract_structured_data action as PDF content cannot be rendered. Use the read_file action on the downloaded PDF in available_file_paths to read the full content.\n\n'
|
||||
pdf_message = 'PDF viewer cannot be rendered. In this page, DO NOT use the extract_structured_data action as PDF content cannot be rendered. '
|
||||
pdf_message += 'Use the read_file action on the downloaded PDF in available_file_paths to read the full text content or scroll in the page to see images/figures if needed.\n\n'
|
||||
|
||||
# Add recent events if available and requested
|
||||
recent_events_text = ''
|
||||
|
||||
@@ -624,7 +624,10 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
self._message_manager.add_new_task(new_task)
|
||||
# Mark as follow-up task and recreate eventbus (gets shut down after each run)
|
||||
self.state.follow_up_task = True
|
||||
self.eventbus = EventBus(name=f'Agent_{str(self.id)[-self.state.n_steps :]}')
|
||||
agent_id_suffix = str(self.id)[-4:].replace('-', '_')
|
||||
if agent_id_suffix and agent_id_suffix[0].isdigit():
|
||||
agent_id_suffix = 'a' + agent_id_suffix
|
||||
self.eventbus = EventBus(name=f'Agent_{agent_id_suffix}')
|
||||
|
||||
# Re-register cloud sync handler if it exists (if not disabled)
|
||||
if hasattr(self, 'cloud_sync') and self.cloud_sync and self.enable_cloud_sync:
|
||||
@@ -681,7 +684,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
# Always take screenshots for all steps
|
||||
self.logger.debug('📸 Requesting browser state with include_screenshot=True')
|
||||
browser_state_summary = await self.browser_session.get_browser_state_summary(
|
||||
cache_clickable_elements_hashes=True,
|
||||
include_screenshot=True, # always capture even if use_vision=False so that cloud sync is useful (it's fast now anyway)
|
||||
include_recent_events=self.include_recent_events,
|
||||
)
|
||||
@@ -1662,7 +1664,6 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
# This prevents stale element detection but doesn't refresh before execution
|
||||
if action.get_index() is not None and i != 0:
|
||||
new_browser_state_summary = await self.browser_session.get_browser_state_summary(
|
||||
cache_clickable_elements_hashes=False,
|
||||
include_screenshot=False,
|
||||
)
|
||||
new_selector_map = new_browser_state_summary.dom_state.selector_map
|
||||
@@ -1888,9 +1889,7 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
async def _execute_history_step(self, history_item: AgentHistory, delay: float) -> list[ActionResult]:
|
||||
"""Execute a single step from history with element validation"""
|
||||
assert self.browser_session is not None, 'BrowserSession is not set up'
|
||||
state = await self.browser_session.get_browser_state_summary(
|
||||
cache_clickable_elements_hashes=False, include_screenshot=False
|
||||
)
|
||||
state = await self.browser_session.get_browser_state_summary(include_screenshot=False)
|
||||
if not state or not history_item.model_output:
|
||||
raise ValueError('Invalid state or model output')
|
||||
updated_actions = []
|
||||
|
||||
@@ -190,7 +190,6 @@ class BrowserStateRequestEvent(BaseEvent[BrowserStateSummary]):
|
||||
|
||||
include_dom: bool = True
|
||||
include_screenshot: bool = True
|
||||
cache_clickable_elements_hashes: bool = True
|
||||
include_recent_events: bool = False
|
||||
|
||||
event_timeout: float | None = _get_timeout('TIMEOUT_BrowserStateRequestEvent', 30.0) # seconds
|
||||
|
||||
@@ -837,6 +837,17 @@ class BrowserSession(BaseModel):
|
||||
self.agent_focus = await self.get_or_create_cdp_session(target_id=last_target_id, focus=True)
|
||||
raise
|
||||
|
||||
# Dispatch NavigationCompleteEvent when tab focus changes
|
||||
# This ensures PDF detection and downloads work when switching tabs
|
||||
if event.target_id and event.url:
|
||||
self.logger.debug(f'🔄 Dispatching NavigationCompleteEvent for tab switch to {event.url[:50]}...')
|
||||
await self.event_bus.dispatch(
|
||||
NavigationCompleteEvent(
|
||||
target_id=event.target_id,
|
||||
url=event.url,
|
||||
)
|
||||
)
|
||||
|
||||
# self.logger.debug('🔄 AgentFocusChangedEvent handler completed successfully')
|
||||
|
||||
async def on_FileDownloadedEvent(self, event: FileDownloadedEvent) -> None:
|
||||
@@ -1061,7 +1072,6 @@ class BrowserSession(BaseModel):
|
||||
@observe_debug(ignore_input=True, ignore_output=True, name='get_browser_state_summary')
|
||||
async def get_browser_state_summary(
|
||||
self,
|
||||
cache_clickable_elements_hashes: bool = True,
|
||||
include_screenshot: bool = True,
|
||||
cached: bool = False,
|
||||
include_recent_events: bool = False,
|
||||
@@ -1088,7 +1098,6 @@ class BrowserSession(BaseModel):
|
||||
BrowserStateRequestEvent(
|
||||
include_dom=True,
|
||||
include_screenshot=include_screenshot,
|
||||
cache_clickable_elements_hashes=cache_clickable_elements_hashes,
|
||||
include_recent_events=include_recent_events,
|
||||
)
|
||||
),
|
||||
|
||||
@@ -56,6 +56,7 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
_cdp_event_tasks: set[asyncio.Task] = PrivateAttr(default_factory=set) # Track CDP event handler tasks
|
||||
_cdp_downloads_info: dict[str, dict[str, Any]] = PrivateAttr(default_factory=dict) # Map guid -> info
|
||||
_use_js_fetch_for_local: bool = PrivateAttr(default=False) # Guard JS fetch path for local regular downloads
|
||||
_session_pdf_urls: dict[str, str] = PrivateAttr(default_factory=dict) # URL -> path for PDFs downloaded this session
|
||||
|
||||
async def on_BrowserLaunchEvent(self, event: BrowserLaunchEvent) -> None:
|
||||
self.logger.debug(f'[DownloadsWatchdog] Received BrowserLaunchEvent, EventBus ID: {id(self.event_bus)}')
|
||||
@@ -123,6 +124,7 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
self._sessions_with_listeners.clear()
|
||||
self._active_downloads.clear()
|
||||
self._pdf_viewer_cache.clear()
|
||||
self._session_pdf_urls.clear()
|
||||
|
||||
async def on_NavigationCompleteEvent(self, event: NavigationCompleteEvent) -> None:
|
||||
"""Check for PDFs after navigation completes."""
|
||||
@@ -801,13 +803,26 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
|
||||
self.logger.debug(f'[DownloadsWatchdog] Generated filename: {pdf_filename}')
|
||||
|
||||
# Check if already downloaded by looking in the downloads directory
|
||||
# Check if already downloaded in this session
|
||||
self.logger.debug(f'[DownloadsWatchdog] PDF_URL: {pdf_url}, session_pdf_urls: {self._session_pdf_urls}')
|
||||
if pdf_url in self._session_pdf_urls:
|
||||
existing_path = self._session_pdf_urls[pdf_url]
|
||||
self.logger.debug(f'[DownloadsWatchdog] PDF already downloaded in session: {existing_path}')
|
||||
return existing_path
|
||||
|
||||
# Generate unique filename if file exists from previous run
|
||||
downloads_dir = str(self.browser_session.browser_profile.downloads_path)
|
||||
if os.path.exists(downloads_dir):
|
||||
existing_files = os.listdir(downloads_dir)
|
||||
if pdf_filename in existing_files:
|
||||
self.logger.debug(f'[DownloadsWatchdog] PDF already downloaded: {pdf_filename}')
|
||||
return None
|
||||
os.makedirs(downloads_dir, exist_ok=True)
|
||||
final_filename = pdf_filename
|
||||
existing_files = os.listdir(downloads_dir)
|
||||
if pdf_filename in existing_files:
|
||||
# Generate unique name with (1), (2), etc.
|
||||
base, ext = os.path.splitext(pdf_filename)
|
||||
counter = 1
|
||||
while f'{base} ({counter}){ext}' in existing_files:
|
||||
counter += 1
|
||||
final_filename = f'{base} ({counter}){ext}'
|
||||
self.logger.debug(f'[DownloadsWatchdog] File exists, using: {final_filename}')
|
||||
|
||||
self.logger.debug(f'[DownloadsWatchdog] Starting PDF download from: {pdf_url[:100]}...')
|
||||
|
||||
@@ -858,12 +873,10 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
download_result = result.get('result', {}).get('value', {})
|
||||
|
||||
if download_result and download_result.get('data') and len(download_result['data']) > 0:
|
||||
# Ensure unique filename
|
||||
downloads_dir = str(self.browser_session.browser_profile.downloads_path)
|
||||
# Ensure downloads directory exists
|
||||
downloads_dir = str(self.browser_session.browser_profile.downloads_path)
|
||||
os.makedirs(downloads_dir, exist_ok=True)
|
||||
unique_filename = await self._get_unique_filename(downloads_dir, pdf_filename)
|
||||
download_path = os.path.join(downloads_dir, unique_filename)
|
||||
download_path = os.path.join(downloads_dir, final_filename)
|
||||
|
||||
# Save the PDF asynchronously
|
||||
async with await anyio.open_file(download_path, 'wb') as f:
|
||||
@@ -886,13 +899,16 @@ class DownloadsWatchdog(BaseWatchdog):
|
||||
f'[DownloadsWatchdog] ✅ Auto-downloaded PDF ({cache_status}, {response_size:,} bytes): {download_path}'
|
||||
)
|
||||
|
||||
# Store URL->path mapping for this session
|
||||
self._session_pdf_urls[pdf_url] = download_path
|
||||
|
||||
# Emit file downloaded event
|
||||
self.logger.debug(f'[DownloadsWatchdog] Dispatching FileDownloadedEvent for {unique_filename}')
|
||||
self.logger.debug(f'[DownloadsWatchdog] Dispatching FileDownloadedEvent for {final_filename}')
|
||||
self.event_bus.dispatch(
|
||||
FileDownloadedEvent(
|
||||
url=pdf_url,
|
||||
path=download_path,
|
||||
file_name=unique_filename,
|
||||
file_name=final_filename,
|
||||
file_size=response_size,
|
||||
file_type='pdf',
|
||||
mime_type='application/pdf',
|
||||
|
||||
@@ -272,7 +272,7 @@ class FileSystem:
|
||||
|
||||
reader = pypdf.PdfReader(full_filename)
|
||||
num_pages = len(reader.pages)
|
||||
MAX_PDF_PAGES = 10
|
||||
MAX_PDF_PAGES = 20
|
||||
extra_pages = num_pages - MAX_PDF_PAGES
|
||||
extracted_text = ''
|
||||
for page in reader.pages[:MAX_PDF_PAGES]:
|
||||
|
||||
@@ -768,7 +768,7 @@ class BrowserUseServer:
|
||||
if not self.browser_session:
|
||||
return 'Error: No browser session active'
|
||||
|
||||
state = await self.browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False)
|
||||
state = await self.browser_session.get_browser_state_summary()
|
||||
|
||||
result = {
|
||||
'url': state.url,
|
||||
@@ -819,10 +819,15 @@ class BrowserUseServer:
|
||||
ExtractAction = create_model(
|
||||
'ExtractAction',
|
||||
__base__=ActionModel,
|
||||
extract_structured_data=(dict[str, Any], {'query': query, 'extract_links': extract_links}),
|
||||
extract_structured_data=dict[str, Any],
|
||||
)
|
||||
|
||||
action = ExtractAction()
|
||||
# Use model_validate because Pyright does not understand the dynamic model
|
||||
action = ExtractAction.model_validate(
|
||||
{
|
||||
'extract_structured_data': {'query': query, 'extract_links': extract_links},
|
||||
}
|
||||
)
|
||||
action_result = await self.tools.act(
|
||||
action=action,
|
||||
browser_session=self.browser_session,
|
||||
|
||||
@@ -243,6 +243,33 @@ Required environment variables:
|
||||
ALIBABA_CLOUD=
|
||||
```
|
||||
|
||||
## ModelScope [example](https://github.com/browser-use/browser-use/blob/main/examples/models/modelscope_example.py)
|
||||
|
||||
```python
|
||||
from browser_use import Agent, ChatOpenAI
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Get API key from https://www.modelscope.cn/docs/model-service/API-Inference/intro
|
||||
api_key = os.getenv('MODELSCOPE_API_KEY')
|
||||
base_url = 'https://api-inference.modelscope.cn/v1/'
|
||||
|
||||
llm = ChatOpenAI(model='Qwen/Qwen2.5-VL-72B-Instruct', api_key=api_key, base_url=base_url)
|
||||
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm,
|
||||
use_vision=True
|
||||
)
|
||||
```
|
||||
|
||||
Required environment variables:
|
||||
|
||||
```bash .env
|
||||
MODELSCOPE_API_KEY=
|
||||
```
|
||||
|
||||
## Other models (DeepSeek, Novita, X...)
|
||||
|
||||
|
||||
312
examples/custom-functions/parallel_agents.py
Normal file
312
examples/custom-functions/parallel_agents.py
Normal file
@@ -0,0 +1,312 @@
|
||||
"""
|
||||
Simple parallel multi-agent example.
|
||||
|
||||
This launches multiple agents in parallel to work on different tasks simultaneously.
|
||||
No complex orchestrator - just direct parallel execution.
|
||||
|
||||
@file purpose: Demonstrates parallel multi-agent execution using asyncio
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from typing import List
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
from browser_use import Agent
|
||||
from browser_use.llm.google import ChatGoogle
|
||||
|
||||
# ============================================================================
|
||||
# 🔧 SIMPLE CONFIGURATION - CHANGE THIS TO YOUR DESIRED TASK
|
||||
# ============================================================================
|
||||
|
||||
MAIN_TASK = "find age of ronaldo and messi"
|
||||
|
||||
# Simple test - let's start with just one person to see what happens
|
||||
# MAIN_TASK = "find age of elon musk"
|
||||
|
||||
# ============================================================================
|
||||
|
||||
|
||||
async def create_subtasks(main_task: str, llm) -> list[str]:
|
||||
"""
|
||||
Use LLM to break down main task into logical subtasks
|
||||
|
||||
Real examples of how this works:
|
||||
|
||||
Input: "what is the revenue of nvidia, microsoft, tesla"
|
||||
Output: [
|
||||
"Find Nvidia's current revenue and financial data",
|
||||
"Find Microsoft's current revenue and financial data",
|
||||
"Find Tesla's current revenue and financial data"
|
||||
]
|
||||
|
||||
Input: "what are ages of musk, altman, bezos, gates"
|
||||
Output: [
|
||||
"Find Elon Musk's age and birth date",
|
||||
"Find Sam Altman's age and birth date",
|
||||
"Find Jeff Bezos's age and birth date",
|
||||
"Find Bill Gates's age and birth date"
|
||||
]
|
||||
|
||||
Input: "what is the population of tokyo, new york, london, paris"
|
||||
Output: [
|
||||
"Find Tokyo's current population",
|
||||
"Find New York's current population",
|
||||
"Find London's current population",
|
||||
"Find Paris's current population"
|
||||
]
|
||||
|
||||
Input: "name top 10 yc companies by revenue"
|
||||
Output: [
|
||||
"Research Y Combinator's top companies by revenue",
|
||||
"Find revenue data for top YC companies",
|
||||
"Compile list of top 10 YC companies by revenue"
|
||||
]
|
||||
"""
|
||||
|
||||
prompt = f"""
|
||||
Break down this main task into individual, separate subtasks where each subtask focuses on ONLY ONE specific person, company, or item:
|
||||
|
||||
Main task: {main_task}
|
||||
|
||||
RULES:
|
||||
- Each subtask must focus on ONLY ONE person/company/item
|
||||
- Do NOT combine multiple people/companies/items in one subtask
|
||||
- Each subtask should be completely independent
|
||||
- If the main task mentions multiple items, create one subtask per item
|
||||
|
||||
Return only the subtasks, one per line, without numbering or bullets.
|
||||
Each line should focus on exactly ONE person/company/item.
|
||||
"""
|
||||
|
||||
try:
|
||||
# Use the correct method for ChatGoogle
|
||||
response = await llm.ainvoke(prompt)
|
||||
|
||||
# Debug: Print the response type and content
|
||||
print(f"DEBUG: Response type: {type(response)}")
|
||||
print(f"DEBUG: Response content: {response}")
|
||||
|
||||
# Handle different response types - ChatGoogle returns string content
|
||||
if hasattr(response, 'content'):
|
||||
content = response.content
|
||||
elif isinstance(response, str):
|
||||
content = response
|
||||
elif hasattr(response, 'text'):
|
||||
content = response.text
|
||||
else:
|
||||
# Convert to string if it's some other type
|
||||
content = str(response)
|
||||
|
||||
# Split by newlines and clean up
|
||||
subtasks = [task.strip() for task in content.strip().split('\n') if task.strip()]
|
||||
|
||||
# Remove any numbering or bullets that the LLM might add
|
||||
cleaned_subtasks = []
|
||||
for task in subtasks:
|
||||
# Remove common prefixes like "1. ", "- ", "* ", etc.
|
||||
cleaned = task.lstrip('0123456789.-* ')
|
||||
if cleaned:
|
||||
cleaned_subtasks.append(cleaned)
|
||||
|
||||
return cleaned_subtasks if cleaned_subtasks else simple_split_task(main_task)
|
||||
except Exception as e:
|
||||
print(f"Error creating subtasks: {e}")
|
||||
# Fallback to simple split
|
||||
return simple_split_task(main_task)
|
||||
|
||||
|
||||
def simple_split_task(main_task: str) -> list[str]:
|
||||
"""Simple fallback: split task by common separators"""
|
||||
task_lower = main_task.lower()
|
||||
|
||||
# Try to split by common separators
|
||||
if " and " in task_lower:
|
||||
parts = main_task.split(" and ")
|
||||
return [part.strip() for part in parts if part.strip()]
|
||||
elif ", " in main_task:
|
||||
parts = main_task.split(", ")
|
||||
return [part.strip() for part in parts if part.strip()]
|
||||
elif "," in main_task:
|
||||
parts = main_task.split(",")
|
||||
return [part.strip() for part in parts if part.strip()]
|
||||
|
||||
# If no separators found, return the original task
|
||||
return [main_task]
|
||||
|
||||
|
||||
async def run_single_agent(task: str, llm, agent_id: int) -> tuple[int, str]:
|
||||
"""Run a single agent and return its result"""
|
||||
print(f"🚀 Agent {agent_id} starting: {task}")
|
||||
print(f" 📝 This agent will focus ONLY on: {task}")
|
||||
print(f" 🌐 Creating isolated browser instance for agent {agent_id}")
|
||||
|
||||
try:
|
||||
# Create agent with its own browser session (separate browser instance)
|
||||
from browser_use.browser import BrowserSession
|
||||
from browser_use.browser.profile import BrowserProfile
|
||||
import tempfile
|
||||
|
||||
# Create a unique temp directory for this agent's browser data
|
||||
temp_dir = tempfile.mkdtemp(prefix=f"browser_agent_{agent_id}_")
|
||||
|
||||
# Create browser profile with custom user data directory and single tab focus
|
||||
profile = BrowserProfile()
|
||||
profile.user_data_dir = temp_dir
|
||||
profile.headless = False # Set to True if you want headless mode
|
||||
profile.keep_alive = False # Don't keep browser alive after task
|
||||
|
||||
# Add custom args to prevent new tabs and popups
|
||||
profile.args = [
|
||||
'--disable-popup-blocking',
|
||||
'--disable-extensions',
|
||||
'--disable-plugins',
|
||||
'--disable-images', # Faster loading
|
||||
'--no-first-run',
|
||||
'--disable-default-apps',
|
||||
'--disable-background-timer-throttling',
|
||||
'--disable-backgrounding-occluded-windows',
|
||||
'--disable-renderer-backgrounding',
|
||||
]
|
||||
|
||||
# Create a new browser session for each agent with the custom profile
|
||||
browser_session = BrowserSession(browser_profile=profile)
|
||||
|
||||
# Debug: Check initial tab count
|
||||
try:
|
||||
await browser_session.start()
|
||||
initial_tabs = await browser_session._cdp_get_all_pages()
|
||||
print(f" 📊 Agent {agent_id} initial tab count: {len(initial_tabs)}")
|
||||
except Exception as e:
|
||||
print(f" ⚠️ Could not check initial tabs for agent {agent_id}: {e}")
|
||||
|
||||
# Create agent with the dedicated browser session and disable auto URL detection
|
||||
agent = Agent(task=task, llm=llm, browser_session=browser_session, preload=False)
|
||||
|
||||
# Run the agent with timeout to prevent hanging
|
||||
try:
|
||||
result = await asyncio.wait_for(agent.run(), timeout=300) # 5 minute timeout
|
||||
except asyncio.TimeoutError:
|
||||
print(f"⏰ Agent {agent_id} timed out after 5 minutes")
|
||||
result = "Task timed out"
|
||||
|
||||
# Debug: Check final tab count
|
||||
try:
|
||||
final_tabs = await browser_session._cdp_get_all_pages()
|
||||
print(f" 📊 Agent {agent_id} final tab count: {len(final_tabs)}")
|
||||
for i, tab in enumerate(final_tabs):
|
||||
print(f" Tab {i+1}: {tab.get('url', 'unknown')[:50]}...")
|
||||
except Exception as e:
|
||||
print(f" ⚠️ Could not check final tabs for agent {agent_id}: {e}")
|
||||
|
||||
# Extract clean result from the agent history
|
||||
clean_result = extract_clean_result(result)
|
||||
|
||||
# Close the browser session for this agent
|
||||
try:
|
||||
await browser_session.kill()
|
||||
except Exception as e:
|
||||
print(f"⚠️ Warning: Error closing browser for agent {agent_id}: {e}")
|
||||
|
||||
print(f"✅ Agent {agent_id} completed and browser closed: {task}")
|
||||
|
||||
return agent_id, clean_result
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Agent {agent_id} failed: {str(e)}"
|
||||
print(f"❌ {error_msg}")
|
||||
return agent_id, error_msg
|
||||
|
||||
|
||||
def extract_clean_result(agent_result) -> str:
|
||||
"""Extract clean result from agent history"""
|
||||
try:
|
||||
# Get the last result from the agent history
|
||||
if hasattr(agent_result, 'all_results') and agent_result.all_results:
|
||||
last_result = agent_result.all_results[-1]
|
||||
if hasattr(last_result, 'extracted_content') and last_result.extracted_content:
|
||||
return last_result.extracted_content
|
||||
|
||||
# Fallback to string representation
|
||||
return str(agent_result)
|
||||
except Exception:
|
||||
return "Result extraction failed"
|
||||
|
||||
|
||||
async def run_parallel_agents():
|
||||
"""Run multiple agents in parallel on different tasks"""
|
||||
|
||||
# Use Gemini 1.5 Flash
|
||||
llm = ChatGoogle(model="gemini-1.5-flash")
|
||||
|
||||
# Main task to break down - use the simple configuration
|
||||
main_task = MAIN_TASK
|
||||
|
||||
print(f"🎯 Main task: {main_task}")
|
||||
print("🧠 Creating subtasks using LLM...")
|
||||
|
||||
# Create subtasks using LLM
|
||||
subtasks = await create_subtasks(main_task, llm)
|
||||
|
||||
print(f"📋 Created {len(subtasks)} subtasks:")
|
||||
for i, task in enumerate(subtasks, 1):
|
||||
print(f" {i}. {task}")
|
||||
|
||||
print(f"\n🔥 Starting {len(subtasks)} agents in parallel...")
|
||||
print(f"🔍 Each agent will get its own browser instance with exactly ONE tab")
|
||||
print(f"📊 Expected: {len(subtasks)} browser instances, {len(subtasks)} tabs total")
|
||||
|
||||
# Create tasks for parallel execution
|
||||
agent_tasks = [
|
||||
run_single_agent(task, llm, i+1)
|
||||
for i, task in enumerate(subtasks)
|
||||
]
|
||||
|
||||
# Run all agents in parallel using asyncio.gather
|
||||
results = await asyncio.gather(*agent_tasks)
|
||||
|
||||
# Print results
|
||||
print("\n" + "="*60)
|
||||
print("📊 PARALLEL EXECUTION RESULTS")
|
||||
print("="*60)
|
||||
|
||||
for agent_id, result in results:
|
||||
print(f"\n🤖 Agent {agent_id} result:")
|
||||
print(f"Task: {subtasks[agent_id-1]}")
|
||||
print(f"Result: {result}")
|
||||
print("-" * 50)
|
||||
|
||||
print(f"\n🎉 All {len(subtasks)} parallel agents completed!")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to run parallel agents"""
|
||||
# Check if Google API key is available
|
||||
api_key = os.getenv('GOOGLE_API_KEY')
|
||||
if not api_key:
|
||||
print('❌ Error: GOOGLE_API_KEY environment variable not set')
|
||||
print('Please set your Google API key to use parallel agents')
|
||||
print('You can set it with: export GOOGLE_API_KEY="your-key-here"')
|
||||
sys.exit(1)
|
||||
|
||||
# Check if API key looks valid (Google API keys are typically 39 characters)
|
||||
if len(api_key) < 20:
|
||||
print(f'⚠️ Warning: GOOGLE_API_KEY seems too short ({len(api_key)} characters)')
|
||||
print('Google API keys are typically 39 characters long')
|
||||
print('Continuing anyway, but this might cause authentication issues...')
|
||||
|
||||
print('🚀 Starting parallel multi-agent example...')
|
||||
print(f'📝 Task: {MAIN_TASK}')
|
||||
print('This will dynamically create agents based on task complexity')
|
||||
print('-' * 60)
|
||||
|
||||
asyncio.run(run_parallel_agents())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
34
examples/models/modelscope_example.py
Normal file
34
examples/models/modelscope_example.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""
|
||||
Simple try of the agent.
|
||||
|
||||
@dev You need to add MODELSCOPE_API_KEY to your environment variables.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from browser_use import Agent, ChatOpenAI
|
||||
|
||||
# dotenv
|
||||
load_dotenv()
|
||||
|
||||
api_key = os.getenv('MODELSCOPE_API_KEY', '')
|
||||
if not api_key:
|
||||
raise ValueError('MODELSCOPE_API_KEY is not set')
|
||||
|
||||
|
||||
async def run_search():
|
||||
agent = Agent(
|
||||
# task=('go to amazon.com, search for laptop'),
|
||||
task=('go to google, search for modelscope'),
|
||||
llm=ChatOpenAI(base_url='https://api-inference.modelscope.cn/v1/', model='Qwen/Qwen2.5-VL-72B-Instruct', api_key=api_key),
|
||||
use_vision=False,
|
||||
)
|
||||
|
||||
await agent.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(run_search())
|
||||
@@ -143,7 +143,7 @@ class TestClickElementEvent:
|
||||
await asyncio.sleep(0.5) # Give page time to load
|
||||
|
||||
# Initialize the DOM state to populate the selector map
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Get the selector map
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
@@ -406,7 +406,7 @@ class TestClickElementEvent:
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Get the clickable elements
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
|
||||
# Find the inline element
|
||||
@@ -488,7 +488,7 @@ class TestClickElementEvent:
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Get the clickable elements
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
|
||||
# Find the block element inside inline
|
||||
@@ -576,7 +576,7 @@ class TestClickElementEvent:
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Get the clickable elements
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
|
||||
# Find the target element
|
||||
@@ -636,7 +636,7 @@ class TestClickElementEvent:
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Get the clickable elements
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
|
||||
# Find the file input
|
||||
@@ -699,7 +699,7 @@ class TestClickElementEvent:
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Get the clickable elements
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
|
||||
# Find the select element
|
||||
@@ -1098,7 +1098,7 @@ class TestClickElementEvent:
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Initialize the DOM state to populate the selector map
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Get the selector map
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
|
||||
@@ -286,7 +286,7 @@ class TestGetDropdownOptionsEvent:
|
||||
await tools.act(GoToUrlActionModel(**goto_action), browser_session)
|
||||
|
||||
# Initialize the DOM state to populate the selector map
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Get the selector map and find the select element
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
@@ -344,7 +344,7 @@ class TestGetDropdownOptionsEvent:
|
||||
await tools.act(GoToUrlActionModel(**goto_action), browser_session)
|
||||
|
||||
# Initialize the DOM state
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Get the selector map and find the ARIA menu
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
@@ -406,7 +406,7 @@ class TestGetDropdownOptionsEvent:
|
||||
await tools.act(GoToUrlActionModel(**goto_action), browser_session)
|
||||
|
||||
# Initialize the DOM state
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Get the selector map and find the custom dropdown
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
@@ -495,7 +495,7 @@ class TestSelectDropdownOptionEvent:
|
||||
await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
|
||||
|
||||
# Initialize the DOM state
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Get the selector map and find the select element
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
@@ -543,7 +543,7 @@ class TestSelectDropdownOptionEvent:
|
||||
await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
|
||||
|
||||
# Initialize the DOM state
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Get the selector map and find the ARIA menu
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
@@ -595,7 +595,7 @@ class TestSelectDropdownOptionEvent:
|
||||
await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
|
||||
|
||||
# Initialize the DOM state
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Get the selector map and find the custom dropdown
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
@@ -643,7 +643,7 @@ class TestSelectDropdownOptionEvent:
|
||||
await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
|
||||
|
||||
# Initialize the DOM state
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Get the selector map and find the select element
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
|
||||
@@ -165,7 +165,7 @@ class TestARIAMenuDropdown:
|
||||
await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
|
||||
|
||||
# Initialize the DOM state to populate the selector map
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Get the selector map
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
@@ -232,7 +232,7 @@ class TestARIAMenuDropdown:
|
||||
await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
|
||||
|
||||
# Initialize the DOM state to populate the selector map
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Get the selector map
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
@@ -302,7 +302,7 @@ class TestARIAMenuDropdown:
|
||||
await browser_session.event_bus.expect(NavigationCompleteEvent, timeout=10.0)
|
||||
|
||||
# Initialize the DOM state to populate the selector map
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Get the selector map
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
|
||||
@@ -97,7 +97,7 @@ class TestNavigateToUrlEvent:
|
||||
|
||||
# Test that get_state_summary works
|
||||
try:
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
assert False, 'Expected throw error when navigating to non-existent page'
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
@@ -88,7 +88,7 @@ async def test_assumption_1_dom_processing_works(browser_session, httpserver):
|
||||
await event.event_result(raise_if_any=True, raise_if_none=False)
|
||||
|
||||
# Trigger DOM processing
|
||||
state = await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False)
|
||||
state = await browser_session.get_browser_state_summary()
|
||||
|
||||
print('DOM processing result:')
|
||||
print(f' - Elements found: {len(state.dom_state.selector_map)}')
|
||||
@@ -109,7 +109,7 @@ async def test_assumption_2_cached_selector_map_persists(browser_session, httpse
|
||||
await event.event_result(raise_if_any=True, raise_if_none=False)
|
||||
|
||||
# Trigger DOM processing and cache
|
||||
state = await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False)
|
||||
state = await browser_session.get_browser_state_summary()
|
||||
initial_selector_map = dict(state.dom_state.selector_map)
|
||||
|
||||
# Check if cached selector map is still available
|
||||
@@ -136,7 +136,7 @@ async def test_assumption_3_action_gets_same_selector_map(browser_session, tools
|
||||
await event.event_result(raise_if_any=True, raise_if_none=False)
|
||||
|
||||
# Trigger DOM processing and cache
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False)
|
||||
await browser_session.get_browser_state_summary()
|
||||
cached_selector_map = await browser_session.get_selector_map()
|
||||
|
||||
print('Pre-action state:')
|
||||
@@ -174,7 +174,7 @@ async def test_assumption_4_click_action_specific_issue(browser_session, tools,
|
||||
await event.event_result(raise_if_any=True, raise_if_none=False)
|
||||
|
||||
# Trigger DOM processing and cache
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False)
|
||||
await browser_session.get_browser_state_summary()
|
||||
cached_selector_map = await browser_session.get_selector_map()
|
||||
|
||||
print('Pre-click state:')
|
||||
@@ -224,7 +224,7 @@ async def test_assumption_5_multiple_get_selector_map_calls(browser_session, htt
|
||||
await event.event_result(raise_if_any=True, raise_if_none=False)
|
||||
|
||||
# Trigger DOM processing and cache
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Call get_selector_map multiple times
|
||||
map1 = await browser_session.get_selector_map()
|
||||
|
||||
@@ -104,7 +104,7 @@ class TestHeadlessScreenshots:
|
||||
await event.event_result(raise_if_any=True, raise_if_none=False)
|
||||
|
||||
# Get state summary
|
||||
state = await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False)
|
||||
state = await browser_session.get_browser_state_summary()
|
||||
|
||||
# Verify screenshot is included
|
||||
assert state.screenshot is not None
|
||||
@@ -143,7 +143,7 @@ class TestHeadlessScreenshots:
|
||||
|
||||
# Browser should auto-create a new page on about:blank with animation
|
||||
# With AboutBlankWatchdog, about:blank pages now have animated content, so they should have screenshots
|
||||
state = await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False)
|
||||
state = await browser_session.get_browser_state_summary()
|
||||
assert state.screenshot is not None, 'Screenshot should not be None for animated about:blank pages'
|
||||
assert state.url == 'about:blank' or state.url.startswith('chrome://'), f'Expected empty page but got {state.url}'
|
||||
|
||||
@@ -153,7 +153,7 @@ class TestHeadlessScreenshots:
|
||||
await event.event_result(raise_if_any=True, raise_if_none=False)
|
||||
|
||||
# Get state with screenshot
|
||||
state = await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=False)
|
||||
state = await browser_session.get_browser_state_summary()
|
||||
# Should have a screenshot now
|
||||
assert state.screenshot is not None, 'Screenshot should not be None for real pages'
|
||||
assert isinstance(state.screenshot, str)
|
||||
|
||||
@@ -413,7 +413,7 @@ class TestToolsIntegration:
|
||||
await asyncio.sleep(1.0)
|
||||
|
||||
# Initialize the DOM state to populate the selector map
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Get the selector map
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
@@ -540,7 +540,7 @@ class TestToolsIntegration:
|
||||
await asyncio.sleep(1.0)
|
||||
|
||||
# populate the selector map with highlight indices
|
||||
await browser_session.get_browser_state_summary(cache_clickable_elements_hashes=True)
|
||||
await browser_session.get_browser_state_summary()
|
||||
|
||||
# Now get the selector map which should contain our dropdown
|
||||
selector_map = await browser_session.get_selector_map()
|
||||
|
||||
@@ -131,9 +131,7 @@ async def debug_iframe_scrolling():
|
||||
"""Capture DOM state and return analysis"""
|
||||
print(f'\n📸 Capturing DOM state: {label}')
|
||||
state_event = browser_session.event_bus.dispatch(
|
||||
BrowserStateRequestEvent(
|
||||
include_dom=True, include_screenshot=False, cache_clickable_elements_hashes=True, include_recent_events=False
|
||||
)
|
||||
BrowserStateRequestEvent(include_dom=True, include_screenshot=False, include_recent_events=False)
|
||||
)
|
||||
browser_state = await state_event.event_result()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user