mirror of
https://github.com/browser-use/browser-use
synced 2026-05-13 17:56:35 +02:00
Merge branch 'main' into fix-browser-error-recovery
This commit is contained in:
4
.github/workflows/eval.yaml
vendored
4
.github/workflows/eval.yaml
vendored
@@ -23,6 +23,7 @@ jobs:
|
||||
EVALUATION_TOOL_URL: ${{ secrets.EVALUATION_TOOL_URL }}
|
||||
EVALUATION_TOOL_SECRET_KEY: ${{ secrets.EVALUATION_TOOL_SECRET_KEY }}
|
||||
ANCHOR_BROWSER_API_KEY: ${{ secrets.ANCHOR_BROWSER_API_KEY }}
|
||||
BRIGHTDATA_CDP_URL: ${{ secrets.BRIGHTDATA_CDP_URL }}
|
||||
SERPER_API_KEY: ${{ secrets.SERPER_API_KEY }}
|
||||
LMNR_PROJECT_API_KEY: ${{ secrets.LMNR_PROJECT_API_KEY }}
|
||||
BROWSER_USE_LOGGING_LEVEL: ${{ secrets.BROWSER_USE_LOGGING_LEVEL }}
|
||||
@@ -251,6 +252,7 @@ jobs:
|
||||
USER_MESSAGE="${{ github.event.client_payload.script_args.user_message }}"
|
||||
DEVELOPER_ID="${{ github.event.client_payload.script_args.developer_id }}"
|
||||
PLANNER_MODEL="${{ github.event.client_payload.script_args.planner_model }}"
|
||||
BROWSER="${{ github.event.client_payload.script_args.browser }}"
|
||||
RUN_ID="${{ github.event.client_payload.script_args.run_id }}"
|
||||
LAMINAR_EVAL_ID="${{ github.event.client_payload.script_args.laminar_eval_id }}"
|
||||
# Pass raw GitHub Actions object to Python - no parsing in bash
|
||||
@@ -282,7 +284,6 @@ jobs:
|
||||
[[ "${{ github.event.client_payload.script_args.no_vision }}" == "true" ]] && CMD_ARGS+=("--no-vision")
|
||||
[[ "$HEADLESS" == "true" ]] && CMD_ARGS+=("--headless")
|
||||
[[ "${{ github.event.client_payload.script_args.use_serp }}" == "true" ]] && CMD_ARGS+=("--use-serp")
|
||||
[[ "${{ github.event.client_payload.script_args.use_anchor }}" == "true" ]] && CMD_ARGS+=("--use-anchor")
|
||||
[[ "${{ github.event.client_payload.script_args.enable_memory }}" == "true" ]] && CMD_ARGS+=("--enable-memory")
|
||||
[[ "${{ github.event.client_payload.script_args.validate_output }}" == "true" ]] && CMD_ARGS+=("--validate-output")
|
||||
[[ "${{ github.event.client_payload.script_args.include_result }}" == "true" ]] && CMD_ARGS+=("--include-result")
|
||||
@@ -294,6 +295,7 @@ jobs:
|
||||
[[ -n "$USER_MESSAGE" ]] && CMD_ARGS+=("--user-message" "$USER_MESSAGE")
|
||||
[[ -n "$DEVELOPER_ID" ]] && CMD_ARGS+=("--developer-id" "$DEVELOPER_ID")
|
||||
[[ -n "$PLANNER_MODEL" ]] && CMD_ARGS+=("--planner-model" "$PLANNER_MODEL")
|
||||
[[ -n "$BROWSER" ]] && CMD_ARGS+=("--browser" "$BROWSER")
|
||||
[[ -n "$RUN_ID" ]] && CMD_ARGS+=("--run-id" "$RUN_ID")
|
||||
[[ -n "$LAMINAR_EVAL_ID" ]] && CMD_ARGS+=("--laminar-eval-id" "$LAMINAR_EVAL_ID")
|
||||
|
||||
|
||||
315
eval/service.py
315
eval/service.py
@@ -44,6 +44,7 @@ import base64
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import sys
|
||||
@@ -54,6 +55,8 @@ from uuid import UUID
|
||||
|
||||
import anyio
|
||||
import psutil
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
from lmnr import AsyncLaminarClient, Laminar, observe
|
||||
from PIL import Image
|
||||
from pydantic import BaseModel
|
||||
@@ -71,6 +74,60 @@ MAX_IMAGE = 5
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s: %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Load dotenv
|
||||
load_dotenv()
|
||||
|
||||
# Check for Anchor Browser API key
|
||||
ANCHOR_BROWSER_API_KEY = os.getenv('ANCHOR_BROWSER_API_KEY')
|
||||
if ANCHOR_BROWSER_API_KEY:
|
||||
logger.info('ANCHOR_BROWSER_API_KEY is set. Tasks can use Anchor Browser.')
|
||||
else:
|
||||
logger.warning('ANCHOR_BROWSER_API_KEY is not set. Anchor Browser will not be available.')
|
||||
|
||||
# Check for Brightdata CDP URL
|
||||
BRIGHTDATA_CDP_URL = os.getenv('BRIGHTDATA_CDP_URL')
|
||||
if BRIGHTDATA_CDP_URL:
|
||||
logger.info('BRIGHTDATA_CDP_URL is set. Tasks can use Brightdata browser.')
|
||||
else:
|
||||
logger.warning('BRIGHTDATA_CDP_URL is not set. Brightdata browser will not be available.')
|
||||
|
||||
|
||||
def create_anchor_browser_session(headless: bool = False) -> str:
|
||||
"""Create an Anchor Browser session and return CDP URL"""
|
||||
browser_configuration = {
|
||||
'session': {'proxy': {'type': 'anchor_mobile', 'active': True, 'country_code': 'us'}},
|
||||
'browser': {
|
||||
'adblock': {'active': True},
|
||||
'captcha_solver': {'active': True},
|
||||
'headless': {'active': headless},
|
||||
'extra_stealth': {'active': True},
|
||||
},
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
'https://api.anchorbrowser.io/v1/sessions',
|
||||
headers={
|
||||
'anchor-api-key': ANCHOR_BROWSER_API_KEY,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
json=browser_configuration,
|
||||
)
|
||||
response.raise_for_status()
|
||||
session_data = response.json()['data']
|
||||
session_id = session_data['id']
|
||||
|
||||
# Return only the CDP URL
|
||||
return f'wss://connect.anchorbrowser.io?apiKey={ANCHOR_BROWSER_API_KEY}&sessionId={session_id}'
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f'Failed to create Anchor Browser session: {type(e).__name__}: {e}')
|
||||
raise
|
||||
except KeyError as e:
|
||||
logger.error(f'Unexpected response format from Anchor Browser API: {e}')
|
||||
raise
|
||||
|
||||
|
||||
Laminar.initialize()
|
||||
laminar_client = AsyncLaminarClient()
|
||||
|
||||
@@ -493,7 +550,6 @@ from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Import the new comprehensive judge system (conditional import for backwards compatibility)
|
||||
@@ -1181,6 +1237,7 @@ class Task:
|
||||
self.login_type = kwargs.get('login_type', None)
|
||||
self.category = kwargs.get('category', None)
|
||||
self.output_schema = kwargs.get('output_schema', None) # Add structured output schema support
|
||||
self.auth_keys = kwargs.get('auth_keys', None) # List of auth keys to fetch from auth distribution
|
||||
if self.output_schema:
|
||||
# Convert JSON schema to Pydantic model class
|
||||
self.output_model = create_pydantic_model_from_schema(self.output_schema, f'Task_{self.task_id}_Output')
|
||||
@@ -1197,6 +1254,7 @@ class Task:
|
||||
'login_type',
|
||||
'category',
|
||||
'output_schema',
|
||||
'auth_keys',
|
||||
}
|
||||
self.additional_fields = {k: v for k, v in kwargs.items() if k not in known_fields}
|
||||
|
||||
@@ -1206,7 +1264,7 @@ class Task:
|
||||
|
||||
def __str__(self):
|
||||
# Include main fields and indicate if there are additional fields
|
||||
base_str = f'Task(task_id={self.task_id}, confirmed_task={self.confirmed_task}, website={self.website}, reference_length={self.reference_length}, level={self.level}, cluster_id={self.cluster_id}, login_cookie={self.login_cookie}, login_type={self.login_type}, category={self.category}, output_schema={self.output_schema}'
|
||||
base_str = f'Task(task_id={self.task_id}, confirmed_task={self.confirmed_task}, website={self.website}, reference_length={self.reference_length}, level={self.level}, cluster_id={self.cluster_id}, login_cookie={self.login_cookie}, login_type={self.login_type}, category={self.category}, output_schema={self.output_schema}, auth_keys={self.auth_keys}'
|
||||
if self.additional_fields:
|
||||
additional_str = ', '.join(f'{k}={v}' for k, v in self.additional_fields.items())
|
||||
base_str += f', {additional_str}'
|
||||
@@ -1390,11 +1448,45 @@ async def run_stage(stage: Stage, stage_func, timeout: int | None = None):
|
||||
return await stage_func()
|
||||
|
||||
|
||||
async def setup_browser_session(task: Task, headless: bool, highlight_elements: bool = True) -> BrowserSession:
|
||||
async def setup_browser_session(
|
||||
task: Task, headless: bool, highlight_elements: bool = True, browser: str = 'local'
|
||||
) -> BrowserSession:
|
||||
"""Setup browser session for the task"""
|
||||
logger.debug(f'Browser setup: Initializing BrowserSession for task {task.task_id}')
|
||||
|
||||
# Use incognito mode (user_data_dir=None) for evaluations to avoid state pollution
|
||||
# Validate browser option
|
||||
valid_browsers = ['local', 'anchor-browser', 'brightdata', 'browser-use']
|
||||
if browser not in valid_browsers:
|
||||
logger.warning(f'Browser setup: Invalid browser option "{browser}". Falling back to local browser.')
|
||||
browser = 'local'
|
||||
|
||||
cdp_url = None
|
||||
|
||||
if browser == 'anchor-browser':
|
||||
if ANCHOR_BROWSER_API_KEY:
|
||||
try:
|
||||
logger.debug(f'Browser setup: Creating Anchor Browser session for task {task.task_id}')
|
||||
cdp_url = await asyncio.to_thread(create_anchor_browser_session, headless)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f'Browser setup: Failed to create Anchor Browser session for task {task.task_id}: {type(e).__name__}: {e}'
|
||||
)
|
||||
logger.info(f'Browser setup: Falling back to local browser for task {task.task_id}')
|
||||
cdp_url = None
|
||||
else:
|
||||
logger.warning(
|
||||
f'Browser setup: Anchor Browser requested but ANCHOR_BROWSER_API_KEY not set. Using local browser for task {task.task_id}'
|
||||
)
|
||||
elif browser == 'brightdata':
|
||||
if BRIGHTDATA_CDP_URL:
|
||||
logger.debug(f'Browser setup: Using Brightdata CDP URL for task {task.task_id}')
|
||||
cdp_url = BRIGHTDATA_CDP_URL
|
||||
else:
|
||||
logger.warning(
|
||||
f'Browser setup: Brightdata requested but BRIGHTDATA_CDP_URL not set. Using local browser for task {task.task_id}'
|
||||
)
|
||||
elif browser == 'browser-use':
|
||||
logger.warning(f'Browser setup: Browser-use not implemented yet. Falling back to local browser for task {task.task_id}')
|
||||
|
||||
profile_kwargs = {
|
||||
'user_data_dir': None, # Incognito mode - no persistent state
|
||||
'headless': headless,
|
||||
@@ -1433,10 +1525,16 @@ async def setup_browser_session(task: Task, headless: bool, highlight_elements:
|
||||
logger.debug(f'Login task {task.task_id}: Configured to save cookies to {storage_state_path}')
|
||||
|
||||
profile = BrowserProfile(**profile_kwargs)
|
||||
browser_session = BrowserSession(browser_profile=profile)
|
||||
|
||||
if cdp_url:
|
||||
logger.debug(f'Browser setup: Using CDP Browser for task {task.task_id}')
|
||||
browser_session = BrowserSession(browser_profile=profile, cdp_url=cdp_url)
|
||||
else:
|
||||
# Use local browser
|
||||
logger.debug(f'Browser setup: Initializing BrowserSession for task {task.task_id}')
|
||||
browser_session = BrowserSession(browser_profile=profile)
|
||||
|
||||
# Start browser session
|
||||
logger.debug(f'Browser setup: Starting browser session for task {task.task_id}')
|
||||
await browser_session.start()
|
||||
logger.debug(f'Browser setup: Browser session started for task {task.task_id}')
|
||||
|
||||
@@ -1570,11 +1668,11 @@ async def evaluate_task_with_login_cookie(login_cookie: str, task_folder: Path)
|
||||
If not found in tracking, falls back to checking end-state cookies.
|
||||
|
||||
Args:
|
||||
login_cookie: String identifier that should appear in cookies if login was successful
|
||||
task_folder: Path to the task result folder containing saved cookies
|
||||
login_cookie: String identifier that should appear in cookies if login was successful
|
||||
task_folder: Path to the task result folder containing saved cookies
|
||||
|
||||
Returns:
|
||||
Dictionary containing evaluation results similar to Online_Mind2Web_eval format
|
||||
Dictionary containing evaluation results similar to Online_Mind2Web_eval format
|
||||
"""
|
||||
task_id = task_folder.name
|
||||
|
||||
@@ -1767,8 +1865,10 @@ async def run_task_with_semaphore(
|
||||
headless: bool,
|
||||
use_vision: bool,
|
||||
semaphore_runs: asyncio.Semaphore, # Pass semaphore as argument
|
||||
auth_distribution: dict | None = None, # Pre-fetched auth distribution
|
||||
github_workflow_url: str | None = None,
|
||||
use_serp: bool = False,
|
||||
browser: str = 'local',
|
||||
enable_memory: bool = False,
|
||||
memory_interval: int = 10,
|
||||
max_actions_per_step: int = 10,
|
||||
@@ -1864,7 +1964,9 @@ async def run_task_with_semaphore(
|
||||
)
|
||||
|
||||
browser_session = await run_stage(
|
||||
Stage.SETUP_BROWSER, lambda: setup_browser_session(task, headless, highlight_elements), timeout=120
|
||||
Stage.SETUP_BROWSER,
|
||||
lambda: setup_browser_session(task, headless, highlight_elements, browser),
|
||||
timeout=120,
|
||||
)
|
||||
task_result.stage_completed(Stage.SETUP_BROWSER)
|
||||
logger.info(f'Task {task.task_id}: Browser session started successfully.')
|
||||
@@ -1892,6 +1994,38 @@ async def run_task_with_semaphore(
|
||||
convex_url, secret_key, run_id, task.task_id, 'run_agent', 'active', github_workflow_url
|
||||
)
|
||||
|
||||
# Handle auth information if task requires it
|
||||
task_with_auth = task
|
||||
if hasattr(task, 'auth_keys') and task.auth_keys:
|
||||
# Validate auth_keys is a list
|
||||
if isinstance(task.auth_keys, list) and len(task.auth_keys) > 0:
|
||||
if auth_distribution:
|
||||
logger.info(
|
||||
f'Task {task.task_id}: Using pre-fetched auth distribution for auth_keys: {task.auth_keys}'
|
||||
)
|
||||
auth_info_text = format_auth_info_for_agent(auth_distribution, task.auth_keys)
|
||||
if auth_info_text:
|
||||
# Create a modified task with auth info appended
|
||||
class TaskWithAuth(Task):
|
||||
def __init__(self, original_task: Task, auth_text: str):
|
||||
# Copy all attributes from original task
|
||||
for attr_name in dir(original_task):
|
||||
if not attr_name.startswith('__'):
|
||||
setattr(self, attr_name, getattr(original_task, attr_name))
|
||||
# Modify the confirmed_task to include auth info
|
||||
self.confirmed_task = original_task.confirmed_task + auth_text
|
||||
|
||||
task_with_auth = TaskWithAuth(task, auth_info_text)
|
||||
logger.info(f'Task {task.task_id}: Auth info added to task description')
|
||||
else:
|
||||
logger.warning(
|
||||
f'Task {task.task_id}: No matching auth info found for keys: {task.auth_keys}'
|
||||
)
|
||||
else:
|
||||
logger.warning(f'Task {task.task_id}: Auth keys specified but no auth distribution available')
|
||||
else:
|
||||
logger.warning(f'Task {task.task_id}: auth_keys is not a valid list: {task.auth_keys}')
|
||||
|
||||
# Start timing for agent execution only
|
||||
agent_start_time = time.time()
|
||||
|
||||
@@ -1899,7 +2033,7 @@ async def run_task_with_semaphore(
|
||||
Stage.RUN_AGENT,
|
||||
lambda: run_agent_with_browser(
|
||||
browser_session,
|
||||
task,
|
||||
task_with_auth,
|
||||
llm,
|
||||
max_steps_per_task,
|
||||
use_vision,
|
||||
@@ -2163,6 +2297,7 @@ async def run_multiple_tasks(
|
||||
convex_url: str,
|
||||
secret_key: str,
|
||||
eval_model: BaseChatModel,
|
||||
auth_distribution: dict | None = None,
|
||||
github_workflow_url: str | None = None,
|
||||
max_parallel_runs: int = 3,
|
||||
max_steps_per_task: int = 25,
|
||||
@@ -2171,6 +2306,7 @@ async def run_multiple_tasks(
|
||||
headless: bool = False,
|
||||
use_vision: bool = True,
|
||||
use_serp: bool = False,
|
||||
browser: str = 'local',
|
||||
enable_memory: bool = False,
|
||||
memory_interval: int = 10,
|
||||
max_actions_per_step: int = 10,
|
||||
@@ -2249,8 +2385,10 @@ async def run_multiple_tasks(
|
||||
headless=headless,
|
||||
use_vision=use_vision,
|
||||
semaphore_runs=semaphore_runs, # Pass the semaphore
|
||||
auth_distribution=auth_distribution, # Pass the pre-fetched auth distribution
|
||||
github_workflow_url=github_workflow_url,
|
||||
use_serp=use_serp,
|
||||
browser=browser,
|
||||
enable_memory=enable_memory,
|
||||
memory_interval=memory_interval,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
@@ -2373,6 +2511,109 @@ def fetch_tasks_from_server(convex_url: str, secret_key: str, test_case_name: st
|
||||
return None
|
||||
|
||||
|
||||
# Helper function to fetch auth distribution from the server
|
||||
def fetch_auth_distribution_from_server(convex_url: str, secret_key: str):
|
||||
"""Fetches an available auth distribution from the Convex HTTP endpoint."""
|
||||
|
||||
if not convex_url:
|
||||
logger.error('Error: EVALUATION_TOOL_URL environment variable not set.')
|
||||
return None
|
||||
|
||||
if not secret_key:
|
||||
logger.error('Error: EVALUATION_TOOL_SECRET_KEY environment variable not set.')
|
||||
return None
|
||||
|
||||
endpoint_url = f'{convex_url}/api/getAuthDistribution'
|
||||
headers = {
|
||||
'Authorization': f'Bearer {secret_key}',
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
|
||||
logger.info(f'Fetching auth distribution from {endpoint_url}...')
|
||||
|
||||
try:
|
||||
response = requests.post(endpoint_url, headers=headers, json={})
|
||||
|
||||
logger.info(f'Fetch Auth Distribution Status Code: {response.status_code}')
|
||||
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
data = response.json()
|
||||
logger.info('Successfully fetched auth distribution data.')
|
||||
# Verify the response has the expected structure
|
||||
if isinstance(data, dict) and 'id' in data and 'loginInfo' in data:
|
||||
return data
|
||||
else:
|
||||
logger.error(
|
||||
f'Error: Fetched auth distribution data has unexpected structure. Keys: {list(data.keys()) if isinstance(data, dict) else "Not a dict"}'
|
||||
)
|
||||
logger.error(f'Raw response: {response.text}')
|
||||
return None
|
||||
|
||||
except json.JSONDecodeError:
|
||||
logger.error('Error: Failed to decode JSON response for auth distribution.')
|
||||
logger.error(f'Raw response text: {response.text}')
|
||||
return None
|
||||
elif response.status_code == 404:
|
||||
logger.warning('No available auth distribution found on server.')
|
||||
return None
|
||||
else:
|
||||
logger.error(f'Error: Failed to fetch auth distribution. Status: {response.status_code}')
|
||||
logger.error(f'Response: {response.text}')
|
||||
return None
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f'Error during request to fetch auth distribution: {type(e).__name__}: {e}')
|
||||
return None
|
||||
|
||||
|
||||
# Helper function to format auth information for the agent
|
||||
def format_auth_info_for_agent(auth_distribution: dict, auth_keys: list[str]) -> str:
|
||||
"""
|
||||
Formats auth information from auth distribution for the agent task description.
|
||||
|
||||
Args:
|
||||
auth_distribution: Dict with 'loginInfo' key containing auth data
|
||||
auth_keys: List of auth keys to extract (e.g., ['google', 'facebook'])
|
||||
|
||||
Returns:
|
||||
Formatted string with login credentials or empty string if no matching keys
|
||||
"""
|
||||
if not auth_distribution or not auth_keys:
|
||||
return ''
|
||||
|
||||
login_info = auth_distribution.get('loginInfo', {})
|
||||
if not login_info:
|
||||
logger.warning('Auth distribution has no loginInfo')
|
||||
return ''
|
||||
|
||||
# Extract relevant auth information based on auth_keys
|
||||
relevant_auths = []
|
||||
for auth_key in auth_keys:
|
||||
if auth_key in login_info:
|
||||
auth_data = login_info[auth_key]
|
||||
if isinstance(auth_data, dict):
|
||||
# Format the auth data for this key
|
||||
auth_details = []
|
||||
for key, value in auth_data.items():
|
||||
auth_details.append(f'{key}: {value}')
|
||||
|
||||
if auth_details:
|
||||
relevant_auths.append(f'{auth_key} with {", ".join(auth_details)}')
|
||||
else:
|
||||
logger.warning(f"Auth data for key '{auth_key}' is not a dictionary: {type(auth_data)}")
|
||||
else:
|
||||
logger.warning(f"Auth key '{auth_key}' not found in available login info. Available keys: {list(login_info.keys())}")
|
||||
|
||||
if relevant_auths:
|
||||
auth_text = f'\n\nThe following login credentials can be used to complete this task: {"; ".join(relevant_auths)}.'
|
||||
logger.info(f'Formatted auth info: {auth_text}')
|
||||
return auth_text
|
||||
else:
|
||||
logger.warning(f'No matching auth keys found. Requested: {auth_keys}, Available: {list(login_info.keys())}')
|
||||
return ''
|
||||
|
||||
|
||||
# Helper function to get git information
|
||||
def get_git_info():
|
||||
"""Retrieves git branch, commit hash, commit timestamp, and repository URL using subprocess."""
|
||||
@@ -2602,6 +2843,7 @@ async def run_evaluation_pipeline(
|
||||
convex_url: str,
|
||||
secret_key: str,
|
||||
eval_model: BaseChatModel,
|
||||
auth_distribution: dict | None = None,
|
||||
github_workflow_url: str | None = None,
|
||||
max_parallel_runs: int = 3,
|
||||
max_steps_per_task: int = 25,
|
||||
@@ -2610,6 +2852,7 @@ async def run_evaluation_pipeline(
|
||||
headless: bool = False,
|
||||
use_vision: bool = True,
|
||||
use_serp: bool = False,
|
||||
browser: str = 'local',
|
||||
enable_memory: bool = False,
|
||||
memory_interval: int = 10,
|
||||
max_actions_per_step: int = 10,
|
||||
@@ -2656,6 +2899,7 @@ async def run_evaluation_pipeline(
|
||||
convex_url=convex_url,
|
||||
secret_key=secret_key,
|
||||
eval_model=eval_model,
|
||||
auth_distribution=auth_distribution,
|
||||
github_workflow_url=github_workflow_url,
|
||||
max_parallel_runs=max_parallel_runs,
|
||||
max_steps_per_task=max_steps_per_task,
|
||||
@@ -2664,6 +2908,7 @@ async def run_evaluation_pipeline(
|
||||
headless=headless,
|
||||
use_vision=use_vision,
|
||||
use_serp=use_serp,
|
||||
browser=browser,
|
||||
enable_memory=enable_memory,
|
||||
memory_interval=memory_interval,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
@@ -2796,6 +3041,12 @@ if __name__ == '__main__':
|
||||
parser.add_argument('--eval-group', type=str, default='', help='Evaluation group to include in the run')
|
||||
parser.add_argument('--developer-id', type=str, default=None, help='Name of the developer starting the run')
|
||||
parser.add_argument('--use-serp', action='store_true', help='Use SERP search instead of Google search')
|
||||
parser.add_argument(
|
||||
'--browser',
|
||||
type=str,
|
||||
default='local',
|
||||
help='Browser to use: local, anchor-browser, brightdata, browser-use (default: local)',
|
||||
)
|
||||
parser.add_argument('--enable-memory', action='store_true', help='Enable mem0 memory system for agents')
|
||||
parser.add_argument('--memory-interval', type=int, default=10, help='Memory creation interval (default: 10 steps)')
|
||||
parser.add_argument('--max-actions-per-step', type=int, default=10, help='Maximum number of actions per step (default: 10)')
|
||||
@@ -2837,7 +3088,6 @@ if __name__ == '__main__':
|
||||
)
|
||||
parser.add_argument('--use-mind2web-judge', action='store_true', help='Use original judge')
|
||||
parser.add_argument('--no-thinking', action='store_true', help='Disable thinking in agent system prompt')
|
||||
parser.add_argument('--use-anchor', action='store_true', help='Use anchor to navigate to the page')
|
||||
parser.add_argument('--github-workflow-url', type=str, default=None, help='GitHub workflow URL for tracking')
|
||||
|
||||
# Gmail 2FA support arguments
|
||||
@@ -2934,6 +3184,7 @@ if __name__ == '__main__':
|
||||
# --- Load Tasks (Either Single Task or from Server) ---
|
||||
tasks = []
|
||||
task_id = None # Initialize for proper scoping
|
||||
auth_distribution = None # Initialize auth distribution
|
||||
|
||||
# Check if this is single task mode
|
||||
if args.task_text:
|
||||
@@ -2968,10 +3219,28 @@ if __name__ == '__main__':
|
||||
logger.info(f'Successfully loaded {len(tasks)} tasks from the server.')
|
||||
except (TypeError, ValueError) as e:
|
||||
logger.error(
|
||||
f'Error creating Task objects from fetched data. Ensure the data structure includes required fields (task_id, confirmed_task). Known optional fields: website, reference_length, level, cluster_id, login_cookie, login_type, category. Any additional fields will be accepted dynamically. Error: {type(e).__name__}: {e}'
|
||||
f'Error creating Task objects from fetched data. Ensure the data structure includes required fields (task_id, confirmed_task). Known optional fields: website, reference_length, level, cluster_id, login_cookie, login_type, category, auth_keys. Any additional fields will be accepted dynamically. Error: {type(e).__name__}: {e}'
|
||||
)
|
||||
logger.error(f'First item in fetched data: {fetched_task_data[0] if fetched_task_data else "None"}')
|
||||
exit(1)
|
||||
|
||||
# --- Fetch Auth Distribution Once (if any tasks need auth) ---
|
||||
tasks_with_auth = [
|
||||
task
|
||||
for task in tasks
|
||||
if hasattr(task, 'auth_keys') and task.auth_keys and isinstance(task.auth_keys, list) and len(task.auth_keys) > 0
|
||||
]
|
||||
if tasks_with_auth and CONVEX_URL and SECRET_KEY:
|
||||
logger.info(f'Found {len(tasks_with_auth)} tasks requiring auth. Fetching auth distribution...')
|
||||
auth_distribution = fetch_auth_distribution_from_server(CONVEX_URL, SECRET_KEY)
|
||||
if auth_distribution:
|
||||
logger.info(
|
||||
f'Successfully fetched auth distribution with login info for: {list(auth_distribution.get("loginInfo", {}).keys())}'
|
||||
)
|
||||
else:
|
||||
logger.warning('Failed to fetch auth distribution. Tasks requiring auth may fail.')
|
||||
elif tasks_with_auth:
|
||||
logger.warning(f'Found {len(tasks_with_auth)} tasks requiring auth but no server config available')
|
||||
# -----------------------------
|
||||
|
||||
# --- Start Run on Server (with optional existing Run ID) ---
|
||||
@@ -3048,6 +3317,22 @@ if __name__ == '__main__':
|
||||
else:
|
||||
logger.info('🔍 Using default Google search')
|
||||
|
||||
# Log browser mode being used
|
||||
if args.browser == 'anchor-browser':
|
||||
if ANCHOR_BROWSER_API_KEY:
|
||||
logger.info('🌐 Using Anchor Browser (remote browser service)')
|
||||
else:
|
||||
logger.warning('⚠️ --browser anchor-browser provided but ANCHOR_BROWSER_API_KEY not set. Will use local browser!')
|
||||
elif args.browser == 'brightdata':
|
||||
if BRIGHTDATA_CDP_URL:
|
||||
logger.info('🌐 Using Brightdata browser (remote browser service)')
|
||||
else:
|
||||
logger.warning('⚠️ --browser brightdata provided but BRIGHTDATA_CDP_URL not set. Will use local browser!')
|
||||
elif args.browser == 'browser-use':
|
||||
logger.warning('🌐 Browser-use not implemented yet. Will use local browser!')
|
||||
else:
|
||||
logger.info('🌐 Using local browser')
|
||||
|
||||
# Log memory configuration
|
||||
if args.enable_memory:
|
||||
logger.info(f'🧠 Memory enabled: mem0 system with interval={args.memory_interval} steps')
|
||||
@@ -3137,6 +3422,7 @@ if __name__ == '__main__':
|
||||
convex_url=convex_url,
|
||||
secret_key=secret_key,
|
||||
eval_model=eval_model,
|
||||
auth_distribution=auth_distribution,
|
||||
github_workflow_url=args.github_workflow_url,
|
||||
max_parallel_runs=parallel_runs,
|
||||
max_steps_per_task=args.max_steps,
|
||||
@@ -3145,6 +3431,7 @@ if __name__ == '__main__':
|
||||
headless=args.headless,
|
||||
use_vision=not args.no_vision,
|
||||
use_serp=args.use_serp,
|
||||
browser=args.browser,
|
||||
enable_memory=args.enable_memory,
|
||||
memory_interval=args.memory_interval,
|
||||
max_actions_per_step=args.max_actions_per_step,
|
||||
|
||||
Reference in New Issue
Block a user