mirror of
https://github.com/browser-use/browser-use
synced 2026-05-06 17:52:15 +02:00
356 lines
11 KiB
Python
356 lines
11 KiB
Python
"""
|
||
Cloud Example 5: Search API (Beta) 🔍
|
||
=====================================
|
||
|
||
This example demonstrates the Browser Use Search API (BETA):
|
||
- Simple search: Search Google and extract from multiple results
|
||
- URL search: Extract specific content from a target URL
|
||
- Deep navigation through websites (depth parameter)
|
||
- Real-time content extraction vs cached results
|
||
|
||
Perfect for: Content extraction, research, competitive analysis
|
||
"""
|
||
|
||
import argparse
|
||
import asyncio
|
||
import json
|
||
import os
|
||
import time
|
||
from typing import Any
|
||
|
||
import aiohttp
|
||
|
||
# Configuration
|
||
API_KEY = os.getenv('BROWSER_USE_API_KEY')
|
||
if not API_KEY:
|
||
raise ValueError(
|
||
'Please set BROWSER_USE_API_KEY environment variable. You can also create an API key at https://cloud.browser-use.com/new-api-key'
|
||
)
|
||
|
||
BASE_URL = os.getenv('BROWSER_USE_BASE_URL', 'https://api.browser-use.com/api/v1')
|
||
TIMEOUT = int(os.getenv('BROWSER_USE_TIMEOUT', '30'))
|
||
HEADERS = {'Authorization': f'Bearer {API_KEY}', 'Content-Type': 'application/json'}
|
||
|
||
|
||
async def simple_search(query: str, max_websites: int = 5, depth: int = 2) -> dict[str, Any]:
|
||
"""
|
||
Search Google and extract content from multiple top results.
|
||
|
||
Args:
|
||
query: Search query to process
|
||
max_websites: Number of websites to process (1-10)
|
||
depth: How deep to navigate (2-5)
|
||
|
||
Returns:
|
||
Dictionary with results from multiple websites
|
||
"""
|
||
# Validate input parameters
|
||
max_websites = max(1, min(max_websites, 10)) # Clamp to 1-10
|
||
depth = max(2, min(depth, 5)) # Clamp to 2-5
|
||
|
||
start_time = time.time()
|
||
|
||
print(f"🔍 Simple Search: '{query}'")
|
||
print(f'📊 Processing {max_websites} websites at depth {depth}')
|
||
print(f'💰 Estimated cost: {depth * max_websites}¢')
|
||
|
||
payload = {'query': query, 'max_websites': max_websites, 'depth': depth}
|
||
|
||
timeout = aiohttp.ClientTimeout(total=TIMEOUT)
|
||
connector = aiohttp.TCPConnector(limit=10) # Limit concurrent connections
|
||
|
||
async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
|
||
async with session.post(f'{BASE_URL}/simple-search', json=payload, headers=HEADERS) as response:
|
||
elapsed = time.time() - start_time
|
||
if response.status == 200:
|
||
try:
|
||
result = await response.json()
|
||
print(f'✅ Found results from {len(result.get("results", []))} websites in {elapsed:.1f}s')
|
||
return result
|
||
except (aiohttp.ContentTypeError, json.JSONDecodeError) as e:
|
||
error_text = await response.text()
|
||
print(f'❌ Invalid JSON response: {e} (after {elapsed:.1f}s)')
|
||
return {'error': 'Invalid JSON', 'details': error_text}
|
||
else:
|
||
error_text = await response.text()
|
||
print(f'❌ Search failed: {response.status} - {error_text} (after {elapsed:.1f}s)')
|
||
return {'error': f'HTTP {response.status}', 'details': error_text}
|
||
|
||
|
||
async def search_url(url: str, query: str, depth: int = 2) -> dict[str, Any]:
|
||
"""
|
||
Extract specific content from a target URL.
|
||
|
||
Args:
|
||
url: Target URL to extract from
|
||
query: What specific content to look for
|
||
depth: How deep to navigate (2-5)
|
||
|
||
Returns:
|
||
Dictionary with extracted content
|
||
"""
|
||
# Validate input parameters
|
||
depth = max(2, min(depth, 5)) # Clamp to 2-5
|
||
|
||
start_time = time.time()
|
||
|
||
print(f'🎯 URL Search: {url}')
|
||
print(f"🔍 Looking for: '{query}'")
|
||
print(f'📊 Navigation depth: {depth}')
|
||
print(f'💰 Estimated cost: {depth}¢')
|
||
|
||
payload = {'url': url, 'query': query, 'depth': depth}
|
||
|
||
timeout = aiohttp.ClientTimeout(total=TIMEOUT)
|
||
connector = aiohttp.TCPConnector(limit=10) # Limit concurrent connections
|
||
|
||
async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
|
||
async with session.post(f'{BASE_URL}/search-url', json=payload, headers=HEADERS) as response:
|
||
elapsed = time.time() - start_time
|
||
if response.status == 200:
|
||
try:
|
||
result = await response.json()
|
||
print(f'✅ Extracted content from {result.get("url", "website")} in {elapsed:.1f}s')
|
||
return result
|
||
except (aiohttp.ContentTypeError, json.JSONDecodeError) as e:
|
||
error_text = await response.text()
|
||
print(f'❌ Invalid JSON response: {e} (after {elapsed:.1f}s)')
|
||
return {'error': 'Invalid JSON', 'details': error_text}
|
||
else:
|
||
error_text = await response.text()
|
||
print(f'❌ URL search failed: {response.status} - {error_text} (after {elapsed:.1f}s)')
|
||
return {'error': f'HTTP {response.status}', 'details': error_text}
|
||
|
||
|
||
def display_simple_search_results(results: dict[str, Any]):
|
||
"""Display simple search results in a readable format."""
|
||
if 'error' in results:
|
||
print(f'❌ Error: {results["error"]}')
|
||
return
|
||
|
||
websites = results.get('results', [])
|
||
|
||
print(f'\n📋 Search Results ({len(websites)} websites)')
|
||
print('=' * 50)
|
||
|
||
for i, site in enumerate(websites, 1):
|
||
url = site.get('url', 'Unknown URL')
|
||
content = site.get('content', 'No content')
|
||
|
||
print(f'\n{i}. 🌐 {url}')
|
||
print('-' * 40)
|
||
|
||
# Show first 300 chars of content
|
||
if len(content) > 300:
|
||
print(f'{content[:300]}...')
|
||
print(f'[Content truncated - {len(content)} total characters]')
|
||
else:
|
||
print(content)
|
||
|
||
# Show execution URLs if available
|
||
if results.get('live_url'):
|
||
print(f'\n🔗 Live Preview: {results["live_url"]}')
|
||
if results.get('public_share_url'):
|
||
print(f'🌐 Share URL: {results["public_share_url"]}')
|
||
elif results.get('share_url'):
|
||
print(f'🌐 Share URL: {results["share_url"]}')
|
||
|
||
|
||
def display_url_search_results(results: dict[str, Any]):
|
||
"""Display URL search results in a readable format."""
|
||
if 'error' in results:
|
||
print(f'❌ Error: {results["error"]}')
|
||
return
|
||
|
||
url = results.get('url', 'Unknown URL')
|
||
content = results.get('content', 'No content')
|
||
|
||
print(f'\n📄 Extracted Content from: {url}')
|
||
print('=' * 60)
|
||
print(content)
|
||
|
||
# Show execution URLs if available
|
||
if results.get('live_url'):
|
||
print(f'\n🔗 Live Preview: {results["live_url"]}')
|
||
if results.get('public_share_url'):
|
||
print(f'🌐 Share URL: {results["public_share_url"]}')
|
||
elif results.get('share_url'):
|
||
print(f'🌐 Share URL: {results["share_url"]}')
|
||
|
||
|
||
async def demo_news_search():
|
||
"""Demo: Search for latest news across multiple sources."""
|
||
print('\n📰 Demo 1: Latest News Search')
|
||
print('-' * 35)
|
||
|
||
demo_start = time.time()
|
||
query = 'latest developments in artificial intelligence 2024'
|
||
results = await simple_search(query, max_websites=4, depth=2)
|
||
demo_elapsed = time.time() - demo_start
|
||
|
||
display_simple_search_results(results)
|
||
print(f'\n⏱️ Total demo time: {demo_elapsed:.1f}s')
|
||
|
||
return results
|
||
|
||
|
||
async def demo_competitive_analysis():
|
||
"""Demo: Analyze competitor websites."""
|
||
print('\n🏢 Demo 2: Competitive Analysis')
|
||
print('-' * 35)
|
||
|
||
query = 'browser automation tools comparison features pricing'
|
||
results = await simple_search(query, max_websites=3, depth=3)
|
||
display_simple_search_results(results)
|
||
|
||
return results
|
||
|
||
|
||
async def demo_deep_website_analysis():
|
||
"""Demo: Deep analysis of a specific website."""
|
||
print('\n🎯 Demo 3: Deep Website Analysis')
|
||
print('-' * 35)
|
||
|
||
demo_start = time.time()
|
||
url = 'https://docs.browser-use.com'
|
||
query = 'Browser Use features, pricing, and API capabilities'
|
||
results = await search_url(url, query, depth=3)
|
||
demo_elapsed = time.time() - demo_start
|
||
|
||
display_url_search_results(results)
|
||
print(f'\n⏱️ Total demo time: {demo_elapsed:.1f}s')
|
||
|
||
return results
|
||
|
||
|
||
async def demo_product_research():
|
||
"""Demo: Product research and comparison."""
|
||
print('\n🛍️ Demo 4: Product Research')
|
||
print('-' * 30)
|
||
|
||
query = 'best wireless headphones 2024 reviews comparison'
|
||
results = await simple_search(query, max_websites=5, depth=2)
|
||
display_simple_search_results(results)
|
||
|
||
return results
|
||
|
||
|
||
async def demo_real_time_vs_cached():
|
||
"""Demo: Show difference between real-time and cached results."""
|
||
print('\n⚡ Demo 5: Real-time vs Cached Data')
|
||
print('-' * 40)
|
||
|
||
print('🔄 Browser Use Search API benefits:')
|
||
print('• Actually browses websites like a human')
|
||
print('• Gets live, current data (not cached)')
|
||
print('• Navigates deep into sites via clicks')
|
||
print('• Handles JavaScript and dynamic content')
|
||
print('• Accesses pages requiring navigation')
|
||
|
||
# Example with live data
|
||
query = 'current Bitcoin price USD live'
|
||
results = await simple_search(query, max_websites=3, depth=2)
|
||
|
||
print('\n💰 Live Bitcoin Price Search Results:')
|
||
display_simple_search_results(results)
|
||
|
||
return results
|
||
|
||
|
||
async def demo_search_depth_comparison():
|
||
"""Demo: Compare different search depths."""
|
||
print('\n📊 Demo 6: Search Depth Comparison')
|
||
print('-' * 40)
|
||
|
||
url = 'https://news.ycombinator.com'
|
||
query = 'trending technology discussions'
|
||
|
||
depths = [2, 3, 4]
|
||
results = {}
|
||
|
||
for depth in depths:
|
||
print(f'\n🔍 Testing depth {depth}:')
|
||
result = await search_url(url, query, depth)
|
||
results[depth] = result
|
||
|
||
if 'content' in result:
|
||
content_length = len(result['content'])
|
||
print(f'📏 Content length: {content_length} characters')
|
||
|
||
# Brief pause between requests
|
||
await asyncio.sleep(1)
|
||
|
||
# Summary
|
||
print('\n📊 Depth Comparison Summary:')
|
||
print('-' * 30)
|
||
for depth, result in results.items():
|
||
if 'content' in result:
|
||
length = len(result['content'])
|
||
print(f'Depth {depth}: {length} characters')
|
||
else:
|
||
print(f'Depth {depth}: Error or no content')
|
||
|
||
return results
|
||
|
||
|
||
async def main():
|
||
"""Demonstrate comprehensive Search API usage."""
|
||
print('🔍 Browser Use Cloud - Search API (BETA)')
|
||
print('=' * 45)
|
||
|
||
print('⚠️ Note: This API is in BETA and may change')
|
||
print()
|
||
print('🎯 Search API Features:')
|
||
print('• Real-time website browsing (not cached)')
|
||
print('• Deep navigation through multiple pages')
|
||
print('• Dynamic content and JavaScript handling')
|
||
print('• Multiple result aggregation')
|
||
print('• Cost-effective content extraction')
|
||
|
||
print('\n💰 Pricing:')
|
||
print('• Simple Search: 1¢ × depth × websites')
|
||
print('• URL Search: 1¢ × depth')
|
||
print('• Example: depth=2, 5 websites = 10¢')
|
||
|
||
try:
|
||
# Parse command line arguments
|
||
parser = argparse.ArgumentParser(description='Search API (BETA) examples')
|
||
parser.add_argument(
|
||
'--demo',
|
||
choices=['news', 'competitive', 'deep', 'product', 'realtime', 'depth', 'all'],
|
||
default='news',
|
||
help='Which demo to run',
|
||
)
|
||
args = parser.parse_args()
|
||
|
||
print(f'\n🔍 Running {args.demo} demo(s)...')
|
||
|
||
if args.demo == 'news':
|
||
await demo_news_search()
|
||
elif args.demo == 'competitive':
|
||
await demo_competitive_analysis()
|
||
elif args.demo == 'deep':
|
||
await demo_deep_website_analysis()
|
||
elif args.demo == 'product':
|
||
await demo_product_research()
|
||
elif args.demo == 'realtime':
|
||
await demo_real_time_vs_cached()
|
||
elif args.demo == 'depth':
|
||
await demo_search_depth_comparison()
|
||
elif args.demo == 'all':
|
||
await demo_news_search()
|
||
await demo_competitive_analysis()
|
||
await demo_deep_website_analysis()
|
||
await demo_product_research()
|
||
await demo_real_time_vs_cached()
|
||
await demo_search_depth_comparison()
|
||
|
||
except aiohttp.ClientError as e:
|
||
print(f'❌ Network Error: {e}')
|
||
except Exception as e:
|
||
print(f'❌ Error: {e}')
|
||
|
||
|
||
if __name__ == '__main__':
|
||
asyncio.run(main())
|