mirror of
https://github.com/browser-use/browser-use
synced 2026-05-06 17:52:15 +02:00
91 lines
2.3 KiB
Python
Executable File
91 lines
2.3 KiB
Python
Executable File
#!/usr/bin/env -S uv run --script
|
|
# /// script
|
|
# requires-python = ">=3.11"
|
|
# dependencies = ["browser-use", "mistralai"]
|
|
# ///
|
|
|
|
import os
|
|
import sys
|
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
import asyncio
|
|
import logging
|
|
|
|
from mistralai import Mistral # type: ignore
|
|
from pydantic import BaseModel, Field
|
|
|
|
from browser_use import Agent, Controller
|
|
from browser_use.agent.views import ActionResult
|
|
from browser_use.browser.context import BrowserContext
|
|
from browser_use.llm import ChatOpenAI
|
|
|
|
if not os.getenv('OPENAI_API_KEY'):
|
|
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
|
|
|
|
if not os.getenv('MISTRAL_API_KEY'):
|
|
raise ValueError('MISTRAL_API_KEY is not set. Please add it to your environment variables.')
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
controller = Controller()
|
|
|
|
|
|
class PdfExtractParams(BaseModel):
|
|
url: str = Field(description='URL to a PDF document')
|
|
|
|
|
|
@controller.registry.action(
|
|
'Extract PDF Text',
|
|
param_model=PdfExtractParams,
|
|
)
|
|
def extract_mistral_ocr(params: PdfExtractParams, browser: BrowserContext) -> ActionResult:
|
|
"""
|
|
Process a PDF URL using Mistral OCR API and return the OCR response.
|
|
|
|
Args:
|
|
url: URL to a PDF document
|
|
|
|
Returns:
|
|
OCR response object from Mistral API
|
|
"""
|
|
api_key = os.getenv('MISTRAL_API_KEY')
|
|
client = Mistral(api_key=api_key)
|
|
|
|
response = client.ocr.process(
|
|
model='mistral-ocr-latest',
|
|
document={
|
|
'type': 'document_url',
|
|
'document_url': params.url,
|
|
},
|
|
include_image_base64=False,
|
|
)
|
|
|
|
markdown = '\n\n'.join(f'### Page {i + 1}\n{response.pages[i].markdown}' for i in range(len(response.pages)))
|
|
return ActionResult(
|
|
extracted_content=markdown,
|
|
include_in_memory=False, ## PDF content can be very large, so we don't include it in memory
|
|
)
|
|
|
|
|
|
async def main():
|
|
agent = Agent(
|
|
task="""
|
|
Objective: Navigate to the following URL, extract its contents using the Extract PDF Text action, and explain its historical significance.
|
|
|
|
URL: https://docs.house.gov/meetings/GO/GO00/20220929/115171/HHRG-117-GO00-20220929-SD010.pdf
|
|
""",
|
|
llm=ChatOpenAI(model='gpt-4.1'),
|
|
controller=controller,
|
|
)
|
|
result = await agent.run()
|
|
logger.info(result)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
asyncio.run(main())
|