Files
browser-use/examples/use-cases/google_sheets.py
2025-03-31 07:47:56 +02:00

194 lines
7.4 KiB
Python

import os
import sys
from browser_use.browser.context import BrowserContext
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import asyncio
import pyperclip
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from browser_use import ActionResult, Agent, Controller
from browser_use.browser.browser import Browser, BrowserConfig
browser = Browser(
config=BrowserConfig(
browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
),
)
# Load environment variables
load_dotenv()
if not os.getenv('OPENAI_API_KEY'):
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
controller = Controller()
def is_google_sheet(page) -> bool:
return page.url.startswith('https://docs.google.com/spreadsheets/')
@controller.registry.action('Google Sheets: Open a specific Google Sheet')
async def open_google_sheet(browser: BrowserContext, google_sheet_url: str):
page = await browser.get_current_page()
if page.url != google_sheet_url:
await page.goto(google_sheet_url)
await page.wait_for_load_state()
if not is_google_sheet(page):
return ActionResult(error='Failed to open Google Sheet, are you sure you have permissions to access this sheet?')
return ActionResult(extracted_content=f'Opened Google Sheet {google_sheet_url}', include_in_memory=False)
@controller.registry.action('Google Sheets: Get the contents of the entire sheet', page_filter=is_google_sheet)
async def get_sheet_contents(browser: BrowserContext):
page = await browser.get_current_page()
# select all cells
await page.keyboard.press('Enter')
await page.keyboard.press('Escape')
await page.keyboard.press('ControlOrMeta+A')
await page.keyboard.press('ControlOrMeta+C')
extracted_tsv = pyperclip.paste()
return ActionResult(extracted_content=extracted_tsv, include_in_memory=True)
@controller.registry.action('Google Sheets: Select a specific cell or range of cells', page_filter=is_google_sheet)
async def select_cell_or_range(browser: BrowserContext, cell_or_range: str):
page = await browser.get_current_page()
await page.keyboard.press('Enter') # make sure we dont delete current cell contents if we were last editing
await page.keyboard.press('Escape') # to clear current focus (otherwise select range popup is additive)
await asyncio.sleep(0.1)
await page.keyboard.press('Home') # move cursor to the top left of the sheet first
await page.keyboard.press('ArrowUp')
await asyncio.sleep(0.1)
await page.keyboard.press('Control+G') # open the goto range popup
await asyncio.sleep(0.2)
await page.keyboard.type(cell_or_range, delay=0.05)
await asyncio.sleep(0.2)
await page.keyboard.press('Enter')
await asyncio.sleep(0.2)
await page.keyboard.press('Escape') # to make sure the popup still closes in the case where the jump failed
return ActionResult(extracted_content=f'Selected cell {cell_or_range}', include_in_memory=False)
@controller.registry.action('Google Sheets: Get the contents of a specific cell or range of cells', page_filter=is_google_sheet)
async def get_range_contents(browser: BrowserContext, cell_or_range: str):
page = await browser.get_current_page()
await select_cell_or_range(browser, cell_or_range)
await page.keyboard.press('ControlOrMeta+C')
await asyncio.sleep(0.1)
extracted_tsv = pyperclip.paste()
return ActionResult(extracted_content=extracted_tsv, include_in_memory=True)
@controller.registry.action('Google Sheets: Clear the currently selected cells', page_filter=is_google_sheet)
async def clear_selected_range(browser: BrowserContext):
page = await browser.get_current_page()
await page.keyboard.press('Backspace')
return ActionResult(extracted_content='Cleared selected range', include_in_memory=False)
@controller.registry.action('Google Sheets: Input text into the currently selected cell', page_filter=is_google_sheet)
async def input_selected_cell_text(browser: BrowserContext, text: str):
page = await browser.get_current_page()
await page.keyboard.type(text, delay=0.1)
await page.keyboard.press('Enter') # make sure to commit the input so it doesn't get overwritten by the next action
await page.keyboard.press('ArrowUp')
return ActionResult(extracted_content=f'Inputted text {text}', include_in_memory=False)
@controller.registry.action('Google Sheets: Batch update a range of cells', page_filter=is_google_sheet)
async def update_range_contents(browser: BrowserContext, range: str, new_contents_tsv: str):
page = await browser.get_current_page()
await select_cell_or_range(browser, range)
# simulate paste event from clipboard with TSV content
await page.evaluate(f"""
const clipboardData = new DataTransfer();
clipboardData.setData('text/plain', `{new_contents_tsv}`);
document.activeElement.dispatchEvent(new ClipboardEvent('paste', {{clipboardData}}));
""")
return ActionResult(extracted_content=f'Updated cell {range} with {new_contents_tsv}', include_in_memory=False)
# many more snippets for keyboard-shortcut based Google Sheets automation can be found here, see:
# - https://github.com/philc/sheetkeys/blob/master/content_scripts/sheet_actions.js
# - https://github.com/philc/sheetkeys/blob/master/content_scripts/commands.js
# - https://support.google.com/docs/answer/181110?hl=en&co=GENIE.Platform%3DDesktop#zippy=%2Cmac-shortcuts
# Tip: LLM is bad at spatial reasoning, don't make it navigate with arrow keys relative to current cell
# if given arrow keys, it will try to jump from G1 to A2 by pressing Down, without realizing needs to go Down+LeftLeftLeftLeft
async def main():
async with await browser.new_context() as context:
model = ChatOpenAI(model='gpt-4o')
eraser = Agent(
task="""
Clear all the existing values in columns A through F in this Google Sheet:
https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
""",
llm=model,
browser_context=context,
controller=controller,
)
await eraser.run()
researcher = Agent(
task="""
Google to find the full name, nationality, and date of birth of the CEO of the top 10 Fortune 100 companies.
For each company, append a row to this existing Google Sheet: https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
Make sure column headers are present and all existing values in the sheet are formatted correctly.
Columns:
A: Company Name
B: CEO Full Name
C: CEO Country of Birth
D: CEO Date of Birth (YYYY-MM-DD)
E: Source URL where the information was found
""",
llm=model,
browser_context=context,
controller=controller,
)
await researcher.run()
improvised_continuer = Agent(
task="""
Read the Google Sheet https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
Add 3 more rows to the bottom continuing the existing pattern, make sure any data you add is sourced correctly.
""",
llm=model,
browser_context=context,
controller=controller,
)
await improvised_continuer.run()
final_fact_checker = Agent(
task="""
Read the Google Sheet https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
Fact-check every entry, add a new column F with your findings for each row.
Make sure to check the source URL for each row, and make sure the information is correct.
""",
llm=model,
browser_context=context,
controller=controller,
)
await final_fact_checker.run()
if __name__ == '__main__':
asyncio.run(main())