Merge branch 'main' into hot-fix/fix-highlight-interative-elements-in-shadow-dom

2026-05-13 17:56:35 +02:00 · 2025-07-05 10:46:51 +02:00
parent b2dc5dbcb0 3b8f4ff7cf
commit 3d77ef0e1e
78 changed files with 564 additions and 126 deletions
--- a/.cursor/rules/browser-use-rules.mdc
+++ b/.cursor/rules/browser-use-rules.mdc
@@ -76,7 +76,7 @@ from browser_use import Agent
 from browser_use.llm import ChatOpenAI

 task = "Find the CEO of OpenAI and return their name"
-model = ChatOpenAI(model="gpt-4o")
+model = ChatOpenAI(model="gpt-4.1-mini")

 agent = Agent(task=task, llm=model, controller=controller)

--- a/.github/ISSUE_TEMPLATE/1_element_detection_bug.yml
+++ b/.github/ISSUE_TEMPLATE/1_element_detection_bug.yml
@@ -44,6 +44,8 @@ body:
        - gpt-4.1
        - gpt-4.1-mini
        - gpt-4.1-nano
+        - o4-mini
+        - o3
        - claude-3.7-sonnet
        - claude-3.5-sonnet
        - gemini-2.6-flash-preview
@@ -114,7 +116,7 @@ body:

        agent = Agent(
            task='...',
-            llm=ChatOpenAI(model="gpt-4o"),
+            llm=ChatOpenAI(model="gpt-4.1"),
            browser_session=BrowserSession(headless=False),
        )
        ...
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ from browser_use.llm import ChatOpenAI
 async def main():
    agent = Agent(
        task="Compare the price of gpt-4o and DeepSeek-V3",
-        llm=ChatOpenAI(model="gpt-4o"),
+        llm=ChatOpenAI(model="o4-mini", temperature=1.0),
    )
    await agent.run()

--- a/browser_use/agent/system_prompt.md
+++ b/browser_use/agent/system_prompt.md
@@ -91,6 +91,7 @@ Strictly follow these rules while using the browser and navigating the web:
 <file_system>
 - You have access to a persistent file system which you can use to track progress, store results, and manage long tasks.
 - Your file system is initialized with a `todo.md`: Use this to keep a checklist for known subtasks. Update it to mark completed items and track what remains. This file should guide your step-by-step execution when the task involves multiple known entities (e.g., a list of links or items to visit). ALWAYS use `write_file` to rewrite entire `todo.md` when you want to update your progress. NEVER use `append_file` on `todo.md` as this can explode your context.
+- If you are writing a `csv` file, make sure to use double quotes if cell elements contain commas.
 - Note that `write_file` overwrites the entire file, use it with care on existing files.
 - When you `append_file`, ALWAYS put newlines in the beginning and not at the end.
 - If the file is too large, you are only given a preview of your file. Use `read_file` to see the full content if necessary.
--- a/browser_use/agent/system_prompt_no_thinking.md
+++ b/browser_use/agent/system_prompt_no_thinking.md
@@ -91,6 +91,7 @@ Strictly follow these rules while using the browser and navigating the web:
 <file_system>
 - You have access to a persistent file system which you can use to track progress, store results, and manage long tasks.
 - Your file system is initialized with a `todo.md`: Use this to keep a checklist for known subtasks. Update it to mark completed items and track what remains. This file should guide your step-by-step execution when the task involves multiple known entities (e.g., a list of links or items to visit). ALWAYS use `write_file` to rewrite entire `todo.md` when you want to update your progress. NEVER use `append_file` on `todo.md` as this can explode your context.
+- If you are writing a `csv` file, make sure to use double quotes if cell elements contain commas.
 - Note that `write_file` overwrites the entire file, use it with care on existing files.
 - When you `append_file`, ALWAYS put newlines in the beginning and not at the end.
 - If the file is too large, you are only given a preview of your file. Use `read_file` to see the full content if necessary.
--- a/browser_use/cli.py
+++ b/browser_use/cli.py
@@ -211,11 +211,11 @@ def get_llm(config: dict[str, Any]):

 	# Auto-detect based on available API keys
 	if CONFIG.OPENAI_API_KEY:
-		return ChatOpenAI(model='gpt-4o', temperature=temperature)
+		return ChatOpenAI(model='gpt-4.1', temperature=temperature)
 	elif CONFIG.ANTHROPIC_API_KEY:
-		return ChatAnthropic(model='claude-3.5-sonnet-exp', temperature=temperature)
+		return ChatAnthropic(model='claude-3.5-sonnet', temperature=temperature)
 	elif CONFIG.GOOGLE_API_KEY:
-		return ChatGoogle(model='gemini-2.0-flash-lite', temperature=temperature)
+		return ChatGoogle(model='gemini-2.5-flash', temperature=temperature)
 	else:
 		print(
 			'⚠️  No API keys found. Please update your config or set one of: OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY.'
--- a/browser_use/controller/service.py
+++ b/browser_use/controller/service.py
@@ -566,7 +566,7 @@ Explain the content of the page and that the requested information is not availa
 				return ActionResult(error=msg, include_in_memory=True)

 		# File System Actions
-		@self.registry.action('Write content to file_name in file system, use only .md or .txt extensions.')
+		@self.registry.action('Write content to file_name in file system. Allowed extensions are .md, .txt, .json, .csv.')
 		async def write_file(file_name: str, content: str, file_system: FileSystem):
 			result = await file_system.write_file(file_name, content)
 			logger.info(f'💾 {result}')
@@ -581,13 +581,9 @@ Explain the content of the page and that the requested information is not availa
 		@self.registry.action('Read file_name from file system')
 		async def read_file(file_name: str, available_file_paths: list[str], file_system: FileSystem):
 			if available_file_paths and file_name in available_file_paths:
-				import anyio
-
-				async with await anyio.open_file(file_name, 'r') as f:
-					content = await f.read()
-					result = f'Read from file {file_name}.\n<content>\n{content}\n</content>'
+				result = await file_system.read_file(file_name, external_file=True)
 			else:
-				result = file_system.read_file(file_name)
+				result = await file_system.read_file(file_name)

 			MAX_MEMORY_SIZE = 1000
 			if len(result) > MAX_MEMORY_SIZE:
--- a/browser_use/filesystem/file_system.py
+++ b/browser_use/filesystem/file_system.py
@@ -94,6 +94,22 @@ class TxtFile(BaseFile):
 		return 'txt'


+class JsonFile(BaseFile):
+	"""JSON file implementation"""
+
+	@property
+	def extension(self) -> str:
+		return 'json'
+
+
+class CsvFile(BaseFile):
+	"""CSV file implementation"""
+
+	@property
+	def extension(self) -> str:
+		return 'csv'
+
+
 class FileSystemState(BaseModel):
 	"""Serializable state of the file system"""

@@ -120,6 +136,8 @@ class FileSystem:
 		self._file_types: dict[str, type[BaseFile]] = {
 			'md': MarkdownFile,
 			'txt': TxtFile,
+			'json': JsonFile,
+			'csv': CsvFile,
 		}

 		self.files = {}
@@ -188,8 +206,41 @@ class FileSystem:

 		return file_obj.read()

-	def read_file(self, full_filename: str) -> str:
+	async def read_file(self, full_filename: str, external_file: bool = False) -> str:
 		"""Read file content using file-specific read method and return appropriate message to LLM"""
+		if external_file:
+			try:
+				try:
+					_, extension = self._parse_filename(full_filename)
+				except Exception:
+					return f'Error: Invalid filename format {full_filename}. Must be alphanumeric with a supported extension.'
+				if extension in ['md', 'txt', 'json', 'csv']:
+					import anyio
+
+					async with await anyio.open_file(full_filename, 'r') as f:
+						content = await f.read()
+						return f'Read from file {full_filename}.\n<content>\n{content}\n</content>'
+				elif extension == 'pdf':
+					import pypdf
+
+					reader = pypdf.PdfReader(full_filename)
+					num_pages = len(reader.pages)
+					MAX_PDF_PAGES = 5
+					extra_pages = num_pages - MAX_PDF_PAGES
+					extracted_text = ''
+					for page in reader.pages[:MAX_PDF_PAGES]:
+						extracted_text += page.extract_text()
+					extra_pages_text = f'{extra_pages} more pages...' if extra_pages > 0 else ''
+					return f'Read from file {full_filename}.\n<content>\n{extracted_text}\n{extra_pages_text}</content>'
+				else:
+					return f'Error: Cannot read file {full_filename} as {extension} extension is not supported.'
+			except FileNotFoundError:
+				return f"Error: File '{full_filename}' not found."
+			except PermissionError:
+				return f"Error: Permission denied to read file '{full_filename}'."
+			except Exception as e:
+				return f"Error: Could not read file '{full_filename}'."
+
 		if not self._is_valid_filename(full_filename):
 			return INVALID_FILENAME_ERROR_MESSAGE

@@ -367,6 +418,10 @@ class FileSystem:
 				file_obj = MarkdownFile(**file_info)
 			elif file_type == 'TxtFile':
 				file_obj = TxtFile(**file_info)
+			elif file_type == 'JsonFile':
+				file_obj = JsonFile(**file_info)
+			elif file_type == 'CsvFile':
+				file_obj = CsvFile(**file_info)
 			else:
 				# Skip unknown file types
 				continue
--- a/browser_use/integrations/gmail/actions.py
+++ b/browser_use/integrations/gmail/actions.py
@@ -22,15 +22,13 @@ _gmail_service: GmailService | None = None
 class GetRecentEmailsParams(BaseModel):
 	"""Parameters for getting recent emails"""

-	query: str = Field(
-		default='', description='Gmail search query (e.g., "from:noreply@example.com") - optional additional filter'
-	)
-	max_results: int = Field(default=10, ge=1, le=50, description='Maximum number of emails to retrieve (1-50, default: 10)')
+	keyword: str = Field(default='', description='A single keyword for search, e.g. github, airbnb, etc.')
+	max_results: int = Field(default=3, ge=1, le=50, description='Maximum number of emails to retrieve (1-50, default: 3)')


 def register_gmail_actions(
 	controller: Controller, gmail_service: GmailService | None = None, access_token: str | None = None
-) -> None:
+) -> Controller:
 	"""
 	Register Gmail actions with the provided controller
 	Args:
@@ -48,11 +46,8 @@ def register_gmail_actions(
 	else:
 		_gmail_service = GmailService()

-	@controller.action(
-		description='📧 **Get recent emails** - to fetch recent emails from the past 5 minutes with full content. '
-		'Perfect for retrieving verification codes, OTP, 2FA tokens, or any recent email content. '
-		'This action accesses your Gmail inbox to read email messages and extract verification codes. '
-		'Returns complete email content so you can extract verification codes or analyze email details yourself.',
+	@controller.registry.action(
+		description='Get recent emails from the mailbox with a keyword to retrieve verification codes, OTP, 2FA tokens, magic links, or any recent email content. Keep your query a single keyword.',
 		param_model=GetRecentEmailsParams,
 	)
 	async def get_recent_emails(params: GetRecentEmailsParams) -> ActionResult:
@@ -77,8 +72,8 @@ def register_gmail_actions(

 			# Build query with time filter and optional user query
 			query_parts = [f'newer_than:{time_filter}']
-			if params.query.strip():
-				query_parts.append(params.query.strip())
+			if params.keyword.strip():
+				query_parts.append(params.keyword.strip())

 			query = ' '.join(query_parts)
 			logger.info(f'🔍 Gmail search query: {query}')
@@ -87,10 +82,11 @@ def register_gmail_actions(
 			emails = await _gmail_service.get_recent_emails(max_results=max_results, query=query, time_filter=time_filter)

 			if not emails:
-				query_info = f" matching '{params.query}'" if params.query.strip() else ''
+				query_info = f" matching '{params.keyword}'" if params.keyword.strip() else ''
+				memory = f'No recent emails found from last {time_filter}{query_info}'
 				return ActionResult(
-					extracted_content=f'No emails found from the last {time_filter}{query_info}',
-					long_term_memory=f'No recent emails found from last {time_filter}',
+					extracted_content=memory,
+					long_term_memory=memory,
 				)

 			# Format with full email content for large display
@@ -108,7 +104,7 @@ def register_gmail_actions(
 			return ActionResult(
 				extracted_content=content,
 				include_extracted_content_only_once=True,
-				long_term_memory=f'Retrieved {len(emails)} recent emails from last {time_filter}',
+				long_term_memory=f'Retrieved {len(emails)} recent emails from last {time_filter} for query {query}.',
 			)

 		except Exception as e:
@@ -117,3 +113,5 @@ def register_gmail_actions(
 				error=f'Error getting recent emails: {str(e)}',
 				long_term_memory='Failed to get recent emails due to error',
 			)
+
+	return controller
--- a/browser_use/llm/tests/test_single_step.py
+++ b/browser_use/llm/tests/test_single_step.py
@@ -95,9 +95,9 @@ def create_mock_state_message(temp_dir: str):
 	[
 		(ChatGroq, 'meta-llama/llama-4-maverick-17b-128e-instruct'),
 		(ChatGoogle, 'gemini-2.0-flash-exp'),
-		(ChatOpenAI, 'gpt-4o-mini'),
+		(ChatOpenAI, 'gpt-4.1-mini'),
 		(ChatAnthropic, 'claude-3-5-sonnet-latest'),
-		(ChatAzureOpenAI, 'gpt-4o-mini'),
+		(ChatAzureOpenAI, 'gpt-4.1-mini'),
 	],
 )
 async def test_single_step_parametrized(llm_class, model_name):
--- a/docs/quickstart.mdx
+++ b/docs/quickstart.mdx
@@ -50,7 +50,7 @@ load_dotenv()

 import asyncio

-llm = ChatOpenAI(model="gpt-4o")
+llm = ChatOpenAI(model="gpt-4.1")

 async def main():
    agent = Agent(
--- a/eval/service.py
+++ b/eval/service.py
@@ -949,8 +949,8 @@ def create_controller(
 	else:
 		controller = Controller(output_model=output_model)

-	# Add Gmail 2FA support if tokens dict is available and task contains email
-	if gmail_tokens_dict and task:
+	# Add Gmail 2FA support if tokens dict is available and task has login_type OTP
+	if gmail_tokens_dict and task and hasattr(task, 'login_type') and task.login_type == 'OTP':
 		try:
 			# Extract username from task - check multiple possible sources
 			username = None
@@ -979,17 +979,23 @@ def create_controller(
 					from browser_use.integrations.gmail import register_gmail_actions

 					# Register Gmail actions using the access token
-					register_gmail_actions(controller, access_token=access_token)
-					logger.info(f'Gmail 2FA integration registered successfully for user {user_id}')
+					controller = register_gmail_actions(controller, access_token=access_token)
+					logger.info(f'Gmail 2FA integration registered successfully for user {user_id} (OTP task)')
 				else:
 					logger.info(f'No Gmail 2FA token found for user {user_id}, running without Gmail integration')
 			else:
-				logger.info('No email found in task, running without Gmail integration')
+				logger.info('No email found in OTP task, running without Gmail integration')

 		except Exception as e:
 			logger.error(f'Failed to setup Gmail integration: {e}')
 	else:
-		logger.info(f'No Gmail 2FA tokens provided, running without Gmail integration: {gmail_tokens_dict}, {task}')
+		if gmail_tokens_dict and task:
+			if not hasattr(task, 'login_type') or task.login_type != 'OTP':
+				logger.info(f'Task login_type is "{getattr(task, "login_type", "None")}", not OTP - skipping Gmail integration')
+			else:
+				logger.info('Gmail 2FA tokens provided but no task or task missing login_type')
+		else:
+			logger.info('No Gmail 2FA tokens provided or no task, running without Gmail integration')

 	return controller

@@ -3039,7 +3045,7 @@ if __name__ == '__main__':
 		'--model', type=str, default='gpt-4o', choices=list(SUPPORTED_MODELS.keys()), help='Model to use for the agent'
 	)
 	parser.add_argument(
-		'--eval-model', type=str, default='gpt-4o', choices=list(SUPPORTED_MODELS.keys()), help='Model to use for evaluation'
+		'--eval-model', type=str, default='gpt-4.1', choices=list(SUPPORTED_MODELS.keys()), help='Model to use for evaluation'
 	)
 	parser.add_argument('--no-vision', action='store_true', help='Disable vision capabilities in the agent')

--- a/examples/browser/multiple_agents_same_browser.py
+++ b/examples/browser/multiple_agents_same_browser.py
@@ -26,7 +26,7 @@ async def main():
 	await browser_session.start()

 	current_agent = None
-	llm = ChatOpenAI(model='gpt-4o')
+	llm = ChatOpenAI(model='gpt-4.1')

 	task1 = 'find todays weather on San Francisco and extract it as json'
 	task2 = 'find todays weather in Zurich and extract it as json'
--- a/examples/browser/real_browser.py
+++ b/examples/browser/real_browser.py
@@ -24,7 +24,7 @@ browser_session = BrowserSession(browser_profile=browser_profile)
 async def main():
 	agent = Agent(
 		task='Find todays DOW stock price',
-		llm=ChatOpenAI(model='gpt-4o'),
+		llm=ChatOpenAI(model='gpt-4.1'),
 		browser_session=browser_session,
 	)

--- a/examples/browser/stealth.py
+++ b/examples/browser/stealth.py
@@ -18,7 +18,7 @@ from browser_use.browser.profile import BrowserProfile
 from browser_use.browser.types import async_patchright
 from browser_use.llm import ChatOpenAI

-llm = ChatOpenAI(model='gpt-4o')
+llm = ChatOpenAI(model='gpt-4.1')

 terminal_width, terminal_height = shutil.get_terminal_size((80, 20))

--- a/examples/custom-functions/2fa.py
+++ b/examples/custom-functions/2fa.py
@@ -56,7 +56,7 @@ async def main():
 	You are completely FORBIDDEN to use any other method to get the 2FA code.
 	"""

-	model = ChatOpenAI(model='gpt-4o')
+	model = ChatOpenAI(model='gpt-4.1')
 	agent = Agent(task=task, llm=model, controller=controller)

 	result = await agent.run()
--- a/examples/custom-functions/action_filters.py
+++ b/examples/custom-functions/action_filters.py
@@ -68,7 +68,7 @@ async def main():
 	"""Main function to run the example"""
 	browser_session = BrowserSession()
 	await browser_session.start()
-	llm = ChatOpenAI(model='gpt-4o')
+	llm = ChatOpenAI(model='gpt-4.1')

 	# Create the agent
 	agent = Agent(  # disco mode will not be triggered on apple.com because the LLM won't be able to see that action available, it should work on Google.com though.
--- a/examples/custom-functions/advanced_search.py
+++ b/examples/custom-functions/advanced_search.py
@@ -93,7 +93,7 @@ names = [
 async def main():
 	task = 'use search_web with "find email address of the following ETH professor:" for each of the following persons in a list of actions. Finally return the list with name and email if provided - do always 5 at once'
 	task += '\n' + '\n'.join(names)
-	model = ChatOpenAI(model='gpt-4o')
+	model = ChatOpenAI(model='gpt-4.1')
 	browser_profile = BrowserProfile()
 	agent = Agent(task=task, llm=model, controller=controller, browser_profile=browser_profile)

--- a/examples/custom-functions/clipboard.py
+++ b/examples/custom-functions/clipboard.py
@@ -39,7 +39,7 @@ async def paste_from_clipboard(page: Page):

 async def main():
 	task = 'Copy the text "Hello, world!" to the clipboard, then go to google.com and paste the text'
-	model = ChatOpenAI(model='gpt-4o')
+	model = ChatOpenAI(model='gpt-4.1')
 	browser_session = BrowserSession(browser_profile=browser_profile)
 	await browser_session.start()
 	agent = Agent(
--- a/examples/custom-functions/custom_hooks_before_after_step.py
+++ b/examples/custom-functions/custom_hooks_before_after_step.py
@@ -220,8 +220,8 @@ async def record_activity(agent_obj):


 agent = Agent(
-	task='Compare the price of gpt-4o and DeepSeek-V3',
-	llm=ChatOpenAI(model='gpt-4o'),
+	task='Compare the price of gpt-4.1 and DeepSeek-V3',
+	llm=ChatOpenAI(model='gpt-4.1'),
 )


--- a/examples/custom-functions/drag_and_drop.py
+++ b/examples/custom-functions/drag_and_drop.py
@@ -268,7 +268,7 @@ async def example_drag_drop_sortable_list():
 	controller = await create_drag_drop_controller()

 	# Initialize LLM (replace with your preferred model)
-	llm = ChatOpenAI(model='gpt-4o')
+	llm = ChatOpenAI(model='gpt-4.1')

 	# Create the agent
 	agent = Agent(
@@ -288,7 +288,7 @@ async def example_drag_drop_coordinates():
 	"""Example: Direct coordinate-based drag and drop."""

 	controller = await create_drag_drop_controller()
-	llm = ChatOpenAI(model='gpt-4o')
+	llm = ChatOpenAI(model='gpt-4.1')

 	agent = Agent(
 		task='Go to a canvas drawing website and draw a simple line using drag and drop from coordinates (100, 100) to (300, 200)',
--- a/examples/custom-functions/extract_pdf_content.py
+++ b/examples/custom-functions/extract_pdf_content.py
@@ -79,7 +79,7 @@ async def main():

        URL: https://docs.house.gov/meetings/GO/GO00/20220929/115171/HHRG-117-GO00-20220929-SD010.pdf
        """,
-		llm=ChatOpenAI(model='gpt-4o'),
+		llm=ChatOpenAI(model='gpt-4.1'),
 		controller=controller,
 	)
 	result = await agent.run()
--- a/examples/custom-functions/hover_element.py
+++ b/examples/custom-functions/hover_element.py
@@ -74,7 +74,7 @@ async def hover_element(params: HoverAction, browser_session: BrowserSession):
 async def main():
 	task = 'Open https://testpages.eviltester.com/styled/csspseudo/css-hover.html and hover the element with the css selector #hoverdivpara, then click on "Can you click me?"'
 	# task = 'Open https://testpages.eviltester.com/styled/csspseudo/css-hover.html and hover the element with the xpath //*[@id="hoverdivpara"], then click on "Can you click me?"'
-	model = ChatOpenAI(model='gpt-4o')
+	model = ChatOpenAI(model='gpt-4.1')
 	browser_session = BrowserSession(browser_profile=browser_profile)
 	await browser_session.start()
 	agent = Agent(
--- a/examples/custom-functions/notification.py
+++ b/examples/custom-functions/notification.py
@@ -34,7 +34,7 @@ async def done(text: str):

 async def main():
 	task = 'go to brower-use.com and then done'
-	model = ChatOpenAI(model='gpt-4o')
+	model = ChatOpenAI(model='gpt-4.1')
 	agent = Agent(task=task, llm=model, controller=controller)

 	await agent.run()
--- a/examples/custom-functions/onepassword_2fa.py
+++ b/examples/custom-functions/onepassword_2fa.py
@@ -46,7 +46,7 @@ async def main():
 	# Example task using the 1Password 2FA action
 	task = 'Go to account.google.com, enter username and password, then if prompted for 2FA code, get 2FA code from 1Password for and enter it'

-	model = ChatOpenAI(model='gpt-4o')
+	model = ChatOpenAI(model='gpt-4.1')
 	agent = Agent(task=task, llm=model, controller=controller)

 	result = await agent.run()
--- a/examples/custom-functions/perplexity_search.py
+++ b/examples/custom-functions/perplexity_search.py
@@ -71,7 +71,7 @@ names = [
 async def main():
 	task = 'use search_web with "find email address of the following ETH professor:" for each of the persons. Finally return the list with name and email if provided '
 	task += '\n' + '\n'.join(names)
-	model = ChatOpenAI(model='gpt-4o')
+	model = ChatOpenAI(model='gpt-4.1')
 	browser_profile = BrowserProfile()
 	agent = Agent(task=task, llm=model, controller=controller, browser_profile=browser_profile)

--- a/examples/custom-functions/save_to_file_hugging_face.py
+++ b/examples/custom-functions/save_to_file_hugging_face.py
@@ -40,7 +40,7 @@ def save_models(params: Models):
 async def main():
 	task = 'Look up models with a license of cc-by-sa-4.0 and sort by most likes on Hugging face, save top 5 to file.'

-	model = ChatOpenAI(model='gpt-4o')
+	model = ChatOpenAI(model='gpt-4.1')
 	agent = Agent(task=task, llm=model, controller=controller)

 	await agent.run()
--- a/examples/custom-functions/solve_amazon_captcha.py
+++ b/examples/custom-functions/solve_amazon_captcha.py
@@ -71,7 +71,7 @@ async def solve_amazon_captcha(browser_session: BrowserSession):
 async def main():
 	task = 'Go to https://www.amazon.com/errors/validateCaptcha and solve the captcha using the solve_amazon_captcha tool'

-	model = ChatOpenAI(model='gpt-4o')
+	model = ChatOpenAI(model='gpt-4.1')
 	browser_session = BrowserSession(browser_profile=browser_profile)
 	await browser_session.start()
 	agent = Agent(task=task, llm=model, controller=controller, browser_session=browser_session)
--- a/examples/features/click_fallback_options.py
+++ b/examples/features/click_fallback_options.py
@@ -181,7 +181,7 @@ async def main():
 	select_task = 'Open http://localhost:8000/, choose the car BMW'
 	button_task = 'Open http://localhost:8000/, click on the button'

-	llm = ChatOpenAI(model='gpt-4o')
+	llm = ChatOpenAI(model='gpt-4.1')
 	# llm = ChatGoogleGenerativeAI(
 	#     model="gemini-2.0-flash-lite",
 	# )
--- a/examples/features/cross_origin_iframes.py
+++ b/examples/features/cross_origin_iframes.py
@@ -32,7 +32,7 @@ controller = Controller()
 async def main():
 	agent = Agent(
 		task='Click "Go cross-site (simple page)" button on https://csreis.github.io/tests/cross-site-iframe.html then tell me the text within',
-		llm=ChatOpenAI(model='gpt-4o', temperature=0.0),
+		llm=ChatOpenAI(model='gpt-4.1', temperature=0.0),
 		controller=controller,
 		browser_session=browser_session,
 	)
--- a/examples/features/custom_output.py
+++ b/examples/features/custom_output.py
@@ -36,7 +36,7 @@ controller = Controller(output_model=Posts)

 async def main():
 	task = 'Go to hackernews show hn and give me the first  5 posts'
-	model = ChatOpenAI(model='gpt-4o')
+	model = ChatOpenAI(model='gpt-4.1')
 	agent = Agent(task=task, llm=model, controller=controller)

 	history = await agent.run()
--- a/examples/features/custom_system_prompt.py
+++ b/examples/features/custom_system_prompt.py
@@ -29,7 +29,7 @@ extend_system_message = (

 async def main():
 	task = 'do google search to find images of Elon Musk'
-	model = ChatOpenAI(model='gpt-4o')
+	model = ChatOpenAI(model='gpt-4.1')
 	agent = Agent(task=task, llm=model, extend_system_message=extend_system_message)

 	print(
--- a/examples/features/custom_user_agent.py
+++ b/examples/features/custom_user_agent.py
@@ -19,7 +19,7 @@ def get_llm(provider: str):
 	if provider == 'anthropic':
 		return ChatAnthropic(model='claude-3-5-sonnet-20240620', temperature=0.0)
 	elif provider == 'openai':
-		return ChatOpenAI(model='gpt-4o', temperature=0.0)
+		return ChatOpenAI(model='gpt-4.1', temperature=0.0)

 	else:
 		raise ValueError(f'Unsupported provider: {provider}')
--- a/examples/features/follow_up_tasks.py
+++ b/examples/features/follow_up_tasks.py
@@ -14,7 +14,7 @@ from browser_use.llm import ChatOpenAI

 # Initialize the model
 llm = ChatOpenAI(
-	model='gpt-4o',
+	model='gpt-4.1',
 	temperature=0.0,
 )
 # Get your chrome path
--- a/examples/features/initial_actions.py
+++ b/examples/features/initial_actions.py
@@ -11,7 +11,7 @@ load_dotenv()
 from browser_use import Agent
 from browser_use.llm import ChatOpenAI

-llm = ChatOpenAI(model='gpt-4o')
+llm = ChatOpenAI(model='gpt-4.1')

 initial_actions = [
 	{'go_to_url': {'url': 'https://www.google.com', 'new_tab': True}},
--- a/examples/features/multi-tab_handling.py
+++ b/examples/features/multi-tab_handling.py
@@ -18,7 +18,7 @@ from browser_use import Agent
 from browser_use.llm import ChatOpenAI

 # video: https://preview.screen.studio/share/clenCmS6
-llm = ChatOpenAI(model='gpt-4o')
+llm = ChatOpenAI(model='gpt-4.1')
 agent = Agent(
 	task='open 3 tabs with elon musk, trump, and steve jobs, then go back to the first and stop',
 	llm=llm,
--- a/examples/features/outsource_state.py
+++ b/examples/features/outsource_state.py
@@ -35,10 +35,10 @@ async def main():
 	for i in range(10):
 		agent = Agent(
 			task=task,
-			llm=ChatOpenAI(model='gpt-4o'),
+			llm=ChatOpenAI(model='gpt-4.1'),
 			browser_session=browser_session,
 			injected_agent_state=agent_state,
-			page_extraction_llm=ChatOpenAI(model='gpt-4o-mini'),
+			page_extraction_llm=ChatOpenAI(model='gpt-4.1-mini'),
 		)

 		done, valid = await agent.take_step()
--- a/examples/features/parallel_agents.py
+++ b/examples/features/parallel_agents.py
@@ -20,7 +20,7 @@ browser_session = BrowserSession(
 		user_data_dir='~/.config/browseruse/profiles/default',
 	)
 )
-llm = ChatOpenAI(model='gpt-4o')
+llm = ChatOpenAI(model='gpt-4.1')


 async def main():
--- a/examples/features/pause_agent.py
+++ b/examples/features/pause_agent.py
@@ -15,7 +15,7 @@ from browser_use.llm import ChatOpenAI

 class AgentController:
 	def __init__(self):
-		llm = ChatOpenAI(model='gpt-4o')
+		llm = ChatOpenAI(model='gpt-4.1')
 		self.agent = Agent(
 			task='open in one action https://www.google.com, https://www.wikipedia.org, https://www.youtube.com, https://www.github.com, https://amazon.com',
 			llm=llm,
--- a/examples/features/planner.py
+++ b/examples/features/planner.py
@@ -11,7 +11,7 @@ load_dotenv()
 from browser_use import Agent
 from browser_use.llm import ChatOpenAI

-llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
+llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
 planner_llm = ChatOpenAI(
 	model='o3-mini',
 )
--- a/examples/features/restrict_urls.py
+++ b/examples/features/restrict_urls.py
@@ -12,7 +12,7 @@ from browser_use import Agent
 from browser_use.browser import BrowserProfile, BrowserSession
 from browser_use.llm import ChatOpenAI

-llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
+llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
 task = (
 	"go to google.com and search for openai.com and click on the first link then extract content and scroll down - what's there?"
 )
--- a/examples/features/result_processing.py
+++ b/examples/features/result_processing.py
@@ -14,7 +14,7 @@ from browser_use.agent.views import AgentHistoryList
 from browser_use.browser import BrowserProfile, BrowserSession
 from browser_use.llm import ChatOpenAI

-llm = ChatOpenAI(model='gpt-4o')
+llm = ChatOpenAI(model='gpt-4.1')


 async def main():
--- a/examples/features/save_trace.py
+++ b/examples/features/save_trace.py
@@ -12,7 +12,7 @@ from browser_use.agent.service import Agent
 from browser_use.browser import BrowserProfile, BrowserSession
 from browser_use.llm import ChatOpenAI

-llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
+llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)


 async def main():
--- a/examples/features/small_model_for_extraction.py
+++ b/examples/features/small_model_for_extraction.py
@@ -11,8 +11,8 @@ load_dotenv()
 from browser_use import Agent
 from browser_use.llm import ChatOpenAI

-llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
-small_llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.0)
+llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
+small_llm = ChatOpenAI(model='gpt-4.1-mini', temperature=0.0)
 task = 'Find the founders of browser-use in ycombinator, extract all links and open the links one by one'
 agent = Agent(task=task, llm=llm, page_extraction_llm=small_llm)

--- a/examples/features/validate_output.py
+++ b/examples/features/validate_output.py
@@ -39,7 +39,7 @@ async def done(params: DoneResult):

 async def main():
 	task = 'Go to hackernews hn and give me the top 1 post'
-	model = ChatOpenAI(model='gpt-4o')
+	model = ChatOpenAI(model='gpt-4.1')
 	agent = Agent(task=task, llm=model, controller=controller, validate_output=True)
 	# NOTE: this should fail to demonstrate the validator
 	await agent.run(max_steps=5)
--- a/examples/file_system/excel_sheet.py
+++ b/examples/file_system/excel_sheet.py
@@ -0,0 +1,47 @@
+import asyncio
+import os
+import sys
+
+from browser_use.llm.openai.chat import ChatOpenAI
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+from lmnr import Laminar
+
+try:
+	Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY'))
+except Exception:
+	pass
+
+from browser_use import Agent
+
+# Initialize the model
+llm = ChatOpenAI(
+	model='o4-mini',
+	temperature=1.0,
+)
+
+
+task = (
+	'Find current stock price of companies Meta and Amazon. Then, make me a CSV file with 2 columns: company name, stock price.'
+)
+
+agent = Agent(task=task, llm=llm)
+
+
+async def main():
+	import time
+
+	start_time = time.time()
+	history = await agent.run()
+	# token usage
+	print(history.usage)
+	end_time = time.time()
+	print(f'Time taken: {end_time - start_time} seconds')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/examples/integrations/gmail_2fa_integration.py
+++ b/examples/integrations/gmail_2fa_integration.py
@@ -43,7 +43,7 @@ async def main():
 	print()

 	# Initialize LLM
-	llm = ChatOpenAI(model='gpt-4o')
+	llm = ChatOpenAI(model='gpt-4.1')

 	# Example 1: Basic Gmail authentication test
 	print('📧 Testing Gmail authentication...')
--- a/examples/models/azure_openai.py
+++ b/examples/models/azure_openai.py
@@ -27,7 +27,7 @@ if not azure_openai_api_key or not azure_openai_endpoint:

 # Initialize the Azure OpenAI client
 llm = ChatAzureOpenAI(
-	model='gpt-4o',
+	model='gpt-4.1',
 	api_key=azure_openai_api_key,
 	azure_endpoint=azure_openai_endpoint,  # Corrected to use azure_endpoint instead of openai_api_base
 )
--- a/examples/models/langchain/README.md
+++ b/examples/models/langchain/README.md
@@ -15,7 +15,7 @@ async def main():

 	# Create a LangChain model (OpenAI)
 	langchain_model = ChatOpenAI(
-		model='gpt-4o-mini',
+		model='gpt-4.1-mini',
 		temperature=0.1,
 	)

--- a/examples/models/langchain/example.py
+++ b/examples/models/langchain/example.py
@@ -25,7 +25,7 @@ async def main():

 	# Create a LangChain model (OpenAI)
 	langchain_model = ChatOpenAI(
-		model='gpt-4o-mini',
+		model='gpt-4.1-mini',
 		temperature=0.1,
 	)

--- a/examples/simple.py
+++ b/examples/simple.py
@@ -9,37 +9,31 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from dotenv import load_dotenv

 load_dotenv()
-from lmnr import Laminar

 try:
+	from lmnr import Laminar
+
 	Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY'))
 except Exception:
+	print('Error initializing Laminar')
 	pass

 from browser_use import Agent

 # Initialize the model
 llm = ChatOpenAI(
-	model='gpt-4o',
+	model='gpt-4.1-mini',
 )


 task = 'Go to google.com/travel/flights and search for flights to Tokyo next week'
-task = """http://www.sadfdsafdssdafd.com/ go here and scroll around"""
-task = 'Go to Louis Vuittons website, find every product and save the product details 1 by 1. Extract product details as JSON: productname (Full name as shown on the webpage), brand (Manufacturer or designer name), model (Specific version or edition), gender (Target audience: Men, Women, Unisex), sku (Unique identifier), releasedate (Launch date in YYYY-MM-DD format), retailprice (Price as a number, no currency symbols), colorway (Color description without spaces around slashes, e.g., White/PinkFoam), sizerange (Available sizes as a list, maintain decimals for half sizes, e.g., 7.5), requesturl (URL where product data is scraped), requesttimestamp (ISO 8601 timestamp of the request), primaryimgurl (URL of the main product image); ensure required fields are present, return null if data is missing.'
-
 agent = Agent(task=task, llm=llm)


 async def main():
-	import time
-
-	start_time = time.time()
 	history = await agent.run()
 	# token usage
 	print(history.usage)
-	end_time = time.time()
-	print(f'Time taken: {end_time - start_time} seconds')


 if __name__ == '__main__':
--- a/examples/ui/command_line.py
+++ b/examples/ui/command_line.py
@@ -45,7 +45,7 @@ def get_llm(provider: str):
 		if not api_key:
 			raise ValueError('Error: OPENAI_API_KEY is not set. Please provide a valid API key.')

-		return ChatOpenAI(model='gpt-4o', temperature=0.0)
+		return ChatOpenAI(model='gpt-4.1', temperature=0.0)

 	else:
 		raise ValueError(f'Unsupported provider: {provider}')
--- a/examples/ui/gradio_demo.py
+++ b/examples/ui/gradio_demo.py
@@ -59,7 +59,7 @@ def parse_agent_history(history_str: str) -> None:
 async def run_browser_task(
 	task: str,
 	api_key: str,
-	model: str = 'gpt-4o',
+	model: str = 'gpt-4.1',
 	headless: bool = True,
 ) -> str:
 	if not api_key.strip():
@@ -70,7 +70,7 @@ async def run_browser_task(
 	try:
 		agent = Agent(
 			task=task,
-			llm=ChatOpenAI(model='gpt-4o'),
+			llm=ChatOpenAI(model='gpt-4.1'),
 		)
 		result = await agent.run()
 		#  TODO: The result could be parsed better
--- a/examples/ui/streamlit_demo.py
+++ b/examples/ui/streamlit_demo.py
@@ -44,7 +44,7 @@ def get_llm(provider: str):
 			st.error('Error: OPENAI_API_KEY is not set. Please provide a valid API key.')
 			st.stop()

-		return ChatOpenAI(model='gpt-4o', temperature=0.0)
+		return ChatOpenAI(model='gpt-4.1', temperature=0.0)
 	else:
 		st.error(f'Unsupported provider: {provider}')
 		st.stop()
--- a/examples/use-cases/captcha.py
+++ b/examples/use-cases/captcha.py
@@ -26,7 +26,7 @@ if not os.getenv('OPENAI_API_KEY'):


 async def main():
-	llm = ChatOpenAI(model='gpt-4o')
+	llm = ChatOpenAI(model='gpt-4.1')
 	agent = Agent(
 		task='go to https://captcha.com/demos/features/captcha-demo.aspx and solve the captcha',
 		llm=llm,
--- a/examples/use-cases/check_appointment.py
+++ b/examples/use-cases/check_appointment.py
@@ -42,7 +42,7 @@ async def main():
 		'If there is no available date in both months, tell me there is no available date.'
 	)

-	model = ChatOpenAI(model='gpt-4o-mini')
+	model = ChatOpenAI(model='gpt-4.1-mini')
 	agent = Agent(task, model, controller=controller, use_vision=True)

 	await agent.run()
--- a/examples/use-cases/find_and_apply_to_jobs.py
+++ b/examples/use-cases/find_and_apply_to_jobs.py
@@ -137,7 +137,7 @@ async def main():
 		# ground_task + '\n' + 'Meta',
 	]
 	model = ChatAzureOpenAI(
-		model='gpt-4o',
+		model='gpt-4.1',
 	)

 	agents = []
--- a/examples/use-cases/find_influencer_profiles.py
+++ b/examples/use-cases/find_influencer_profiles.py
@@ -68,7 +68,7 @@ async def main():
 		'Go to this tiktok video url, open it and extract the @username from the resulting url. Then do a websearch for this username to find all his social media profiles. Return me the links to the social media profiles with the platform name.'
 		' https://www.tiktokv.com/share/video/7470981717659110678/  '
 	)
-	model = ChatOpenAI(model='gpt-4o')
+	model = ChatOpenAI(model='gpt-4.1')
 	agent = Agent(task=task, llm=model, controller=controller)

 	history = await agent.run()
--- a/examples/use-cases/google_sheets.py
+++ b/examples/use-cases/google_sheets.py
@@ -44,7 +44,7 @@ async def main():
 	)

 	async with browser_session:
-		model = ChatOpenAI(model='gpt-4o')
+		model = ChatOpenAI(model='gpt-4.1')

 		# eraser = Agent(
 		# 	task="""
--- a/examples/use-cases/online_coding_agent.py
+++ b/examples/use-cases/online_coding_agent.py
@@ -20,7 +20,7 @@ if not os.getenv('OPENAI_API_KEY'):

 async def main():
 	browser_session = BrowserSession()
-	model = ChatOpenAI(model='gpt-4o')
+	model = ChatOpenAI(model='gpt-4.1')

 	# Initialize browser agent
 	agent1 = Agent(
--- a/examples/use-cases/play_chess.py
+++ b/examples/use-cases/play_chess.py
@@ -327,7 +327,7 @@ async def main():
        8. Repeat steps 4-7 until the game ends. If anything seems wrong, use 'Read Chess Board' again.
        9. Announce the final result.
        """,
-		llm=ChatOpenAI(model='gpt-4o'),
+		llm=ChatOpenAI(model='gpt-4.1'),
 		controller=controller,
 	)
 	result = await agent.run()
--- a/examples/use-cases/post-twitter.py
+++ b/examples/use-cases/post-twitter.py
@@ -49,7 +49,7 @@ class TwitterConfig:
 	message: str
 	reply_url: str
 	headless: bool = False
-	model: str = 'gpt-4o-mini'
+	model: str = 'gpt-4.1-mini'
 	base_url: str = 'https://x.com/home'


--- a/examples/use-cases/scrolling_page.py
+++ b/examples/use-cases/scrolling_page.py
@@ -24,7 +24,7 @@ This script demonstrates how the agent can navigate to a webpage and scroll down
 If no amount is specified, the agent will scroll down by one page height.
 """

-llm = ChatOpenAI(model='gpt-4o')
+llm = ChatOpenAI(model='gpt-4.1')

 browser_profile = BrowserProfile(headless=False)
 browser_session = BrowserSession(browser_profile=browser_profile)
--- a/examples/use-cases/shopping.py
+++ b/examples/use-cases/shopping.py
@@ -113,7 +113,7 @@ browser_session = BrowserSession()

 agent = Agent(
 	task=task,
-	llm=ChatOpenAI(model='gpt-4o'),
+	llm=ChatOpenAI(model='gpt-4.1'),
 	browser_session=browser_session,
 )

--- a/examples/use-cases/web_voyager_agent.py
+++ b/examples/use-cases/web_voyager_agent.py
@@ -19,11 +19,11 @@ from browser_use.llm import ChatAzureOpenAI, ChatOpenAI
 # Set LLM based on defined environment variables
 if os.getenv('OPENAI_API_KEY'):
 	llm = ChatOpenAI(
-		model='gpt-4o',
+		model='gpt-4.1',
 	)
 elif os.getenv('AZURE_OPENAI_KEY') and os.getenv('AZURE_OPENAI_ENDPOINT'):
 	llm = ChatAzureOpenAI(
-		model='gpt-4o',
+		model='gpt-4.1',
 	)
 else:
 	raise ValueError('No LLM found. Please set OPENAI_API_KEY or AZURE_OPENAI_KEY and AZURE_OPENAI_ENDPOINT.')
--- a/examples/use-cases/wikipedia_banana_to_quantum.py
+++ b/examples/use-cases/wikipedia_banana_to_quantum.py
@@ -14,7 +14,7 @@ from browser_use.llm import ChatOpenAI

 # video https://preview.screen.studio/share/vuq91Ej8
 llm = ChatOpenAI(
-	model='gpt-4o',
+	model='gpt-4.1',
 	temperature=0.0,
 )
 task = 'go to https://en.wikipedia.org/wiki/Banana and click on buttons on the wikipedia page to go as fast as possible from banna to Quantum mechanics'
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,6 +39,7 @@ dependencies = [
    "google-api-python-client>=2.174.0",
    "google-auth>=2.40.3",
    "google-auth-oauthlib>=1.2.2",
+    "pypdf>=5.7.0",
 ]
 # google-api-core: only used for Google LLM APIs
 # pyperclip: only used for examples that use copy/paste
--- a/tests/ci/test_filesystem.py
+++ b/tests/ci/test_filesystem.py
@@ -9,8 +9,10 @@ import pytest
 from browser_use.filesystem.file_system import (
 	DEFAULT_FILE_SYSTEM_PATH,
 	INVALID_FILENAME_ERROR_MESSAGE,
+	CsvFile,
 	FileSystem,
 	FileSystemState,
+	JsonFile,
 	MarkdownFile,
 	TxtFile,
 )
@@ -41,6 +43,30 @@ class TestBaseFile:
 		assert txt_file.get_size == 11
 		assert txt_file.get_line_count == 2

+	def test_json_file_creation(self):
+		"""Test JsonFile creation and basic properties."""
+		json_content = '{"name": "John", "age": 30, "city": "New York"}'
+		json_file = JsonFile(name='data', content=json_content)
+
+		assert json_file.name == 'data'
+		assert json_file.content == json_content
+		assert json_file.extension == 'json'
+		assert json_file.full_name == 'data.json'
+		assert json_file.get_size == len(json_content)
+		assert json_file.get_line_count == 1
+
+	def test_csv_file_creation(self):
+		"""Test CsvFile creation and basic properties."""
+		csv_content = 'name,age,city\nJohn,30,New York\nJane,25,London'
+		csv_file = CsvFile(name='users', content=csv_content)
+
+		assert csv_file.name == 'users'
+		assert csv_file.content == csv_content
+		assert csv_file.extension == 'csv'
+		assert csv_file.full_name == 'users.csv'
+		assert csv_file.get_size == len(csv_content)
+		assert csv_file.get_line_count == 3
+
 	def test_file_content_operations(self):
 		"""Test content update and append operations."""
 		file_obj = TxtFile(name='test')
@@ -88,6 +114,60 @@ class TestBaseFile:
 			assert file_path.read_text() == expected_content
 			assert file_obj.content == expected_content

+	async def test_json_file_disk_operations(self):
+		"""Test JSON file sync to disk operations."""
+		with tempfile.TemporaryDirectory() as tmp_dir:
+			tmp_path = Path(tmp_dir)
+			json_content = '{"users": [{"name": "John", "age": 30}]}'
+			json_file = JsonFile(name='data', content=json_content)
+
+			# Test sync to disk
+			await json_file.sync_to_disk(tmp_path)
+
+			# Verify file was created on disk
+			file_path = tmp_path / 'data.json'
+			assert file_path.exists()
+			assert file_path.read_text() == json_content
+
+			# Test write operation
+			new_content = '{"users": [{"name": "Jane", "age": 25}]}'
+			await json_file.write(new_content, tmp_path)
+			assert file_path.read_text() == new_content
+			assert json_file.content == new_content
+
+			# Test append operation
+			await json_file.append(', {"name": "Bob", "age": 35}', tmp_path)
+			expected_content = new_content + ', {"name": "Bob", "age": 35}'
+			assert file_path.read_text() == expected_content
+			assert json_file.content == expected_content
+
+	async def test_csv_file_disk_operations(self):
+		"""Test CSV file sync to disk operations."""
+		with tempfile.TemporaryDirectory() as tmp_dir:
+			tmp_path = Path(tmp_dir)
+			csv_content = 'name,age,city\nJohn,30,New York'
+			csv_file = CsvFile(name='users', content=csv_content)
+
+			# Test sync to disk
+			await csv_file.sync_to_disk(tmp_path)
+
+			# Verify file was created on disk
+			file_path = tmp_path / 'users.csv'
+			assert file_path.exists()
+			assert file_path.read_text() == csv_content
+
+			# Test write operation
+			new_content = 'name,age,city\nJane,25,London'
+			await csv_file.write(new_content, tmp_path)
+			assert file_path.read_text() == new_content
+			assert csv_file.content == new_content
+
+			# Test append operation
+			await csv_file.append('\nBob,35,Paris', tmp_path)
+			expected_content = new_content + '\nBob,35,Paris'
+			assert file_path.read_text() == expected_content
+			assert csv_file.content == expected_content
+
 	def test_file_sync_to_disk_sync(self):
 		"""Test synchronous disk sync operation."""
 		with tempfile.TemporaryDirectory() as tmp_dir:
@@ -160,7 +240,8 @@ class TestFileSystem:

 		assert 'md' in extensions
 		assert 'txt' in extensions
-		assert len(extensions) == 2
+		assert 'json' in extensions
+		assert 'csv' in extensions

 	def test_filename_validation(self, temp_filesystem):
 		"""Test filename validation."""
@@ -171,6 +252,8 @@ class TestFileSystem:
 		assert fs._is_valid_filename('my_file.txt') is True
 		assert fs._is_valid_filename('file-name.md') is True
 		assert fs._is_valid_filename('file123.txt') is True
+		assert fs._is_valid_filename('data.json') is True
+		assert fs._is_valid_filename('users.csv') is True

 		# Invalid filenames
 		assert fs._is_valid_filename('test.doc') is False  # wrong extension
@@ -179,6 +262,8 @@ class TestFileSystem:
 		assert fs._is_valid_filename('test with spaces.md') is False  # spaces
 		assert fs._is_valid_filename('test@file.md') is False  # special chars
 		assert fs._is_valid_filename('.md') is False  # no name
+		assert fs._is_valid_filename('.json') is False  # no name
+		assert fs._is_valid_filename('.csv') is False  # no name

 	def test_filename_parsing(self, temp_filesystem):
 		"""Test filename parsing into name and extension."""
@@ -192,6 +277,14 @@ class TestFileSystem:
 		assert name == 'my_file'
 		assert ext == 'txt'  # Should be lowercased

+		name, ext = fs._parse_filename('data.json')
+		assert name == 'data'
+		assert ext == 'json'
+
+		name, ext = fs._parse_filename('users.CSV')
+		assert name == 'users'
+		assert ext == 'csv'  # Should be lowercased
+
 	def test_get_file(self, temp_filesystem):
 		"""Test getting files from the filesystem."""
 		fs = temp_filesystem
@@ -228,21 +321,21 @@ class TestFileSystem:
 		content = fs.display_file('invalid@name.md')
 		assert content is None

-	def test_read_file(self, temp_filesystem):
+	async def test_read_file(self, temp_filesystem: FileSystem):
 		"""Test reading file content with proper formatting."""
-		fs = temp_filesystem
+		fs: FileSystem = temp_filesystem

 		# Read existing empty file
-		result = fs.read_file('todo.md')
+		result = await fs.read_file('todo.md')
 		expected = 'Read from file todo.md.\n<content>\n\n</content>'
 		assert result == expected

 		# Read non-existent file
-		result = fs.read_file('nonexistent.md')
+		result = await fs.read_file('nonexistent.md')
 		assert result == "File 'nonexistent.md' not found."

 		# Read file with invalid name
-		result = fs.read_file('invalid@name.md')
+		result = await fs.read_file('invalid@name.md')
 		assert result == INVALID_FILENAME_ERROR_MESSAGE

 	async def test_write_file(self, temp_filesystem):
@@ -254,7 +347,7 @@ class TestFileSystem:
 		assert result == 'Data written to file results.md successfully.'

 		# Verify content was written
-		content = fs.read_file('results.md')
+		content = await fs.read_file('results.md')
 		assert '# Test Results\nThis is a test.' in content

 		# Write to new file
@@ -271,6 +364,56 @@ class TestFileSystem:
 		result = await fs.write_file('test.doc', 'content')
 		assert result == INVALID_FILENAME_ERROR_MESSAGE

+	async def test_write_json_file(self, temp_filesystem):
+		"""Test writing JSON files."""
+		fs = temp_filesystem
+
+		# Write valid JSON content
+		json_content = '{"users": [{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]}'
+		result = await fs.write_file('data.json', json_content)
+		assert result == 'Data written to file data.json successfully.'
+
+		# Verify content was written
+		content = await fs.read_file('data.json')
+		assert json_content in content
+
+		# Verify file object was created
+		assert 'data.json' in fs.files
+		file_obj = fs.get_file('data.json')
+		assert file_obj is not None
+		assert isinstance(file_obj, JsonFile)
+		assert file_obj.content == json_content
+
+		# Write to new JSON file
+		result = await fs.write_file('config.json', '{"debug": true, "port": 8080}')
+		assert result == 'Data written to file config.json successfully.'
+		assert 'config.json' in fs.files
+
+	async def test_write_csv_file(self, temp_filesystem):
+		"""Test writing CSV files."""
+		fs = temp_filesystem
+
+		# Write valid CSV content
+		csv_content = 'name,age,city\nJohn,30,New York\nJane,25,London\nBob,35,Paris'
+		result = await fs.write_file('users.csv', csv_content)
+		assert result == 'Data written to file users.csv successfully.'
+
+		# Verify content was written
+		content = await fs.read_file('users.csv')
+		assert csv_content in content
+
+		# Verify file object was created
+		assert 'users.csv' in fs.files
+		file_obj = fs.get_file('users.csv')
+		assert file_obj is not None
+		assert isinstance(file_obj, CsvFile)
+		assert file_obj.content == csv_content
+
+		# Write to new CSV file
+		result = await fs.write_file('products.csv', 'id,name,price\n1,Laptop,999.99\n2,Mouse,29.99')
+		assert result == 'Data written to file products.csv successfully.'
+		assert 'products.csv' in fs.files
+
 	async def test_append_file(self, temp_filesystem):
 		"""Test appending content to files."""
 		fs = temp_filesystem
@@ -294,6 +437,45 @@ class TestFileSystem:
 		result = await fs.append_file('invalid@name.md', 'content')
 		assert result == INVALID_FILENAME_ERROR_MESSAGE

+	async def test_append_json_file(self, temp_filesystem):
+		"""Test appending content to JSON files."""
+		fs = temp_filesystem
+
+		# First write some JSON content
+		await fs.write_file('data.json', '{"users": [{"name": "John", "age": 30}]}')
+
+		# Append additional JSON content (note: this creates invalid JSON, but tests the append functionality)
+		result = await fs.append_file('data.json', ', {"name": "Jane", "age": 25}')
+		assert result == 'Data appended to file data.json successfully.'
+
+		# Verify content was appended
+		file_obj = fs.get_file('data.json')
+		assert file_obj is not None
+		expected_content = '{"users": [{"name": "John", "age": 30}]}, {"name": "Jane", "age": 25}'
+		assert file_obj.content == expected_content
+
+	async def test_append_csv_file(self, temp_filesystem):
+		"""Test appending content to CSV files."""
+		fs = temp_filesystem
+
+		# First write some CSV content
+		await fs.write_file('users.csv', 'name,age,city\nJohn,30,New York')
+
+		# Append additional CSV row
+		result = await fs.append_file('users.csv', '\nJane,25,London')
+		assert result == 'Data appended to file users.csv successfully.'
+
+		# Verify content was appended
+		file_obj = fs.get_file('users.csv')
+		assert file_obj is not None
+		expected_content = 'name,age,city\nJohn,30,New York\nJane,25,London'
+		assert file_obj.content == expected_content
+
+		# Append another row
+		await fs.append_file('users.csv', '\nBob,35,Paris')
+		expected_content = 'name,age,city\nJohn,30,New York\nJane,25,London\nBob,35,Paris'
+		assert file_obj.content == expected_content
+
 	async def test_save_extracted_content(self, temp_filesystem):
 		"""Test saving extracted content with auto-numbering."""
 		fs = temp_filesystem
@@ -412,6 +594,161 @@ class TestFileSystem:
 		# Clean up second filesystem
 		fs2.nuke()

+	async def test_complete_workflow_with_json_csv(self):
+		"""Test a complete filesystem workflow with JSON and CSV files."""
+		with tempfile.TemporaryDirectory() as tmp_dir:
+			# Create filesystem
+			fs = FileSystem(base_dir=tmp_dir, create_default_files=True)
+
+			# Write JSON configuration file
+			config_json = '{"app": {"name": "TestApp", "version": "1.0"}, "database": {"host": "localhost", "port": 5432}}'
+			await fs.write_file('config.json', config_json)
+
+			# Write CSV data file
+			users_csv = 'id,name,email,age\n1,John Doe,john@example.com,30\n2,Jane Smith,jane@example.com,25'
+			await fs.write_file('users.csv', users_csv)
+
+			# Append more data to CSV
+			await fs.append_file('users.csv', '\n3,Bob Johnson,bob@example.com,35')
+
+			# Update JSON configuration
+			updated_config = '{"app": {"name": "TestApp", "version": "1.1"}, "database": {"host": "localhost", "port": 5432}, "features": {"logging": true}}'
+			await fs.write_file('config.json', updated_config)
+
+			# Create another JSON file for API responses
+			api_response = '{"status": "success", "data": [{"id": 1, "name": "Item 1"}, {"id": 2, "name": "Item 2"}]}'
+			await fs.write_file('api_response.json', api_response)
+
+			# Create a products CSV file
+			products_csv = (
+				'sku,name,price,category\nLAP001,Gaming Laptop,1299.99,Electronics\nMOU001,Wireless Mouse,29.99,Accessories'
+			)
+			await fs.write_file('products.csv', products_csv)
+
+			# Verify file listing
+			files = fs.list_files()
+			expected_files = ['todo.md', 'config.json', 'users.csv', 'api_response.json', 'products.csv']
+			assert len(files) == len(expected_files)
+			for expected_file in expected_files:
+				assert expected_file in files
+
+			# Verify JSON file contents
+			config_file = fs.get_file('config.json')
+			assert config_file is not None
+			assert isinstance(config_file, JsonFile)
+			assert config_file.content == updated_config
+
+			api_file = fs.get_file('api_response.json')
+			assert api_file is not None
+			assert isinstance(api_file, JsonFile)
+			assert api_file.content == api_response
+
+			# Verify CSV file contents
+			users_file = fs.get_file('users.csv')
+			assert users_file is not None
+			assert isinstance(users_file, CsvFile)
+			expected_users_content = 'id,name,email,age\n1,John Doe,john@example.com,30\n2,Jane Smith,jane@example.com,25\n3,Bob Johnson,bob@example.com,35'
+			assert users_file.content == expected_users_content
+
+			products_file = fs.get_file('products.csv')
+			assert products_file is not None
+			assert isinstance(products_file, CsvFile)
+			assert products_file.content == products_csv
+
+			# Test state persistence with JSON and CSV files
+			state = fs.get_state()
+			fs.nuke()
+
+			# Restore from state
+			fs2 = FileSystem.from_state(state)
+
+			# Verify restoration
+			assert len(fs2.files) == len(expected_files)
+
+			# Verify JSON files were restored correctly
+			restored_config = fs2.get_file('config.json')
+			assert restored_config is not None
+			assert isinstance(restored_config, JsonFile)
+			assert restored_config.content == updated_config
+
+			restored_api = fs2.get_file('api_response.json')
+			assert restored_api is not None
+			assert isinstance(restored_api, JsonFile)
+			assert restored_api.content == api_response
+
+			# Verify CSV files were restored correctly
+			restored_users = fs2.get_file('users.csv')
+			assert restored_users is not None
+			assert isinstance(restored_users, CsvFile)
+			assert restored_users.content == expected_users_content
+
+			restored_products = fs2.get_file('products.csv')
+			assert restored_products is not None
+			assert isinstance(restored_products, CsvFile)
+			assert restored_products.content == products_csv
+
+			# Verify files exist on disk
+			for filename in expected_files:
+				if filename != 'todo.md':  # Skip todo.md as it's already tested
+					assert (fs2.data_dir / filename).exists()
+
+			fs2.nuke()
+
+	async def test_from_state_with_json_csv_files(self, temp_filesystem):
+		"""Test restoring filesystem from state with JSON and CSV files."""
+		fs = temp_filesystem
+
+		# Add JSON and CSV content
+		await fs.write_file('data.json', '{"version": "1.0", "users": [{"name": "John", "age": 30}]}')
+		await fs.write_file('users.csv', 'name,age,city\nJohn,30,New York\nJane,25,London')
+		await fs.write_file('config.json', '{"debug": true, "port": 8080}')
+		await fs.write_file('products.csv', 'id,name,price\n1,Laptop,999.99\n2,Mouse,29.99')
+
+		# Get state
+		state = fs.get_state()
+
+		# Create new filesystem from state
+		fs2 = FileSystem.from_state(state)
+
+		# Verify restoration
+		assert fs2.base_dir == fs.base_dir
+		assert len(fs2.files) == len(fs.files)
+
+		# Verify JSON file contents
+		json_file = fs2.get_file('data.json')
+		assert json_file is not None
+		assert isinstance(json_file, JsonFile)
+		assert json_file.content == '{"version": "1.0", "users": [{"name": "John", "age": 30}]}'
+
+		config_file = fs2.get_file('config.json')
+		assert config_file is not None
+		assert isinstance(config_file, JsonFile)
+		assert config_file.content == '{"debug": true, "port": 8080}'
+
+		# Verify CSV file contents
+		csv_file = fs2.get_file('users.csv')
+		assert csv_file is not None
+		assert isinstance(csv_file, CsvFile)
+		assert csv_file.content == 'name,age,city\nJohn,30,New York\nJane,25,London'
+
+		products_file = fs2.get_file('products.csv')
+		assert products_file is not None
+		assert isinstance(products_file, CsvFile)
+		assert products_file.content == 'id,name,price\n1,Laptop,999.99\n2,Mouse,29.99'
+
+		# Verify files exist on disk
+		assert (fs2.data_dir / 'data.json').exists()
+		assert (fs2.data_dir / 'users.csv').exists()
+		assert (fs2.data_dir / 'config.json').exists()
+		assert (fs2.data_dir / 'products.csv').exists()
+
+		# Verify disk contents match
+		assert (fs2.data_dir / 'data.json').read_text() == '{"version": "1.0", "users": [{"name": "John", "age": 30}]}'
+		assert (fs2.data_dir / 'users.csv').read_text() == 'name,age,city\nJohn,30,New York\nJane,25,London'
+
+		# Clean up second filesystem
+		fs2.nuke()
+
 	def test_nuke(self, empty_filesystem):
 		"""Test filesystem destruction."""
 		fs = empty_filesystem
--- a/tests/ci/test_sync_agent_events.py
+++ b/tests/ci/test_sync_agent_events.py
@@ -404,7 +404,7 @@ class TestEventValidation:
 				user_id='0683fb03-c5da-79c9-8000-d3a39c47c650',
 				agent_session_id='0683fb03-c5da-79c9-8000-d3a39c47c651',
 				task='test',
-				llm_model='gpt-4o',
+				llm_model='gpt-4.1',
 				done_output=None,
 				user_feedback_type=None,
 				user_comment=None,
--- a/tests/old/test_agent_actions.py
+++ b/tests/old/test_agent_actions.py
@@ -13,9 +13,9 @@ def llm():

 	# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
 	return ChatAzureOpenAI(
-		model='gpt-4o',
+		model='gpt-4.1',
 	)
-	# return ChatOpenAI(model='gpt-4o-mini')
+	# return ChatOpenAI(model='gpt-4.1-mini')


@pytest.fixture
--- a/tests/old/test_core_functionality.py
+++ b/tests/old/test_core_functionality.py
@@ -79,7 +79,7 @@ class TestCoreFunctionality:
 	def llm(self):
 		"""Initialize language model for testing with minimal settings."""
 		return ChatOpenAI(
-			model='gpt-4o',
+			model='gpt-4.1',
 			temperature=0.0,
 		)

--- a/tests/old/test_dropdown_error.py
+++ b/tests/old/test_dropdown_error.py
@@ -14,7 +14,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from browser_use import Agent, AgentHistoryList
 from browser_use.llm import ChatOpenAI

-llm = ChatOpenAI(model='gpt-4o')
+llm = ChatOpenAI(model='gpt-4.1')
 browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True))

 agent = Agent(
--- a/tests/old/test_gif_path.py
+++ b/tests/old/test_gif_path.py
@@ -14,7 +14,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from browser_use import Agent, AgentHistoryList
 from browser_use.llm import ChatOpenAI

-llm = ChatOpenAI(model='gpt-4o')
+llm = ChatOpenAI(model='gpt-4.1')

 browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, disable_security=True))

--- a/tests/old/test_mind2web.py
+++ b/tests/old/test_mind2web.py
@@ -50,7 +50,7 @@ def llm():

 	# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
 	return ChatAzureOpenAI(
-		model='gpt-4o',
+		model='gpt-4.1',
 	)


--- a/tests/old/test_react_dropdown.py
+++ b/tests/old/test_react_dropdown.py
@@ -15,7 +15,7 @@ import asyncio
 from browser_use import Agent, AgentHistoryList
 from browser_use.llm import ChatOpenAI

-llm = ChatOpenAI(model='gpt-4o')
+llm = ChatOpenAI(model='gpt-4.1')

 browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, disable_security=True))

--- a/tests/old/test_self_registered_actions.py
+++ b/tests/old/test_self_registered_actions.py
@@ -78,7 +78,7 @@ def llm():

 	# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
 	return ChatAzureOpenAI(
-		model='gpt-4o',
+		model='gpt-4.1',
 	)


--- a/tests/old/test_vision.py
+++ b/tests/old/test_vision.py
@@ -16,7 +16,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from browser_use import Agent, AgentHistoryList, BrowserSession, Controller
 from browser_use.llm import ChatOpenAI

-llm = ChatOpenAI(model='gpt-4o')
+llm = ChatOpenAI(model='gpt-4.1')
 controller = Controller()

 # use this test to ask the model questions about the page like
--- a/tests/old/test_wait_for_element.py
+++ b/tests/old/test_wait_for_element.py
@@ -21,7 +21,7 @@ from browser_use.browser import BrowserProfile, BrowserSession
 load_dotenv()

 # Initialize language model and controller.
-llm = ChatOpenAI(model='gpt-4o')
+llm = ChatOpenAI(model='gpt-4.1')
 controller = Controller()