From f8686f401217464889cfd775e8066f853d63d01c Mon Sep 17 00:00:00 2001 From: skools-here Date: Tue, 4 Nov 2025 19:43:06 +0530 Subject: [PATCH 01/17] Fix: display Chinese characters correctly in conversation logs by using ensure_ascii=False --- browser_use/agent/message_manager/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/browser_use/agent/message_manager/utils.py b/browser_use/agent/message_manager/utils.py index f83eba735..1fc25e534 100644 --- a/browser_use/agent/message_manager/utils.py +++ b/browser_use/agent/message_manager/utils.py @@ -42,8 +42,7 @@ async def _format_conversation(messages: list[BaseMessage], response: Any) -> st lines.append('') # Empty line after each message # Format response - lines.append(' RESPONSE') - lines.append(json.dumps(json.loads(response.model_dump_json(exclude_unset=True)), indent=2)) + lines.append(json.dumps(json.loads(response.model_dump_json(exclude_unset=True)), indent=2, ensure_ascii=False)) return '\n'.join(lines) From 6a378758e1e3768f5dcd58d4fc4aba7fe8b0d226 Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Wed, 5 Nov 2025 13:37:56 -0800 Subject: [PATCH 02/17] Add cloud.md file --- CLOUD.md | 2701 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2701 insertions(+) create mode 100644 CLOUD.md diff --git a/CLOUD.md b/CLOUD.md new file mode 100644 index 000000000..2fb6e0252 --- /dev/null +++ b/CLOUD.md @@ -0,0 +1,2701 @@ +# Cloud.md +Instructions for AI Agents to assist the user in using Browser Use Cloud + +## What is Browser Use Cloud? +Browser Use is a framework for AI Agents that interact with web browsers. +Browser Use Cloud is the fully hosted product made by Browser Use made for users to automate web-based tasks. +Users submit tasks in the form of prompts (text and optionally files and images) and through API requests, remote browsers and agents are spun up to complete these tasks on-demand. +Pricing is usage based and adjudicated through an API key system. +Billing, API Key management, live session viewing, task results, account settings, and profile management is done through the Browser Use Cloud web app at https://cloud.browser-use.com/ + +## Core Concepts: +The key product of Browser Use Cloud is the completion of user tasks. +- A Session is the complete package of infrastructure Browser Use Cloud provides. Sessions are currently limited to 15 minutes of runtime. A session has a Browser running, and users can run Agents in a session to complete tasks. A Session is limited to one and only one Browser, which will be open the entire duration of the Session. Users can run a maximum of one Agent on a Session at a time, which will control the Browser. After one Agent is done, the user can run another within the same Session, limited only by the Session maximum duration. +- A Browser is simply a browser running on Browser Use Cloud infrastructure (a Session). Browsers (as a service) are controllable via CDP url. The user can use an Agent to control a Browser, or can request the CDP url and control the hosted browser with whatever scripts or external automations they desire. However we mainly encourage to control Browsers with Browser Use Agents, as they are optimized to work together. These official Browser Use browsers are forked from chromium, but have a lot of proprietary optimizations made to them so that they are extremely fast and lightweight, untraceable and not detectable as bots, and come preloaded with adblockers and other quality of life. Using Browser Use hosted browsers provides significant performance improvements. +- An Agent is the collection of tools, prompts, and framework that enables a Large Language Model to interact with a Browser. The Agents goal is to complete a given user Task. The Agent goes through an iterative process of many steps to complete this. For each step, the Agent is given the page state (including a screenshot) of the Browser, and then it calls tools to interact with the Browser. After many steps, the Agent will mark the task as complete, either successfully or unsuccessfully and return a result, which is a block of text and optionally files. After completion, an independent strict judge will examine the Agent's trajectory and give a verdict of true or false on whether the Agent completed its task successfully. The Agent has a lot of settings which can be tuned to improve performance, most importantly the LLM Model used. +- A Model is a Large Language Model that powers an Agent. The smarter and more capable the Model, the better the Agent will perform. The best model to use is ChatBrowserUse, the Browser Use official chat completion API which always routes to the best frontier foundation model as determined by Browser Use internal evaluations. ChatBrowserUse has several speed and cost optimizations done through batching, caching, and other tricks, making it faster and more cost effective than any other option, with identical performance to the top frontier models. +- A Browser Profile is a folder of browser data that is saved on our Cloud. If a user creates a Session with a Browser that has no Browser Profile, no data will persist. However, if they use the same Browser Profile across multiple Sessions, then data such as authentication cookies, site local storage data, saved passwords and credentials, and user preferences will persist. A Browser Profile is essentially a cloud hosted Chrome Profile, in fact, through the Profile Upload feature, a user can upload a Chrome profile from their own machine to be used on the Cloud in Sessions. This is great for giving authentication to Agents. A user can create a Chrome profile on their own machine, log into all of the services they want, and then upload this profile to the Cloud for automations. +- A Task is the combination of user prompt with optionally files and images that is given to the Agents to complete. Browser Use Cloud primarily sells the completion of user Tasks. Writing Tasks with clarity is key to success. + +## Quickstart +To get started, direct the user to first must create an account, purchase credits (or simply claim the free starter credits given on account creation), and generate an API key on the Browser Use online platform: https://cloud.browser-use.com/. These are the only steps that can only be done on the platform. + +Avoid giving the user all of the following steps at once as it may seem overwheling. Instead present one step at a time and only continue when asked. Do as much for the user as you are able to. + +Next, direct the user to run their first task by making the following post request to Create Task from whatever system is available (cURL, python, JS, etc), but replace `` with the users actual API key. +```bash +curl -X POST https://api.browser-use.com/api/v2/tasks \ + -H "X-Browser-Use-API-Key: " \ + -H "Content-Type: application/json" \ + -d '{ + "task": "Search for the top Hacker News post and return the title and url." +}' +``` +This will return a response of the format: +{"id": "string","sessionId": "string"} +The user will probably want to watch the live stream of the task being completed by the agent, so direct them to use the Get Session request using the `` returned by the prior request and their API key +```bash +curl https://api.browser-use.com/api/v2/sessions/ \ + -H "X-Browser-Use-API-Key: " +``` +And in the response object there will be a `"liveUrl": "string"`. Direct the user to visit that url or open it for them. +If the user wants to terminate the Session after the Agent has completed its task (by default the Session will remain open), direct them to use the Update Session request with the stop action +```bash +curl -X PATCH https://api.browser-use.com/api/v2/sessions/ \ + -H "X-Browser-Use-API-Key: " \ + -H "Content-Type: application/json" \ + -d '{ + "action": "stop" + +}' +``` + +## API (v2) Docs +The best way to use Browser Use Cloud is with API v2. +Other options exist, namely API v2 and the SDK, but are give less comprehensive control. + +### Billing +##### Get Account Billing +GET https://api.browser-use.com/api/v2/billing/account +Get authenticated account information including credit balances and account details. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/billing/get-account-billing-billing-account-get +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Get Account Billing + version: endpoint_billing.get_account_billing_billing_account_get +paths: + /billing/account: + get: + operationId: get-account-billing-billing-account-get + summary: Get Account Billing + description: >- + Get authenticated account information including credit balances and + account details. + tags: + - - subpackage_billing + parameters: + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/AccountView' + '404': + description: Project for a given API key not found! + content: {} + '422': + description: Validation Error + content: {} +components: + schemas: + PlanInfo: + type: object + properties: + planName: + type: string + subscriptionStatus: + type: + - string + - 'null' + subscriptionId: + type: + - string + - 'null' + subscriptionCurrentPeriodEnd: + type: + - string + - 'null' + subscriptionCanceledAt: + type: + - string + - 'null' + required: + - planName + - subscriptionStatus + - subscriptionId + - subscriptionCurrentPeriodEnd + - subscriptionCanceledAt + AccountView: + type: object + properties: + name: + type: + - string + - 'null' + monthlyCreditsBalanceUsd: + type: number + format: double + additionalCreditsBalanceUsd: + type: number + format: double + totalCreditsBalanceUsd: + type: number + format: double + rateLimit: + type: integer + planInfo: + $ref: '#/components/schemas/PlanInfo' + projectId: + type: string + format: uuid + required: + - monthlyCreditsBalanceUsd + - additionalCreditsBalanceUsd + - totalCreditsBalanceUsd + - rateLimit + - planInfo + - projectId + +``` + +### Tasks + +#### List Tasks +GET https://api.browser-use.com/api/v2/tasks +Get paginated list of AI agent tasks with optional filtering by session and status. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/tasks/list-tasks-tasks-get +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: List Tasks + version: endpoint_tasks.list_tasks_tasks_get +paths: + /tasks: + get: + operationId: list-tasks-tasks-get + summary: List Tasks + description: >- + Get paginated list of AI agent tasks with optional filtering by session + and status. + tags: + - - subpackage_tasks + parameters: + - name: pageSize + in: query + required: false + schema: + type: integer + - name: pageNumber + in: query + required: false + schema: + type: integer + - name: sessionId + in: query + required: false + schema: + type: + - string + - 'null' + format: uuid + - name: filterBy + in: query + required: false + schema: + oneOf: + - $ref: '#/components/schemas/TaskStatus' + - type: 'null' + - name: after + in: query + required: false + schema: + type: + - string + - 'null' + format: date-time + - name: before + in: query + required: false + schema: + type: + - string + - 'null' + format: date-time + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/TaskListResponse' + '422': + description: Validation Error + content: {} +components: + schemas: + TaskStatus: + type: string + enum: + - value: started + - value: paused + - value: finished + - value: stopped + TaskItemView: + type: object + properties: + id: + type: string + format: uuid + sessionId: + type: string + format: uuid + llm: + type: string + task: + type: string + status: + $ref: '#/components/schemas/TaskStatus' + startedAt: + type: string + format: date-time + finishedAt: + type: + - string + - 'null' + format: date-time + metadata: + type: object + additionalProperties: + description: Any type + output: + type: + - string + - 'null' + browserUseVersion: + type: + - string + - 'null' + isSuccess: + type: + - boolean + - 'null' + required: + - id + - sessionId + - llm + - task + - status + - startedAt + TaskListResponse: + type: object + properties: + items: + type: array + items: + $ref: '#/components/schemas/TaskItemView' + totalItems: + type: integer + pageNumber: + type: integer + pageSize: + type: integer + required: + - items + - totalItems + - pageNumber + - pageSize + +``` + +#### Create Task +POST https://api.browser-use.com/api/v2/tasks +Content-Type: application/json +You can either: +1. Start a new task (auto creates a new simple session) +2. Start a new task in an existing session (you can create a custom session before starting the task and reuse it for follow-up tasks) +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/tasks/create-task-tasks-post +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Create Task + version: endpoint_tasks.create_task_tasks_post +paths: + /tasks: + post: + operationId: create-task-tasks-post + summary: Create Task + description: >- + You can either: + + 1. Start a new task (auto creates a new simple session) + + 2. Start a new task in an existing session (you can create a custom + session before starting the task and reuse it for follow-up tasks) + tags: + - - subpackage_tasks + parameters: + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '202': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/TaskCreatedResponse' + '400': + description: Session is stopped or has running task + content: {} + '404': + description: Session not found + content: {} + '422': + description: Request validation failed + content: {} + '429': + description: Too many concurrent active sessions + content: {} + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateTaskRequest' +components: + schemas: + SupportedLLMs: + type: string + enum: + - value: browser-use-llm + - value: gpt-4.1 + - value: gpt-4.1-mini + - value: o4-mini + - value: o3 + - value: gemini-2.5-flash + - value: gemini-2.5-pro + - value: gemini-flash-latest + - value: gemini-flash-lite-latest + - value: claude-sonnet-4-20250514 + - value: gpt-4o + - value: gpt-4o-mini + - value: llama-4-maverick-17b-128e-instruct + - value: claude-3-7-sonnet-20250219 + CreateTaskRequestVision: + oneOf: + - type: boolean + - type: string + enum: + - type: stringLiteral + value: auto + CreateTaskRequest: + type: object + properties: + task: + type: string + llm: + $ref: '#/components/schemas/SupportedLLMs' + startUrl: + type: + - string + - 'null' + maxSteps: + type: integer + structuredOutput: + type: + - string + - 'null' + sessionId: + type: + - string + - 'null' + format: uuid + metadata: + type: + - object + - 'null' + additionalProperties: + type: string + secrets: + type: + - object + - 'null' + additionalProperties: + type: string + allowedDomains: + type: + - array + - 'null' + items: + type: string + opVaultId: + type: + - string + - 'null' + highlightElements: + type: boolean + flashMode: + type: boolean + thinking: + type: boolean + vision: + $ref: '#/components/schemas/CreateTaskRequestVision' + systemPromptExtension: + type: string + required: + - task + TaskCreatedResponse: + type: object + properties: + id: + type: string + format: uuid + sessionId: + type: string + format: uuid + required: + - id + - sessionId + +``` + +#### Get Task +GET https://api.browser-use.com/api/v2/tasks/{task_id} +Get detailed task information including status, progress, steps, and file outputs. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/tasks/get-task-tasks-task-id-get +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Get Task + version: endpoint_tasks.get_task_tasks__task_id__get +paths: + /tasks/{task_id}: + get: + operationId: get-task-tasks-task-id-get + summary: Get Task + description: >- + Get detailed task information including status, progress, steps, and + file outputs. + tags: + - - subpackage_tasks + parameters: + - name: task_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/TaskView' + '404': + description: Task not found + content: {} + '422': + description: Validation Error + content: {} +components: + schemas: + TaskStatus: + type: string + enum: + - value: started + - value: paused + - value: finished + - value: stopped + TaskStepView: + type: object + properties: + number: + type: integer + memory: + type: string + evaluationPreviousGoal: + type: string + nextGoal: + type: string + url: + type: string + screenshotUrl: + type: + - string + - 'null' + actions: + type: array + items: + type: string + required: + - number + - memory + - evaluationPreviousGoal + - nextGoal + - url + - actions + FileView: + type: object + properties: + id: + type: string + format: uuid + fileName: + type: string + required: + - id + - fileName + TaskView: + type: object + properties: + id: + type: string + format: uuid + sessionId: + type: string + format: uuid + llm: + type: string + task: + type: string + status: + $ref: '#/components/schemas/TaskStatus' + startedAt: + type: string + format: date-time + finishedAt: + type: + - string + - 'null' + format: date-time + metadata: + type: object + additionalProperties: + description: Any type + steps: + type: array + items: + $ref: '#/components/schemas/TaskStepView' + output: + type: + - string + - 'null' + outputFiles: + type: array + items: + $ref: '#/components/schemas/FileView' + browserUseVersion: + type: + - string + - 'null' + isSuccess: + type: + - boolean + - 'null' + required: + - id + - sessionId + - llm + - task + - status + - startedAt + - steps + - outputFiles +``` + +#### Update Task +PATCH https://api.browser-use.com/api/v2/tasks/{task_id} +Content-Type: application/json +Control task execution with stop, pause, resume, or stop task and session actions. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/tasks/update-task-tasks-task-id-patch +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Update Task + version: endpoint_tasks.update_task_tasks__task_id__patch +paths: + /tasks/{task_id}: + patch: + operationId: update-task-tasks-task-id-patch + summary: Update Task + description: >- + Control task execution with stop, pause, resume, or stop task and + session actions. + tags: + - - subpackage_tasks + parameters: + - name: task_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/TaskView' + '404': + description: Task not found + content: {} + '422': + description: Request validation failed + content: {} + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateTaskRequest' +components: + schemas: + TaskUpdateAction: + type: string + enum: + - value: stop + - value: pause + - value: resume + - value: stop_task_and_session + UpdateTaskRequest: + type: object + properties: + action: + $ref: '#/components/schemas/TaskUpdateAction' + required: + - action + TaskStatus: + type: string + enum: + - value: started + - value: paused + - value: finished + - value: stopped + TaskStepView: + type: object + properties: + number: + type: integer + memory: + type: string + evaluationPreviousGoal: + type: string + nextGoal: + type: string + url: + type: string + screenshotUrl: + type: + - string + - 'null' + actions: + type: array + items: + type: string + required: + - number + - memory + - evaluationPreviousGoal + - nextGoal + - url + - actions + FileView: + type: object + properties: + id: + type: string + format: uuid + fileName: + type: string + required: + - id + - fileName + TaskView: + type: object + properties: + id: + type: string + format: uuid + sessionId: + type: string + format: uuid + llm: + type: string + task: + type: string + status: + $ref: '#/components/schemas/TaskStatus' + startedAt: + type: string + format: date-time + finishedAt: + type: + - string + - 'null' + format: date-time + metadata: + type: object + additionalProperties: + description: Any type + steps: + type: array + items: + $ref: '#/components/schemas/TaskStepView' + output: + type: + - string + - 'null' + outputFiles: + type: array + items: + $ref: '#/components/schemas/FileView' + browserUseVersion: + type: + - string + - 'null' + isSuccess: + type: + - boolean + - 'null' + required: + - id + - sessionId + - llm + - task + - status + - startedAt + - steps + - outputFiles +``` + +#### Get Task Logs +GET https://api.browser-use.com/api/v2/tasks/{task_id}/logs +Get secure download URL for task execution logs with step-by-step details. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/tasks/get-task-logs-tasks-task-id-logs-get +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Get Task Logs + version: endpoint_tasks.get_task_logs_tasks__task_id__logs_get +paths: + /tasks/{task_id}/logs: + get: + operationId: get-task-logs-tasks-task-id-logs-get + summary: Get Task Logs + description: >- + Get secure download URL for task execution logs with step-by-step + details. + tags: + - - subpackage_tasks + parameters: + - name: task_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/TaskLogFileResponse' + '404': + description: Task not found + content: {} + '422': + description: Validation Error + content: {} + '500': + description: Failed to generate download URL + content: {} +components: + schemas: + TaskLogFileResponse: + type: object + properties: + downloadUrl: + type: string + required: + - downloadUrl +``` + +### Sessions + +#### List Sessions +GET https://api.browser-use.com/api/v2/sessions +Get paginated list of AI agent sessions with optional status filtering. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/sessions/list-sessions-sessions-get +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: List Sessions + version: endpoint_sessions.list_sessions_sessions_get +paths: + /sessions: + get: + operationId: list-sessions-sessions-get + summary: List Sessions + description: Get paginated list of AI agent sessions with optional status filtering. + tags: + - - subpackage_sessions + parameters: + - name: pageSize + in: query + required: false + schema: + type: integer + - name: pageNumber + in: query + required: false + schema: + type: integer + - name: filterBy + in: query + required: false + schema: + oneOf: + - $ref: '#/components/schemas/SessionStatus' + - type: 'null' + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/SessionListResponse' + '422': + description: Validation Error + content: {} +components: + schemas: + SessionStatus: + type: string + enum: + - value: active + - value: stopped + SessionItemView: + type: object + properties: + id: + type: string + format: uuid + status: + $ref: '#/components/schemas/SessionStatus' + liveUrl: + type: + - string + - 'null' + startedAt: + type: string + format: date-time + finishedAt: + type: + - string + - 'null' + format: date-time + required: + - id + - status + - startedAt + SessionListResponse: + type: object + properties: + items: + type: array + items: + $ref: '#/components/schemas/SessionItemView' + totalItems: + type: integer + pageNumber: + type: integer + pageSize: + type: integer + required: + - items + - totalItems + - pageNumber + - pageSize +``` + +#### Create Session +POST https://api.browser-use.com/api/v2/sessions +Content-Type: application/json +Create a new session with a new task. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/sessions/create-session-sessions-post +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Create Session + version: endpoint_sessions.create_session_sessions_post +paths: + /sessions: + post: + operationId: create-session-sessions-post + summary: Create Session + description: Create a new session with a new task. + tags: + - - subpackage_sessions + parameters: + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '201': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/SessionItemView' + '404': + description: Profile not found + content: {} + '422': + description: Request validation failed + content: {} + '429': + description: Too many concurrent active sessions + content: {} + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateSessionRequest' +components: + schemas: + ProxyCountryCode: + type: string + enum: + - value: us + - value: uk + - value: fr + - value: it + - value: jp + - value: au + - value: de + - value: fi + - value: ca + - value: in + CreateSessionRequest: + type: object + properties: + profileId: + type: + - string + - 'null' + format: uuid + proxyCountryCode: + oneOf: + - $ref: '#/components/schemas/ProxyCountryCode' + - type: 'null' + startUrl: + type: + - string + - 'null' + SessionStatus: + type: string + enum: + - value: active + - value: stopped + SessionItemView: + type: object + properties: + id: + type: string + format: uuid + status: + $ref: '#/components/schemas/SessionStatus' + liveUrl: + type: + - string + - 'null' + startedAt: + type: string + format: date-time + finishedAt: + type: + - string + - 'null' + format: date-time + required: + - id + - status + - startedAt +``` + +#### Get Session +GET https://api.browser-use.com/api/v2/sessions/{session_id} +Get detailed session information including status, URLs, and task details. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/sessions/get-session-sessions-session-id-get +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Get Session + version: endpoint_sessions.get_session_sessions__session_id__get +paths: + /sessions/{session_id}: + get: + operationId: get-session-sessions-session-id-get + summary: Get Session + description: >- + Get detailed session information including status, URLs, and task + details. + tags: + - - subpackage_sessions + parameters: + - name: session_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/SessionView' + '404': + description: Session not found + content: {} + '422': + description: Validation Error + content: {} +components: + schemas: + SessionStatus: + type: string + enum: + - value: active + - value: stopped + TaskStatus: + type: string + enum: + - value: started + - value: paused + - value: finished + - value: stopped + TaskItemView: + type: object + properties: + id: + type: string + format: uuid + sessionId: + type: string + format: uuid + llm: + type: string + task: + type: string + status: + $ref: '#/components/schemas/TaskStatus' + startedAt: + type: string + format: date-time + finishedAt: + type: + - string + - 'null' + format: date-time + metadata: + type: object + additionalProperties: + description: Any type + output: + type: + - string + - 'null' + browserUseVersion: + type: + - string + - 'null' + isSuccess: + type: + - boolean + - 'null' + required: + - id + - sessionId + - llm + - task + - status + - startedAt + SessionView: + type: object + properties: + id: + type: string + format: uuid + status: + $ref: '#/components/schemas/SessionStatus' + liveUrl: + type: + - string + - 'null' + startedAt: + type: string + format: date-time + finishedAt: + type: + - string + - 'null' + format: date-time + tasks: + type: array + items: + $ref: '#/components/schemas/TaskItemView' + publicShareUrl: + type: + - string + - 'null' + required: + - id + - status + - startedAt + - tasks +``` + +#### Update Session +PATCH https://api.browser-use.com/api/v2/sessions/{session_id} +Content-Type: application/json +Stop a session and all its running tasks. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/sessions/update-session-sessions-session-id-patch +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Update Session + version: endpoint_sessions.update_session_sessions__session_id__patch +paths: + /sessions/{session_id}: + patch: + operationId: update-session-sessions-session-id-patch + summary: Update Session + description: Stop a session and all its running tasks. + tags: + - - subpackage_sessions + parameters: + - name: session_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/SessionView' + '404': + description: Session not found + content: {} + '422': + description: Request validation failed + content: {} + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateSessionRequest' +components: + schemas: + SessionUpdateAction: + type: string + enum: + - value: stop + UpdateSessionRequest: + type: object + properties: + action: + $ref: '#/components/schemas/SessionUpdateAction' + required: + - action + SessionStatus: + type: string + enum: + - value: active + - value: stopped + TaskStatus: + type: string + enum: + - value: started + - value: paused + - value: finished + - value: stopped + TaskItemView: + type: object + properties: + id: + type: string + format: uuid + sessionId: + type: string + format: uuid + llm: + type: string + task: + type: string + status: + $ref: '#/components/schemas/TaskStatus' + startedAt: + type: string + format: date-time + finishedAt: + type: + - string + - 'null' + format: date-time + metadata: + type: object + additionalProperties: + description: Any type + output: + type: + - string + - 'null' + browserUseVersion: + type: + - string + - 'null' + isSuccess: + type: + - boolean + - 'null' + required: + - id + - sessionId + - llm + - task + - status + - startedAt + SessionView: + type: object + properties: + id: + type: string + format: uuid + status: + $ref: '#/components/schemas/SessionStatus' + liveUrl: + type: + - string + - 'null' + startedAt: + type: string + format: date-time + finishedAt: + type: + - string + - 'null' + format: date-time + tasks: + type: array + items: + $ref: '#/components/schemas/TaskItemView' + publicShareUrl: + type: + - string + - 'null' + required: + - id + - status + - startedAt + - tasks +``` + +#### Get Session Public Share +GET https://api.browser-use.com/api/v2/sessions/{session_id}/public-share +Get public share information including URL and usage statistics. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/sessions/get-session-public-share-sessions-session-id-public-share-get +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Get Session Public Share + version: >- + endpoint_sessions.get_session_public_share_sessions__session_id__public_share_get +paths: + /sessions/{session_id}/public-share: + get: + operationId: get-session-public-share-sessions-session-id-public-share-get + summary: Get Session Public Share + description: Get public share information including URL and usage statistics. + tags: + - - subpackage_sessions + parameters: + - name: session_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/ShareView' + '404': + description: Session or share not found + content: {} + '422': + description: Validation Error + content: {} +components: + schemas: + ShareView: + type: object + properties: + shareToken: + type: string + shareUrl: + type: string + viewCount: + type: integer + lastViewedAt: + type: + - string + - 'null' + format: date-time + required: + - shareToken + - shareUrl + - viewCount +``` + +#### Create Session Public Share +POST https://api.browser-use.com/api/v2/sessions/{session_id}/public-share +Create or return existing public share for a session. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/sessions/create-session-public-share-sessions-session-id-public-share-post +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Create Session Public Share + version: >- + endpoint_sessions.create_session_public_share_sessions__session_id__public_share_post +paths: + /sessions/{session_id}/public-share: + post: + operationId: create-session-public-share-sessions-session-id-public-share-post + summary: Create Session Public Share + description: Create or return existing public share for a session. + tags: + - - subpackage_sessions + parameters: + - name: session_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '201': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/ShareView' + '404': + description: Session not found + content: {} + '422': + description: Validation Error + content: {} +components: + schemas: + ShareView: + type: object + properties: + shareToken: + type: string + shareUrl: + type: string + viewCount: + type: integer + lastViewedAt: + type: + - string + - 'null' + format: date-time + required: + - shareToken + - shareUrl + - viewCount +``` + +#### Delete Session Public Share +DELETE https://api.browser-use.com/api/v2/sessions/{session_id}/public-share +Remove public share for a session. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/sessions/delete-session-public-share-sessions-session-id-public-share-delete +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Delete Session Public Share + version: >- + endpoint_sessions.delete_session_public_share_sessions__session_id__public_share_delete +paths: + /sessions/{session_id}/public-share: + delete: + operationId: delete-session-public-share-sessions-session-id-public-share-delete + summary: Delete Session Public Share + description: Remove public share for a session. + tags: + - - subpackage_sessions + parameters: + - name: session_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '204': + description: Successful Response + content: + application/json: + schema: + $ref: >- + #/components/schemas/Sessions_delete_session_public_share_sessions__session_id__public_share_delete_Response_204 + '404': + description: Session not found + content: {} + '422': + description: Validation Error + content: {} +components: + schemas: + Sessions_delete_session_public_share_sessions__session_id__public_share_delete_Response_204: + type: object + properties: {} +``` + +### Files + +#### User Upload File Presigned Url +POST https://api.browser-use.com/api/v2/files/sessions/{session_id}/presigned-url +Content-Type: application/json +Generate a secure presigned URL for uploading files that AI agents can use during tasks. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/files/user-upload-file-presigned-url-files-sessions-session-id-presigned-url-post +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: User Upload File Presigned Url + version: >- + endpoint_files.user_upload_file_presigned_url_files_sessions__session_id__presigned_url_post +paths: + /files/sessions/{session_id}/presigned-url: + post: + operationId: >- + user-upload-file-presigned-url-files-sessions-session-id-presigned-url-post + summary: User Upload File Presigned Url + description: >- + Generate a secure presigned URL for uploading files that AI agents can + use during tasks. + tags: + - - subpackage_files + parameters: + - name: session_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/UploadFilePresignedUrlResponse' + '400': + description: Unsupported content type + content: {} + '404': + description: Session not found + content: {} + '422': + description: Validation Error + content: {} + '500': + description: Failed to generate upload URL + content: {} + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/UploadFileRequest' +components: + schemas: + UploadFileRequestContentType: + type: string + enum: + - value: image/jpg + - value: image/jpeg + - value: image/png + - value: image/gif + - value: image/webp + - value: image/svg+xml + - value: application/pdf + - value: application/msword + - value: >- + application/vnd.openxmlformats-officedocument.wordprocessingml.document + - value: application/vnd.ms-excel + - value: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet + - value: text/plain + - value: text/csv + - value: text/markdown + UploadFileRequest: + type: object + properties: + fileName: + type: string + contentType: + $ref: '#/components/schemas/UploadFileRequestContentType' + sizeBytes: + type: integer + required: + - fileName + - contentType + - sizeBytes + UploadFilePresignedUrlResponse: + type: object + properties: + url: + type: string + method: + type: string + enum: + - type: stringLiteral + value: POST + fields: + type: object + additionalProperties: + type: string + fileName: + type: string + expiresIn: + type: integer + required: + - url + - method + - fields + - fileName + - expiresIn +``` + +#### User Upload File Presigned Url Browser +POST https://api.browser-use.com/api/v2/files/browsers/{session_id}/presigned-url +Content-Type: application/json +Generate a secure presigned URL for uploading files that AI agents can use during tasks. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/files/user-upload-file-presigned-url-browser-files-browsers-session-id-presigned-url-post +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: User Upload File Presigned Url Browser + version: >- + endpoint_files.user_upload_file_presigned_url_browser_files_browsers__session_id__presigned_url_post +paths: + /files/browsers/{session_id}/presigned-url: + post: + operationId: >- + user-upload-file-presigned-url-browser-files-browsers-session-id-presigned-url-post + summary: User Upload File Presigned Url Browser + description: >- + Generate a secure presigned URL for uploading files that AI agents can + use during tasks. + tags: + - - subpackage_files + parameters: + - name: session_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/UploadFilePresignedUrlResponse' + '400': + description: Unsupported content type + content: {} + '404': + description: Session not found + content: {} + '422': + description: Validation Error + content: {} + '500': + description: Failed to generate upload URL + content: {} + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/UploadFileRequest' +components: + schemas: + UploadFileRequestContentType: + type: string + enum: + - value: image/jpg + - value: image/jpeg + - value: image/png + - value: image/gif + - value: image/webp + - value: image/svg+xml + - value: application/pdf + - value: application/msword + - value: >- + application/vnd.openxmlformats-officedocument.wordprocessingml.document + - value: application/vnd.ms-excel + - value: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet + - value: text/plain + - value: text/csv + - value: text/markdown + UploadFileRequest: + type: object + properties: + fileName: + type: string + contentType: + $ref: '#/components/schemas/UploadFileRequestContentType' + sizeBytes: + type: integer + required: + - fileName + - contentType + - sizeBytes + UploadFilePresignedUrlResponse: + type: object + properties: + url: + type: string + method: + type: string + enum: + - type: stringLiteral + value: POST + fields: + type: object + additionalProperties: + type: string + fileName: + type: string + expiresIn: + type: integer + required: + - url + - method + - fields + - fileName + - expiresIn +``` + +#### Get Task Output File Presigned Url +GET https://api.browser-use.com/api/v2/files/tasks/{task_id}/output-files/{file_id} +Get secure download URL for an output file generated by the AI agent. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/files/get-task-output-file-presigned-url-files-tasks-task-id-output-files-file-id-get +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Get Task Output File Presigned Url + version: >- + endpoint_files.get_task_output_file_presigned_url_files_tasks__task_id__output_files__file_id__get +paths: + /files/tasks/{task_id}/output-files/{file_id}: + get: + operationId: >- + get-task-output-file-presigned-url-files-tasks-task-id-output-files-file-id-get + summary: Get Task Output File Presigned Url + description: Get secure download URL for an output file generated by the AI agent. + tags: + - - subpackage_files + parameters: + - name: task_id + in: path + required: true + schema: + type: string + format: uuid + - name: file_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/TaskOutputFileResponse' + '404': + description: Task or file not found + content: {} + '422': + description: Validation Error + content: {} + '500': + description: Failed to generate download URL + content: {} +components: + schemas: + TaskOutputFileResponse: + type: object + properties: + id: + type: string + format: uuid + fileName: + type: string + downloadUrl: + type: string + required: + - id + - fileName + - downloadUrl +``` + +### Profiles + +#### List Profiles +GET https://api.browser-use.com/api/v2/profiles +Get paginated list of profiles. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/profiles/list-profiles-profiles-get +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: List Profiles + version: endpoint_profiles.list_profiles_profiles_get +paths: + /profiles: + get: + operationId: list-profiles-profiles-get + summary: List Profiles + description: Get paginated list of profiles. + tags: + - - subpackage_profiles + parameters: + - name: pageSize + in: query + required: false + schema: + type: integer + - name: pageNumber + in: query + required: false + schema: + type: integer + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/ProfileListResponse' + '422': + description: Validation Error + content: {} +components: + schemas: + ProfileView: + type: object + properties: + id: + type: string + format: uuid + name: + type: + - string + - 'null' + lastUsedAt: + type: + - string + - 'null' + format: date-time + createdAt: + type: string + format: date-time + updatedAt: + type: string + format: date-time + cookieDomains: + type: + - array + - 'null' + items: + type: string + required: + - id + - createdAt + - updatedAt + ProfileListResponse: + type: object + properties: + items: + type: array + items: + $ref: '#/components/schemas/ProfileView' + totalItems: + type: integer + pageNumber: + type: integer + pageSize: + type: integer + required: + - items + - totalItems + - pageNumber + - pageSize +``` + +#### Create Profile +POST https://api.browser-use.com/api/v2/profiles +Content-Type: application/json +Profiles allow you to preserve the state of the browser between tasks. +They are most commonly used to allow users to preserve the log-in state in the agent between tasks. +You'd normally create one profile per user and then use it for all their tasks. +You can create a new profile by calling this endpoint. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/profiles/create-profile-profiles-post +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Create Profile + version: endpoint_profiles.create_profile_profiles_post +paths: + /profiles: + post: + operationId: create-profile-profiles-post + summary: Create Profile + description: >- + Profiles allow you to preserve the state of the browser between tasks. + They are most commonly used to allow users to preserve the log-in state + in the agent between tasks. + You'd normally create one profile per user and then use it for all their + tasks. + You can create a new profile by calling this endpoint. + tags: + - - subpackage_profiles + parameters: + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '201': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/ProfileView' + '402': + description: Subscription required for additional profiles + content: {} + '422': + description: Request validation failed + content: {} + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ProfileCreateRequest' +components: + schemas: + ProfileCreateRequest: + type: object + properties: + name: + type: + - string + - 'null' + ProfileView: + type: object + properties: + id: + type: string + format: uuid + name: + type: + - string + - 'null' + lastUsedAt: + type: + - string + - 'null' + format: date-time + createdAt: + type: string + format: date-time + updatedAt: + type: string + format: date-time + cookieDomains: + type: + - array + - 'null' + items: + type: string + required: + - id + - createdAt + - updatedAt +``` + +#### Get Profile +GET https://api.browser-use.com/api/v2/profiles/{profile_id} +Get profile details. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/profiles/get-profile-profiles-profile-id-get +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Get Profile + version: endpoint_profiles.get_profile_profiles__profile_id__get +paths: + /profiles/{profile_id}: + get: + operationId: get-profile-profiles-profile-id-get + summary: Get Profile + description: Get profile details. + tags: + - - subpackage_profiles + parameters: + - name: profile_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/ProfileView' + '404': + description: Profile not found + content: {} + '422': + description: Validation Error + content: {} +components: + schemas: + ProfileView: + type: object + properties: + id: + type: string + format: uuid + name: + type: + - string + - 'null' + lastUsedAt: + type: + - string + - 'null' + format: date-time + createdAt: + type: string + format: date-time + updatedAt: + type: string + format: date-time + cookieDomains: + type: + - array + - 'null' + items: + type: string + required: + - id + - createdAt + - updatedAt +``` + +#### Delete Browser Profile +DELETE https://api.browser-use.com/api/v2/profiles/{profile_id} +Permanently delete a browser profile and its configuration. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/profiles/delete-browser-profile-profiles-profile-id-delete +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Delete Browser Profile + version: endpoint_profiles.delete_browser_profile_profiles__profile_id__delete +paths: + /profiles/{profile_id}: + delete: + operationId: delete-browser-profile-profiles-profile-id-delete + summary: Delete Browser Profile + description: Permanently delete a browser profile and its configuration. + tags: + - - subpackage_profiles + parameters: + - name: profile_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '204': + description: Successful Response + content: + application/json: + schema: + $ref: >- + #/components/schemas/Profiles_delete_browser_profile_profiles__profile_id__delete_Response_204 + '422': + description: Validation Error + content: {} +components: + schemas: + Profiles_delete_browser_profile_profiles__profile_id__delete_Response_204: + type: object + properties: {} +``` + +#### Update Profile +PATCH https://api.browser-use.com/api/v2/profiles/{profile_id} +Content-Type: application/json +Update a browser profile's information. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/profiles/update-profile-profiles-profile-id-patch +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Update Profile + version: endpoint_profiles.update_profile_profiles__profile_id__patch +paths: + /profiles/{profile_id}: + patch: + operationId: update-profile-profiles-profile-id-patch + summary: Update Profile + description: Update a browser profile's information. + tags: + - - subpackage_profiles + parameters: + - name: profile_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/ProfileView' + '404': + description: Profile not found + content: {} + '422': + description: Validation Error + content: {} + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ProfileUpdateRequest' +components: + schemas: + ProfileUpdateRequest: + type: object + properties: + name: + type: + - string + - 'null' + ProfileView: + type: object + properties: + id: + type: string + format: uuid + name: + type: + - string + - 'null' + lastUsedAt: + type: + - string + - 'null' + format: date-time + createdAt: + type: string + format: date-time + updatedAt: + type: string + format: date-time + cookieDomains: + type: + - array + - 'null' + items: + type: string + required: + - id + - createdAt + - updatedAt +``` + +### Browsers + +#### List Browser Sessions +GET https://api.browser-use.com/api/v2/browsers +Get paginated list of browser sessions with optional status filtering. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/browsers/list-browser-sessions-browsers-get +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: List Browser Sessions + version: endpoint_browsers.list_browser_sessions_browsers_get +paths: + /browsers: + get: + operationId: list-browser-sessions-browsers-get + summary: List Browser Sessions + description: Get paginated list of browser sessions with optional status filtering. + tags: + - - subpackage_browsers + parameters: + - name: pageSize + in: query + required: false + schema: + type: integer + - name: pageNumber + in: query + required: false + schema: + type: integer + - name: filterBy + in: query + required: false + schema: + oneOf: + - $ref: '#/components/schemas/BrowserSessionStatus' + - type: 'null' + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/BrowserSessionListResponse' + '422': + description: Validation Error + content: {} +components: + schemas: + BrowserSessionStatus: + type: string + enum: + - value: active + - value: stopped + BrowserSessionItemView: + type: object + properties: + id: + type: string + format: uuid + status: + $ref: '#/components/schemas/BrowserSessionStatus' + liveUrl: + type: + - string + - 'null' + cdpUrl: + type: + - string + - 'null' + timeoutAt: + type: string + format: date-time + startedAt: + type: string + format: date-time + finishedAt: + type: + - string + - 'null' + format: date-time + required: + - id + - status + - timeoutAt + - startedAt + BrowserSessionListResponse: + type: object + properties: + items: + type: array + items: + $ref: '#/components/schemas/BrowserSessionItemView' + totalItems: + type: integer + pageNumber: + type: integer + pageSize: + type: integer + required: + - items + - totalItems + - pageNumber + - pageSize +``` + +#### Create Browser Session +POST https://api.browser-use.com/api/v2/browsers +Content-Type: application/json +Create a new browser session. +**Pricing:** Browser sessions are charged at $0.05 per hour. +The full hourly rate is charged upfront when the session starts. +When you stop the session, any unused time is automatically refunded proportionally. +Billing is rounded to the nearest minute (minimum 1 minute). +For example, if you stop a session after 30 minutes, you'll be refunded $0.025. +**Session Limits:** +- Free users (without active subscription): Maximum 15 minutes per session +- Paid subscribers: Up to 4 hours per session +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/browsers/create-browser-session-browsers-post +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Create Browser Session + version: endpoint_browsers.create_browser_session_browsers_post +paths: + /browsers: + post: + operationId: create-browser-session-browsers-post + summary: Create Browser Session + description: >- + Create a new browser session. + **Pricing:** Browser sessions are charged at $0.05 per hour. + The full hourly rate is charged upfront when the session starts. + When you stop the session, any unused time is automatically refunded + proportionally. + Billing is rounded to the nearest minute (minimum 1 minute). + For example, if you stop a session after 30 minutes, you'll be refunded + $0.025. + **Session Limits:** + - Free users (without active subscription): Maximum 15 minutes per + session + - Paid subscribers: Up to 4 hours per session + tags: + - - subpackage_browsers + parameters: + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '201': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/BrowserSessionItemView' + '403': + description: Session timeout limit exceeded for free users + content: {} + '404': + description: Profile not found + content: {} + '422': + description: Request validation failed + content: {} + '429': + description: Too many concurrent active sessions + content: {} + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateBrowserSessionRequest' +components: + schemas: + ProxyCountryCode: + type: string + enum: + - value: us + - value: uk + - value: fr + - value: it + - value: jp + - value: au + - value: de + - value: fi + - value: ca + - value: in + CreateBrowserSessionRequest: + type: object + properties: + profileId: + type: + - string + - 'null' + format: uuid + proxyCountryCode: + oneOf: + - $ref: '#/components/schemas/ProxyCountryCode' + - type: 'null' + timeout: + type: integer + BrowserSessionStatus: + type: string + enum: + - value: active + - value: stopped + BrowserSessionItemView: + type: object + properties: + id: + type: string + format: uuid + status: + $ref: '#/components/schemas/BrowserSessionStatus' + liveUrl: + type: + - string + - 'null' + cdpUrl: + type: + - string + - 'null' + timeoutAt: + type: string + format: date-time + startedAt: + type: string + format: date-time + finishedAt: + type: + - string + - 'null' + format: date-time + required: + - id + - status + - timeoutAt + - startedAt +``` + +#### Get Browser Session +GET https://api.browser-use.com/api/v2/browsers/{session_id} +Get detailed browser session information including status and URLs. +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/browsers/get-browser-session-browsers-session-id-get +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Get Browser Session + version: endpoint_browsers.get_browser_session_browsers__session_id__get +paths: + /browsers/{session_id}: + get: + operationId: get-browser-session-browsers-session-id-get + summary: Get Browser Session + description: Get detailed browser session information including status and URLs. + tags: + - - subpackage_browsers + parameters: + - name: session_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/BrowserSessionView' + '404': + description: Session not found + content: {} + '422': + description: Validation Error + content: {} +components: + schemas: + BrowserSessionStatus: + type: string + enum: + - value: active + - value: stopped + BrowserSessionView: + type: object + properties: + id: + type: string + format: uuid + status: + $ref: '#/components/schemas/BrowserSessionStatus' + liveUrl: + type: + - string + - 'null' + cdpUrl: + type: + - string + - 'null' + timeoutAt: + type: string + format: date-time + startedAt: + type: string + format: date-time + finishedAt: + type: + - string + - 'null' + format: date-time + required: + - id + - status + - timeoutAt + - startedAt +``` + +#### Update Browser Session +PATCH https://api.browser-use.com/api/v2/browsers/{session_id} +Content-Type: application/json +Stop a browser session. +**Refund:** When you stop a session, unused time is automatically refunded. +If the session ran for less than 1 hour, you'll receive a proportional refund. +Billing is ceil to the nearest minute (minimum 1 minute). +Reference: https://docs.cloud.browser-use.com/api-reference/v-2-api-current/browsers/update-browser-session-browsers-session-id-patch +OpenAPI Specification +```yaml +openapi: 3.1.1 +info: + title: Update Browser Session + version: endpoint_browsers.update_browser_session_browsers__session_id__patch +paths: + /browsers/{session_id}: + patch: + operationId: update-browser-session-browsers-session-id-patch + summary: Update Browser Session + description: >- + Stop a browser session. + **Refund:** When you stop a session, unused time is automatically + refunded. + If the session ran for less than 1 hour, you'll receive a proportional + refund. + Billing is ceil to the nearest minute (minimum 1 minute). + tags: + - - subpackage_browsers + parameters: + - name: session_id + in: path + required: true + schema: + type: string + format: uuid + - name: X-Browser-Use-API-Key + in: header + required: true + schema: + type: string + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/BrowserSessionView' + '404': + description: Session not found + content: {} + '422': + description: Request validation failed + content: {} + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateBrowserSessionRequest' +components: + schemas: + BrowserSessionUpdateAction: + type: string + enum: + - value: stop + UpdateBrowserSessionRequest: + type: object + properties: + action: + $ref: '#/components/schemas/BrowserSessionUpdateAction' + required: + - action + BrowserSessionStatus: + type: string + enum: + - value: active + - value: stopped + BrowserSessionView: + type: object + properties: + id: + type: string + format: uuid + status: + $ref: '#/components/schemas/BrowserSessionStatus' + liveUrl: + type: + - string + - 'null' + cdpUrl: + type: + - string + - 'null' + timeoutAt: + type: string + format: date-time + startedAt: + type: string + format: date-time + finishedAt: + type: + - string + - 'null' + format: date-time + required: + - id + - status + - timeoutAt + - startedAt +``` From 15b650b4c9d18c46bb9dc5ece49057b58adc6930 Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Wed, 5 Nov 2025 13:43:21 -0800 Subject: [PATCH 03/17] reroute docs quickstart to agents.md --- docs/quickstart_llm.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/quickstart_llm.mdx b/docs/quickstart_llm.mdx index 53dc2d137..d4c936037 100644 --- a/docs/quickstart_llm.mdx +++ b/docs/quickstart_llm.mdx @@ -6,5 +6,5 @@ icon: "brain" -1. Copy all content [πŸ”— from here](https://docs.browser-use.com/llms-full.txt) (~32k tokens) +1. Copy all content [πŸ”— from here](https://github.com/browser-use/browser-use/blob/main/AGENTS.MD) (~32k tokens) 2. Paste it into your favorite coding agent (Cursor, Claude, ChatGPT ...). From b2bff5f0fc9c705762232eeeee55801150e164bd Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Wed, 5 Nov 2025 14:07:49 -0800 Subject: [PATCH 04/17] fix typo --- CLOUD.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CLOUD.md b/CLOUD.md index 2fb6e0252..213961c6f 100644 --- a/CLOUD.md +++ b/CLOUD.md @@ -52,7 +52,7 @@ curl -X PATCH https://api.browser-use.com/api/v2/sessions/ \ ## API (v2) Docs The best way to use Browser Use Cloud is with API v2. -Other options exist, namely API v2 and the SDK, but are give less comprehensive control. +Other options exist, namely API v2 and the SDK, but give less comprehensive control. ### Billing ##### Get Account Billing From 667331115d2b278c5b1909f646c90693fe024fb8 Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Wed, 5 Nov 2025 14:28:46 -0800 Subject: [PATCH 05/17] link to new agents md from docs, cleanup styling --- docs/docs.json | 10 ++++++++-- docs/quickstart_llm.mdx | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/docs.json b/docs/docs.json index e4e47e2b2..6768f5d8f 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -4,8 +4,14 @@ "name": "Browser Use", "colors": { "primary": "#FE750E", - "light": "#FFF7ED", - "dark": "#C2410C" + "light": "#FE750E", + "dark": "#FE750E" + }, + "background": { + "color": { + "light": "#FFFFFF", + "dark": "#09090B" + } }, "favicon": "/favicon.ico", "contextual": { diff --git a/docs/quickstart_llm.mdx b/docs/quickstart_llm.mdx index d4c936037..cf88e82e2 100644 --- a/docs/quickstart_llm.mdx +++ b/docs/quickstart_llm.mdx @@ -6,5 +6,5 @@ icon: "brain" -1. Copy all content [πŸ”— from here](https://github.com/browser-use/browser-use/blob/main/AGENTS.MD) (~32k tokens) +1. Copy all content [πŸ”— from here](https://github.com/browser-use/browser-use/blob/main/AGENTS.md) (~32k tokens) 2. Paste it into your favorite coding agent (Cursor, Claude, ChatGPT ...). From 7b67fe546fd23353d6be38e6c6e7a64d2e170195 Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Wed, 5 Nov 2025 14:39:31 -0800 Subject: [PATCH 06/17] add versionn nmumber to AGENTS.md --- AGENTS.MD | 1 + 1 file changed, 1 insertion(+) diff --git a/AGENTS.MD b/AGENTS.MD index e96d67efd..f5dca30ea 100644 --- a/AGENTS.MD +++ b/AGENTS.MD @@ -1,3 +1,4 @@ +# AGENTS.md Version 1 Browser-Use is an AI agent that autonomously interacts with the web. It takes a user-defined task, navigates web pages using Chromium via CDP, processes HTML, and repeatedly queries a language model to decide the next actionβ€”until the task is completed. From 5e996f0c63a299e22f4295cee1d2e25aa7eee2a9 Mon Sep 17 00:00:00 2001 From: Alexander Yue <43824272+Alezander9@users.noreply.github.com> Date: Wed, 5 Nov 2025 14:40:48 -0800 Subject: [PATCH 07/17] Rename AGENTS.MD to AGENTS.md --- AGENTS.MD => AGENTS.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename AGENTS.MD => AGENTS.md (100%) diff --git a/AGENTS.MD b/AGENTS.md similarity index 100% rename from AGENTS.MD rename to AGENTS.md From ffd02959440d9e2a923f347bf855a9ab13f1875d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Thu, 6 Nov 2025 09:17:11 -0800 Subject: [PATCH 08/17] pricing change --- README.md | 10 +++++----- docs/supported-models.mdx | 11 ++++------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 4407ed19d..c753640d9 100644 --- a/README.md +++ b/README.md @@ -156,7 +156,7 @@ https://github.com/user-attachments/assets/a6813fa7-4a7c-40a6-b4aa-382bf88b1850 [Example code β†—](https://github.com/browser-use/browser-use/blob/main/examples/use-cases/buy_groceries.py) -### πŸ’» Personal-Assistant. +### πŸ’» Personal-Assistant. #### Task = "Help me find parts for a custom PC." https://github.com/user-attachments/assets/ac34f75c-057a-43ef-ad06-5b2c9d42bf06 @@ -180,9 +180,9 @@ https://github.com/user-attachments/assets/ac34f75c-057a-43ef-ad06-5b2c9d42bf06 We optimized **ChatBrowserUse()** specifically for browser automation tasks. On avg it completes tasks 3-5x faster than other models with SOTA accuracy. **Pricing (per 1M tokens):** -- Input tokens: $0.50 -- Output tokens: $3.00 -- Cached tokens: $0.10 +- Input tokens: $0.20 +- Output tokens: $2.00 +- Cached tokens: $0.02 For other LLM providers, see our [supported models documentation](https://docs.browser-use.com/supported-models). @@ -251,7 +251,7 @@ For production use cases, use our [Browser Use Cloud API](https://cloud.browser-
- + **Tell your computer what to do, and it gets it done.** diff --git a/docs/supported-models.mdx b/docs/supported-models.mdx index 3e6b8e5bc..d9d6bcbdd 100644 --- a/docs/supported-models.mdx +++ b/docs/supported-models.mdx @@ -32,17 +32,14 @@ Get your API key from the [Browser Use Cloud](https://cloud.browser-use.com/new- #### Pricing -ChatBrowserUse offers competitive pricing per 1 million tokens: +ChatBrowserUse offers the best pricing per 1 million tokens: | Token Type | Price per 1M tokens | |------------|---------------------| -| Input tokens | $0.50 | -| Output tokens | $3.00 | -| Cached tokens | $0.10 | +| Input tokens | $0.20 | +| Cached tokens | $0.02 | +| Output tokens | $2.00 | - - Cached tokens provide significant cost savings on repeated context, reducing input costs by 80%. - ### Google Gemini [example](https://github.com/browser-use/browser-use/blob/main/examples/models/gemini.py) From cb814da33ec19a130c83cb14868d34abe5fe2448 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 6 Nov 2025 21:38:18 +0000 Subject: [PATCH 09/17] Add impossible_task and reached_captcha to JudgementResult Co-authored-by: mailmertunsal --- browser_use/agent/judge.py | 26 +++++++- browser_use/agent/service.py | 4 ++ browser_use/agent/views.py | 8 +++ tests/ci/test_judge.py | 118 +++++++++++++++++++++++++++++++++++ 4 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 tests/ci/test_judge.py diff --git a/browser_use/agent/judge.py b/browser_use/agent/judge.py index a58eefc27..a6220f0f2 100644 --- a/browser_use/agent/judge.py +++ b/browser_use/agent/judge.py @@ -122,6 +122,28 @@ def construct_judge_messages( - The agent made up content that is not in the screenshot or the page state - The agent calls done action before completing all key points of the task +**IMPOSSIBLE TASK DETECTION:** +Set `impossible_task` to true when the task fundamentally could not be completed due to: +- Vague or ambiguous task instructions that cannot be reasonably interpreted +- Website genuinely broken or non-functional (be conservative - temporary issues don't count) +- Required links/pages truly inaccessible (404, 403, etc.) +- Task requires authentication/login but no credentials were provided +- Task asks for functionality that doesn't exist on the target site +- Other insurmountable external obstacles beyond the agent's control + +Do NOT mark as impossible if: +- Agent made poor decisions but task was achievable +- Temporary page loading issues that could be retried +- Agent didn't try the right approach +- Website works but agent struggled with it + +**CAPTCHA DETECTION:** +Set `reached_captcha` to true if: +- Screenshots show captcha challenges (reCAPTCHA, hCaptcha, etc.) +- Agent reports being blocked by bot detection +- Error messages indicate captcha/verification requirements +- Any evidence the agent encountered anti-bot measures during execution + **IMPORTANT EVALUATION NOTES:** - **evaluate for action** - For each key step of the trace, double check whether the action that the agent tried to performed actually happened. If the required action did not actually occur, the verdict should be false. - **screenshot is not entire content** - The agent has the entire DOM content, but the screenshot is only part of the content. If the agent extracts information from the page, but you do not see it in the screenshot, you can assume this information is there. @@ -138,7 +160,9 @@ Respond with EXACTLY this JSON structure (no additional text before or after): {{ "reasoning": "Breakdown of user task into key points. Detailed analysis covering: what went well, what didn't work, trajectory quality assessment, tool usage evaluation, output quality review, and overall user satisfaction prediction", "verdict": true or false, - "failure_reason": "If verdict is false, provide the key reason why the task was not completed successfully. If verdict is true, use an empty string." + "failure_reason": "If verdict is false, provide the key reason why the task was not completed successfully. If verdict is true, use an empty string.", + "impossible_task": true or false, + "reached_captcha": true or false }} """ diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 5bde05406..112da3849 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -974,6 +974,10 @@ class Agent(Generic[Context, AgentStructuredOutput]): judge_log += f'βš–οΈ {verdict_color}Judge Verdict: {verdict_text}\033[0m\n' if judgement.failure_reason: judge_log += f' Failure: {judgement.failure_reason}\n' + if judgement.impossible_task: + judge_log += ' 🚫 Impossible Task: Task was fundamentally impossible to complete\n' + if judgement.reached_captcha: + judge_log += ' πŸ€– Captcha Detected: Agent encountered captcha challenges\n' judge_log += f' {judgement.reasoning}\n' self.logger.info(judge_log) diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py index 0d2744d6f..470f92340 100644 --- a/browser_use/agent/views.py +++ b/browser_use/agent/views.py @@ -94,6 +94,14 @@ class JudgementResult(BaseModel): reasoning: str | None = Field(default=None, description='Explanation of the judgement') verdict: bool = Field(description='Whether the trace was successful or not') failure_reason: str | None = Field(default=None, description='If the trace was not successful, the reason why') + impossible_task: bool = Field( + default=False, + description='True if the task was impossible to complete due to vague instructions, broken website, inaccessible links, missing login credentials, or other insurmountable obstacles', + ) + reached_captcha: bool = Field( + default=False, + description='True if the agent encountered captcha challenges during task execution', + ) class ActionResult(BaseModel): diff --git a/tests/ci/test_judge.py b/tests/ci/test_judge.py new file mode 100644 index 000000000..6e209b240 --- /dev/null +++ b/tests/ci/test_judge.py @@ -0,0 +1,118 @@ +"""Tests for judge functionality.""" + +import json + +import pytest + +from browser_use.agent.judge import construct_judge_messages +from browser_use.agent.views import JudgementResult + + +def test_judgement_result_schema(): + """Test that JudgementResult has all required fields.""" + # Test with all fields + result = JudgementResult( + reasoning='Task completed successfully', + verdict=True, + failure_reason='', + impossible_task=False, + reached_captcha=False, + ) + + assert result.reasoning == 'Task completed successfully' + assert result.verdict is True + assert result.failure_reason == '' + assert result.impossible_task is False + assert result.reached_captcha is False + + # Test with defaults + result_defaults = JudgementResult( + verdict=False, + failure_reason='Task failed due to missing login credentials', + ) + + assert result_defaults.verdict is False + assert result_defaults.failure_reason == 'Task failed due to missing login credentials' + assert result_defaults.impossible_task is False # Default + assert result_defaults.reached_captcha is False # Default + + # Test serialization includes new fields + data = result.model_dump() + assert 'impossible_task' in data + assert 'reached_captcha' in data + + +def test_judgement_result_impossible_task(): + """Test impossible_task field scenarios.""" + # Impossible task due to missing credentials + result_impossible = JudgementResult( + reasoning='Task requires login but no credentials were provided', + verdict=False, + failure_reason='Missing login credentials', + impossible_task=True, + reached_captcha=False, + ) + + assert result_impossible.impossible_task is True + assert result_impossible.verdict is False + + # Achievable task that just failed + result_achievable = JudgementResult( + reasoning='Agent made poor navigation choices', + verdict=False, + failure_reason='Navigation error', + impossible_task=False, + reached_captcha=False, + ) + + assert result_achievable.impossible_task is False + + +def test_judgement_result_reached_captcha(): + """Test reached_captcha field scenarios.""" + # Task blocked by captcha + result_captcha = JudgementResult( + reasoning='Agent was blocked by reCAPTCHA on the login page', + verdict=False, + failure_reason='Blocked by captcha', + impossible_task=True, + reached_captcha=True, + ) + + assert result_captcha.reached_captcha is True + assert result_captcha.verdict is False + + # Task without captcha + result_no_captcha = JudgementResult( + reasoning='Task completed without any anti-bot measures', + verdict=True, + failure_reason='', + impossible_task=False, + reached_captcha=False, + ) + + assert result_no_captcha.reached_captcha is False + + +def test_judge_prompt_includes_new_fields(): + """Test that the judge system prompt includes instructions for new fields.""" + messages = construct_judge_messages( + task='Test task', + final_result='Test result', + agent_steps=['Step 1: Navigate', 'Step 2: Click'], + screenshot_paths=[], + max_images=10, + ) + + # Get system prompt + system_prompt = messages[0].content + + # Check that the prompt mentions the new fields + assert 'impossible_task' in system_prompt + assert 'reached_captcha' in system_prompt + assert 'IMPOSSIBLE TASK DETECTION' in system_prompt + assert 'CAPTCHA DETECTION' in system_prompt + + # Check that response format includes the new fields + assert '"impossible_task": true or false' in system_prompt + assert '"reached_captcha": true or false' in system_prompt From 2a9eae7ec5b3a94143d790ec25fa790200fbc419 Mon Sep 17 00:00:00 2001 From: Mert Unsal Date: Thu, 6 Nov 2025 13:41:06 -0800 Subject: [PATCH 10/17] Update browser_use/agent/service.py --- browser_use/agent/service.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 112da3849..e3a90fce7 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -974,8 +974,6 @@ class Agent(Generic[Context, AgentStructuredOutput]): judge_log += f'βš–οΈ {verdict_color}Judge Verdict: {verdict_text}\033[0m\n' if judgement.failure_reason: judge_log += f' Failure: {judgement.failure_reason}\n' - if judgement.impossible_task: - judge_log += ' 🚫 Impossible Task: Task was fundamentally impossible to complete\n' if judgement.reached_captcha: judge_log += ' πŸ€– Captcha Detected: Agent encountered captcha challenges\n' judge_log += f' {judgement.reasoning}\n' From c4e2dfb39a43d7ebd4ab9a001b5a1a2c2ef3039a Mon Sep 17 00:00:00 2001 From: Mert Unsal Date: Thu, 6 Nov 2025 13:42:38 -0800 Subject: [PATCH 11/17] Update browser_use/agent/views.py --- browser_use/agent/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py index 470f92340..90c16d83a 100644 --- a/browser_use/agent/views.py +++ b/browser_use/agent/views.py @@ -93,7 +93,7 @@ class JudgementResult(BaseModel): reasoning: str | None = Field(default=None, description='Explanation of the judgement') verdict: bool = Field(description='Whether the trace was successful or not') - failure_reason: str | None = Field(default=None, description='If the trace was not successful, the reason why') + failure_reason: str | None = Field(default=None, description='If the trace was not successful, the reason why. Otherwise empty.') impossible_task: bool = Field( default=False, description='True if the task was impossible to complete due to vague instructions, broken website, inaccessible links, missing login credentials, or other insurmountable obstacles', From 604295678f9147e4840a04c3f48e6fd6ddbd1d30 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 6 Nov 2025 21:42:41 +0000 Subject: [PATCH 12/17] Auto-commit pending changes before rebase - PR synchronize --- tests/ci/test_judge.py | 118 ----------------------------------------- 1 file changed, 118 deletions(-) delete mode 100644 tests/ci/test_judge.py diff --git a/tests/ci/test_judge.py b/tests/ci/test_judge.py deleted file mode 100644 index 6e209b240..000000000 --- a/tests/ci/test_judge.py +++ /dev/null @@ -1,118 +0,0 @@ -"""Tests for judge functionality.""" - -import json - -import pytest - -from browser_use.agent.judge import construct_judge_messages -from browser_use.agent.views import JudgementResult - - -def test_judgement_result_schema(): - """Test that JudgementResult has all required fields.""" - # Test with all fields - result = JudgementResult( - reasoning='Task completed successfully', - verdict=True, - failure_reason='', - impossible_task=False, - reached_captcha=False, - ) - - assert result.reasoning == 'Task completed successfully' - assert result.verdict is True - assert result.failure_reason == '' - assert result.impossible_task is False - assert result.reached_captcha is False - - # Test with defaults - result_defaults = JudgementResult( - verdict=False, - failure_reason='Task failed due to missing login credentials', - ) - - assert result_defaults.verdict is False - assert result_defaults.failure_reason == 'Task failed due to missing login credentials' - assert result_defaults.impossible_task is False # Default - assert result_defaults.reached_captcha is False # Default - - # Test serialization includes new fields - data = result.model_dump() - assert 'impossible_task' in data - assert 'reached_captcha' in data - - -def test_judgement_result_impossible_task(): - """Test impossible_task field scenarios.""" - # Impossible task due to missing credentials - result_impossible = JudgementResult( - reasoning='Task requires login but no credentials were provided', - verdict=False, - failure_reason='Missing login credentials', - impossible_task=True, - reached_captcha=False, - ) - - assert result_impossible.impossible_task is True - assert result_impossible.verdict is False - - # Achievable task that just failed - result_achievable = JudgementResult( - reasoning='Agent made poor navigation choices', - verdict=False, - failure_reason='Navigation error', - impossible_task=False, - reached_captcha=False, - ) - - assert result_achievable.impossible_task is False - - -def test_judgement_result_reached_captcha(): - """Test reached_captcha field scenarios.""" - # Task blocked by captcha - result_captcha = JudgementResult( - reasoning='Agent was blocked by reCAPTCHA on the login page', - verdict=False, - failure_reason='Blocked by captcha', - impossible_task=True, - reached_captcha=True, - ) - - assert result_captcha.reached_captcha is True - assert result_captcha.verdict is False - - # Task without captcha - result_no_captcha = JudgementResult( - reasoning='Task completed without any anti-bot measures', - verdict=True, - failure_reason='', - impossible_task=False, - reached_captcha=False, - ) - - assert result_no_captcha.reached_captcha is False - - -def test_judge_prompt_includes_new_fields(): - """Test that the judge system prompt includes instructions for new fields.""" - messages = construct_judge_messages( - task='Test task', - final_result='Test result', - agent_steps=['Step 1: Navigate', 'Step 2: Click'], - screenshot_paths=[], - max_images=10, - ) - - # Get system prompt - system_prompt = messages[0].content - - # Check that the prompt mentions the new fields - assert 'impossible_task' in system_prompt - assert 'reached_captcha' in system_prompt - assert 'IMPOSSIBLE TASK DETECTION' in system_prompt - assert 'CAPTCHA DETECTION' in system_prompt - - # Check that response format includes the new fields - assert '"impossible_task": true or false' in system_prompt - assert '"reached_captcha": true or false' in system_prompt From 3cf6e1046b381548365965c58c511978356d991d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Thu, 6 Nov 2025 13:49:20 -0800 Subject: [PATCH 13/17] include kimi-2 --- browser_use/llm/openai/chat.py | 12 ++++++- browser_use/llm/schema.py | 60 +++++++++++++++++++++++++++++----- examples/models/moonshot.py | 38 +++++++++++++++++++++ 3 files changed, 100 insertions(+), 10 deletions(-) create mode 100644 examples/models/moonshot.py diff --git a/browser_use/llm/openai/chat.py b/browser_use/llm/openai/chat.py index ecf05b654..7cddca24b 100644 --- a/browser_use/llm/openai/chat.py +++ b/browser_use/llm/openai/chat.py @@ -42,6 +42,12 @@ class ChatOpenAI(BaseChatModel): top_p: float | None = None add_schema_to_system_prompt: bool = False # Add JSON schema to system prompt instead of using response_format dont_force_structured_output: bool = False # If True, the model will not be forced to output a structured output + remove_min_items_from_schema: bool = ( + False # If True, remove minItems from JSON schema (for compatibility with some providers) + ) + remove_defaults_from_schema: bool = ( + False # If True, remove default values from JSON schema (for compatibility with some providers) + ) # Client initialization parameters api_key: str | None = None @@ -206,7 +212,11 @@ class ChatOpenAI(BaseChatModel): response_format: JSONSchema = { 'name': 'agent_output', 'strict': True, - 'schema': SchemaOptimizer.create_optimized_json_schema(output_format), + 'schema': SchemaOptimizer.create_optimized_json_schema( + output_format, + remove_min_items=self.remove_min_items_from_schema, + remove_defaults=self.remove_defaults_from_schema, + ), } # Add JSON schema to system prompt if requested diff --git a/browser_use/llm/schema.py b/browser_use/llm/schema.py index 8467e09d4..5477cc4b7 100644 --- a/browser_use/llm/schema.py +++ b/browser_use/llm/schema.py @@ -9,13 +9,20 @@ from pydantic import BaseModel class SchemaOptimizer: @staticmethod - def create_optimized_json_schema(model: type[BaseModel]) -> dict[str, Any]: + def create_optimized_json_schema( + model: type[BaseModel], + *, + remove_min_items: bool = False, + remove_defaults: bool = False, + ) -> dict[str, Any]: """ Create the most optimized schema by flattening all $ref/$defs while preserving FULL descriptions and ALL action definitions. Also ensures OpenAI strict mode compatibility. Args: model: The Pydantic model to optimize + remove_min_items: If True, remove minItems from the schema + remove_defaults: If True, remove default values from the schema Returns: Optimized schema with all $refs resolved and strict mode compatibility @@ -26,12 +33,9 @@ class SchemaOptimizer: # Extract $defs for reference resolution, then flatten everything defs_lookup = original_schema.get('$defs', {}) - def optimize_schema( - obj: Any, - defs_lookup: dict[str, Any] | None = None, - *, - in_properties: bool = False, # NEW: track context - ) -> Any: + # Create optimized schema with flattening + # Pass flags to optimize_schema via closure + def optimize_schema(obj: Any, defs_lookup: dict[str, Any] | None = None, *, in_properties: bool = False) -> Any: """Apply all optimization techniques including flattening all $ref/$defs""" if isinstance(obj, dict): optimized: dict[str, Any] = {} @@ -65,6 +69,12 @@ class SchemaOptimizer: referenced_def = defs_lookup[ref_path] flattened_ref = optimize_schema(referenced_def, defs_lookup) + # Skip minItems/min_items and default if requested (check BEFORE processing) + elif key in ('minItems', 'min_items') and remove_min_items: + continue # Skip minItems/min_items + elif key == 'default' and remove_defaults: + continue # Skip default values + # Keep all anyOf structures (action unions) and resolve any $refs within elif key == 'anyOf' and isinstance(value, list): optimized[key] = [optimize_schema(item, defs_lookup) for item in value] @@ -78,7 +88,17 @@ class SchemaOptimizer: ) # Keep essential validation fields - elif key in ['type', 'required', 'minimum', 'maximum', 'minItems', 'maxItems', 'pattern', 'default']: + elif key in [ + 'type', + 'required', + 'minimum', + 'maximum', + 'minItems', + 'min_items', + 'maxItems', + 'pattern', + 'default', + ]: optimized[key] = value if not isinstance(value, (dict, list)) else optimize_schema(value, defs_lookup) # Recursively process all other fields @@ -111,7 +131,6 @@ class SchemaOptimizer: return [optimize_schema(item, defs_lookup, in_properties=in_properties) for item in obj] return obj - # Create optimized schema with flattening optimized_result = optimize_schema(original_schema, defs_lookup) # Ensure we have a dictionary (should always be the case for schema root) @@ -140,6 +159,29 @@ class SchemaOptimizer: ensure_additional_properties_false(optimized_schema) SchemaOptimizer._make_strict_compatible(optimized_schema) + # Final pass to remove minItems/min_items and default values if requested + if remove_min_items or remove_defaults: + + def remove_forbidden_fields(obj: Any) -> None: + """Recursively remove minItems/min_items and default values""" + if isinstance(obj, dict): + # Remove forbidden keys + if remove_min_items: + obj.pop('minItems', None) + obj.pop('min_items', None) + if remove_defaults: + obj.pop('default', None) + # Recursively process all values + for value in obj.values(): + if isinstance(value, (dict, list)): + remove_forbidden_fields(value) + elif isinstance(obj, list): + for item in obj: + if isinstance(item, (dict, list)): + remove_forbidden_fields(item) + + remove_forbidden_fields(optimized_schema) + return optimized_schema @staticmethod diff --git a/examples/models/moonshot.py b/examples/models/moonshot.py new file mode 100644 index 000000000..b88842c89 --- /dev/null +++ b/examples/models/moonshot.py @@ -0,0 +1,38 @@ +import asyncio +import os + +from dotenv import load_dotenv + +from browser_use import Agent, ChatOpenAI + +load_dotenv() + +# Get API key from environment variable +api_key = os.getenv('MOONSHOT_API_KEY') +if api_key is None: + print('Make sure you have MOONSHOT_API_KEY set in your .env file') + print('Get your API key from https://platform.moonshot.ai/console/api-keys ') + exit(1) + +# Configure Moonshot AI model +llm = ChatOpenAI( + model='kimi-k2-thinking', + base_url='https://api.moonshot.ai/v1', + api_key=api_key, + add_schema_to_system_prompt=True, + remove_min_items_from_schema=True, # Moonshot doesn't support minItems in JSON schema + remove_defaults_from_schema=True, # Moonshot doesn't allow default values with anyOf +) + + +async def main(): + agent = Agent( + task='Search for the latest news about AI and summarize the top 3 articles', + llm=llm, + flash_mode=True, + ) + await agent.run() + + +if __name__ == '__main__': + asyncio.run(main()) From 714232bddd93b30a360966a13a4dfefe9dcd3af5 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 6 Nov 2025 21:52:11 +0000 Subject: [PATCH 14/17] Add impossible task and captcha detection logging Co-authored-by: mailmertunsal --- browser_use/agent/service.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index e3a90fce7..c572c6caa 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -974,8 +974,11 @@ class Agent(Generic[Context, AgentStructuredOutput]): judge_log += f'βš–οΈ {verdict_color}Judge Verdict: {verdict_text}\033[0m\n' if judgement.failure_reason: judge_log += f' Failure: {judgement.failure_reason}\n' + if judgement.impossible_task: + judge_log += ' 🚫 Impossible Task: Task was fundamentally impossible to complete\n' if judgement.reached_captcha: judge_log += ' πŸ€– Captcha Detected: Agent encountered captcha challenges\n' + judge_log += ' πŸ‘‰ πŸ₯· Use Browser Use Cloud for the most stealth browser infra: https://docs.browser-use.com/customize/browser/remote\n' judge_log += f' {judgement.reasoning}\n' self.logger.info(judge_log) From 8973e98ee0c6fe3ef980bfdafb151d96b862a819 Mon Sep 17 00:00:00 2001 From: Mert Unsal Date: Thu, 6 Nov 2025 13:53:31 -0800 Subject: [PATCH 15/17] Update browser_use/agent/service.py --- browser_use/agent/service.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index c572c6caa..b9e706a8f 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -974,8 +974,6 @@ class Agent(Generic[Context, AgentStructuredOutput]): judge_log += f'βš–οΈ {verdict_color}Judge Verdict: {verdict_text}\033[0m\n' if judgement.failure_reason: judge_log += f' Failure: {judgement.failure_reason}\n' - if judgement.impossible_task: - judge_log += ' 🚫 Impossible Task: Task was fundamentally impossible to complete\n' if judgement.reached_captcha: judge_log += ' πŸ€– Captcha Detected: Agent encountered captcha challenges\n' judge_log += ' πŸ‘‰ πŸ₯· Use Browser Use Cloud for the most stealth browser infra: https://docs.browser-use.com/customize/browser/remote\n' From e68114662f5300d07b3aec1dcedc6cf0eb7cd33d Mon Sep 17 00:00:00 2001 From: mertunsall Date: Thu, 6 Nov 2025 13:59:23 -0800 Subject: [PATCH 16/17] fix linter --- browser_use/agent/views.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py index 90c16d83a..4289c8c47 100644 --- a/browser_use/agent/views.py +++ b/browser_use/agent/views.py @@ -93,7 +93,9 @@ class JudgementResult(BaseModel): reasoning: str | None = Field(default=None, description='Explanation of the judgement') verdict: bool = Field(description='Whether the trace was successful or not') - failure_reason: str | None = Field(default=None, description='If the trace was not successful, the reason why. Otherwise empty.') + failure_reason: str | None = Field( + default=None, description='If the trace was not successful, the reason why. Otherwise empty.' + ) impossible_task: bool = Field( default=False, description='True if the task was impossible to complete due to vague instructions, broken website, inaccessible links, missing login credentials, or other insurmountable obstacles', From 67d2f126652b27c3d0ad16c0d5834964243e9a52 Mon Sep 17 00:00:00 2001 From: mertunsall Date: Thu, 6 Nov 2025 14:51:52 -0800 Subject: [PATCH 17/17] default to PNG everywhere --- browser_use/actor/README.md | 8 ++++---- browser_use/actor/element.py | 2 +- browser_use/actor/page.py | 2 +- browser_use/agent/cloud_events.py | 2 +- browser_use/agent/judge.py | 4 ++-- browser_use/agent/prompts.py | 4 ++-- browser_use/agent/service.py | 2 +- browser_use/agent/views.py | 3 ++- browser_use/browser/watchdogs/screenshot_watchdog.py | 2 +- browser_use/code_use/service.py | 4 ++-- browser_use/llm/messages.py | 2 +- 11 files changed, 18 insertions(+), 17 deletions(-) diff --git a/browser_use/actor/README.md b/browser_use/actor/README.md index 24363ac13..82e5cc367 100644 --- a/browser_use/actor/README.md +++ b/browser_use/actor/README.md @@ -75,7 +75,7 @@ await element.drag_to(target_element) # Drag and drop value = await element.get_attribute("value") box = await element.get_bounding_box() # Returns BoundingBox or None info = await element.get_basic_info() # Comprehensive element info -screenshot_b64 = await element.screenshot(format='jpeg') +screenshot_b64 = await element.screenshot(format='png') # Execute JavaScript on element (this context is the element) text = await element.evaluate("() => this.textContent") @@ -108,7 +108,7 @@ await page.press("Escape") # Single keys # Page controls await page.set_viewport_size(width=1920, height=1080) -page_screenshot = await page.screenshot() # JPEG by default +page_screenshot = await page.screenshot() # PNG by default page_png = await page.screenshot(format="png", quality=90) # Page information @@ -166,7 +166,7 @@ products = await page.extract_content( - `evaluate(page_function: str, *args)` β†’ `str` - Execute JavaScript (MUST use (...args) => format) - `press(key: str)` - Press key on page (supports "Control+A" format) - `set_viewport_size(width: int, height: int)` - Set viewport dimensions -- `screenshot(format='jpeg', quality=None)` β†’ `str` - Take page screenshot, return base64 +- `screenshot(format='png', quality=None)` β†’ `str` - Take page screenshot, return base64 - `get_url()` β†’ `str`, `get_title()` β†’ `str` - Get page information - `mouse` β†’ `Mouse` - Get mouse interface for this page @@ -181,7 +181,7 @@ products = await page.extract_content( - `evaluate(page_function: str, *args)` β†’ `str` - Execute JavaScript on element (this = element) - `get_attribute(name: str)` β†’ `str | None` - Get attribute value - `get_bounding_box()` β†’ `BoundingBox | None` - Get element position/size -- `screenshot(format='jpeg', quality=None)` β†’ `str` - Take element screenshot, return base64 +- `screenshot(format='png', quality=None)` β†’ `str` - Take element screenshot, return base64 - `get_basic_info()` β†’ `ElementInfo` - Get comprehensive element information diff --git a/browser_use/actor/element.py b/browser_use/actor/element.py index 755995047..217a09cf2 100644 --- a/browser_use/actor/element.py +++ b/browser_use/actor/element.py @@ -679,7 +679,7 @@ class Element: except Exception: return None - async def screenshot(self, format: str = 'jpeg', quality: int | None = None) -> str: + async def screenshot(self, format: str = 'png', quality: int | None = None) -> str: """Take a screenshot of this element and return base64 encoded image. Args: diff --git a/browser_use/actor/page.py b/browser_use/actor/page.py index 71904010c..e76da29fc 100644 --- a/browser_use/actor/page.py +++ b/browser_use/actor/page.py @@ -188,7 +188,7 @@ class Page: return js_code - async def screenshot(self, format: str = 'jpeg', quality: int | None = None) -> str: + async def screenshot(self, format: str = 'png', quality: int | None = None) -> str: """Take a screenshot and return base64 encoded image. Args: diff --git a/browser_use/agent/cloud_events.py b/browser_use/agent/cloud_events.py index 4ff893df4..ed7b3c4b3 100644 --- a/browser_use/agent/cloud_events.py +++ b/browser_use/agent/cloud_events.py @@ -155,7 +155,7 @@ class CreateAgentStepEvent(BaseEvent): # Capture screenshot as base64 data URL if available screenshot_url = None if browser_state_summary.screenshot: - screenshot_url = f'data:image/jpeg;base64,{browser_state_summary.screenshot}' + screenshot_url = f'data:image/png;base64,{browser_state_summary.screenshot}' import logging logger = logging.getLogger(__name__) diff --git a/browser_use/agent/judge.py b/browser_use/agent/judge.py index a6220f0f2..32cf63690 100644 --- a/browser_use/agent/judge.py +++ b/browser_use/agent/judge.py @@ -158,9 +158,9 @@ Set `reached_captcha` to true if: Respond with EXACTLY this JSON structure (no additional text before or after): {{ - "reasoning": "Breakdown of user task into key points. Detailed analysis covering: what went well, what didn't work, trajectory quality assessment, tool usage evaluation, output quality review, and overall user satisfaction prediction", + "reasoning": "Breakdown of user task into key points. Detailed analysis covering: what went well, what didn't work, trajectory quality assessment, tool usage evaluation, output quality review, and overall user satisfaction prediction.", "verdict": true or false, - "failure_reason": "If verdict is false, provide the key reason why the task was not completed successfully. If verdict is true, use an empty string.", + "failure_reason": "A brief explanation of key reasons why the task was not completed successfully in case of failure. If verdict is true, use an empty string. Keep it concise and easy to read.", "impossible_task": true or false, "reached_captcha": true or false }} diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py index c01a40413..86fc0c1ac 100644 --- a/browser_use/agent/prompts.py +++ b/browser_use/agent/prompts.py @@ -373,8 +373,8 @@ Available tabs: content_parts.append( ContentPartImageParam( image_url=ImageURL( - url=f'data:image/jpeg;base64,{screenshot}', - media_type='image/jpeg', + url=f'data:image/png;base64,{screenshot}', + media_type='image/png', detail=self.vision_detail_level, ), ) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index b9e706a8f..a5fe7b578 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -973,7 +973,7 @@ class Agent(Generic[Context, AgentStructuredOutput]): verdict_text = 'βœ… PASS' if judgement.verdict else '❌ FAIL' judge_log += f'βš–οΈ {verdict_color}Judge Verdict: {verdict_text}\033[0m\n' if judgement.failure_reason: - judge_log += f' Failure: {judgement.failure_reason}\n' + judge_log += f' Failure Reason: {judgement.failure_reason}\n' if judgement.reached_captcha: judge_log += ' πŸ€– Captcha Detected: Agent encountered captcha challenges\n' judge_log += ' πŸ‘‰ πŸ₯· Use Browser Use Cloud for the most stealth browser infra: https://docs.browser-use.com/customize/browser/remote\n' diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py index 4289c8c47..66246c81d 100644 --- a/browser_use/agent/views.py +++ b/browser_use/agent/views.py @@ -94,7 +94,8 @@ class JudgementResult(BaseModel): reasoning: str | None = Field(default=None, description='Explanation of the judgement') verdict: bool = Field(description='Whether the trace was successful or not') failure_reason: str | None = Field( - default=None, description='If the trace was not successful, the reason why. Otherwise empty.' + default=None, + description='A brief explanation of key reasons why the task was not completed successfully in case of failure. If verdict is true, use an empty string. Keep it concise and easy to read.', ) impossible_task: bool = Field( default=False, diff --git a/browser_use/browser/watchdogs/screenshot_watchdog.py b/browser_use/browser/watchdogs/screenshot_watchdog.py index 7fea7a226..9cbd8fff6 100644 --- a/browser_use/browser/watchdogs/screenshot_watchdog.py +++ b/browser_use/browser/watchdogs/screenshot_watchdog.py @@ -39,7 +39,7 @@ class ScreenshotWatchdog(BaseWatchdog): cdp_session = await self.browser_session.get_or_create_cdp_session() # Prepare screenshot parameters - params = CaptureScreenshotParameters(format='jpeg', quality=60, captureBeyondViewport=False) + params = CaptureScreenshotParameters(format='png', captureBeyondViewport=False) # Take screenshot using CDP self.logger.debug(f'[ScreenshotWatchdog] Taking screenshot with params: {params}') diff --git a/browser_use/code_use/service.py b/browser_use/code_use/service.py index 3456fd230..f7b1cfb02 100644 --- a/browser_use/code_use/service.py +++ b/browser_use/code_use/service.py @@ -614,8 +614,8 @@ class CodeAgent: content_parts.append( ContentPartImageParam( image_url=ImageURL( - url=f'data:image/jpeg;base64,{self._last_screenshot}', - media_type='image/jpeg', + url=f'data:image/png;base64,{self._last_screenshot}', + media_type='image/png', detail='auto', ), ) diff --git a/browser_use/llm/messages.py b/browser_use/llm/messages.py index f2b09ac75..75a5d8f6e 100644 --- a/browser_use/llm/messages.py +++ b/browser_use/llm/messages.py @@ -61,7 +61,7 @@ class ImageURL(BaseModel): [Vision guide](https://platform.openai.com/docs/guides/vision#low-or-high-fidelity-image-understanding). """ # needed for Anthropic - media_type: SupportedImageMediaType = 'image/jpeg' + media_type: SupportedImageMediaType = 'image/png' def __str__(self) -> str: url_display = _format_image_url(self.url)