From 8acf706a2f8d0016ae05f310195f184fae315740 Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Sat, 21 Feb 2026 20:28:18 -0800 Subject: [PATCH 01/15] add Browser Use as bundled skill --- skills/browser-use/SKILL.md | 601 ++++++++++++++++++++++++++++++++++++ 1 file changed, 601 insertions(+) create mode 100644 skills/browser-use/SKILL.md diff --git a/skills/browser-use/SKILL.md b/skills/browser-use/SKILL.md new file mode 100644 index 00000000000..595fc047c18 --- /dev/null +++ b/skills/browser-use/SKILL.md @@ -0,0 +1,601 @@ +--- +name: browser-use +description: Automates browser interactions for web testing, form filling, screenshots, and data extraction. Use when the user needs to navigate websites, interact with web pages, fill forms, take screenshots, or extract information from web pages. +homepage: https://github.com/browser-use/browser-use +metadata: + { + "openclaw": + { + "emoji": "🌐", + "requires": { "bins": ["browser-use"] }, + "install": + [ + { + "id": "uv", + "kind": "uv", + "package": "browser-use", + "bins": ["browser-use"], + "label": "Install browser-use (uv)", + }, + ], + }, + } +--- + +# Browser Automation with browser-use CLI + +The `browser-use` command provides fast, persistent browser automation. It maintains browser sessions across commands, enabling complex multi-step workflows. + +## Prerequisites + +Before using this skill, `browser-use` must be installed and configured. Run diagnostics to verify: + +```bash +browser-use doctor +``` + +For more information, see https://github.com/browser-use/browser-use/blob/main/browser_use/skill_cli/README.md + +## Core Workflow + +1. **Navigate**: `browser-use open ` - Opens URL (starts browser if needed) +2. **Inspect**: `browser-use state` - Returns clickable elements with indices +3. **Interact**: Use indices from state to interact (`browser-use click 5`, `browser-use input 3 "text"`) +4. **Verify**: `browser-use state` or `browser-use screenshot` to confirm actions +5. **Repeat**: Browser stays open between commands + +## Browser Modes + +```bash +browser-use --browser chromium open # Default: headless Chromium +browser-use --browser chromium --headed open # Visible Chromium window +browser-use --browser real open # Real Chrome (no profile = fresh) +browser-use --browser real --profile "Default" open # Real Chrome with your login sessions +browser-use --browser remote open # Cloud browser +``` + +- **chromium**: Fast, isolated, headless by default +- **real**: Uses a real Chrome binary. Without `--profile`, uses a persistent but empty CLI profile at `~/.config/browseruse/profiles/cli/`. With `--profile "ProfileName"`, copies your actual Chrome profile (cookies, logins, extensions) +- **remote**: Cloud-hosted browser with proxy support. **Two distinct usage modes:** + - **Interactive** (`browser-use --browser remote open/state/click/cookies`): Spawns a persistent local server process that holds a live connection to a cloud browser. Each interactive connection consumes a concurrent cloud session slot and persists until you run `browser-use close`. + - **Cloud agent tasks** (`browser-use -b remote run "task"`, `session create`, `task status`): Pure API calls — no local process, no held connection. The agent runs entirely in the cloud. Use `session create` to configure the browser (profile, proxy), then `run --session-id` to launch tasks. + - **Do NOT mix these.** If you use interactive remote commands (e.g. `cookies import`) on a cloud session, the local server process holds that session open and blocks `run` from using it. For subagents, always use `session create` → `run --session-id` — never interactive remote commands. + +## Essential Commands + +```bash +# Navigation +browser-use open # Navigate to URL +browser-use back # Go back +browser-use scroll down # Scroll down (--amount N for pixels) + +# Page State (always run state first to get element indices) +browser-use state # Get URL, title, clickable elements +browser-use screenshot # Take screenshot (base64) +browser-use screenshot path.png # Save screenshot to file + +# Interactions (use indices from state) +browser-use click # Click element +browser-use type "text" # Type into focused element +browser-use input "text" # Click element, then type +browser-use keys "Enter" # Send keyboard keys +browser-use select "option" # Select dropdown option + +# Data Extraction +browser-use eval "document.title" # Execute JavaScript +browser-use get text # Get element text +browser-use get html --selector "h1" # Get scoped HTML + +# Wait +browser-use wait selector "h1" # Wait for element +browser-use wait text "Success" # Wait for text + +# Session +browser-use sessions # List active sessions +browser-use close # Close current session +browser-use close --all # Close all sessions + +# AI Agent +browser-use -b remote run "task" # Run agent in cloud (async by default) +browser-use task status # Check cloud task progress +``` + +## Commands + +### Navigation & Tabs + +```bash +browser-use open # Navigate to URL +browser-use back # Go back in history +browser-use scroll down # Scroll down +browser-use scroll up # Scroll up +browser-use scroll down --amount 1000 # Scroll by specific pixels (default: 500) +browser-use switch # Switch to tab by index +browser-use close-tab # Close current tab +browser-use close-tab # Close specific tab +``` + +### Page State + +```bash +browser-use state # Get URL, title, and clickable elements +browser-use screenshot # Take screenshot (outputs base64) +browser-use screenshot path.png # Save screenshot to file +browser-use screenshot --full path.png # Full page screenshot +``` + +### Interactions + +```bash +browser-use click # Click element +browser-use type "text" # Type text into focused element +browser-use input "text" # Click element, then type text +browser-use keys "Enter" # Send keyboard keys +browser-use keys "Control+a" # Send key combination +browser-use select "option" # Select dropdown option +browser-use hover # Hover over element (triggers CSS :hover) +browser-use dblclick # Double-click element +browser-use rightclick # Right-click element (context menu) +``` + +Use indices from `browser-use state`. + +### JavaScript & Data + +```bash +browser-use eval "document.title" # Execute JavaScript, return result +browser-use get title # Get page title +browser-use get html # Get full page HTML +browser-use get html --selector "h1" # Get HTML of specific element +browser-use get text # Get text content of element +browser-use get value # Get value of input/textarea +browser-use get attributes # Get all attributes of element +browser-use get bbox # Get bounding box (x, y, width, height) +``` + +### Cookies + +```bash +browser-use cookies get # Get all cookies +browser-use cookies get --url # Get cookies for specific URL +browser-use cookies set # Set a cookie +browser-use cookies set name val --domain .example.com --secure --http-only +browser-use cookies set name val --same-site Strict # SameSite: Strict, Lax, or None +browser-use cookies set name val --expires 1735689600 # Expiration timestamp +browser-use cookies clear # Clear all cookies +browser-use cookies clear --url # Clear cookies for specific URL +browser-use cookies export # Export all cookies to JSON file +browser-use cookies export --url # Export cookies for specific URL +browser-use cookies import # Import cookies from JSON file +``` + +### Wait Conditions + +```bash +browser-use wait selector "h1" # Wait for element to be visible +browser-use wait selector ".loading" --state hidden # Wait for element to disappear +browser-use wait selector "#btn" --state attached # Wait for element in DOM +browser-use wait text "Success" # Wait for text to appear +browser-use wait selector "h1" --timeout 5000 # Custom timeout in ms +``` + +### Python Execution + +```bash +browser-use python "x = 42" # Set variable +browser-use python "print(x)" # Access variable (outputs: 42) +browser-use python "print(browser.url)" # Access browser object +browser-use python --vars # Show defined variables +browser-use python --reset # Clear Python namespace +browser-use python --file script.py # Execute Python file +``` + +The Python session maintains state across commands. The `browser` object provides: + +- `browser.url`, `browser.title`, `browser.html` — page info +- `browser.goto(url)`, `browser.back()` — navigation +- `browser.click(index)`, `browser.type(text)`, `browser.input(index, text)`, `browser.keys(keys)` — interactions +- `browser.screenshot(path)`, `browser.scroll(direction, amount)` — visual +- `browser.wait(seconds)`, `browser.extract(query)` — utilities + +### Agent Tasks + +#### Remote Mode Options + +When using `--browser remote`, additional options are available: + +```bash +# Specify LLM model +browser-use -b remote run "task" --llm gpt-4o +browser-use -b remote run "task" --llm claude-sonnet-4-20250514 + +# Proxy configuration (default: us) +browser-use -b remote run "task" --proxy-country uk + +# Session reuse +browser-use -b remote run "task 1" --keep-alive # Keep session alive after task +browser-use -b remote run "task 2" --session-id abc-123 # Reuse existing session + +# Execution modes +browser-use -b remote run "task" --flash # Fast execution mode +browser-use -b remote run "task" --wait # Wait for completion (default: async) + +# Advanced options +browser-use -b remote run "task" --thinking # Extended reasoning mode +browser-use -b remote run "task" --no-vision # Disable vision (enabled by default) + +# Using a cloud profile (create session first, then run with --session-id) +browser-use session create --profile --keep-alive +# → returns session_id +browser-use -b remote run "task" --session-id + +# Task configuration +browser-use -b remote run "task" --start-url https://example.com # Start from specific URL +browser-use -b remote run "task" --allowed-domain example.com # Restrict navigation (repeatable) +browser-use -b remote run "task" --metadata key=value # Task metadata (repeatable) +browser-use -b remote run "task" --skill-id skill-123 # Enable skills (repeatable) +browser-use -b remote run "task" --secret key=value # Secret metadata (repeatable) + +# Structured output and evaluation +browser-use -b remote run "task" --structured-output '{"type":"object"}' # JSON schema for output +browser-use -b remote run "task" --judge # Enable judge mode +browser-use -b remote run "task" --judge-ground-truth "expected answer" +``` + +### Task Management + +```bash +browser-use task list # List recent tasks +browser-use task list --limit 20 # Show more tasks +browser-use task list --status finished # Filter by status (finished, stopped) +browser-use task list --session # Filter by session ID +browser-use task list --json # JSON output + +browser-use task status # Get task status (latest step only) +browser-use task status -c # All steps with reasoning +browser-use task status -v # All steps with URLs + actions +browser-use task status --last 5 # Last N steps only +browser-use task status --step 3 # Specific step number +browser-use task status --reverse # Newest first + +browser-use task stop # Stop a running task +browser-use task logs # Get task execution logs +``` + +### Cloud Session Management + +```bash +browser-use session list # List cloud sessions +browser-use session list --limit 20 # Show more sessions +browser-use session list --status active # Filter by status +browser-use session list --json # JSON output + +browser-use session get # Get session details + live URL +browser-use session get --json + +browser-use session stop # Stop a session +browser-use session stop --all # Stop all active sessions + +browser-use session create # Create with defaults +browser-use session create --profile # With cloud profile +browser-use session create --proxy-country uk # With geographic proxy +browser-use session create --start-url https://example.com +browser-use session create --screen-size 1920x1080 +browser-use session create --keep-alive +browser-use session create --persist-memory + +browser-use session share # Create public share URL +browser-use session share --delete # Delete public share +``` + +### Tunnels + +```bash +browser-use tunnel # Start tunnel (returns URL) +browser-use tunnel # Idempotent - returns existing URL +browser-use tunnel list # Show active tunnels +browser-use tunnel stop # Stop tunnel +browser-use tunnel stop --all # Stop all tunnels +``` + +### Session Management + +```bash +browser-use sessions # List active sessions +browser-use close # Close current session +browser-use close --all # Close all sessions +``` + +### Profile Management + +#### Local Chrome Profiles (`--browser real`) + +```bash +browser-use -b real profile list # List local Chrome profiles +browser-use -b real profile cookies "Default" # Show cookie domains in profile +``` + +#### Cloud Profiles (`--browser remote`) + +```bash +browser-use -b remote profile list # List cloud profiles +browser-use -b remote profile list --page 2 --page-size 50 +browser-use -b remote profile get # Get profile details +browser-use -b remote profile create # Create new cloud profile +browser-use -b remote profile create --name "My Profile" +browser-use -b remote profile update --name "New" +browser-use -b remote profile delete +``` + +#### Syncing + +```bash +browser-use profile sync --from "Default" --domain github.com # Domain-specific +browser-use profile sync --from "Default" # Full profile +browser-use profile sync --from "Default" --name "Custom Name" # With custom name +``` + +### Server Control + +```bash +browser-use server logs # View server logs +``` + +## Common Workflows + +### Exposing Local Dev Servers + +Use when you have a local dev server and need a cloud browser to reach it. + +**Core workflow:** Start dev server → create tunnel → browse the tunnel URL remotely. + +```bash +# 1. Start your dev server +npm run dev & # localhost:3000 + +# 2. Expose it via Cloudflare tunnel +browser-use tunnel 3000 +# → url: https://abc.trycloudflare.com + +# 3. Now the cloud browser can reach your local server +browser-use --browser remote open https://abc.trycloudflare.com +browser-use state +browser-use screenshot +``` + +**Note:** Tunnels are independent of browser sessions. They persist across `browser-use close` and can be managed separately. Cloudflared must be installed — run `browser-use doctor` to check. + +### Authenticated Browsing with Profiles + +Use when a task requires browsing a site the user is already logged into (e.g. Gmail, GitHub, internal tools). + +**Core workflow:** Check existing profiles → ask user which profile and browser mode → browse with that profile. Only sync cookies if no suitable profile exists. + +**Before browsing an authenticated site, the agent MUST:** + +1. Ask the user whether to use **real** (local Chrome) or **remote** (cloud) browser +2. List available profiles for that mode +3. Ask which profile to use +4. If no profile has the right cookies, offer to sync (see below) + +#### Step 1: Check existing profiles + +```bash +# Option A: Local Chrome profiles (--browser real) +browser-use -b real profile list +# → Default: Person 1 (user@gmail.com) +# → Profile 1: Work (work@company.com) + +# Option B: Cloud profiles (--browser remote) +browser-use -b remote profile list +# → abc-123: "Chrome - Default (github.com)" +# → def-456: "Work profile" +``` + +#### Step 2: Browse with the chosen profile + +```bash +# Real browser — uses local Chrome with existing login sessions +browser-use --browser real --profile "Default" open https://github.com + +# Cloud browser — uses cloud profile with synced cookies +browser-use --browser remote --profile abc-123 open https://github.com +``` + +The user is already authenticated — no login needed. + +**Note:** Cloud profile cookies can expire over time. If authentication fails, re-sync cookies from the local Chrome profile. + +#### Step 3: Syncing cookies (only if needed) + +If the user wants to use a cloud browser but no cloud profile has the right cookies, sync them from a local Chrome profile. + +**Before syncing, the agent MUST:** + +1. Ask which local Chrome profile to use +2. Ask which domain(s) to sync — do NOT default to syncing the full profile +3. Confirm before proceeding + +**Check what cookies a local profile has:** + +```bash +browser-use -b real profile cookies "Default" +# → youtube.com: 23 +# → google.com: 18 +# → github.com: 2 +``` + +**Domain-specific sync (recommended):** + +```bash +browser-use profile sync --from "Default" --domain github.com +# Creates new cloud profile: "Chrome - Default (github.com)" +# Only syncs github.com cookies +``` + +**Full profile sync (use with caution):** + +```bash +browser-use profile sync --from "Default" +# Syncs ALL cookies — includes sensitive data, tracking cookies, every session token +``` + +Only use when the user explicitly needs their entire browser state. + +**Fine-grained control (advanced):** + +```bash +# Export cookies to file, manually edit, then import +browser-use --browser real --profile "Default" cookies export /tmp/cookies.json +browser-use --browser remote --profile cookies import /tmp/cookies.json +``` + +**Use the synced profile:** + +```bash +browser-use --browser remote --profile open https://github.com +``` + +### Running Subagents + +Use cloud sessions to run autonomous browser agents in parallel. + +**Core workflow:** `session create` (with profile/proxy settings) → `run --session-id` → poll with `task status` → collect results → clean up sessions. + +**IMPORTANT:** Subagents use `session create` and `run` — these are API calls with no local process. Do NOT use interactive remote browser commands (`--browser remote open`, `cookies import`, `state`, etc.) to configure sessions before running tasks. Interactive commands spawn local server processes that hold cloud session slots open and will cause `run` to fail with HTTP 429 errors. + +- **Session = Agent**: Each cloud session is a browser agent with its own state +- **Task = Work**: Jobs given to an agent; an agent can run multiple tasks sequentially +- **Session lifecycle**: Once stopped, a session cannot be revived — start a new one + +#### Launching Tasks + +```bash +# Single task (async by default — returns immediately) +browser-use -b remote run "Search for AI news and summarize top 3 articles" +# → task_id: task-abc, session_id: sess-123 + +# Parallel tasks with profile + proxy — create sessions first, then launch tasks +browser-use session create --profile --proxy-country us --keep-alive +# → session_id: sess-a +browser-use session create --profile --proxy-country us --keep-alive +# → session_id: sess-b +browser-use session create --profile --proxy-country us --keep-alive +# → session_id: sess-c +browser-use -b remote run "Research competitor A pricing" --session-id sess-a +browser-use -b remote run "Research competitor B pricing" --session-id sess-b +browser-use -b remote run "Research competitor C pricing" --session-id sess-c + +# Sequential tasks in same session (reuses cookies, login state, etc.) +browser-use -b remote run "Log into example.com" --keep-alive +# → task_id: task-1, session_id: sess-123 +browser-use task status task-1 # Wait for completion +browser-use -b remote run "Export settings" --session-id sess-123 +# → task_id: task-2, session_id: sess-123 (same session) +``` + +#### Managing & Stopping + +```bash +browser-use task list --status finished # See completed tasks +browser-use task stop task-abc # Stop a task (session may continue if --keep-alive) +browser-use session stop sess-123 # Stop an entire session (terminates its tasks) +browser-use session stop --all # Stop all sessions +``` + +#### Monitoring + +**Task status is designed for token efficiency.** Default output is minimal — only expand when needed: + +| Mode | Flag | Tokens | Use When | +| ------- | ------ | ------ | ------------------- | +| Default | (none) | Low | Polling progress | +| Compact | `-c` | Medium | Need full reasoning | +| Verbose | `-v` | High | Debugging actions | + +```bash +# For long tasks (50+ steps) +browser-use task status -c --last 5 # Last 5 steps only +browser-use task status -v --step 10 # Inspect specific step +``` + +**Live view**: `browser-use session get ` returns a live URL to watch the agent. + +**Detect stuck tasks**: If cost/duration in `task status` stops increasing, the task is stuck — stop it and start a new agent. + +**Logs**: `browser-use task logs ` — only available after task completes. + +## Global Options + +| Option | Description | +| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | +| `--session NAME` | Use named session (default: "default") | +| `--browser MODE` | Browser mode: chromium, real, remote | +| `--headed` | Show browser window (chromium mode) | +| `--profile NAME` | Browser profile (local name or cloud ID). Works with `open`, `session create`, etc. — does NOT work with `run` (use `--session-id` instead) | +| `--json` | Output as JSON | +| `--mcp` | Run as MCP server via stdin/stdout | + +**Session behavior**: All commands without `--session` use the same "default" session. The browser stays open and is reused across commands. Use `--session NAME` to run multiple browsers in parallel. + +## Tips + +1. **Always run `browser-use state` first** to see available elements and their indices +2. **Use `--headed` for debugging** to see what the browser is doing +3. **Sessions persist** — the browser stays open between commands +4. **Use `--json`** for programmatic parsing +5. **Python variables persist** across `browser-use python` commands within a session +6. **CLI aliases**: `bu`, `browser`, and `browseruse` all work identically to `browser-use` + +## Troubleshooting + +**Run diagnostics first:** + +```bash +browser-use doctor +``` + +**Browser won't start?** + +```bash +browser-use close --all # Close all sessions +browser-use --headed open # Try with visible window +``` + +**Element not found?** + +```bash +browser-use state # Check current elements +browser-use scroll down # Element might be below fold +browser-use state # Check again +``` + +**Session issues?** + +```bash +browser-use sessions # Check active sessions +browser-use close --all # Clean slate +browser-use open # Fresh start +``` + +**Session reuse fails after `task stop`**: +If you stop a task and try to reuse its session, the new task may get stuck at "created" status. Create a new session instead: + +```bash +browser-use session create --profile --keep-alive +browser-use -b remote run "new task" --session-id +``` + +**Task stuck at "started"**: Check cost with `task status` — if not increasing, the task is stuck. View live URL with `session get`, then stop and start a new agent. + +**Sessions persist after tasks complete**: Tasks finishing doesn't auto-stop sessions. Run `browser-use session stop --all` to clean up. + +## Cleanup + +**Always close the browser when done:** + +```bash +browser-use close # Close browser session +browser-use session stop --all # Stop cloud sessions (if any) +browser-use tunnel stop --all # Stop tunnels (if any) +``` From 236a49b98c39c3fabf9266776f21744fcf90ac67 Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Mon, 9 Mar 2026 20:34:55 -0700 Subject: [PATCH 02/15] docs: add Browser Use as a cloud browser provider Co-Authored-By: Claude Opus 4.6 --- docs/tools/browser.md | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/docs/tools/browser.md b/docs/tools/browser.md index d632e713068..bfb12777f0a 100644 --- a/docs/tools/browser.md +++ b/docs/tools/browser.md @@ -205,9 +205,45 @@ the standard HTTP-based CDP discovery (`/json/version`). OpenClaw supports both: discover the WebSocket debugger URL, then connects. - **WebSocket endpoints** (`ws://` / `wss://`) — OpenClaw connects directly, skipping `/json/version`. Use this for services like - [Browserbase](https://www.browserbase.com) or any provider that hands you a + [Browser Use](https://www.browser-use.com), + [Browserbase](https://www.browserbase.com), or any provider that hands you a WebSocket URL. +### Browser Use + +[Browser Use](https://www.browser-use.com) is a cloud browser platform with +anti-detect stealth, CAPTCHA solving, persistent profiles, and residential +proxies. + +```json5 +{ + browser: { + enabled: true, + defaultProfile: "browseruse", + remoteCdpTimeoutMs: 5000, + remoteCdpHandshakeTimeoutMs: 15000, + profiles: { + browseruse: { + // All Browser Use session params can be added as query params. + // See: https://docs.browser-use.com/cloud/api-v2/browsers/create-browser-session + cdpUrl: "wss://connect.browser-use.com?apiKey=&timeout=240&profileId=&proxyCountryCode=us", + color: "#ff750e", + }, + }, + }, +} +``` + +Notes: + +- [Sign up](https://www.browser-use.com) and copy your **API Key** from the + dashboard. +- Replace `` with your real Browser Use API key. +- All [Browser Use session parameters](https://docs.browser-use.com/cloud/api-v2/browsers/create-browser-session) + (timeout, profileId, proxyCountryCode, screen size, custom proxy, etc.) + are passed as query params in the `cdpUrl`. +- See [browser-use.com](https://www.browser-use.com) for more information. + ### Browserbase [Browserbase](https://www.browserbase.com) is a cloud platform for running From f8edbf76e413d1d41955ece03d7189765787f00f Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Mon, 9 Mar 2026 20:38:14 -0700 Subject: [PATCH 03/15] docs: adjust Browser Use handshake timeout to 8000ms Co-Authored-By: Claude Opus 4.6 --- docs/tools/browser.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/browser.md b/docs/tools/browser.md index bfb12777f0a..1a9d0634dc4 100644 --- a/docs/tools/browser.md +++ b/docs/tools/browser.md @@ -221,7 +221,7 @@ proxies. enabled: true, defaultProfile: "browseruse", remoteCdpTimeoutMs: 5000, - remoteCdpHandshakeTimeoutMs: 15000, + remoteCdpHandshakeTimeoutMs: 8000, profiles: { browseruse: { // All Browser Use session params can be added as query params. From 48e52ca7c41300eacbd1bbb51ae1cdd0d996de7e Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Mon, 9 Mar 2026 20:38:57 -0700 Subject: [PATCH 04/15] docs: remove profileId placeholder from Browser Use example URL Co-Authored-By: Claude Opus 4.6 --- docs/tools/browser.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/browser.md b/docs/tools/browser.md index 1a9d0634dc4..091fa5cd7e0 100644 --- a/docs/tools/browser.md +++ b/docs/tools/browser.md @@ -226,7 +226,7 @@ proxies. browseruse: { // All Browser Use session params can be added as query params. // See: https://docs.browser-use.com/cloud/api-v2/browsers/create-browser-session - cdpUrl: "wss://connect.browser-use.com?apiKey=&timeout=240&profileId=&proxyCountryCode=us", + cdpUrl: "wss://connect.browser-use.com?apiKey=&timeout=240&proxyCountryCode=us", color: "#ff750e", }, }, From 4cb24d82639f4b53e297a7a7f83104b2f9a7a4ad Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Mon, 9 Mar 2026 20:40:54 -0700 Subject: [PATCH 05/15] docs: restore profileId in URL and add note about replacing it Co-Authored-By: Claude Opus 4.6 --- docs/tools/browser.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/tools/browser.md b/docs/tools/browser.md index 091fa5cd7e0..8c26bea5274 100644 --- a/docs/tools/browser.md +++ b/docs/tools/browser.md @@ -226,7 +226,7 @@ proxies. browseruse: { // All Browser Use session params can be added as query params. // See: https://docs.browser-use.com/cloud/api-v2/browsers/create-browser-session - cdpUrl: "wss://connect.browser-use.com?apiKey=&timeout=240&proxyCountryCode=us", + cdpUrl: "wss://connect.browser-use.com?apiKey=&timeout=240&profileId=&proxyCountryCode=us", color: "#ff750e", }, }, @@ -239,6 +239,8 @@ Notes: - [Sign up](https://www.browser-use.com) and copy your **API Key** from the dashboard. - Replace `` with your real Browser Use API key. +- Replace `` with a persistent profile ID, or remove the + `profileId` param if you don't need profile persistence. - All [Browser Use session parameters](https://docs.browser-use.com/cloud/api-v2/browsers/create-browser-session) (timeout, profileId, proxyCountryCode, screen size, custom proxy, etc.) are passed as query params in the `cdpUrl`. From db2993ca92a773bc91c1280853fe0f96684453b7 Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Tue, 10 Mar 2026 20:38:48 -0700 Subject: [PATCH 06/15] browser: skip throwaway WebSocket probes for wss:// cloud CDP profiles For cloud browser providers like Browser Use and Browserbase, each raw WebSocket health check provisions a new ephemeral browser session. Check the cached Playwright connection state instead, which reflects whether we actually have an active connection without side effects. --- src/browser/pw-session.ts | 7 +++++++ src/browser/server-context.availability.ts | 11 +++++++++++ 2 files changed, 18 insertions(+) diff --git a/src/browser/pw-session.ts b/src/browser/pw-session.ts index a7103c1174c..840499ccea5 100644 --- a/src/browser/pw-session.ts +++ b/src/browser/pw-session.ts @@ -119,6 +119,13 @@ const MAX_NETWORK_REQUESTS = 500; const cachedByCdpUrl = new Map(); const connectingByCdpUrl = new Map>(); +/** + * Returns true if there is an active cached Playwright connection for the given CDP URL. + */ +export function hasActivePlaywrightConnection(cdpUrl: string): boolean { + return cachedByCdpUrl.has(cdpUrl.replace(/\/$/, "")); +} + function normalizeCdpUrl(raw: string) { return raw.replace(/\/$/, ""); } diff --git a/src/browser/server-context.availability.ts b/src/browser/server-context.availability.ts index 3b00ff99dff..b8d617867ea 100644 --- a/src/browser/server-context.availability.ts +++ b/src/browser/server-context.availability.ts @@ -3,6 +3,7 @@ import { PROFILE_POST_RESTART_WS_TIMEOUT_MS, resolveCdpReachabilityTimeouts, } from "./cdp-timeouts.js"; +import { isWebSocketUrl } from "./cdp.helpers.js"; import { isChromeCdpReady, isChromeReachable, @@ -16,6 +17,7 @@ import { stopChromeExtensionRelayServer, } from "./extension-relay.js"; import { getBrowserProfileCapabilities } from "./profile-capabilities.js"; +import { hasActivePlaywrightConnection } from "./pw-session.js"; import { CDP_READY_AFTER_LAUNCH_MAX_TIMEOUT_MS, CDP_READY_AFTER_LAUNCH_MIN_TIMEOUT_MS, @@ -60,11 +62,20 @@ export function createProfileAvailability({ }); const isReachable = async (timeoutMs?: number) => { + // For direct WebSocket endpoints (e.g. Browser Use), each raw CDP health check + // opens a new WebSocket which may provision a new browser session. Check the + // cached Playwright connection instead — it reflects the actual connection state. + if (isWebSocketUrl(profile.cdpUrl)) { + return hasActivePlaywrightConnection(profile.cdpUrl); + } const { httpTimeoutMs, wsTimeoutMs } = resolveTimeouts(timeoutMs); return await isChromeCdpReady(profile.cdpUrl, httpTimeoutMs, wsTimeoutMs); }; const isHttpReachable = async (timeoutMs?: number) => { + if (isWebSocketUrl(profile.cdpUrl)) { + return hasActivePlaywrightConnection(profile.cdpUrl); + } const { httpTimeoutMs } = resolveTimeouts(timeoutMs); return await isChromeReachable(profile.cdpUrl, httpTimeoutMs); }; From 5c85f24d4be06e2f1f6d46dd168e0336394e4901 Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Tue, 10 Mar 2026 20:39:21 -0700 Subject: [PATCH 07/15] browser: allow wss:// profiles to connect lazily in ensureBrowserAvailable Cloud WebSocket endpoints like Browser Use provision sessions on demand. Skip the up-front reachability gate so the first tab operation triggers the Playwright connectOverCDP call instead of erroring with unreachable. --- src/browser/server-context.availability.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/browser/server-context.availability.ts b/src/browser/server-context.availability.ts index b8d617867ea..5fd3d28d28d 100644 --- a/src/browser/server-context.availability.ts +++ b/src/browser/server-context.availability.ts @@ -180,6 +180,12 @@ export function createProfileAvailability({ } if (!httpReachable) { + // Direct WebSocket endpoints (e.g. Browser Use) are on-demand — no need to probe + // reachability up front. The Playwright connection is established lazily in + // connectBrowser when a tab operation actually needs it. + if (isWebSocketUrl(profile.cdpUrl)) { + return; + } if ((attachOnly || remoteCdp) && opts.onEnsureAttachTarget) { await opts.onEnsureAttachTarget(profile); if (await isHttpReachable(PROFILE_ATTACH_RETRY_TIMEOUT_MS)) { From 54523327a08f5e58838de818993cad5f23689522 Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Tue, 10 Mar 2026 20:39:44 -0700 Subject: [PATCH 08/15] browser: enable action=stop to close Playwright connection for wss:// profiles Previously stop was a no-op for remote WebSocket profiles since there is no local Chrome process to kill. Now it closes the cached Playwright CDP connection, properly disconnecting from the cloud browser session. --- src/browser/server-context.availability.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/browser/server-context.availability.ts b/src/browser/server-context.availability.ts index 5fd3d28d28d..414b103263e 100644 --- a/src/browser/server-context.availability.ts +++ b/src/browser/server-context.availability.ts @@ -255,6 +255,13 @@ export function createProfileAvailability({ const stopRunningBrowser = async (): Promise<{ stopped: boolean }> => { await reconcileProfileRuntime(); + // For direct WebSocket endpoints (e.g. Browser Use), there's no local Chrome process + // to stop. Instead, close the cached Playwright connection to the cloud provider. + if (isWebSocketUrl(profile.cdpUrl)) { + const { closePlaywrightBrowserConnection } = await import("./pw-session.js"); + await closePlaywrightBrowserConnection({ cdpUrl: profile.cdpUrl }); + return { stopped: true }; + } if (capabilities.requiresRelay) { const stopped = await stopChromeExtensionRelayServer({ cdpUrl: profile.cdpUrl, From 70979ede75dab70b0ae7a4547c9bc411ace65bc7 Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Tue, 10 Mar 2026 20:42:00 -0700 Subject: [PATCH 09/15] browser: use static import for closePlaywrightBrowserConnection Replace dynamic import with static import since pw-session.ts is already statically imported in this module, eliminating INEFFECTIVE_DYNAMIC_IMPORT build warnings. --- src/browser/server-context.availability.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/browser/server-context.availability.ts b/src/browser/server-context.availability.ts index 414b103263e..9831bc25751 100644 --- a/src/browser/server-context.availability.ts +++ b/src/browser/server-context.availability.ts @@ -17,7 +17,7 @@ import { stopChromeExtensionRelayServer, } from "./extension-relay.js"; import { getBrowserProfileCapabilities } from "./profile-capabilities.js"; -import { hasActivePlaywrightConnection } from "./pw-session.js"; +import { closePlaywrightBrowserConnection, hasActivePlaywrightConnection } from "./pw-session.js"; import { CDP_READY_AFTER_LAUNCH_MAX_TIMEOUT_MS, CDP_READY_AFTER_LAUNCH_MIN_TIMEOUT_MS, @@ -258,7 +258,6 @@ export function createProfileAvailability({ // For direct WebSocket endpoints (e.g. Browser Use), there's no local Chrome process // to stop. Instead, close the cached Playwright connection to the cloud provider. if (isWebSocketUrl(profile.cdpUrl)) { - const { closePlaywrightBrowserConnection } = await import("./pw-session.js"); await closePlaywrightBrowserConnection({ cdpUrl: profile.cdpUrl }); return { stopped: true }; } From 0aeddd8af9ac799ed904a38c94f45d69ff2a51ba Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Wed, 11 Mar 2026 11:29:56 -0700 Subject: [PATCH 10/15] docs(browser): update Browser Use links, pricing, and shared auto-create note - Sign up and dashboard links now point to cloud.browser-use.com - Added pay-as-you-go pricing bullet with concurrent session info - Moved auto-create-on-connect note to shared WebSocket endpoints section - Restructured opening paragraph to fix Mintlify underline rendering --- docs/tools/browser.md | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/docs/tools/browser.md b/docs/tools/browser.md index 8c26bea5274..ae66d46409d 100644 --- a/docs/tools/browser.md +++ b/docs/tools/browser.md @@ -204,16 +204,15 @@ the standard HTTP-based CDP discovery (`/json/version`). OpenClaw supports both: - **HTTP(S) endpoints** (e.g. Browserless) — OpenClaw calls `/json/version` to discover the WebSocket debugger URL, then connects. - **WebSocket endpoints** (`ws://` / `wss://`) — OpenClaw connects directly, - skipping `/json/version`. Use this for services like - [Browser Use](https://www.browser-use.com), + skipping `/json/version`. These providers auto-create a browser session on + WebSocket connect, so no manual session creation step is needed. Use this for + services like [Browser Use](https://cloud.browser-use.com), [Browserbase](https://www.browserbase.com), or any provider that hands you a WebSocket URL. ### Browser Use -[Browser Use](https://www.browser-use.com) is a cloud browser platform with -anti-detect stealth, CAPTCHA solving, persistent profiles, and residential -proxies. +Set up a [Browser Use](https://cloud.browser-use.com) cloud browser profile with anti-detect stealth, CAPTCHA solving, persistent profiles, and residential proxies. ```json5 { @@ -236,15 +235,16 @@ proxies. Notes: -- [Sign up](https://www.browser-use.com) and copy your **API Key** from the - dashboard. +- [Sign up](https://cloud.browser-use.com) and copy your **API Key** from the + [dashboard](https://cloud.browser-use.com/settings?tab=api-keys&new=1). - Replace `` with your real Browser Use API key. - Replace `` with a persistent profile ID, or remove the `profileId` param if you don't need profile persistence. - All [Browser Use session parameters](https://docs.browser-use.com/cloud/api-v2/browsers/create-browser-session) (timeout, profileId, proxyCountryCode, screen size, custom proxy, etc.) are passed as query params in the `cdpUrl`. -- See [browser-use.com](https://www.browser-use.com) for more information. +- [Pay-as-you-go pricing](https://browser-use.com/pricing) starts at $0.06/hr per browser session with up to 25 concurrent sessions and no monthly commitment — just buy credits and go. +- See [Browser Use docs](https://docs.browser-use.com) for full API reference, SDK guides, and integration examples. ### Browserbase @@ -274,8 +274,6 @@ Notes: - [Sign up](https://www.browserbase.com/sign-up) and copy your **API Key** from the [Overview dashboard](https://www.browserbase.com/overview). - Replace `` with your real Browserbase API key. -- Browserbase auto-creates a browser session on WebSocket connect, so no - manual session creation step is needed. - The free tier allows one concurrent session and one browser hour per month. See [pricing](https://www.browserbase.com/pricing) for paid plan limits. - See the [Browserbase docs](https://docs.browserbase.com) for full API From a3fdb664d2b078fafa4de0ed31452a0f4e36bc4b Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Wed, 11 Mar 2026 11:30:08 -0700 Subject: [PATCH 11/15] browser: scope wss:// special handling to remote profiles only Loopback ws:// profiles are locally-managed and need the normal launch/attach/probe flow. Only skip throwaway WebSocket probes and use lazy connection for remote (non-loopback) WebSocket profiles. --- src/browser/server-context.availability.ts | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/browser/server-context.availability.ts b/src/browser/server-context.availability.ts index 9831bc25751..5afcec8be1c 100644 --- a/src/browser/server-context.availability.ts +++ b/src/browser/server-context.availability.ts @@ -62,10 +62,11 @@ export function createProfileAvailability({ }); const isReachable = async (timeoutMs?: number) => { - // For direct WebSocket endpoints (e.g. Browser Use), each raw CDP health check - // opens a new WebSocket which may provision a new browser session. Check the - // cached Playwright connection instead — it reflects the actual connection state. - if (isWebSocketUrl(profile.cdpUrl)) { + // For remote WebSocket endpoints (e.g. Browser Use, Browserbase), each raw CDP + // health check opens a new WebSocket which may provision a new browser session. + // Check the cached Playwright connection instead — it reflects the actual state. + // Loopback ws:// profiles are locally-managed and use normal HTTP-based probes. + if (capabilities.isRemote && isWebSocketUrl(profile.cdpUrl)) { return hasActivePlaywrightConnection(profile.cdpUrl); } const { httpTimeoutMs, wsTimeoutMs } = resolveTimeouts(timeoutMs); @@ -73,7 +74,7 @@ export function createProfileAvailability({ }; const isHttpReachable = async (timeoutMs?: number) => { - if (isWebSocketUrl(profile.cdpUrl)) { + if (capabilities.isRemote && isWebSocketUrl(profile.cdpUrl)) { return hasActivePlaywrightConnection(profile.cdpUrl); } const { httpTimeoutMs } = resolveTimeouts(timeoutMs); @@ -180,10 +181,11 @@ export function createProfileAvailability({ } if (!httpReachable) { - // Direct WebSocket endpoints (e.g. Browser Use) are on-demand — no need to probe + // Remote WebSocket endpoints (e.g. Browser Use) are on-demand — no need to probe // reachability up front. The Playwright connection is established lazily in // connectBrowser when a tab operation actually needs it. - if (isWebSocketUrl(profile.cdpUrl)) { + // Loopback ws:// profiles still need the normal launch/attach flow below. + if (capabilities.isRemote && isWebSocketUrl(profile.cdpUrl)) { return; } if ((attachOnly || remoteCdp) && opts.onEnsureAttachTarget) { @@ -255,9 +257,9 @@ export function createProfileAvailability({ const stopRunningBrowser = async (): Promise<{ stopped: boolean }> => { await reconcileProfileRuntime(); - // For direct WebSocket endpoints (e.g. Browser Use), there's no local Chrome process + // For remote WebSocket endpoints (e.g. Browser Use), there's no local Chrome process // to stop. Instead, close the cached Playwright connection to the cloud provider. - if (isWebSocketUrl(profile.cdpUrl)) { + if (capabilities.isRemote && isWebSocketUrl(profile.cdpUrl)) { await closePlaywrightBrowserConnection({ cdpUrl: profile.cdpUrl }); return { stopped: true }; } From 74177d480e266bea2644dce2709555a4e04eb2a9 Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Wed, 11 Mar 2026 11:54:44 -0700 Subject: [PATCH 12/15] browser: return accurate stop status for remote WebSocket profiles Check hasActivePlaywrightConnection before closing so repeated or cold-start stop calls correctly return { stopped: false } instead of always claiming a session was terminated. --- src/browser/server-context.availability.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/browser/server-context.availability.ts b/src/browser/server-context.availability.ts index 5afcec8be1c..bea8c54b33e 100644 --- a/src/browser/server-context.availability.ts +++ b/src/browser/server-context.availability.ts @@ -260,8 +260,9 @@ export function createProfileAvailability({ // For remote WebSocket endpoints (e.g. Browser Use), there's no local Chrome process // to stop. Instead, close the cached Playwright connection to the cloud provider. if (capabilities.isRemote && isWebSocketUrl(profile.cdpUrl)) { + const wasConnected = hasActivePlaywrightConnection(profile.cdpUrl); await closePlaywrightBrowserConnection({ cdpUrl: profile.cdpUrl }); - return { stopped: true }; + return { stopped: wasConnected }; } if (capabilities.requiresRelay) { const stopped = await stopChromeExtensionRelayServer({ From 3c3199278432bac9aa9008b30c0bdd7b79c1c767 Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Wed, 11 Mar 2026 12:44:42 -0700 Subject: [PATCH 13/15] browser: reconnect remote websocket tab routes lazily --- docs/tools/browser.md | 4 +- src/browser/routes/tabs.test.ts | 194 ++++++++++++++++++++++++++++++++ src/browser/routes/tabs.ts | 35 ++++++ 3 files changed, 231 insertions(+), 2 deletions(-) create mode 100644 src/browser/routes/tabs.test.ts diff --git a/docs/tools/browser.md b/docs/tools/browser.md index ae66d46409d..d1727c5e533 100644 --- a/docs/tools/browser.md +++ b/docs/tools/browser.md @@ -219,8 +219,8 @@ Set up a [Browser Use](https://cloud.browser-use.com) cloud browser profile with browser: { enabled: true, defaultProfile: "browseruse", - remoteCdpTimeoutMs: 5000, - remoteCdpHandshakeTimeoutMs: 8000, + remoteCdpTimeoutMs: 3000, + remoteCdpHandshakeTimeoutMs: 5000, profiles: { browseruse: { // All Browser Use session params can be added as query params. diff --git a/src/browser/routes/tabs.test.ts b/src/browser/routes/tabs.test.ts new file mode 100644 index 00000000000..e009f3c580f --- /dev/null +++ b/src/browser/routes/tabs.test.ts @@ -0,0 +1,194 @@ +import { describe, expect, it, vi } from "vitest"; +import type { ResolvedBrowserProfile } from "../config.js"; +import type { BrowserRouteContext, ProfileContext } from "../server-context.js"; +import { registerBrowserTabRoutes } from "./tabs.js"; +import type { + BrowserRequest, + BrowserResponse, + BrowserRouteHandler, + BrowserRouteRegistrar, +} from "./types.js"; + +function makeProfile(overrides: Partial): ResolvedBrowserProfile { + return { + name: "remote", + cdpPort: 443, + cdpUrl: "wss://connect.browser-use.com", + cdpHost: "connect.browser-use.com", + cdpIsLoopback: false, + color: "#00AA00", + driver: "openclaw", + attachOnly: false, + ...overrides, + }; +} + +function makeProfileContext(overrides: Partial = {}): ProfileContext { + return { + profile: makeProfile({}), + ensureBrowserAvailable: vi.fn(async () => {}), + ensureTabAvailable: vi.fn(async () => ({ + targetId: "T1", + title: "Tab 1", + url: "https://example.com", + type: "page", + })), + isHttpReachable: vi.fn(async () => false), + isReachable: vi.fn(async () => false), + listTabs: vi.fn(async () => []), + openTab: vi.fn(async () => ({ + targetId: "T1", + title: "Tab 1", + url: "https://example.com", + type: "page", + })), + focusTab: vi.fn(async () => {}), + closeTab: vi.fn(async () => {}), + stopRunningBrowser: vi.fn(async () => ({ stopped: false })), + resetProfile: vi.fn(async () => ({ moved: false, from: "/tmp/profile" })), + ...overrides, + }; +} + +function createRegistrar() { + const routes = new Map(); + const registrar: BrowserRouteRegistrar = { + get: (path, handler) => void routes.set(`GET ${path}`, handler), + post: (path, handler) => void routes.set(`POST ${path}`, handler), + delete: (path, handler) => void routes.set(`DELETE ${path}`, handler), + }; + return { routes, registrar }; +} + +function makeResponse() { + const result: { statusCode: number; body: unknown } = { statusCode: 200, body: null }; + const res: BrowserResponse = { + status: (code) => { + result.statusCode = code; + return res; + }, + json: (body) => { + result.body = body; + }, + }; + return { res, result }; +} + +function makeContext(profileCtx: ProfileContext): BrowserRouteContext { + return { + state: vi.fn(), + forProfile: vi.fn(() => profileCtx), + listProfiles: vi.fn(async () => []), + mapTabError: vi.fn(() => null), + ensureBrowserAvailable: vi.fn(async () => {}), + ensureTabAvailable: vi.fn(async () => ({ + targetId: "T1", + title: "Tab 1", + url: "https://example.com", + type: "page", + })), + isHttpReachable: vi.fn(async () => false), + isReachable: vi.fn(async () => false), + listTabs: vi.fn(async () => []), + openTab: vi.fn(async () => ({ + targetId: "T1", + title: "Tab 1", + url: "https://example.com", + type: "page", + })), + focusTab: vi.fn(async () => {}), + closeTab: vi.fn(async () => {}), + stopRunningBrowser: vi.fn(async () => ({ stopped: false })), + resetProfile: vi.fn(async () => ({ moved: false, from: "/tmp/profile" })), + }; +} + +describe("browser tab routes", () => { + it("lists tabs for remote websocket profiles without requiring a cached connection", async () => { + const listTabs = vi.fn(async () => [ + { targetId: "T1", title: "Tab 1", url: "https://example.com", type: "page" }, + ]); + const profileCtx = makeProfileContext({ listTabs }); + const ctx = makeContext(profileCtx); + const { routes, registrar } = createRegistrar(); + registerBrowserTabRoutes(registrar, ctx); + + const handler = routes.get("GET /tabs"); + expect(handler).toBeTypeOf("function"); + + const { res, result } = makeResponse(); + await handler!( + { + params: {}, + query: {}, + } satisfies BrowserRequest, + res, + ); + + expect(profileCtx.isReachable).not.toHaveBeenCalled(); + expect(listTabs).toHaveBeenCalledTimes(1); + expect(result.body).toEqual({ + running: true, + tabs: [{ targetId: "T1", title: "Tab 1", url: "https://example.com", type: "page" }], + }); + }); + + it("focuses tabs for remote websocket profiles without the browser-not-running preflight", async () => { + const focusTab = vi.fn(async () => {}); + const listTabs = vi.fn(async () => [ + { targetId: "T1", title: "Tab 1", url: "https://example.com", type: "page" }, + ]); + const profileCtx = makeProfileContext({ listTabs, focusTab }); + const ctx = makeContext(profileCtx); + const { routes, registrar } = createRegistrar(); + registerBrowserTabRoutes(registrar, ctx); + + const handler = routes.get("POST /tabs/focus"); + expect(handler).toBeTypeOf("function"); + + const { res, result } = makeResponse(); + await handler!( + { + params: {}, + query: {}, + body: { targetId: "T1" }, + } satisfies BrowserRequest, + res, + ); + + expect(profileCtx.isReachable).not.toHaveBeenCalled(); + expect(listTabs).toHaveBeenCalledTimes(1); + expect(focusTab).toHaveBeenCalledWith("T1"); + expect(result.body).toEqual({ ok: true }); + }); + + it("lists tabs via action=list for remote websocket profiles without requiring a cached connection", async () => { + const listTabs = vi.fn(async () => [ + { targetId: "T1", title: "Tab 1", url: "https://example.com", type: "page" }, + ]); + const profileCtx = makeProfileContext({ listTabs }); + const ctx = makeContext(profileCtx); + const { routes, registrar } = createRegistrar(); + registerBrowserTabRoutes(registrar, ctx); + + const handler = routes.get("POST /tabs/action"); + expect(handler).toBeTypeOf("function"); + + const { res, result } = makeResponse(); + await handler!( + { + params: {}, + query: {}, + body: { action: "list" }, + } satisfies BrowserRequest, + res, + ); + + expect(profileCtx.isReachable).not.toHaveBeenCalled(); + expect(listTabs).toHaveBeenCalledTimes(1); + expect(result.body).toEqual({ + ok: true, + tabs: [{ targetId: "T1", title: "Tab 1", url: "https://example.com", type: "page" }], + }); + }); +}); diff --git a/src/browser/routes/tabs.ts b/src/browser/routes/tabs.ts index 87cb36c562c..5b3be7ea93c 100644 --- a/src/browser/routes/tabs.ts +++ b/src/browser/routes/tabs.ts @@ -1,3 +1,4 @@ +import { isWebSocketUrl } from "../cdp.helpers.js"; import { BrowserProfileUnavailableError, BrowserTabNotFoundError } from "../errors.js"; import type { BrowserRouteContext, ProfileContext } from "../server-context.js"; import type { BrowserRequest, BrowserResponse, BrowserRouteRegistrar } from "./types.js"; @@ -49,7 +50,25 @@ async function withTabsProfileRoute(params: { } } +function usesLazyRemoteWebSocketReconnect(profileCtx: ProfileContext) { + return !profileCtx.profile.cdpIsLoopback && isWebSocketUrl(profileCtx.profile.cdpUrl); +} + async function ensureBrowserRunning(profileCtx: ProfileContext, res: BrowserResponse) { + if (usesLazyRemoteWebSocketReconnect(profileCtx)) { + try { + // Remote WebSocket profiles reconnect on demand through the Playwright-backed tab ops. + await profileCtx.listTabs(); + return true; + } catch { + jsonError( + res, + new BrowserProfileUnavailableError("browser not running").status, + "browser not running", + ); + return false; + } + } if (!(await profileCtx.isReachable(300))) { jsonError( res, @@ -106,6 +125,14 @@ export function registerBrowserTabRoutes(app: BrowserRouteRegistrar, ctx: Browse res, ctx, run: async (profileCtx) => { + if (usesLazyRemoteWebSocketReconnect(profileCtx)) { + try { + const tabs = await profileCtx.listTabs(); + return res.json({ running: true, tabs }); + } catch { + return res.json({ running: false, tabs: [] as unknown[] }); + } + } const reachable = await profileCtx.isReachable(300); if (!reachable) { return res.json({ running: false, tabs: [] as unknown[] }); @@ -178,6 +205,14 @@ export function registerBrowserTabRoutes(app: BrowserRouteRegistrar, ctx: Browse mapTabError: true, run: async (profileCtx) => { if (action === "list") { + if (usesLazyRemoteWebSocketReconnect(profileCtx)) { + try { + const tabs = await profileCtx.listTabs(); + return res.json({ ok: true, tabs }); + } catch { + return res.json({ ok: true, tabs: [] as unknown[] }); + } + } const reachable = await profileCtx.isReachable(300); if (!reachable) { return res.json({ ok: true, tabs: [] as unknown[] }); From c007b444dd49f258a08ea91e470ec7f27f8fe378 Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Fri, 13 Mar 2026 19:22:04 -0700 Subject: [PATCH 14/15] Rename browser-use profile from "browseruse" to "browser-use" Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/tools/browser.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tools/browser.md b/docs/tools/browser.md index d1727c5e533..4d0ff576def 100644 --- a/docs/tools/browser.md +++ b/docs/tools/browser.md @@ -218,11 +218,11 @@ Set up a [Browser Use](https://cloud.browser-use.com) cloud browser profile with { browser: { enabled: true, - defaultProfile: "browseruse", + defaultProfile: "browser-use", remoteCdpTimeoutMs: 3000, remoteCdpHandshakeTimeoutMs: 5000, profiles: { - browseruse: { + browser-use: { // All Browser Use session params can be added as query params. // See: https://docs.browser-use.com/cloud/api-v2/browsers/create-browser-session cdpUrl: "wss://connect.browser-use.com?apiKey=&timeout=240&profileId=&proxyCountryCode=us", From 8f58982830d3b95430b70f5fc4d1e8a82a99fd4a Mon Sep 17 00:00:00 2001 From: ShawnPana Date: Fri, 13 Mar 2026 21:58:49 -0700 Subject: [PATCH 15/15] Quote browser-use key in JSON5 config example Hyphens are invalid in unquoted JSON5 identifiers. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/tools/browser.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/browser.md b/docs/tools/browser.md index 4d0ff576def..8b7cf3fec89 100644 --- a/docs/tools/browser.md +++ b/docs/tools/browser.md @@ -222,7 +222,7 @@ Set up a [Browser Use](https://cloud.browser-use.com) cloud browser profile with remoteCdpTimeoutMs: 3000, remoteCdpHandshakeTimeoutMs: 5000, profiles: { - browser-use: { + "browser-use": { // All Browser Use session params can be added as query params. // See: https://docs.browser-use.com/cloud/api-v2/browsers/create-browser-session cdpUrl: "wss://connect.browser-use.com?apiKey=&timeout=240&profileId=&proxyCountryCode=us",