2026-01-04 05:07:37 +01:00
import { Type } from "@sinclair/typebox" ;
import {
browserCloseTab ,
browserFocusTab ,
browserOpenTab ,
browserSnapshot ,
browserStart ,
browserStatus ,
browserStop ,
browserTabs ,
} from "../../browser/client.js" ;
import {
browserAct ,
browserArmDialog ,
browserArmFileChooser ,
browserConsoleMessages ,
browserNavigate ,
browserPdfSave ,
browserScreenshotAction ,
} from "../../browser/client-actions.js" ;
import { resolveBrowserConfig } from "../../browser/config.js" ;
import { loadConfig } from "../../config/config.js" ;
import {
type AnyAgentTool ,
imageResultFromFile ,
jsonResult ,
readStringParam ,
} from "./common.js" ;
2026-01-07 17:54:19 +00:00
const BROWSER_ACT_KINDS = [
"click" ,
"type" ,
"press" ,
"hover" ,
"drag" ,
"select" ,
"fill" ,
"resize" ,
"wait" ,
"evaluate" ,
"close" ,
] as const ;
type BrowserActKind = ( typeof BROWSER_ACT_KINDS ) [ number ] ;
2026-01-07 16:54:13 +00:00
// NOTE: Using a flattened object schema instead of Type.Union([Type.Object(...), ...])
// because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema.
// The discriminator (kind) determines which properties are relevant; runtime validates.
const BrowserActSchema = Type . Object ( {
2026-01-07 17:54:19 +00:00
kind : Type.Unsafe < BrowserActKind > ( {
2026-01-07 16:54:13 +00:00
type : "string" ,
2026-01-07 17:54:19 +00:00
enum : [ . . . BROWSER_ACT_KINDS ] ,
2026-01-04 05:07:37 +01:00
} ) ,
2026-01-07 16:54:13 +00:00
// Common fields
targetId : Type.Optional ( Type . String ( ) ) ,
ref : Type.Optional ( Type . String ( ) ) ,
// click
doubleClick : Type.Optional ( Type . Boolean ( ) ) ,
button : Type.Optional ( Type . String ( ) ) ,
modifiers : Type.Optional ( Type . Array ( Type . String ( ) ) ) ,
// type
text : Type.Optional ( Type . String ( ) ) ,
submit : Type.Optional ( Type . Boolean ( ) ) ,
slowly : Type.Optional ( Type . Boolean ( ) ) ,
// press
key : Type.Optional ( Type . String ( ) ) ,
// drag
startRef : Type.Optional ( Type . String ( ) ) ,
endRef : Type.Optional ( Type . String ( ) ) ,
// select
values : Type.Optional ( Type . Array ( Type . String ( ) ) ) ,
// fill - use permissive array of objects
2026-01-07 17:54:19 +00:00
fields : Type.Optional (
Type . Array ( Type . Object ( { } , { additionalProperties : true } ) ) ,
) ,
2026-01-07 16:54:13 +00:00
// resize
width : Type.Optional ( Type . Number ( ) ) ,
height : Type.Optional ( Type . Number ( ) ) ,
// wait
timeMs : Type.Optional ( Type . Number ( ) ) ,
textGone : Type.Optional ( Type . String ( ) ) ,
// evaluate
fn : Type.Optional ( Type . String ( ) ) ,
} ) ;
2026-01-04 05:07:37 +01:00
2026-01-05 00:15:42 +01:00
// IMPORTANT: OpenAI function tool schemas must have a top-level `type: "object"`.
// A root-level `Type.Union([...])` compiles to `{ anyOf: [...] }` (no `type`),
// which OpenAI rejects ("Invalid schema ... type: None"). Keep this schema an object.
const BrowserToolSchema = Type . Object ( {
action : Type.Union ( [
Type . Literal ( "status" ) ,
Type . Literal ( "start" ) ,
Type . Literal ( "stop" ) ,
Type . Literal ( "tabs" ) ,
Type . Literal ( "open" ) ,
Type . Literal ( "focus" ) ,
Type . Literal ( "close" ) ,
Type . Literal ( "snapshot" ) ,
Type . Literal ( "screenshot" ) ,
Type . Literal ( "navigate" ) ,
Type . Literal ( "console" ) ,
Type . Literal ( "pdf" ) ,
Type . Literal ( "upload" ) ,
Type . Literal ( "dialog" ) ,
Type . Literal ( "act" ) ,
] ) ,
2026-01-06 09:54:31 -07:00
profile : Type.Optional ( Type . String ( ) ) ,
2026-01-05 00:15:42 +01:00
controlUrl : Type.Optional ( Type . String ( ) ) ,
targetUrl : Type.Optional ( Type . String ( ) ) ,
targetId : Type.Optional ( Type . String ( ) ) ,
limit : Type.Optional ( Type . Number ( ) ) ,
format : Type.Optional ( Type . Union ( [ Type . Literal ( "aria" ) , Type . Literal ( "ai" ) ] ) ) ,
fullPage : Type.Optional ( Type . Boolean ( ) ) ,
ref : Type.Optional ( Type . String ( ) ) ,
element : Type.Optional ( Type . String ( ) ) ,
type : Type . Optional ( Type . Union ( [ Type . Literal ( "png" ) , Type . Literal ( "jpeg" ) ] ) ) ,
level : Type.Optional ( Type . String ( ) ) ,
paths : Type.Optional ( Type . Array ( Type . String ( ) ) ) ,
inputRef : Type.Optional ( Type . String ( ) ) ,
timeoutMs : Type.Optional ( Type . Number ( ) ) ,
accept : Type.Optional ( Type . Boolean ( ) ) ,
promptText : Type.Optional ( Type . String ( ) ) ,
request : Type.Optional ( BrowserActSchema ) ,
} ) ;
2026-01-04 05:07:37 +01:00
function resolveBrowserBaseUrl ( controlUrl? : string ) {
const cfg = loadConfig ( ) ;
const resolved = resolveBrowserConfig ( cfg . browser ) ;
if ( ! resolved . enabled && ! controlUrl ? . trim ( ) ) {
throw new Error (
2026-01-04 14:32:47 +00:00
"Browser control is disabled. Set browser.enabled=true in ~/.clawdbot/clawdbot.json." ,
2026-01-04 05:07:37 +01:00
) ;
}
const url = controlUrl ? . trim ( ) ? controlUrl . trim ( ) : resolved . controlUrl ;
return url . replace ( /\/$/ , "" ) ;
}
export function createBrowserTool ( opts ? : {
defaultControlUrl? : string ;
} ) : AnyAgentTool {
return {
label : "Browser" ,
name : "browser" ,
description :
"Control clawd's dedicated browser (status/start/stop/tabs/open/snapshot/screenshot/actions). Use snapshot+act for UI automation. Avoid act:wait by default; use only in exceptional cases when no reliable UI state exists." ,
parameters : BrowserToolSchema ,
execute : async ( _toolCallId , args ) = > {
const params = args as Record < string , unknown > ;
const action = readStringParam ( params , "action" , { required : true } ) ;
const controlUrl = readStringParam ( params , "controlUrl" ) ;
2026-01-06 09:54:31 -07:00
const profile = readStringParam ( params , "profile" ) ;
2026-01-04 05:07:37 +01:00
const baseUrl = resolveBrowserBaseUrl (
controlUrl ? ? opts ? . defaultControlUrl ,
) ;
switch ( action ) {
case "status" :
2026-01-06 09:54:31 -07:00
return jsonResult ( await browserStatus ( baseUrl , { profile } ) ) ;
2026-01-04 05:07:37 +01:00
case "start" :
2026-01-06 09:54:31 -07:00
await browserStart ( baseUrl , { profile } ) ;
return jsonResult ( await browserStatus ( baseUrl , { profile } ) ) ;
2026-01-04 05:07:37 +01:00
case "stop" :
2026-01-06 09:54:31 -07:00
await browserStop ( baseUrl , { profile } ) ;
return jsonResult ( await browserStatus ( baseUrl , { profile } ) ) ;
2026-01-04 05:07:37 +01:00
case "tabs" :
2026-01-06 09:54:31 -07:00
return jsonResult ( { tabs : await browserTabs ( baseUrl , { profile } ) } ) ;
2026-01-04 05:07:37 +01:00
case "open" : {
const targetUrl = readStringParam ( params , "targetUrl" , {
required : true ,
} ) ;
2026-01-06 11:04:33 -07:00
return jsonResult (
await browserOpenTab ( baseUrl , targetUrl , { profile } ) ,
) ;
2026-01-04 05:07:37 +01:00
}
case "focus" : {
const targetId = readStringParam ( params , "targetId" , {
required : true ,
} ) ;
2026-01-06 09:54:31 -07:00
await browserFocusTab ( baseUrl , targetId , { profile } ) ;
2026-01-04 05:07:37 +01:00
return jsonResult ( { ok : true } ) ;
}
case "close" : {
const targetId = readStringParam ( params , "targetId" ) ;
2026-01-06 09:54:31 -07:00
if ( targetId ) await browserCloseTab ( baseUrl , targetId , { profile } ) ;
else await browserAct ( baseUrl , { kind : "close" } , { profile } ) ;
2026-01-04 05:07:37 +01:00
return jsonResult ( { ok : true } ) ;
}
case "snapshot" : {
const format =
params . format === "ai" || params . format === "aria"
? ( params . format as "ai" | "aria" )
: "ai" ;
const targetId =
typeof params . targetId === "string"
? params . targetId . trim ( )
: undefined ;
const limit =
typeof params . limit === "number" && Number . isFinite ( params . limit )
? params . limit
: undefined ;
const snapshot = await browserSnapshot ( baseUrl , {
format ,
targetId ,
limit ,
2026-01-06 09:54:31 -07:00
profile ,
2026-01-04 05:07:37 +01:00
} ) ;
if ( snapshot . format === "ai" ) {
return {
content : [ { type : "text" , text : snapshot.snapshot } ] ,
details : snapshot ,
} ;
}
return jsonResult ( snapshot ) ;
}
case "screenshot" : {
const targetId = readStringParam ( params , "targetId" ) ;
const fullPage = Boolean ( params . fullPage ) ;
const ref = readStringParam ( params , "ref" ) ;
const element = readStringParam ( params , "element" ) ;
const type = params . type === "jpeg" ? "jpeg" : "png" ;
const result = await browserScreenshotAction ( baseUrl , {
targetId ,
fullPage ,
ref ,
element ,
type ,
2026-01-06 09:54:31 -07:00
profile ,
2026-01-04 05:07:37 +01:00
} ) ;
return await imageResultFromFile ( {
label : "browser:screenshot" ,
path : result.path ,
details : result ,
} ) ;
}
case "navigate" : {
const targetUrl = readStringParam ( params , "targetUrl" , {
required : true ,
} ) ;
const targetId = readStringParam ( params , "targetId" ) ;
return jsonResult (
2026-01-06 11:04:33 -07:00
await browserNavigate ( baseUrl , {
url : targetUrl ,
targetId ,
profile ,
} ) ,
2026-01-04 05:07:37 +01:00
) ;
}
case "console" : {
const level =
typeof params . level === "string" ? params . level . trim ( ) : undefined ;
const targetId =
typeof params . targetId === "string"
? params . targetId . trim ( )
: undefined ;
return jsonResult (
2026-01-06 09:54:31 -07:00
await browserConsoleMessages ( baseUrl , { level , targetId , profile } ) ,
2026-01-04 05:07:37 +01:00
) ;
}
case "pdf" : {
const targetId =
typeof params . targetId === "string"
? params . targetId . trim ( )
: undefined ;
2026-01-06 09:54:31 -07:00
const result = await browserPdfSave ( baseUrl , { targetId , profile } ) ;
2026-01-04 05:07:37 +01:00
return {
content : [ { type : "text" , text : ` FILE: ${ result . path } ` } ] ,
details : result ,
} ;
}
case "upload" : {
const paths = Array . isArray ( params . paths )
? params . paths . map ( ( p ) = > String ( p ) )
: [ ] ;
if ( paths . length === 0 ) throw new Error ( "paths required" ) ;
const ref = readStringParam ( params , "ref" ) ;
const inputRef = readStringParam ( params , "inputRef" ) ;
const element = readStringParam ( params , "element" ) ;
const targetId =
typeof params . targetId === "string"
? params . targetId . trim ( )
: undefined ;
const timeoutMs =
typeof params . timeoutMs === "number" &&
Number . isFinite ( params . timeoutMs )
? params . timeoutMs
: undefined ;
return jsonResult (
await browserArmFileChooser ( baseUrl , {
paths ,
ref ,
inputRef ,
element ,
targetId ,
timeoutMs ,
2026-01-06 09:54:31 -07:00
profile ,
2026-01-04 05:07:37 +01:00
} ) ,
) ;
}
case "dialog" : {
const accept = Boolean ( params . accept ) ;
const promptText =
typeof params . promptText === "string"
? params . promptText
: undefined ;
const targetId =
typeof params . targetId === "string"
? params . targetId . trim ( )
: undefined ;
const timeoutMs =
typeof params . timeoutMs === "number" &&
Number . isFinite ( params . timeoutMs )
? params . timeoutMs
: undefined ;
return jsonResult (
await browserArmDialog ( baseUrl , {
accept ,
promptText ,
targetId ,
timeoutMs ,
2026-01-06 09:54:31 -07:00
profile ,
2026-01-04 05:07:37 +01:00
} ) ,
) ;
}
case "act" : {
const request = params . request as Record < string , unknown > | undefined ;
if ( ! request || typeof request !== "object" ) {
throw new Error ( "request required" ) ;
}
const result = await browserAct (
baseUrl ,
request as Parameters < typeof browserAct > [ 1 ] ,
2026-01-06 09:54:31 -07:00
{ profile } ,
2026-01-04 05:07:37 +01:00
) ;
return jsonResult ( result ) ;
}
default :
throw new Error ( ` Unknown action: ${ action } ` ) ;
}
} ,
} ;
}