Merge remote-tracking branch 'upstream/main' into feat/add_qwen_official_api

# Conflicts:
#	extensions/irc/src/runtime-api.ts
#	extensions/telegram/runtime-api.ts
#	extensions/whatsapp/src/runtime-api.ts
This commit is contained in:
wenmengzhou 2026-03-18 23:36:25 +08:00
commit 99ec56ebeb
297 changed files with 4905 additions and 9390 deletions

View File

@ -309,8 +309,6 @@ jobs:
needs: [docs-scope, changed-scope]
if: needs.docs-scope.outputs.docs_only != 'true' && needs.changed-scope.outputs.run_node == 'true'
runs-on: blacksmith-16vcpu-ubuntu-2404
env:
PLUGIN_EXTENSION_BOUNDARY_ENFORCE_AFTER: "2026-03-24T05:00:00Z"
steps:
- name: Checkout
uses: actions/checkout@v6
@ -323,41 +321,14 @@ jobs:
install-bun: "false"
use-sticky-disk: "false"
- name: Run plugin extension boundary guard with grace period
shell: bash
run: |
set -euo pipefail
tmp_output="$(mktemp)"
if pnpm run lint:plugins:no-extension-imports >"$tmp_output" 2>&1; then
cat "$tmp_output"
rm -f "$tmp_output"
exit 0
fi
status=$?
cat "$tmp_output"
rm -f "$tmp_output"
now_epoch="$(date -u +%s)"
enforce_epoch="$(date -u -d "$PLUGIN_EXTENSION_BOUNDARY_ENFORCE_AFTER" +%s)"
fix_instructions="If you are an LLM agent fixing this: run 'pnpm run lint:plugins:no-extension-imports', remove src/plugins/** -> extensions/** imports where possible, and if the remaining inventory is intentional for now update test/fixtures/plugin-extension-import-boundary-inventory.json in the same PR."
if [ "$now_epoch" -lt "$enforce_epoch" ]; then
echo "::warning::Plugin extension import boundary violations are temporarily allowed until ${PLUGIN_EXTENSION_BOUNDARY_ENFORCE_AFTER}. This grace period ends in one week from the rollout date. After that timestamp this job will fail unless the inventory is reduced or the baseline is intentionally updated. ${fix_instructions}"
exit 0
fi
echo "::error::Plugin extension import boundary grace period ended at ${PLUGIN_EXTENSION_BOUNDARY_ENFORCE_AFTER}. ${fix_instructions}"
exit "$status"
- name: Run plugin extension boundary guard
run: pnpm run lint:plugins:no-extension-imports
web-search-provider-boundary:
name: "web-search-provider-boundary"
needs: [docs-scope, changed-scope]
if: needs.docs-scope.outputs.docs_only != 'true' && needs.changed-scope.outputs.run_node == 'true'
runs-on: blacksmith-16vcpu-ubuntu-2404
env:
WEB_SEARCH_PROVIDER_BOUNDARY_ENFORCE_AFTER: "2026-03-24T05:00:00Z"
steps:
- name: Checkout
uses: actions/checkout@v6
@ -370,41 +341,14 @@ jobs:
install-bun: "false"
use-sticky-disk: "false"
- name: Run web search provider boundary guard with grace period
shell: bash
run: |
set -euo pipefail
tmp_output="$(mktemp)"
if pnpm run lint:web-search-provider-boundaries >"$tmp_output" 2>&1; then
cat "$tmp_output"
rm -f "$tmp_output"
exit 0
fi
status=$?
cat "$tmp_output"
rm -f "$tmp_output"
now_epoch="$(date -u +%s)"
enforce_epoch="$(date -u -d "$WEB_SEARCH_PROVIDER_BOUNDARY_ENFORCE_AFTER" +%s)"
fix_instructions="If you are an LLM agent fixing this: run 'pnpm run lint:web-search-provider-boundaries', move provider-specific web-search logic out of core, and if the remaining inventory is intentional for now update test/fixtures/web-search-provider-boundary-inventory.json in the same PR."
if [ "$now_epoch" -lt "$enforce_epoch" ]; then
echo "::warning::Web search provider boundary violations are temporarily allowed until ${WEB_SEARCH_PROVIDER_BOUNDARY_ENFORCE_AFTER}. This grace period ends in one week from the rollout date. After that timestamp this job will fail unless the inventory is reduced or the baseline is intentionally updated. ${fix_instructions}"
exit 0
fi
echo "::error::Web search provider boundary grace period ended at ${WEB_SEARCH_PROVIDER_BOUNDARY_ENFORCE_AFTER}. ${fix_instructions}"
exit "$status"
- name: Run web search provider boundary guard
run: pnpm run lint:web-search-provider-boundaries
extension-src-outside-plugin-sdk-boundary:
name: "extension-src-outside-plugin-sdk-boundary"
needs: [docs-scope, changed-scope]
if: needs.docs-scope.outputs.docs_only != 'true' && needs.changed-scope.outputs.run_node == 'true'
runs-on: blacksmith-16vcpu-ubuntu-2404
env:
EXTENSION_PLUGIN_SDK_BOUNDARY_ENFORCE_AFTER: "2026-03-24T05:00:00Z"
steps:
- name: Checkout
uses: actions/checkout@v6
@ -417,41 +361,14 @@ jobs:
install-bun: "false"
use-sticky-disk: "false"
- name: Run extension src boundary guard with grace period
shell: bash
run: |
set -euo pipefail
tmp_output="$(mktemp)"
if pnpm run lint:extensions:no-src-outside-plugin-sdk >"$tmp_output" 2>&1; then
cat "$tmp_output"
rm -f "$tmp_output"
exit 0
fi
status=$?
cat "$tmp_output"
rm -f "$tmp_output"
now_epoch="$(date -u +%s)"
enforce_epoch="$(date -u -d "$EXTENSION_PLUGIN_SDK_BOUNDARY_ENFORCE_AFTER" +%s)"
fix_instructions="If you are an LLM agent fixing this: run 'pnpm run lint:extensions:no-src-outside-plugin-sdk', move extension imports off core src paths and onto src/plugin-sdk/**, and if the remaining inventory is intentional for now update test/fixtures/extension-src-outside-plugin-sdk-inventory.json in the same PR."
if [ "$now_epoch" -lt "$enforce_epoch" ]; then
echo "::warning::Extension src boundary violations are temporarily allowed until ${EXTENSION_PLUGIN_SDK_BOUNDARY_ENFORCE_AFTER}. This grace period ends in one week from the rollout date. After that timestamp this job will fail unless the inventory is reduced or the baseline is intentionally updated. ${fix_instructions}"
exit 0
fi
echo "::error::Extension src boundary grace period ended at ${EXTENSION_PLUGIN_SDK_BOUNDARY_ENFORCE_AFTER}. ${fix_instructions}"
exit "$status"
- name: Run extension src boundary guard
run: pnpm run lint:extensions:no-src-outside-plugin-sdk
extension-plugin-sdk-internal-boundary:
name: "extension-plugin-sdk-internal-boundary"
needs: [docs-scope, changed-scope]
if: needs.docs-scope.outputs.docs_only != 'true' && needs.changed-scope.outputs.run_node == 'true'
runs-on: blacksmith-16vcpu-ubuntu-2404
env:
EXTENSION_PLUGIN_SDK_INTERNAL_ENFORCE_AFTER: "2026-03-24T05:00:00Z"
steps:
- name: Checkout
uses: actions/checkout@v6
@ -464,33 +381,8 @@ jobs:
install-bun: "false"
use-sticky-disk: "false"
- name: Run extension plugin-sdk-internal guard with grace period
shell: bash
run: |
set -euo pipefail
tmp_output="$(mktemp)"
if pnpm run lint:extensions:no-plugin-sdk-internal >"$tmp_output" 2>&1; then
cat "$tmp_output"
rm -f "$tmp_output"
exit 0
fi
status=$?
cat "$tmp_output"
rm -f "$tmp_output"
now_epoch="$(date -u +%s)"
enforce_epoch="$(date -u -d "$EXTENSION_PLUGIN_SDK_INTERNAL_ENFORCE_AFTER" +%s)"
fix_instructions="If you are an LLM agent fixing this: run 'pnpm run lint:extensions:no-plugin-sdk-internal', remove extension imports of src/plugin-sdk-internal/** in favor of src/plugin-sdk/**, and if the remaining inventory is intentional for now update test/fixtures/extension-plugin-sdk-internal-inventory.json in the same PR."
if [ "$now_epoch" -lt "$enforce_epoch" ]; then
echo "::warning::Extension plugin-sdk-internal boundary violations are temporarily allowed until ${EXTENSION_PLUGIN_SDK_INTERNAL_ENFORCE_AFTER}. This grace period ends in one week from the rollout date. After that timestamp this job will fail unless the inventory is reduced or the baseline is intentionally updated. ${fix_instructions}"
exit 0
fi
echo "::error::Extension plugin-sdk-internal boundary grace period ended at ${EXTENSION_PLUGIN_SDK_INTERNAL_ENFORCE_AFTER}. ${fix_instructions}"
exit "$status"
- name: Run extension plugin-sdk-internal guard
run: pnpm run lint:extensions:no-plugin-sdk-internal
build-smoke:
name: "build-smoke"

214
.github/workflows/plugin-npm-release.yml vendored Normal file
View File

@ -0,0 +1,214 @@
name: Plugin NPM Release
on:
push:
branches:
- main
paths:
- ".github/workflows/plugin-npm-release.yml"
- "extensions/**"
- "package.json"
- "scripts/lib/plugin-npm-release.ts"
- "scripts/plugin-npm-publish.sh"
- "scripts/plugin-npm-release-check.ts"
- "scripts/plugin-npm-release-plan.ts"
workflow_dispatch:
inputs:
publish_scope:
description: Publish the selected plugins or all publishable plugins from the ref
required: true
default: selected
type: choice
options:
- selected
- all-publishable
ref:
description: Commit SHA on main to publish from (copy from the preview run)
required: true
type: string
plugins:
description: Comma-separated plugin package names to publish when publish_scope=selected
required: false
type: string
concurrency:
group: plugin-npm-release-${{ github.event_name == 'workflow_dispatch' && inputs.ref || github.sha }}
cancel-in-progress: false
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
NODE_VERSION: "24.x"
PNPM_VERSION: "10.23.0"
jobs:
preview_plugins_npm:
runs-on: ubuntu-latest
permissions:
contents: read
outputs:
ref_sha: ${{ steps.ref.outputs.sha }}
has_candidates: ${{ steps.plan.outputs.has_candidates }}
candidate_count: ${{ steps.plan.outputs.candidate_count }}
matrix: ${{ steps.plan.outputs.matrix }}
steps:
- name: Checkout
uses: actions/checkout@v6
with:
ref: ${{ github.event_name == 'workflow_dispatch' && inputs.ref || github.sha }}
fetch-depth: 0
- name: Setup Node environment
uses: ./.github/actions/setup-node-env
with:
node-version: ${{ env.NODE_VERSION }}
pnpm-version: ${{ env.PNPM_VERSION }}
install-bun: "false"
use-sticky-disk: "false"
- name: Resolve checked-out ref
id: ref
run: echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
- name: Validate ref is on main
run: |
set -euo pipefail
git fetch --no-tags origin +refs/heads/main:refs/remotes/origin/main
git merge-base --is-ancestor HEAD origin/main
- name: Validate publishable plugin metadata
env:
PUBLISH_SCOPE: ${{ github.event_name == 'workflow_dispatch' && inputs.publish_scope || '' }}
RELEASE_PLUGINS: ${{ github.event_name == 'workflow_dispatch' && inputs.plugins || '' }}
BASE_REF: ${{ github.event_name != 'workflow_dispatch' && github.event.before || '' }}
HEAD_REF: ${{ steps.ref.outputs.sha }}
run: |
set -euo pipefail
if [[ -n "${PUBLISH_SCOPE}" ]]; then
release_args=(--selection-mode "${PUBLISH_SCOPE}")
if [[ -n "${RELEASE_PLUGINS}" ]]; then
release_args+=(--plugins "${RELEASE_PLUGINS}")
fi
pnpm release:plugins:npm:check -- "${release_args[@]}"
elif [[ -n "${BASE_REF}" ]]; then
pnpm release:plugins:npm:check -- --base-ref "${BASE_REF}" --head-ref "${HEAD_REF}"
else
pnpm release:plugins:npm:check
fi
- name: Resolve plugin release plan
id: plan
env:
PUBLISH_SCOPE: ${{ github.event_name == 'workflow_dispatch' && inputs.publish_scope || '' }}
RELEASE_PLUGINS: ${{ github.event_name == 'workflow_dispatch' && inputs.plugins || '' }}
BASE_REF: ${{ github.event_name != 'workflow_dispatch' && github.event.before || '' }}
HEAD_REF: ${{ steps.ref.outputs.sha }}
run: |
set -euo pipefail
mkdir -p .local
if [[ -n "${PUBLISH_SCOPE}" ]]; then
plan_args=(--selection-mode "${PUBLISH_SCOPE}")
if [[ -n "${RELEASE_PLUGINS}" ]]; then
plan_args+=(--plugins "${RELEASE_PLUGINS}")
fi
node --import tsx scripts/plugin-npm-release-plan.ts "${plan_args[@]}" > .local/plugin-npm-release-plan.json
elif [[ -n "${BASE_REF}" ]]; then
node --import tsx scripts/plugin-npm-release-plan.ts --base-ref "${BASE_REF}" --head-ref "${HEAD_REF}" > .local/plugin-npm-release-plan.json
else
node --import tsx scripts/plugin-npm-release-plan.ts > .local/plugin-npm-release-plan.json
fi
cat .local/plugin-npm-release-plan.json
candidate_count="$(jq -r '.candidates | length' .local/plugin-npm-release-plan.json)"
has_candidates="false"
if [[ "${candidate_count}" != "0" ]]; then
has_candidates="true"
fi
matrix_json="$(jq -c '.candidates' .local/plugin-npm-release-plan.json)"
{
echo "candidate_count=${candidate_count}"
echo "has_candidates=${has_candidates}"
echo "matrix=${matrix_json}"
} >> "$GITHUB_OUTPUT"
echo "Plugin release candidates:"
jq -r '.candidates[]? | "- \(.packageName)@\(.version) [\(.publishTag)] from \(.packageDir)"' .local/plugin-npm-release-plan.json
echo "Already published / skipped:"
jq -r '.skippedPublished[]? | "- \(.packageName)@\(.version)"' .local/plugin-npm-release-plan.json
preview_plugin_pack:
needs: preview_plugins_npm
if: needs.preview_plugins_npm.outputs.has_candidates == 'true'
runs-on: ubuntu-latest
permissions:
contents: read
strategy:
fail-fast: false
matrix:
plugin: ${{ fromJson(needs.preview_plugins_npm.outputs.matrix) }}
steps:
- name: Checkout
uses: actions/checkout@v6
with:
ref: ${{ needs.preview_plugins_npm.outputs.ref_sha }}
fetch-depth: 1
- name: Setup Node environment
uses: ./.github/actions/setup-node-env
with:
node-version: ${{ env.NODE_VERSION }}
pnpm-version: ${{ env.PNPM_VERSION }}
install-bun: "false"
use-sticky-disk: "false"
install-deps: "false"
- name: Preview publish command
run: bash scripts/plugin-npm-publish.sh --dry-run "${{ matrix.plugin.packageDir }}"
- name: Preview npm pack contents
working-directory: ${{ matrix.plugin.packageDir }}
run: npm pack --dry-run --json --ignore-scripts
publish_plugins_npm:
needs: [preview_plugins_npm, preview_plugin_pack]
if: github.event_name == 'workflow_dispatch' && needs.preview_plugins_npm.outputs.has_candidates == 'true'
runs-on: ubuntu-latest
environment: npm-release
permissions:
contents: read
id-token: write
strategy:
fail-fast: false
matrix:
plugin: ${{ fromJson(needs.preview_plugins_npm.outputs.matrix) }}
steps:
- name: Checkout
uses: actions/checkout@v6
with:
ref: ${{ needs.preview_plugins_npm.outputs.ref_sha }}
fetch-depth: 1
- name: Setup Node environment
uses: ./.github/actions/setup-node-env
with:
node-version: ${{ env.NODE_VERSION }}
pnpm-version: ${{ env.PNPM_VERSION }}
install-bun: "false"
use-sticky-disk: "false"
install-deps: "false"
- name: Ensure version is not already published
env:
PACKAGE_NAME: ${{ matrix.plugin.packageName }}
PACKAGE_VERSION: ${{ matrix.plugin.version }}
run: |
set -euo pipefail
if npm view "${PACKAGE_NAME}@${PACKAGE_VERSION}" version >/dev/null 2>&1; then
echo "${PACKAGE_NAME}@${PACKAGE_VERSION} is already published on npm."
exit 1
fi
- name: Publish
run: bash scripts/plugin-npm-publish.sh --publish "${{ matrix.plugin.packageDir }}"

View File

@ -140,7 +140,7 @@
- Do not set test workers above 16; tried already.
- If local Vitest runs cause memory pressure (common on non-Mac-Studio hosts), use `OPENCLAW_TEST_PROFILE=low OPENCLAW_TEST_SERIAL_GATEWAY=1 pnpm test` for land/gate runs.
- Live tests (real keys): `CLAWDBOT_LIVE_TEST=1 pnpm test:live` (OpenClaw-only) or `LIVE=1 pnpm test:live` (includes provider live tests). Docker: `pnpm test:docker:live-models`, `pnpm test:docker:live-gateway`. Onboarding Docker E2E: `pnpm test:docker:onboard`.
- Full kit + whats covered: `docs/testing.md`.
- Full kit + whats covered: `docs/help/testing.md`.
- Changelog: user-facing changes only; no internal/meta notes (version alignment, appcast reminders, release process).
- Changelog placement: in the active version block, append new entries to the end of the target section (`### Changes` or `### Fixes`); do not insert new entries at the top of a section.
- Changelog attribution: use at most one contributor mention per line; prefer `Thanks @author` and do not also add `by @author` on the same entry.
@ -281,7 +281,7 @@
- If staged+unstaged diffs are formatting-only, auto-resolve without asking.
- If commit/push already requested, auto-stage and include formatting-only follow-ups in the same commit (or a tiny follow-up commit if needed), no extra confirmation.
- Only ask when changes are semantic (logic/data/behavior).
- Lobster seam: use the shared CLI palette in `src/terminal/palette.ts` (no hardcoded colors); apply palette to onboarding/config prompts and other TTY UI output as needed.
- Lobster palette: use the shared CLI palette in `src/terminal/palette.ts` (no hardcoded colors); apply palette to onboarding/config prompts and other TTY UI output as needed.
- **Multi-agent safety:** focus reports on your edits; avoid guard-rail disclaimers unless truly blocked; when multiple agents touch the same file, continue if safe; end with a brief “other files present” note only if relevant.
- Bug investigations: read source code of relevant npm dependencies and all related local code before concluding; aim for high-confidence root cause.
- Code style: add brief comments for tricky logic; keep files under ~500 LOC when feasible (split/refactor as needed).

View File

@ -35,7 +35,7 @@ Docs: https://docs.openclaw.ai
- Models/OpenAI: add native forward-compat support for `gpt-5.4-mini` and `gpt-5.4-nano` in the OpenAI provider catalog, runtime resolution, and reasoning capability gates. Thanks @vincentkoc.
- Plugins/bundles: make enabled bundle MCP servers expose runnable tools in embedded Pi, and default relative bundle MCP launches to the bundle root so marketplace bundles like Context7 work through Pi instead of stopping at config import.
- Scope message SecretRef resolution and harden doctor/status paths. (#48728) Thanks @joshavant.
- Plugins/testing: add a public `openclaw/plugin-sdk/testing` seam for plugin-author test helpers, and move bundled-extension-only test bridges out of `extensions/` into private repo test helpers.
- Plugins/testing: add a public `openclaw/plugin-sdk/testing` surface for plugin-author test helpers, and move bundled-extension-only test bridges out of `extensions/` into private repo test helpers.
- Plugins/Chutes: add a bundled Chutes provider with plugin-owned OAuth/API-key auth, dynamic model discovery, and default-on extension wiring. (#41416) Thanks @Veightor.
- Plugins/binding: add `onConversationBindingResolved(...)` so plugins can react immediately after bind approvals or denies without blocking channel interaction acknowledgements. (#48678) Thanks @huntharo.
- CLI/config: expand `config set` with SecretRef and provider builder modes, JSON/batch assignment support, and `--dry-run` validation with structured JSON output. (#49296) Thanks @joshavant.
@ -128,6 +128,7 @@ Docs: https://docs.openclaw.ai
- Plugins/subagents: forward per-run provider and model overrides through gateway plugin subagent dispatch so plugin-launched agent delegations honor explicit model selection again. (#48277) Thanks @jalehman.
- Agents/compaction: write minimal boundary summaries for empty preparations while keeping split-turn prefixes on the normal path, so no-summarizable-message sessions stop retriggering the safeguard loop. (#42215) thanks @lml2468.
- Models/chat commands: keep `/model ...@YYYYMMDD` version suffixes intact by default, but still honor matching stored numeric auth-profile overrides for the same provider. (#48896) Thanks @Alix-007.
- Gateway/channels: serialize per-account channel startup so overlapping starts do not boot the same provider twice, preventing MS Teams `EADDRINUSE` crash loops during startup and restart. (#49583) Thanks @sudie-codes.
### Fixes
@ -141,7 +142,7 @@ Docs: https://docs.openclaw.ai
- Telegram/network: unify API and media fetches under the same sticky IPv4 and pinned-IP fallback chain, and re-validate pinned override addresses against SSRF policy. (#49148) Thanks @obviyus.
- Agents/prompt composition: append bootstrap truncation warnings to the current-turn prompt and add regression coverage for stable system-prompt cache invariants. (#49237) Thanks @scoootscooob.
- Gateway/auth: add regression coverage that keeps device-less trusted-proxy Control UI sessions off privileged pairing approval RPCs. Thanks @vincentkoc.
- Plugins/runtime-api: pin extension runtime-api export seams with explicit guardrail coverage so future surface creep becomes a deliberate diff. Thanks @vincentkoc.
- Plugins/runtime-api: pin extension runtime-api export surfaces with explicit guardrail coverage so future surface creep becomes a deliberate diff. Thanks @vincentkoc.
- Telegram/security: add regression coverage proving pinned fallback host overrides stay bound to Telegram and delegate non-matching hostnames back to the original lookup path. Thanks @vincentkoc.
- Secrets/exec refs: require explicit `--allow-exec` for `secrets apply` write plans that contain exec SecretRefs/providers, and align audit/configure/apply dry-run behavior to skip exec checks unless opted in to prevent unexpected command side effects. (#49417) Thanks @restriction and @joshavant.
- Tools/image generation: add bundled fal image generation support so `image_generate` can target `fal/*` models with `FAL_KEY`, including single-image edit flows via FLUX image-to-image. Thanks @vincentkoc.
@ -151,10 +152,14 @@ Docs: https://docs.openclaw.ai
### Breaking
- Skills/image generation: remove the bundled `nano-banana-pro` skill wrapper. Use `agents.defaults.imageGenerationModel.primary: "google/gemini-3-pro-image-preview"` for the native Nano Banana-style path instead.
- Browser/Chrome MCP: remove the legacy Chrome extension relay path, bundled extension assets, `driver: "extension"`, and `browser.relayBindHost`. Run `openclaw doctor --fix` to migrate host-local browser config to `existing-session` / `user`; Docker, headless, sandbox, and remote browser flows still use raw CDP. (#47893) Thanks @vincentkoc.
- Plugins/runtime: remove the public `openclaw/extension-api` surface with no compatibility shim. Bundled plugins must use injected runtime for host-side operations (for example `api.runtime.agent.runEmbeddedPiAgent`) and any remaining direct imports must come from narrow `openclaw/plugin-sdk/*` subpaths instead of the monolithic SDK root.
- Tools/image generation: standardize the stock image create/edit path on the core `image_generate` tool. The old `nano-banana-pro` docs/examples are gone; if you previously copied that sample-skill config, switch to `agents.defaults.imageGenerationModel` for built-in image generation or install a separate third-party skill explicitly.
- Skills/image generation: remove the bundled `nano-banana-pro` skill wrapper. Use `agents.defaults.imageGenerationModel.primary: "google/gemini-3-pro-image-preview"` for the native Nano Banana-style path instead.
- Plugins/message discovery: require `ChannelMessageActionAdapter.describeMessageTool(...)` for shared `message` tool discovery. The legacy `listActions`, `getCapabilities`, and `getToolSchema` adapter methods are removed. Plugin authors should migrate message discovery to `describeMessageTool(...)` and keep channel-specific action runtime code inside the owning plugin package. Thanks @gumadeiras.
- Exec/env sandbox: block build-tool JVM injection (`MAVEN_OPTS`, `SBT_OPTS`, `GRADLE_OPTS`, `ANT_OPTS`), glibc tunable exploitation (`GLIBC_TUNABLES`), and .NET dependency resolution hijack (`DOTNET_ADDITIONAL_DEPS`) from the host exec environment, and restrict Gradle init script redirect (`GRADLE_USER_HOME`) as an override-only block so user-configured Gradle homes still propagate. (#49702)
## 2026.3.13

View File

@ -293,7 +293,7 @@ If you plan to build/run companion apps, follow the platform runbooks below.
- WebChat + debug tools.
- Remote gateway control over SSH.
Note: signed builds required for macOS permissions to stick across rebuilds (see `docs/mac/permissions.md`).
Note: signed builds required for macOS permissions to stick across rebuilds (see [macOS Permissions](https://docs.openclaw.ai/platforms/mac/permissions)).
### iOS node (optional)

View File

@ -28,11 +28,18 @@ enum HostEnvSecurityPolicy {
"_JAVA_OPTIONS",
"JDK_JAVA_OPTIONS",
"PYTHONBREAKPOINT",
"DOTNET_STARTUP_HOOKS"
"DOTNET_STARTUP_HOOKS",
"DOTNET_ADDITIONAL_DEPS",
"GLIBC_TUNABLES",
"MAVEN_OPTS",
"SBT_OPTS",
"GRADLE_OPTS",
"ANT_OPTS"
]
static let blockedOverrideKeys: Set<String> = [
"HOME",
"GRADLE_USER_HOME",
"ZDOTDIR",
"GIT_SSH_COMMAND",
"GIT_SSH",

View File

@ -1,3 +1,11 @@
---
title: "Auth Credential Semantics"
summary: "Canonical credential eligibility and resolution semantics for auth profiles"
read_when:
- Working on auth profile resolution or credential routing
- Debugging model auth failures or profile order
---
# Auth Credential Semantics
This document defines the canonical credential eligibility and resolution semantics used across:

View File

@ -700,7 +700,7 @@ openclaw system event --mode now --text "Next heartbeat: check battery."
## Troubleshooting
### “Nothing runs”
### "Nothing runs"
- Check cron is enabled: `cron.enabled` and `OPENCLAW_SKIP_CRON`.
- Check the Gateway is running continuously (cron runs inside the Gateway process).

View File

@ -11,7 +11,7 @@ Goal: let Clawd sit in WhatsApp groups, wake up only when pinged, and keep that
Note: `agents.list[].groupChat.mentionPatterns` is now used by Telegram/Discord/Slack/iMessage as well; this doc focuses on WhatsApp-specific behavior. For multi-agent setups, set `agents.list[].groupChat.mentionPatterns` per agent (or use `messages.groupChat.mentionPatterns` as a global fallback).
## Whats implemented (2025-12-03)
## Current implementation (2025-12-03)
- Activation modes: `mention` (default) or `always`. `mention` requires a ping (real WhatsApp @-mentions via `mentionedJids`, safe regex patterns, or the bots E.164 anywhere in the text). `always` wakes the agent on every message but it should reply only when it can add meaningful value; otherwise it returns the silent token `NO_REPLY`. Defaults can be set in config (`channels.whatsapp.groups`) and overridden per group via `/activation`. When `channels.whatsapp.groups` is set, it also acts as a group allowlist (include `"*"` to allow all).
- Group policy: `channels.whatsapp.groupPolicy` controls whether group messages are accepted (`open|disabled|allowlist`). `allowlist` uses `channels.whatsapp.groupAllowFrom` (fallback: explicit `channels.whatsapp.allowFrom`). Default is `allowlist` (blocked until you add senders).

View File

@ -255,7 +255,7 @@ openclaw doctor
openclaw channels status --probe
```
### Bot doesn't respond to messages
### Bot does not respond to messages
**Check access control:** Ensure your user ID is in `allowFrom`, or temporarily remove
`allowFrom` and set `allowedRoles: ["all"]` to test.

View File

@ -40,7 +40,7 @@ openclaw message send --channel slack --target user:U012ABCDEF --message "hello"
- Zalo (plugin): user id (Bot API)
- Zalo Personal / `zalouser` (plugin): thread id (DM/group) from `zca` (`me`, `friend list`, `group list`)
## Self (“me”)
## Self ("me")
```bash
openclaw directory self --channel zalouser

View File

@ -88,7 +88,7 @@ OpenClaw uses a lobster palette for CLI output.
- `error` (#E23D2D): errors, failures.
- `muted` (#8B7F77): de-emphasis, metadata.
Palette source of truth: `src/terminal/palette.ts` (aka “lobster seam”).
Palette source of truth: `src/terminal/palette.ts` (the “lobster palette”).
## Command tree

View File

@ -116,7 +116,7 @@ Large files are truncated per-file using `agents.defaults.bootstrapMaxChars` (de
When truncation occurs, the runtime can inject an in-prompt warning block under Project Context. Configure this with `agents.defaults.bootstrapPromptTruncationWarning` (`off`, `once`, `always`; default `once`).
## Skills: whats injected vs loaded on-demand
## Skills: injected vs loaded on-demand
The system prompt includes a compact **skills list** (name + description + location). This list has real overhead.
@ -131,7 +131,7 @@ Tools affect context in two ways:
`/context detail` breaks down the biggest tool schemas so you can see what dominates.
## Commands, directives, and “inline shortcuts”
## Commands, directives, and "inline shortcuts"
Slash commands are handled by the Gateway. There are a few different behaviors:

View File

@ -70,7 +70,7 @@ they are tried first, but OpenClaw may rotate to another profile on rate limits/
Userpinned profiles stay locked to that profile; if it fails and model fallbacks
are configured, OpenClaw moves to the next model instead of switching profiles.
### Why OAuth can “look lost”
### Why OAuth can "look lost"
If you have both an OAuth profile and an API key profile for the same provider, roundrobin can switch between them across messages unless pinned. To force a single profile:

View File

@ -60,7 +60,7 @@ to `zai/*`.
Provider configuration examples (including OpenCode) live in
[/gateway/configuration](/gateway/configuration#opencode).
## “Model is not allowed” (and why replies stop)
## "Model is not allowed" (and why replies stop)
If `agents.defaults.models` is set, it becomes the **allowlist** for `/model` and for
session overrides. When a user selects a model that isnt in that allowlist,

View File

@ -9,7 +9,7 @@ status: active
Goal: multiple _isolated_ agents (separate workspace + `agentDir` + sessions), plus multiple channel accounts (e.g. two WhatsApps) in one running Gateway. Inbound is routed to an agent via bindings.
## What is “one agent”?
## What is "one agent"?
An **agent** is a fully scoped brain with its own:

View File

@ -45,7 +45,7 @@ even before any clients connect.
Every WS client begins with a `connect` request. On successful handshake the
Gateway upserts a presence entry for that connection.
#### Why oneoff CLI commands dont show up
#### Why one-off CLI commands do not show up
The CLI often connects for short, oneoff commands. To avoid spamming the
Instances list, `client.mode === "cli"` is **not** turned into a presence entry.

View File

@ -90,7 +90,7 @@ more natural.
- Modes: `off` (default), `natural` (8002500ms), `custom` (`minMs`/`maxMs`).
- Applies only to **block replies**, not final replies or tool summaries.
## “Stream chunks or everything”
## "Stream chunks or everything"
This maps to:

View File

@ -185,7 +185,7 @@ ws.on("message", (data) => {
});
```
## Worked example: add a method endtoend
## Worked example: add a method end-to-end
Example: add a new `system.echo` request that returns `{ ok: true, text }`.

View File

@ -1,534 +0,0 @@
# Kilo Gateway Provider Integration Design
## Overview
This document outlines the design for integrating "Kilo Gateway" as a first-class provider in OpenClaw, modeled after the existing OpenRouter implementation. Kilo Gateway uses an OpenAI-compatible completions API with a different base URL.
## Design Decisions
### 1. Provider Naming
**Recommendation: `kilocode`**
Rationale:
- Matches the user config example provided (`kilocode` provider key)
- Consistent with existing provider naming patterns (e.g., `openrouter`, `opencode`, `moonshot`)
- Short and memorable
- Avoids confusion with generic "kilo" or "gateway" terms
Alternative considered: `kilo-gateway` - rejected because hyphenated names are less common in the codebase and `kilocode` is more concise.
### 2. Default Model Reference
**Recommendation: `kilocode/anthropic/claude-opus-4.6`**
Rationale:
- Based on user config example
- Claude Opus 4.5 is a capable default model
- Explicit model selection avoids reliance on auto-routing
### 3. Base URL Configuration
**Recommendation: Hardcoded default with config override**
- **Default Base URL:** `https://api.kilo.ai/api/gateway/`
- **Configurable:** Yes, via `models.providers.kilocode.baseUrl`
This matches the pattern used by other providers like Moonshot, Venice, and Synthetic.
### 4. Model Scanning
**Recommendation: No dedicated model scanning endpoint initially**
Rationale:
- Kilo Gateway proxies to OpenRouter, so models are dynamic
- Users can manually configure models in their config
- If Kilo Gateway exposes a `/models` endpoint in the future, scanning can be added
### 5. Special Handling
**Recommendation: Inherit OpenRouter behavior for Anthropic models**
Since Kilo Gateway proxies to OpenRouter, the same special handling should apply:
- Cache TTL eligibility for `anthropic/*` models
- Extra params (cacheControlTtl) for `anthropic/*` models
- Transcript policy follows OpenRouter patterns
## Files to Modify
### Core Credential Management
#### 1. `src/commands/onboard-auth.credentials.ts`
Add:
```typescript
export const KILOCODE_DEFAULT_MODEL_REF = "kilocode/anthropic/claude-opus-4.6";
export async function setKilocodeApiKey(key: string, agentDir?: string) {
upsertAuthProfile({
profileId: "kilocode:default",
credential: {
type: "api_key",
provider: "kilocode",
key,
},
agentDir: resolveAuthAgentDir(agentDir),
});
}
```
#### 2. `src/agents/model-auth.ts`
Add to `envMap` in `resolveEnvApiKey()`:
```typescript
const envMap: Record<string, string> = {
// ... existing entries
kilocode: "KILOCODE_API_KEY",
};
```
#### 3. `src/config/io.ts`
Add to `SHELL_ENV_EXPECTED_KEYS`:
```typescript
const SHELL_ENV_EXPECTED_KEYS = [
// ... existing entries
"KILOCODE_API_KEY",
];
```
### Config Application
#### 4. `src/commands/onboard-auth.config-core.ts`
Add new functions:
```typescript
export const KILOCODE_BASE_URL = "https://api.kilo.ai/api/gateway/";
export function applyKilocodeProviderConfig(cfg: OpenClawConfig): OpenClawConfig {
const models = { ...cfg.agents?.defaults?.models };
models[KILOCODE_DEFAULT_MODEL_REF] = {
...models[KILOCODE_DEFAULT_MODEL_REF],
alias: models[KILOCODE_DEFAULT_MODEL_REF]?.alias ?? "Kilo Gateway",
};
const providers = { ...cfg.models?.providers };
const existingProvider = providers.kilocode;
const { apiKey: existingApiKey, ...existingProviderRest } = (existingProvider ?? {}) as Record<
string,
unknown
> as { apiKey?: string };
const resolvedApiKey = typeof existingApiKey === "string" ? existingApiKey : undefined;
const normalizedApiKey = resolvedApiKey?.trim();
providers.kilocode = {
...existingProviderRest,
baseUrl: KILOCODE_BASE_URL,
api: "openai-completions",
...(normalizedApiKey ? { apiKey: normalizedApiKey } : {}),
};
return {
...cfg,
agents: {
...cfg.agents,
defaults: {
...cfg.agents?.defaults,
models,
},
},
models: {
mode: cfg.models?.mode ?? "merge",
providers,
},
};
}
export function applyKilocodeConfig(cfg: OpenClawConfig): OpenClawConfig {
const next = applyKilocodeProviderConfig(cfg);
const existingModel = next.agents?.defaults?.model;
return {
...next,
agents: {
...next.agents,
defaults: {
...next.agents?.defaults,
model: {
...(existingModel && "fallbacks" in (existingModel as Record<string, unknown>)
? {
fallbacks: (existingModel as { fallbacks?: string[] }).fallbacks,
}
: undefined),
primary: KILOCODE_DEFAULT_MODEL_REF,
},
},
},
};
}
```
### Auth Choice System
#### 5. `src/commands/onboard-types.ts`
Add to `AuthChoice` type:
```typescript
export type AuthChoice =
// ... existing choices
"kilocode-api-key";
// ...
```
Add to `OnboardOptions`:
```typescript
export type OnboardOptions = {
// ... existing options
kilocodeApiKey?: string;
// ...
};
```
#### 6. `src/commands/auth-choice-options.ts`
Add to `AuthChoiceGroupId`:
```typescript
export type AuthChoiceGroupId =
// ... existing groups
"kilocode";
// ...
```
Add to `AUTH_CHOICE_GROUP_DEFS`:
```typescript
{
value: "kilocode",
label: "Kilo Gateway",
hint: "API key (OpenRouter-compatible)",
choices: ["kilocode-api-key"],
},
```
Add to `buildAuthChoiceOptions()`:
```typescript
options.push({
value: "kilocode-api-key",
label: "Kilo Gateway API key",
hint: "OpenRouter-compatible gateway",
});
```
#### 7. `src/commands/auth-choice.preferred-provider.ts`
Add mapping:
```typescript
const PREFERRED_PROVIDER_BY_AUTH_CHOICE: Partial<Record<AuthChoice, string>> = {
// ... existing mappings
"kilocode-api-key": "kilocode",
};
```
### Auth Choice Application
#### 8. `src/commands/auth-choice.apply.api-providers.ts`
Add import:
```typescript
import {
// ... existing imports
applyKilocodeConfig,
applyKilocodeProviderConfig,
KILOCODE_DEFAULT_MODEL_REF,
setKilocodeApiKey,
} from "./onboard-auth.js";
```
Add handling for `kilocode-api-key`:
```typescript
if (authChoice === "kilocode-api-key") {
const store = ensureAuthProfileStore(params.agentDir, {
allowKeychainPrompt: false,
});
const profileOrder = resolveAuthProfileOrder({
cfg: nextConfig,
store,
provider: "kilocode",
});
const existingProfileId = profileOrder.find((profileId) => Boolean(store.profiles[profileId]));
const existingCred = existingProfileId ? store.profiles[existingProfileId] : undefined;
let profileId = "kilocode:default";
let mode: "api_key" | "oauth" | "token" = "api_key";
let hasCredential = false;
if (existingProfileId && existingCred?.type) {
profileId = existingProfileId;
mode =
existingCred.type === "oauth" ? "oauth" : existingCred.type === "token" ? "token" : "api_key";
hasCredential = true;
}
if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "kilocode") {
await setKilocodeApiKey(normalizeApiKeyInput(params.opts.token), params.agentDir);
hasCredential = true;
}
if (!hasCredential) {
const envKey = resolveEnvApiKey("kilocode");
if (envKey) {
const useExisting = await params.prompter.confirm({
message: `Use existing KILOCODE_API_KEY (${envKey.source}, ${formatApiKeyPreview(envKey.apiKey)})?`,
initialValue: true,
});
if (useExisting) {
await setKilocodeApiKey(envKey.apiKey, params.agentDir);
hasCredential = true;
}
}
}
if (!hasCredential) {
const key = await params.prompter.text({
message: "Enter Kilo Gateway API key",
validate: validateApiKeyInput,
});
await setKilocodeApiKey(normalizeApiKeyInput(String(key)), params.agentDir);
hasCredential = true;
}
if (hasCredential) {
nextConfig = applyAuthProfileConfig(nextConfig, {
profileId,
provider: "kilocode",
mode,
});
}
{
const applied = await applyDefaultModelChoice({
config: nextConfig,
setDefaultModel: params.setDefaultModel,
defaultModel: KILOCODE_DEFAULT_MODEL_REF,
applyDefaultConfig: applyKilocodeConfig,
applyProviderConfig: applyKilocodeProviderConfig,
noteDefault: KILOCODE_DEFAULT_MODEL_REF,
noteAgentModel,
prompter: params.prompter,
});
nextConfig = applied.config;
agentModelOverride = applied.agentModelOverride ?? agentModelOverride;
}
return { config: nextConfig, agentModelOverride };
}
```
Also add tokenProvider mapping at the top of the function:
```typescript
if (params.opts.tokenProvider === "kilocode") {
authChoice = "kilocode-api-key";
}
```
### CLI Registration
#### 9. `src/cli/program/register.onboard.ts`
Add CLI option:
```typescript
.option("--kilocode-api-key <key>", "Kilo Gateway API key")
```
Add to action handler:
```typescript
kilocodeApiKey: opts.kilocodeApiKey as string | undefined,
```
Update auth-choice help text:
```typescript
.option(
"--auth-choice <choice>",
"Auth: setup-token|token|chutes|openai-codex|openai-api-key|openrouter-api-key|kilocode-api-key|ai-gateway-api-key|...",
)
```
### Non-Interactive Onboarding
#### 10. `src/commands/onboard-non-interactive/local/auth-choice.ts`
Add handling for `kilocode-api-key`:
```typescript
if (authChoice === "kilocode-api-key") {
const resolved = await resolveNonInteractiveApiKey({
provider: "kilocode",
cfg: baseConfig,
flagValue: opts.kilocodeApiKey,
flagName: "--kilocode-api-key",
envVar: "KILOCODE_API_KEY",
});
await setKilocodeApiKey(resolved.apiKey, agentDir);
nextConfig = applyAuthProfileConfig(nextConfig, {
profileId: "kilocode:default",
provider: "kilocode",
mode: "api_key",
});
// ... apply default model
}
```
### Export Updates
#### 11. `src/commands/onboard-auth.ts`
Add exports:
```typescript
export {
// ... existing exports
applyKilocodeConfig,
applyKilocodeProviderConfig,
KILOCODE_BASE_URL,
} from "./onboard-auth.config-core.js";
export {
// ... existing exports
KILOCODE_DEFAULT_MODEL_REF,
setKilocodeApiKey,
} from "./onboard-auth.credentials.js";
```
### Special Handling (Optional)
#### 12. `src/agents/pi-embedded-runner/cache-ttl.ts`
Add Kilo Gateway support for Anthropic models:
```typescript
export function isCacheTtlEligibleProvider(provider: string, modelId: string): boolean {
const normalizedProvider = provider.toLowerCase();
const normalizedModelId = modelId.toLowerCase();
if (normalizedProvider === "anthropic") return true;
if (normalizedProvider === "openrouter" && normalizedModelId.startsWith("anthropic/"))
return true;
if (normalizedProvider === "kilocode" && normalizedModelId.startsWith("anthropic/")) return true;
return false;
}
```
#### 13. `src/agents/transcript-policy.ts`
Add Kilo Gateway handling (similar to OpenRouter):
```typescript
const isKilocodeGemini = provider === "kilocode" && modelId.toLowerCase().includes("gemini");
// Include in needsNonImageSanitize check
const needsNonImageSanitize =
isGoogle || isAnthropic || isMistral || isOpenRouterGemini || isKilocodeGemini;
```
## Configuration Structure
### User Config Example
```json
{
"models": {
"mode": "merge",
"providers": {
"kilocode": {
"baseUrl": "https://api.kilo.ai/api/gateway/",
"apiKey": "xxxxx",
"api": "openai-completions",
"models": [
{
"id": "anthropic/claude-opus-4.6",
"name": "Anthropic: Claude Opus 4.6"
},
{ "id": "minimax/minimax-m2.5:free", "name": "Minimax: Minimax M2.5" }
]
}
}
}
}
```
### Auth Profile Structure
```json
{
"profiles": {
"kilocode:default": {
"type": "api_key",
"provider": "kilocode",
"key": "xxxxx"
}
}
}
```
## Testing Considerations
1. **Unit Tests:**
- Test `setKilocodeApiKey()` writes correct profile
- Test `applyKilocodeConfig()` sets correct defaults
- Test `resolveEnvApiKey("kilocode")` returns correct env var
2. **Integration Tests:**
- Test setup flow with `--auth-choice kilocode-api-key`
- Test non-interactive setup with `--kilocode-api-key`
- Test model selection with `kilocode/` prefix
3. **E2E Tests:**
- Test actual API calls through Kilo Gateway (live tests)
## Migration Notes
- No migration needed for existing users
- New users can immediately use `kilocode-api-key` auth choice
- Existing manual config with `kilocode` provider will continue to work
## Future Considerations
1. **Model Catalog:** If Kilo Gateway exposes a `/models` endpoint, add scanning support similar to `scanOpenRouterModels()`
2. **OAuth Support:** If Kilo Gateway adds OAuth, extend the auth system accordingly
3. **Rate Limiting:** Consider adding rate limit handling specific to Kilo Gateway if needed
4. **Documentation:** Add docs at `docs/providers/kilocode.md` explaining setup and usage
## Summary of Changes
| File | Change Type | Description |
| ----------------------------------------------------------- | ----------- | ----------------------------------------------------------------------- |
| `src/commands/onboard-auth.credentials.ts` | Add | `KILOCODE_DEFAULT_MODEL_REF`, `setKilocodeApiKey()` |
| `src/agents/model-auth.ts` | Modify | Add `kilocode` to `envMap` |
| `src/config/io.ts` | Modify | Add `KILOCODE_API_KEY` to shell env keys |
| `src/commands/onboard-auth.config-core.ts` | Add | `applyKilocodeProviderConfig()`, `applyKilocodeConfig()` |
| `src/commands/onboard-types.ts` | Modify | Add `kilocode-api-key` to `AuthChoice`, add `kilocodeApiKey` to options |
| `src/commands/auth-choice-options.ts` | Modify | Add `kilocode` group and option |
| `src/commands/auth-choice.preferred-provider.ts` | Modify | Add `kilocode-api-key` mapping |
| `src/commands/auth-choice.apply.api-providers.ts` | Modify | Add `kilocode-api-key` handling |
| `src/cli/program/register.onboard.ts` | Modify | Add `--kilocode-api-key` option |
| `src/commands/onboard-non-interactive/local/auth-choice.ts` | Modify | Add non-interactive handling |
| `src/commands/onboard-auth.ts` | Modify | Export new functions |
| `src/agents/pi-embedded-runner/cache-ttl.ts` | Modify | Add kilocode support |
| `src/agents/transcript-policy.ts` | Modify | Add kilocode Gemini handling |

View File

@ -65,7 +65,7 @@
},
{
"source": "/cron",
"destination": "/cron-jobs"
"destination": "/automation/cron-jobs"
},
{
"source": "/minimax",
@ -513,11 +513,11 @@
},
{
"source": "/model",
"destination": "/models"
"destination": "/concepts/models"
},
{
"source": "/model/",
"destination": "/models"
"destination": "/concepts/models"
},
{
"source": "/models",
@ -535,10 +535,6 @@
"source": "/onboarding",
"destination": "/start/onboarding"
},
{
"source": "/onboarding-config-protocol",
"destination": "/experiments/onboarding-config-protocol"
},
{
"source": "/pairing",
"destination": "/channels/pairing"
@ -559,10 +555,6 @@
"source": "/presence",
"destination": "/concepts/presence"
},
{
"source": "/proposals/model-config",
"destination": "/experiments/proposals/model-config"
},
{
"source": "/provider-routing",
"destination": "/channels/channel-routing"
@ -583,10 +575,6 @@
"source": "/remote-gateway-readme",
"destination": "/gateway/remote-gateway-readme"
},
{
"source": "/research/memory",
"destination": "/experiments/research/memory"
},
{
"source": "/rpc",
"destination": "/reference/rpc"
@ -1358,21 +1346,6 @@
{
"group": "Release policy",
"pages": ["reference/RELEASING", "reference/test"]
},
{
"group": "Experiments",
"pages": [
"design/kilo-gateway-integration",
"experiments/onboarding-config-protocol",
"experiments/plans/acp-thread-bound-agents",
"experiments/plans/acp-unified-streaming-refactor",
"experiments/plans/browser-evaluate-cdp-refactor",
"experiments/plans/openresponses-gateway",
"experiments/plans/pty-process-supervision",
"experiments/plans/session-binding-channel-agnostic",
"experiments/research/memory",
"experiments/proposals/model-config"
]
}
]
},
@ -1938,27 +1911,6 @@
{
"group": "发布策略",
"pages": ["zh-CN/reference/RELEASING", "zh-CN/reference/test"]
},
{
"group": "实验性功能",
"pages": [
"zh-CN/experiments/onboarding-config-protocol",
"zh-CN/experiments/plans/openresponses-gateway",
"zh-CN/experiments/plans/cron-add-hardening",
"zh-CN/experiments/plans/group-policy-hardening",
"zh-CN/experiments/research/memory",
"zh-CN/experiments/proposals/model-config"
]
},
{
"group": "重构方案",
"pages": [
"zh-CN/refactor/clawnet",
"zh-CN/refactor/exec-host",
"zh-CN/refactor/outbound-session-mirroring",
"zh-CN/refactor/plugin-sdk",
"zh-CN/refactor/strict-config"
]
}
]
},

View File

@ -1,43 +0,0 @@
---
summary: "RPC protocol notes for setup wizard and config schema"
read_when: "Changing setup wizard steps or config schema endpoints"
title: "Onboarding and Config Protocol"
---
# Onboarding + Config Protocol
Purpose: shared onboarding + config surfaces across CLI, macOS app, and Web UI.
## Components
- Wizard engine (shared session + prompts + onboarding state).
- CLI onboarding uses the same wizard flow as the UI clients.
- Gateway RPC exposes wizard + config schema endpoints.
- macOS onboarding uses the wizard step model.
- Web UI renders config forms from JSON Schema + UI hints.
## Gateway RPC
- `wizard.start` params: `{ mode?: "local"|"remote", workspace?: string }`
- `wizard.next` params: `{ sessionId, answer?: { stepId, value? } }`
- `wizard.cancel` params: `{ sessionId }`
- `wizard.status` params: `{ sessionId }`
- `config.schema` params: `{}`
- `config.schema.lookup` params: `{ path }`
- `path` accepts standard config segments plus slash-delimited plugin ids, for example `plugins.entries.pack/one.config`.
Responses (shape)
- Wizard: `{ sessionId, done, step?, status?, error? }`
- Config schema: `{ schema, uiHints, version, generatedAt }`
- Config schema lookup: `{ path, schema, hint?, hintPath?, children[] }`
## UI Hints
- `uiHints` keyed by path; optional metadata (label/help/group/order/advanced/sensitive/placeholder).
- Sensitive fields render as password inputs; no redaction layer.
- Unsupported schema nodes fall back to the raw JSON editor.
## Notes
- This doc is the single place to track protocol refactors for onboarding/config.

View File

@ -1,375 +0,0 @@
# ACP Persistent Bindings for Discord Channels and Telegram Topics
Status: Draft
## Summary
Introduce persistent ACP bindings that map:
- Discord channels (and existing threads, where needed), and
- Telegram forum topics in groups/supergroups (`chatId:topic:topicId`)
to long-lived ACP sessions, with binding state stored in top-level `bindings[]` entries using explicit binding types.
This makes ACP usage in high-traffic messaging channels predictable and durable, so users can create dedicated channels/topics such as `codex`, `claude-1`, or `claude-myrepo`.
## Why
Current thread-bound ACP behavior is optimized for ephemeral Discord thread workflows. Telegram does not have the same thread model; it has forum topics in groups/supergroups. Users want stable, always-on ACP “workspaces” in chat surfaces, not only temporary thread sessions.
## Goals
- Support durable ACP binding for:
- Discord channels/threads
- Telegram forum topics (groups/supergroups)
- Make binding source-of-truth config-driven.
- Keep `/acp`, `/new`, `/reset`, `/focus`, and delivery behavior consistent across Discord and Telegram.
- Preserve existing temporary binding flows for ad-hoc usage.
## Non-Goals
- Full redesign of ACP runtime/session internals.
- Removing existing ephemeral binding flows.
- Expanding to every channel in the first iteration.
- Implementing Telegram channel direct-messages topics (`direct_messages_topic_id`) in this phase.
- Implementing Telegram private-chat topic variants in this phase.
## UX Direction
### 1) Two binding types
- **Persistent binding**: saved in config, reconciled on startup, intended for “named workspace” channels/topics.
- **Temporary binding**: runtime-only, expires by idle/max-age policy.
### 2) Command behavior
- `/acp spawn ... --thread here|auto|off` remains available.
- Add explicit bind lifecycle controls:
- `/acp bind [session|agent] [--persist]`
- `/acp unbind [--persist]`
- `/acp status` includes whether binding is `persistent` or `temporary`.
- In bound conversations, `/new` and `/reset` reset the bound ACP session in place and keep the binding attached.
### 3) Conversation identity
- Use canonical conversation IDs:
- Discord: channel/thread ID.
- Telegram topic: `chatId:topic:topicId`.
- Never key Telegram bindings by bare topic ID alone.
## Config Model (Proposed)
Unify routing and persistent ACP binding configuration in top-level `bindings[]` with explicit `type` discriminator:
```jsonc
{
"agents": {
"list": [
{
"id": "main",
"default": true,
"workspace": "~/.openclaw/workspace-main",
"runtime": { "type": "embedded" },
},
{
"id": "codex",
"workspace": "~/.openclaw/workspace-codex",
"runtime": {
"type": "acp",
"acp": {
"agent": "codex",
"backend": "acpx",
"mode": "persistent",
"cwd": "/workspace/repo-a",
},
},
},
{
"id": "claude",
"workspace": "~/.openclaw/workspace-claude",
"runtime": {
"type": "acp",
"acp": {
"agent": "claude",
"backend": "acpx",
"mode": "persistent",
"cwd": "/workspace/repo-b",
},
},
},
],
},
"acp": {
"enabled": true,
"backend": "acpx",
"allowedAgents": ["codex", "claude"],
},
"bindings": [
// Route bindings (existing behavior)
{
"type": "route",
"agentId": "main",
"match": { "channel": "discord", "accountId": "default" },
},
{
"type": "route",
"agentId": "main",
"match": { "channel": "telegram", "accountId": "default" },
},
// Persistent ACP conversation bindings
{
"type": "acp",
"agentId": "codex",
"match": {
"channel": "discord",
"accountId": "default",
"peer": { "kind": "channel", "id": "222222222222222222" },
},
"acp": {
"label": "codex-main",
"mode": "persistent",
"cwd": "/workspace/repo-a",
"backend": "acpx",
},
},
{
"type": "acp",
"agentId": "claude",
"match": {
"channel": "discord",
"accountId": "default",
"peer": { "kind": "channel", "id": "333333333333333333" },
},
"acp": {
"label": "claude-repo-b",
"mode": "persistent",
"cwd": "/workspace/repo-b",
},
},
{
"type": "acp",
"agentId": "codex",
"match": {
"channel": "telegram",
"accountId": "default",
"peer": { "kind": "group", "id": "-1001234567890:topic:42" },
},
"acp": {
"label": "tg-codex-42",
"mode": "persistent",
},
},
],
"channels": {
"discord": {
"guilds": {
"111111111111111111": {
"channels": {
"222222222222222222": {
"enabled": true,
"requireMention": false,
},
"333333333333333333": {
"enabled": true,
"requireMention": false,
},
},
},
},
},
"telegram": {
"groups": {
"-1001234567890": {
"topics": {
"42": {
"requireMention": false,
},
},
},
},
},
},
}
```
### Minimal Example (No Per-Binding ACP Overrides)
```jsonc
{
"agents": {
"list": [
{ "id": "main", "default": true, "runtime": { "type": "embedded" } },
{
"id": "codex",
"runtime": {
"type": "acp",
"acp": { "agent": "codex", "backend": "acpx", "mode": "persistent" },
},
},
{
"id": "claude",
"runtime": {
"type": "acp",
"acp": { "agent": "claude", "backend": "acpx", "mode": "persistent" },
},
},
],
},
"acp": { "enabled": true, "backend": "acpx" },
"bindings": [
{
"type": "route",
"agentId": "main",
"match": { "channel": "discord", "accountId": "default" },
},
{
"type": "route",
"agentId": "main",
"match": { "channel": "telegram", "accountId": "default" },
},
{
"type": "acp",
"agentId": "codex",
"match": {
"channel": "discord",
"accountId": "default",
"peer": { "kind": "channel", "id": "222222222222222222" },
},
},
{
"type": "acp",
"agentId": "claude",
"match": {
"channel": "discord",
"accountId": "default",
"peer": { "kind": "channel", "id": "333333333333333333" },
},
},
{
"type": "acp",
"agentId": "codex",
"match": {
"channel": "telegram",
"accountId": "default",
"peer": { "kind": "group", "id": "-1009876543210:topic:5" },
},
},
],
}
```
Notes:
- `bindings[].type` is explicit:
- `route`: normal agent routing.
- `acp`: persistent ACP harness binding for a matched conversation.
- For `type: "acp"`, `match.peer.id` is the canonical conversation key:
- Discord channel/thread: raw channel/thread ID.
- Telegram topic: `chatId:topic:topicId`.
- `bindings[].acp.backend` is optional. Backend fallback order:
1. `bindings[].acp.backend`
2. `agents.list[].runtime.acp.backend`
3. global `acp.backend`
- `mode`, `cwd`, and `label` follow the same override pattern (`binding override -> agent runtime default -> global/default behavior`).
- Keep existing `session.threadBindings.*` and `channels.discord.threadBindings.*` for temporary binding policies.
- Persistent entries declare desired state; runtime reconciles to actual ACP sessions/bindings.
- One active ACP binding per conversation node is the intended model.
- Backward compatibility: missing `type` is interpreted as `route` for legacy entries.
### Backend Selection
- ACP session initialization already uses configured backend selection during spawn (`acp.backend` today).
- This proposal extends spawn/reconcile logic to prefer typed ACP binding overrides:
- `bindings[].acp.backend` for conversation-local override.
- `agents.list[].runtime.acp.backend` for per-agent defaults.
- If no override exists, keep current behavior (`acp.backend` default).
## Architecture Fit in Current System
### Reuse existing components
- `SessionBindingService` already supports channel-agnostic conversation references.
- ACP spawn/bind flows already support binding through service APIs.
- Telegram already carries topic/thread context via `MessageThreadId` and `chatId`.
### New/extended components
- **Telegram binding adapter** (parallel to Discord adapter):
- register adapter per Telegram account,
- resolve/list/bind/unbind/touch by canonical conversation ID.
- **Typed binding resolver/index**:
- split `bindings[]` into `route` and `acp` views,
- keep `resolveAgentRoute` on `route` bindings only,
- resolve persistent ACP intent from `acp` bindings only.
- **Inbound binding resolution for Telegram**:
- resolve bound session before route finalization (Discord already does this).
- **Persistent binding reconciler**:
- on startup: load configured top-level `type: "acp"` bindings, ensure ACP sessions exist, ensure bindings exist.
- on config change: apply deltas safely.
- **Cutover model**:
- no channel-local ACP binding fallback is read,
- persistent ACP bindings are sourced only from top-level `bindings[].type="acp"` entries.
## Phased Delivery
### Phase 1: Typed binding schema foundation
- Extend config schema to support `bindings[].type` discriminator:
- `route`,
- `acp` with optional `acp` override object (`mode`, `backend`, `cwd`, `label`).
- Extend agent schema with runtime descriptor to mark ACP-native agents (`agents.list[].runtime.type`).
- Add parser/indexer split for route vs ACP bindings.
### Phase 2: Runtime resolution + Discord/Telegram parity
- Resolve persistent ACP bindings from top-level `type: "acp"` entries for:
- Discord channels/threads,
- Telegram forum topics (`chatId:topic:topicId` canonical IDs).
- Implement Telegram binding adapter and inbound bound-session override parity with Discord.
- Do not include Telegram direct/private topic variants in this phase.
### Phase 3: Command parity and resets
- Align `/acp`, `/new`, `/reset`, and `/focus` behavior in bound Telegram/Discord conversations.
- Ensure binding survives reset flows as configured.
### Phase 4: Hardening
- Better diagnostics (`/acp status`, startup reconciliation logs).
- Conflict handling and health checks.
## Guardrails and Policy
- Respect ACP enablement and sandbox restrictions exactly as today.
- Keep explicit account scoping (`accountId`) to avoid cross-account bleed.
- Fail closed on ambiguous routing.
- Keep mention/access policy behavior explicit per channel config.
## Testing Plan
- Unit:
- conversation ID normalization (especially Telegram topic IDs),
- reconciler create/update/delete paths,
- `/acp bind --persist` and unbind flows.
- Integration:
- inbound Telegram topic -> bound ACP session resolution,
- inbound Discord channel/thread -> persistent binding precedence.
- Regression:
- temporary bindings continue to work,
- unbound channels/topics keep current routing behavior.
## Open Questions
- Should `/acp spawn --thread auto` in Telegram topic default to `here`?
- Should persistent bindings always bypass mention-gating in bound conversations, or require explicit `requireMention=false`?
- Should `/focus` gain `--persist` as an alias for `/acp bind --persist`?
## Rollout
- Ship as opt-in per conversation (`bindings[].type="acp"` entry present).
- Start with Discord + Telegram only.
- Add docs with examples for:
- “one channel/topic per agent”
- “multiple channels/topics per same agent with different `cwd`
- “team naming patterns (`codex-1`, `claude-repo-x`)".

View File

@ -1,800 +0,0 @@
---
summary: "Integrate ACP coding agents via a first-class ACP control plane in core and plugin-backed runtimes (acpx first)"
owner: "onutc"
status: "draft"
last_updated: "2026-02-25"
title: "ACP Thread Bound Agents"
---
# ACP Thread Bound Agents
## Overview
This plan defines how OpenClaw should support ACP coding agents in thread-capable channels (Discord first) with production-level lifecycle and recovery.
Related document:
- [Unified Runtime Streaming Refactor Plan](/experiments/plans/acp-unified-streaming-refactor)
Target user experience:
- a user spawns or focuses an ACP session into a thread
- user messages in that thread route to the bound ACP session
- agent output streams back to the same thread persona
- session can be persistent or one shot with explicit cleanup controls
## Decision summary
Long term recommendation is a hybrid architecture:
- OpenClaw core owns ACP control plane concerns
- session identity and metadata
- thread binding and routing decisions
- delivery invariants and duplicate suppression
- lifecycle cleanup and recovery semantics
- ACP runtime backend is pluggable
- first backend is an acpx-backed plugin service
- runtime does ACP transport, queueing, cancel, reconnect
OpenClaw should not reimplement ACP transport internals in core.
OpenClaw should not rely on a pure plugin-only interception path for routing.
## North-star architecture (holy grail)
Treat ACP as a first-class control plane in OpenClaw, with pluggable runtime adapters.
Non-negotiable invariants:
- every ACP thread binding references a valid ACP session record
- every ACP session has explicit lifecycle state (`creating`, `idle`, `running`, `cancelling`, `closed`, `error`)
- every ACP run has explicit run state (`queued`, `running`, `completed`, `failed`, `cancelled`)
- spawn, bind, and initial enqueue are atomic
- command retries are idempotent (no duplicate runs or duplicate Discord outputs)
- bound-thread channel output is a projection of ACP run events, never ad-hoc side effects
Long-term ownership model:
- `AcpSessionManager` is the single ACP writer and orchestrator
- manager lives in gateway process first; can be moved to a dedicated sidecar later behind the same interface
- per ACP session key, manager owns one in-memory actor (serialized command execution)
- adapters (`acpx`, future backends) are transport/runtime implementations only
Long-term persistence model:
- move ACP control-plane state to a dedicated SQLite store (WAL mode) under OpenClaw state dir
- keep `SessionEntry.acp` as compatibility projection during migration, not source-of-truth
- store ACP events append-only to support replay, crash recovery, and deterministic delivery
### Delivery strategy (bridge to holy-grail)
- short-term bridge
- keep current thread binding mechanics and existing ACP config surface
- fix metadata-gap bugs and route ACP turns through a single core ACP branch
- add idempotency keys and fail-closed routing checks immediately
- long-term cutover
- move ACP source-of-truth to control-plane DB + actors
- make bound-thread delivery purely event-projection based
- remove legacy fallback behavior that depends on opportunistic session-entry metadata
## Why not pure plugin only
Current plugin hooks are not sufficient for end to end ACP session routing without core changes.
- inbound routing from thread binding resolves to a session key in core dispatch first
- message hooks are fire-and-forget and cannot short-circuit the main reply path
- plugin commands are good for control operations but not for replacing core per-turn dispatch flow
Result:
- ACP runtime can be pluginized
- ACP routing branch must exist in core
## Existing foundation to reuse
Already implemented and should remain canonical:
- thread binding target supports `subagent` and `acp`
- inbound thread routing override resolves by binding before normal dispatch
- outbound thread identity via webhook in reply delivery
- `/focus` and `/unfocus` flow with ACP target compatibility
- persistent binding store with restore on startup
- unbind lifecycle on archive, delete, unfocus, reset, and delete
This plan extends that foundation rather than replacing it.
## Architecture
### Boundary model
Core (must be in OpenClaw core):
- ACP session-mode dispatch branch in the reply pipeline
- delivery arbitration to avoid parent plus thread duplication
- ACP control-plane persistence (with `SessionEntry.acp` compatibility projection during migration)
- lifecycle unbind and runtime detach semantics tied to session reset/delete
Plugin backend (acpx implementation):
- ACP runtime worker supervision
- acpx process invocation and event parsing
- ACP command handlers (`/acp ...`) and operator UX
- backend-specific config defaults and diagnostics
### Runtime ownership model
- one gateway process owns ACP orchestration state
- ACP execution runs in supervised child processes via acpx backend
- process strategy is long lived per active ACP session key, not per message
This avoids startup cost on every prompt and keeps cancel and reconnect semantics reliable.
### Core runtime contract
Add a core ACP runtime contract so routing code does not depend on CLI details and can switch backends without changing dispatch logic:
```ts
export type AcpRuntimePromptMode = "prompt" | "steer";
export type AcpRuntimeHandle = {
sessionKey: string;
backend: string;
runtimeSessionName: string;
};
export type AcpRuntimeEvent =
| { type: "text_delta"; stream: "output" | "thought"; text: string }
| { type: "tool_call"; name: string; argumentsText: string }
| { type: "done"; usage?: Record<string, number> }
| { type: "error"; code: string; message: string; retryable?: boolean };
export interface AcpRuntime {
ensureSession(input: {
sessionKey: string;
agent: string;
mode: "persistent" | "oneshot";
cwd?: string;
env?: Record<string, string>;
idempotencyKey: string;
}): Promise<AcpRuntimeHandle>;
submit(input: {
handle: AcpRuntimeHandle;
text: string;
mode: AcpRuntimePromptMode;
idempotencyKey: string;
}): Promise<{ runtimeRunId: string }>;
stream(input: {
handle: AcpRuntimeHandle;
runtimeRunId: string;
onEvent: (event: AcpRuntimeEvent) => Promise<void> | void;
signal?: AbortSignal;
}): Promise<void>;
cancel(input: {
handle: AcpRuntimeHandle;
runtimeRunId?: string;
reason?: string;
idempotencyKey: string;
}): Promise<void>;
close(input: { handle: AcpRuntimeHandle; reason: string; idempotencyKey: string }): Promise<void>;
health?(): Promise<{ ok: boolean; details?: string }>;
}
```
Implementation detail:
- first backend: `AcpxRuntime` shipped as a plugin service
- core resolves runtime via registry and fails with explicit operator error when no ACP runtime backend is available
### Control-plane data model and persistence
Long-term source-of-truth is a dedicated ACP SQLite database (WAL mode), for transactional updates and crash-safe recovery:
- `acp_sessions`
- `session_key` (pk), `backend`, `agent`, `mode`, `cwd`, `state`, `created_at`, `updated_at`, `last_error`
- `acp_runs`
- `run_id` (pk), `session_key` (fk), `state`, `requester_message_id`, `idempotency_key`, `started_at`, `ended_at`, `error_code`, `error_message`
- `acp_bindings`
- `binding_key` (pk), `thread_id`, `channel_id`, `account_id`, `session_key` (fk), `expires_at`, `bound_at`
- `acp_events`
- `event_id` (pk), `run_id` (fk), `seq`, `kind`, `payload_json`, `created_at`
- `acp_delivery_checkpoint`
- `run_id` (pk/fk), `last_event_seq`, `last_discord_message_id`, `updated_at`
- `acp_idempotency`
- `scope`, `idempotency_key`, `result_json`, `created_at`, unique `(scope, idempotency_key)`
```ts
export type AcpSessionMeta = {
backend: string;
agent: string;
runtimeSessionName: string;
mode: "persistent" | "oneshot";
cwd?: string;
state: "idle" | "running" | "error";
lastActivityAt: number;
lastError?: string;
};
```
Storage rules:
- keep `SessionEntry.acp` as a compatibility projection during migration
- process ids and sockets stay in memory only
- durable lifecycle and run status live in ACP DB, not generic session JSON
- if runtime owner dies, gateway rehydrates from ACP DB and resumes from checkpoints
### Routing and delivery
Inbound:
- keep current thread binding lookup as first routing step
- if bound target is ACP session, route to ACP runtime branch instead of `getReplyFromConfig`
- explicit `/acp steer` command uses `mode: "steer"`
Outbound:
- ACP event stream is normalized to OpenClaw reply chunks
- delivery target is resolved through existing bound destination path
- when a bound thread is active for that session turn, parent channel completion is suppressed
Streaming policy:
- stream partial output with coalescing window
- configurable min interval and max chunk bytes to stay under Discord rate limits
- final message always emitted on completion or failure
### State machines and transaction boundaries
Session state machine:
- `creating -> idle -> running -> idle`
- `running -> cancelling -> idle | error`
- `idle -> closed`
- `error -> idle | closed`
Run state machine:
- `queued -> running -> completed`
- `running -> failed | cancelled`
- `queued -> cancelled`
Required transaction boundaries:
- spawn transaction
- create ACP session row
- create/update ACP thread binding row
- enqueue initial run row
- close transaction
- mark session closed
- delete/expire binding rows
- write final close event
- cancel transaction
- mark target run cancelling/cancelled with idempotency key
No partial success is allowed across these boundaries.
### Per-session actor model
`AcpSessionManager` runs one actor per ACP session key:
- actor mailbox serializes `submit`, `cancel`, `close`, and `stream` side effects
- actor owns runtime handle hydration and runtime adapter process lifecycle for that session
- actor writes run events in-order (`seq`) before any Discord delivery
- actor updates delivery checkpoints after successful outbound send
This removes cross-turn races and prevents duplicate or out-of-order thread output.
### Idempotency and delivery projection
All external ACP actions must carry idempotency keys:
- spawn idempotency key
- prompt/steer idempotency key
- cancel idempotency key
- close idempotency key
Delivery rules:
- Discord messages are derived from `acp_events` plus `acp_delivery_checkpoint`
- retries resume from checkpoint without re-sending already delivered chunks
- final reply emission is exactly-once per run from projection logic
### Recovery and self-healing
On gateway start:
- load non-terminal ACP sessions (`creating`, `idle`, `running`, `cancelling`, `error`)
- recreate actors lazily on first inbound event or eagerly under configured cap
- reconcile any `running` runs missing heartbeats and mark `failed` or recover via adapter
On inbound Discord thread message:
- if binding exists but ACP session is missing, fail closed with explicit stale-binding message
- optionally auto-unbind stale binding after operator-safe validation
- never silently route stale ACP bindings to normal LLM path
### Lifecycle and safety
Supported operations:
- cancel current run: `/acp cancel`
- unbind thread: `/unfocus`
- close ACP session: `/acp close`
- auto close idle sessions by effective TTL
TTL policy:
- effective TTL is minimum of
- global/session TTL
- Discord thread binding TTL
- ACP runtime owner TTL
Safety controls:
- allowlist ACP agents by name
- restrict workspace roots for ACP sessions
- env allowlist passthrough
- max concurrent ACP sessions per account and globally
- bounded restart backoff for runtime crashes
## Config surface
Core keys:
- `acp.enabled`
- `acp.dispatch.enabled` (independent ACP routing kill switch)
- `acp.backend` (default `acpx`)
- `acp.defaultAgent`
- `acp.allowedAgents[]`
- `acp.maxConcurrentSessions`
- `acp.stream.coalesceIdleMs`
- `acp.stream.maxChunkChars`
- `acp.runtime.ttlMinutes`
- `acp.controlPlane.store` (`sqlite` default)
- `acp.controlPlane.storePath`
- `acp.controlPlane.recovery.eagerActors`
- `acp.controlPlane.recovery.reconcileRunningAfterMs`
- `acp.controlPlane.checkpoint.flushEveryEvents`
- `acp.controlPlane.checkpoint.flushEveryMs`
- `acp.idempotency.ttlHours`
- `channels.discord.threadBindings.spawnAcpSessions`
Plugin/backend keys (acpx plugin section):
- backend command/path overrides
- backend env allowlist
- backend per-agent presets
- backend startup/stop timeouts
- backend max inflight runs per session
## Implementation specification
### Control-plane modules (new)
Add dedicated ACP control-plane modules in core:
- `src/acp/control-plane/manager.ts`
- owns ACP actors, lifecycle transitions, command serialization
- `src/acp/control-plane/store.ts`
- SQLite schema management, transactions, query helpers
- `src/acp/control-plane/events.ts`
- typed ACP event definitions and serialization
- `src/acp/control-plane/checkpoint.ts`
- durable delivery checkpoints and replay cursors
- `src/acp/control-plane/idempotency.ts`
- idempotency key reservation and response replay
- `src/acp/control-plane/recovery.ts`
- boot-time reconciliation and actor rehydrate plan
Compatibility bridge modules:
- `src/acp/runtime/session-meta.ts`
- remains temporarily for projection into `SessionEntry.acp`
- must stop being source-of-truth after migration cutover
### Required invariants (must enforce in code)
- ACP session creation and thread bind are atomic (single transaction)
- there is at most one active run per ACP session actor at a time
- event `seq` is strictly increasing per run
- delivery checkpoint never advances past last committed event
- idempotency replay returns previous success payload for duplicate command keys
- stale/missing ACP metadata cannot route into normal non-ACP reply path
### Core touchpoints
Core files to change:
- `src/auto-reply/reply/dispatch-from-config.ts`
- ACP branch calls `AcpSessionManager.submit` and event-projection delivery
- remove direct ACP fallback that bypasses control-plane invariants
- `src/auto-reply/reply/inbound-context.ts` (or nearest normalized context boundary)
- expose normalized routing keys and idempotency seeds for ACP control plane
- `src/config/sessions/types.ts`
- keep `SessionEntry.acp` as projection-only compatibility field
- `src/gateway/server-methods/sessions.ts`
- reset/delete/archive must call ACP manager close/unbind transaction path
- `src/infra/outbound/bound-delivery-router.ts`
- enforce fail-closed destination behavior for ACP bound session turns
- `src/discord/monitor/thread-bindings.ts`
- add ACP stale-binding validation helpers wired to control-plane lookups
- `src/auto-reply/reply/commands-acp.ts`
- route spawn/cancel/close/steer through ACP manager APIs
- `src/agents/acp-spawn.ts`
- stop ad-hoc metadata writes; call ACP manager spawn transaction
- `src/plugin-sdk/**` and plugin runtime bridge
- expose ACP backend registration and health semantics cleanly
Core files explicitly not replaced:
- `src/discord/monitor/message-handler.preflight.ts`
- keep thread binding override behavior as the canonical session-key resolver
### ACP runtime registry API
Add a core registry module:
- `src/acp/runtime/registry.ts`
Required API:
```ts
export type AcpRuntimeBackend = {
id: string;
runtime: AcpRuntime;
healthy?: () => boolean;
};
export function registerAcpRuntimeBackend(backend: AcpRuntimeBackend): void;
export function unregisterAcpRuntimeBackend(id: string): void;
export function getAcpRuntimeBackend(id?: string): AcpRuntimeBackend | null;
export function requireAcpRuntimeBackend(id?: string): AcpRuntimeBackend;
```
Behavior:
- `requireAcpRuntimeBackend` throws a typed ACP backend missing error when unavailable
- plugin service registers backend on `start` and unregisters on `stop`
- runtime lookups are read-only and process-local
### acpx runtime plugin contract (implementation detail)
For the first production backend (`extensions/acpx`), OpenClaw and acpx are
connected with a strict command contract:
- backend id: `acpx`
- plugin service id: `acpx-runtime`
- runtime handle encoding: `runtimeSessionName = acpx:v1:<base64url(json)>`
- encoded payload fields:
- `name` (acpx named session; uses OpenClaw `sessionKey`)
- `agent` (acpx agent command)
- `cwd` (session workspace root)
- `mode` (`persistent | oneshot`)
Command mapping:
- ensure session:
- `acpx --format json --json-strict --cwd <cwd> <agent> sessions ensure --name <name>`
- prompt turn:
- `acpx --format json --json-strict --cwd <cwd> <agent> prompt --session <name> --file -`
- cancel:
- `acpx --format json --json-strict --cwd <cwd> <agent> cancel --session <name>`
- close:
- `acpx --format json --json-strict --cwd <cwd> <agent> sessions close <name>`
Streaming:
- OpenClaw consumes ndjson events from `acpx --format json --json-strict`
- `text` => `text_delta/output`
- `thought` => `text_delta/thought`
- `tool_call` => `tool_call`
- `done` => `done`
- `error` => `error`
### Session schema patch
Patch `SessionEntry` in `src/config/sessions/types.ts`:
```ts
type SessionAcpMeta = {
backend: string;
agent: string;
runtimeSessionName: string;
mode: "persistent" | "oneshot";
cwd?: string;
state: "idle" | "running" | "error";
lastActivityAt: number;
lastError?: string;
};
```
Persisted field:
- `SessionEntry.acp?: SessionAcpMeta`
Migration rules:
- phase A: dual-write (`acp` projection + ACP SQLite source-of-truth)
- phase B: read-primary from ACP SQLite, fallback-read from legacy `SessionEntry.acp`
- phase C: migration command backfills missing ACP rows from valid legacy entries
- phase D: remove fallback-read and keep projection optional for UX only
- legacy fields (`cliSessionIds`, `claudeCliSessionId`) remain untouched
### Error contract
Add stable ACP error codes and user-facing messages:
- `ACP_BACKEND_MISSING`
- message: `ACP runtime backend is not configured. Install and enable the acpx runtime plugin.`
- `ACP_BACKEND_UNAVAILABLE`
- message: `ACP runtime backend is currently unavailable. Try again in a moment.`
- `ACP_SESSION_INIT_FAILED`
- message: `Could not initialize ACP session runtime.`
- `ACP_TURN_FAILED`
- message: `ACP turn failed before completion.`
Rules:
- return actionable user-safe message in-thread
- log detailed backend/system error only in runtime logs
- never silently fall back to normal LLM path when ACP routing was explicitly selected
### Duplicate delivery arbitration
Single routing rule for ACP bound turns:
- if an active thread binding exists for the target ACP session and requester context, deliver only to that bound thread
- do not also send to parent channel for the same turn
- if bound destination selection is ambiguous, fail closed with explicit error (no implicit parent fallback)
- if no active binding exists, use normal session destination behavior
### Observability and operational readiness
Required metrics:
- ACP spawn success/failure count by backend and error code
- ACP run latency percentiles (queue wait, runtime turn time, delivery projection time)
- ACP actor restart count and restart reason
- stale-binding detection count
- idempotency replay hit rate
- Discord delivery retry and rate-limit counters
Required logs:
- structured logs keyed by `sessionKey`, `runId`, `backend`, `threadId`, `idempotencyKey`
- explicit state transition logs for session and run state machines
- adapter command logs with redaction-safe arguments and exit summary
Required diagnostics:
- `/acp sessions` includes state, active run, last error, and binding status
- `/acp doctor` (or equivalent) validates backend registration, store health, and stale bindings
### Config precedence and effective values
ACP enablement precedence:
- account override: `channels.discord.accounts.<id>.threadBindings.spawnAcpSessions`
- channel override: `channels.discord.threadBindings.spawnAcpSessions`
- global ACP gate: `acp.enabled`
- dispatch gate: `acp.dispatch.enabled`
- backend availability: registered backend for `acp.backend`
Auto-enable behavior:
- when ACP is configured (`acp.enabled=true`, `acp.dispatch.enabled=true`, or
`acp.backend=acpx`), plugin auto-enable marks `plugins.entries.acpx.enabled=true`
unless denylisted or explicitly disabled
TTL effective value:
- `min(session ttl, discord thread binding ttl, acp runtime ttl)`
### Test map
Unit tests:
- `src/acp/runtime/registry.test.ts` (new)
- `src/auto-reply/reply/dispatch-from-config.acp.test.ts` (new)
- `src/infra/outbound/bound-delivery-router.test.ts` (extend ACP fail-closed cases)
- `src/config/sessions/types.test.ts` or nearest session-store tests (ACP metadata persistence)
Integration tests:
- `src/discord/monitor/reply-delivery.test.ts` (bound ACP delivery target behavior)
- `src/discord/monitor/message-handler.preflight*.test.ts` (bound ACP session-key routing continuity)
- acpx plugin runtime tests in backend package (service register/start/stop + event normalization)
Gateway e2e tests:
- `src/gateway/server.sessions.gateway-server-sessions-a.e2e.test.ts` (extend ACP reset/delete lifecycle coverage)
- ACP thread turn roundtrip e2e for spawn, message, stream, cancel, unfocus, restart recovery
### Rollout guard
Add independent ACP dispatch kill switch:
- `acp.dispatch.enabled` default `false` for first release
- when disabled:
- ACP spawn/focus control commands may still bind sessions
- ACP dispatch path does not activate
- user receives explicit message that ACP dispatch is disabled by policy
- after canary validation, default can be flipped to `true` in a later release
## Command and UX plan
### New commands
- `/acp spawn <agent-id> [--mode persistent|oneshot] [--thread auto|here|off]`
- `/acp cancel [session]`
- `/acp steer <instruction>`
- `/acp close [session]`
- `/acp sessions`
### Existing command compatibility
- `/focus <sessionKey>` continues to support ACP targets
- `/unfocus` keeps current semantics
- `/session idle` and `/session max-age` replace the old TTL override
## Phased rollout
### Phase 0 ADR and schema freeze
- ship ADR for ACP control-plane ownership and adapter boundaries
- freeze DB schema (`acp_sessions`, `acp_runs`, `acp_bindings`, `acp_events`, `acp_delivery_checkpoint`, `acp_idempotency`)
- define stable ACP error codes, event contract, and state-transition guards
### Phase 1 Control-plane foundation in core
- implement `AcpSessionManager` and per-session actor runtime
- implement ACP SQLite store and transaction helpers
- implement idempotency store and replay helpers
- implement event append + delivery checkpoint modules
- wire spawn/cancel/close APIs to manager with transactional guarantees
### Phase 2 Core routing and lifecycle integration
- route thread-bound ACP turns from dispatch pipeline into ACP manager
- enforce fail-closed routing when ACP binding/session invariants fail
- integrate reset/delete/archive/unfocus lifecycle with ACP close/unbind transactions
- add stale-binding detection and optional auto-unbind policy
### Phase 3 acpx backend adapter/plugin
- implement `acpx` adapter against runtime contract (`ensureSession`, `submit`, `stream`, `cancel`, `close`)
- add backend health checks and startup/teardown registration
- normalize acpx ndjson events into ACP runtime events
- enforce backend timeouts, process supervision, and restart/backoff policy
### Phase 4 Delivery projection and channel UX (Discord first)
- implement event-driven channel projection with checkpoint resume (Discord first)
- coalesce streaming chunks with rate-limit aware flush policy
- guarantee exactly-once final completion message per run
- ship `/acp spawn`, `/acp cancel`, `/acp steer`, `/acp close`, `/acp sessions`
### Phase 5 Migration and cutover
- introduce dual-write to `SessionEntry.acp` projection plus ACP SQLite source-of-truth
- add migration utility for legacy ACP metadata rows
- flip read path to ACP SQLite primary
- remove legacy fallback routing that depends on missing `SessionEntry.acp`
### Phase 6 Hardening, SLOs, and scale limits
- enforce concurrency limits (global/account/session), queue policies, and timeout budgets
- add full telemetry, dashboards, and alert thresholds
- chaos-test crash recovery and duplicate-delivery suppression
- publish runbook for backend outage, DB corruption, and stale-binding remediation
### Full implementation checklist
- core control-plane modules and tests
- DB migrations and rollback plan
- ACP manager API integration across dispatch and commands
- adapter registration interface in plugin runtime bridge
- acpx adapter implementation and tests
- thread-capable channel delivery projection logic with checkpoint replay (Discord first)
- lifecycle hooks for reset/delete/archive/unfocus
- stale-binding detector and operator-facing diagnostics
- config validation and precedence tests for all new ACP keys
- operational docs and troubleshooting runbook
## Test plan
Unit tests:
- ACP DB transaction boundaries (spawn/bind/enqueue atomicity, cancel, close)
- ACP state-machine transition guards for sessions and runs
- idempotency reservation/replay semantics across all ACP commands
- per-session actor serialization and queue ordering
- acpx event parser and chunk coalescer
- runtime supervisor restart and backoff policy
- config precedence and effective TTL calculation
- core ACP routing branch selection and fail-closed behavior when backend/session is invalid
Integration tests:
- fake ACP adapter process for deterministic streaming and cancel behavior
- ACP manager + dispatch integration with transactional persistence
- thread-bound inbound routing to ACP session key
- thread-bound outbound delivery suppresses parent channel duplication
- checkpoint replay recovers after delivery failure and resumes from last event
- plugin service registration and teardown of ACP runtime backend
Gateway e2e tests:
- spawn ACP with thread, exchange multi-turn prompts, unfocus
- gateway restart with persisted ACP DB and bindings, then continue same session
- concurrent ACP sessions in multiple threads have no cross-talk
- duplicate command retries (same idempotency key) do not create duplicate runs or replies
- stale-binding scenario yields explicit error and optional auto-clean behavior
## Risks and mitigations
- Duplicate deliveries during transition
- Mitigation: single destination resolver and idempotent event checkpoint
- Runtime process churn under load
- Mitigation: long lived per session owners + concurrency caps + backoff
- Plugin absent or misconfigured
- Mitigation: explicit operator-facing error and fail-closed ACP routing (no implicit fallback to normal session path)
- Config confusion between subagent and ACP gates
- Mitigation: explicit ACP keys and command feedback that includes effective policy source
- Control-plane store corruption or migration bugs
- Mitigation: WAL mode, backup/restore hooks, migration smoke tests, and read-only fallback diagnostics
- Actor deadlocks or mailbox starvation
- Mitigation: watchdog timers, actor health probes, and bounded mailbox depth with rejection telemetry
## Acceptance checklist
- ACP session spawn can create or bind a thread in a supported channel adapter (currently Discord)
- all thread messages route to bound ACP session only
- ACP outputs appear in the same thread identity with streaming or batches
- no duplicate output in parent channel for bound turns
- spawn+bind+initial enqueue are atomic in persistent store
- ACP command retries are idempotent and do not duplicate runs or outputs
- cancel, close, unfocus, archive, reset, and delete perform deterministic cleanup
- crash restart preserves mapping and resumes multi turn continuity
- concurrent thread bound ACP sessions work independently
- ACP backend missing state produces clear actionable error
- stale bindings are detected and surfaced explicitly (with optional safe auto-clean)
- control-plane metrics and diagnostics are available for operators
- new unit, integration, and e2e coverage passes
## Addendum: targeted refactors for current implementation (status)
These are non-blocking follow-ups to keep the ACP path maintainable after the current feature set lands.
### 1) Centralize ACP dispatch policy evaluation (completed)
- implemented via shared ACP policy helpers in `src/acp/policy.ts`
- dispatch, ACP command lifecycle handlers, and ACP spawn path now consume shared policy logic
### 2) Split ACP command handler by subcommand domain (completed)
- `src/auto-reply/reply/commands-acp.ts` is now a thin router
- subcommand behavior is split into:
- `src/auto-reply/reply/commands-acp/lifecycle.ts`
- `src/auto-reply/reply/commands-acp/runtime-options.ts`
- `src/auto-reply/reply/commands-acp/diagnostics.ts`
- shared helpers in `src/auto-reply/reply/commands-acp/shared.ts`
### 3) Split ACP session manager by responsibility (completed)
- manager is split into:
- `src/acp/control-plane/manager.ts` (public facade + singleton)
- `src/acp/control-plane/manager.core.ts` (manager implementation)
- `src/acp/control-plane/manager.types.ts` (manager types/deps)
- `src/acp/control-plane/manager.utils.ts` (normalization + helper functions)
### 4) Optional acpx runtime adapter cleanup
- `extensions/acpx/src/runtime.ts` can be split into:
- process execution/supervision
- ndjson event parsing/normalization
- runtime API surface (`submit`, `cancel`, `close`, etc.)
- improves testability and makes backend behavior easier to audit

View File

@ -1,96 +0,0 @@
---
summary: "Holy grail refactor plan for one unified runtime streaming pipeline across main, subagent, and ACP"
owner: "onutc"
status: "draft"
last_updated: "2026-02-25"
title: "Unified Runtime Streaming Refactor Plan"
---
# Unified Runtime Streaming Refactor Plan
## Objective
Deliver one shared streaming pipeline for `main`, `subagent`, and `acp` so all runtimes get identical coalescing, chunking, delivery ordering, and crash recovery behavior.
## Why this exists
- Current behavior is split across multiple runtime-specific shaping paths.
- Formatting/coalescing bugs can be fixed in one path but remain in others.
- Delivery consistency, duplicate suppression, and recovery semantics are harder to reason about.
## Target architecture
Single pipeline, runtime-specific adapters:
1. Runtime adapters emit canonical events only.
2. Shared stream assembler coalesces and finalizes text/tool/status events.
3. Shared channel projector applies channel-specific chunking/formatting once.
4. Shared delivery ledger enforces idempotent send/replay semantics.
5. Outbound channel adapter executes sends and records delivery checkpoints.
Canonical event contract:
- `turn_started`
- `text_delta`
- `block_final`
- `tool_started`
- `tool_finished`
- `status`
- `turn_completed`
- `turn_failed`
- `turn_cancelled`
## Workstreams
### 1) Canonical streaming contract
- Define strict event schema + validation in core.
- Add adapter contract tests to guarantee each runtime emits compatible events.
- Reject malformed runtime events early and surface structured diagnostics.
### 2) Shared stream processor
- Replace runtime-specific coalescer/projector logic with one processor.
- Processor owns text delta buffering, idle flush, max-chunk splitting, and completion flush.
- Move ACP/main/subagent config resolution into one helper to prevent drift.
### 3) Shared channel projection
- Keep channel adapters dumb: accept finalized blocks and send.
- Move Discord-specific chunking quirks to channel projector only.
- Keep pipeline channel-agnostic before projection.
### 4) Delivery ledger + replay
- Add per-turn/per-chunk delivery IDs.
- Record checkpoints before and after physical send.
- On restart, replay pending chunks idempotently and avoid duplicates.
### 5) Migration and cutover
- Phase 1: shadow mode (new pipeline computes output but old path sends; compare).
- Phase 2: runtime-by-runtime cutover (`acp`, then `subagent`, then `main` or reverse by risk).
- Phase 3: delete legacy runtime-specific streaming code.
## Non-goals
- No changes to ACP policy/permissions model in this refactor.
- No channel-specific feature expansion outside projection compatibility fixes.
- No transport/backend redesign (acpx plugin contract remains as-is unless needed for event parity).
## Risks and mitigations
- Risk: behavioral regressions in existing main/subagent paths.
Mitigation: shadow mode diffing + adapter contract tests + channel e2e tests.
- Risk: duplicate sends during crash recovery.
Mitigation: durable delivery IDs + idempotent replay in delivery adapter.
- Risk: runtime adapters diverge again.
Mitigation: required shared contract test suite for all adapters.
## Acceptance criteria
- All runtimes pass shared streaming contract tests.
- Discord ACP/main/subagent produce equivalent spacing/chunking behavior for tiny deltas.
- Crash/restart replay sends no duplicate chunk for the same delivery ID.
- Legacy ACP projector/coalescer path is removed.
- Streaming config resolution is shared and runtime-independent.

View File

@ -1,232 +0,0 @@
---
summary: "Plan: isolate browser act:evaluate from Playwright queue using CDP, with end-to-end deadlines and safer ref resolution"
read_when:
- Working on browser `act:evaluate` timeout, abort, or queue blocking issues
- Planning CDP based isolation for evaluate execution
owner: "openclaw"
status: "draft"
last_updated: "2026-02-10"
title: "Browser Evaluate CDP Refactor"
---
# Browser Evaluate CDP Refactor Plan
## Context
`act:evaluate` executes user provided JavaScript in the page. Today it runs via Playwright
(`page.evaluate` or `locator.evaluate`). Playwright serializes CDP commands per page, so a
stuck or long running evaluate can block the page command queue and make every later action
on that tab look "stuck".
PR #13498 adds a pragmatic safety net (bounded evaluate, abort propagation, and best-effort
recovery). This document describes a larger refactor that makes `act:evaluate` inherently
isolated from Playwright so a stuck evaluate cannot wedge normal Playwright operations.
## Goals
- `act:evaluate` cannot permanently block later browser actions on the same tab.
- Timeouts are single source of truth end to end so a caller can rely on a budget.
- Abort and timeout are treated the same way across HTTP and in-process dispatch.
- Element targeting for evaluate is supported without switching everything off Playwright.
- Maintain backward compatibility for existing callers and payloads.
## Non-goals
- Replace all browser actions (click, type, wait, etc.) with CDP implementations.
- Remove the existing safety net introduced in PR #13498 (it remains a useful fallback).
- Introduce new unsafe capabilities beyond the existing `browser.evaluateEnabled` gate.
- Add process isolation (worker process/thread) for evaluate. If we still see hard to recover
stuck states after this refactor, that is a follow-up idea.
## Current Architecture (Why It Gets Stuck)
At a high level:
- Callers send `act:evaluate` to the browser control service.
- The route handler calls into Playwright to execute the JavaScript.
- Playwright serializes page commands, so an evaluate that never finishes blocks the queue.
- A stuck queue means later click/type/wait operations on the tab can appear to hang.
## Proposed Architecture
### 1. Deadline Propagation
Introduce a single budget concept and derive everything from it:
- Caller sets `timeoutMs` (or a deadline in the future).
- The outer request timeout, route handler logic, and the execution budget inside the page
all use the same budget, with small headroom where needed for serialization overhead.
- Abort is propagated as an `AbortSignal` everywhere so cancellation is consistent.
Implementation direction:
- Add a small helper (for example `createBudget({ timeoutMs, signal })`) that returns:
- `signal`: the linked AbortSignal
- `deadlineAtMs`: absolute deadline
- `remainingMs()`: remaining budget for child operations
- Use this helper in:
- `src/browser/client-fetch.ts` (HTTP and in-process dispatch)
- `src/node-host/runner.ts` (proxy path)
- browser action implementations (Playwright and CDP)
### 2. Separate Evaluate Engine (CDP Path)
Add a CDP based evaluate implementation that does not share Playwright's per page command
queue. The key property is that the evaluate transport is a separate WebSocket connection
and a separate CDP session attached to the target.
Implementation direction:
- New module, for example `src/browser/cdp-evaluate.ts`, that:
- Connects to the configured CDP endpoint (browser level socket).
- Uses `Target.attachToTarget({ targetId, flatten: true })` to get a `sessionId`.
- Runs either:
- `Runtime.evaluate` for page level evaluate, or
- `DOM.resolveNode` plus `Runtime.callFunctionOn` for element evaluate.
- On timeout or abort:
- Sends `Runtime.terminateExecution` best-effort for the session.
- Closes the WebSocket and returns a clear error.
Notes:
- This still executes JavaScript in the page, so termination can have side effects. The win
is that it does not wedge the Playwright queue, and it is cancelable at the transport
layer by killing the CDP session.
### 3. Ref Story (Element Targeting Without A Full Rewrite)
The hard part is element targeting. CDP needs a DOM handle or `backendDOMNodeId`, while
today most browser actions use Playwright locators based on refs from snapshots.
Recommended approach: keep existing refs, but attach an optional CDP resolvable id.
#### 3.1 Extend Stored Ref Info
Extend the stored role ref metadata to optionally include a CDP id:
- Today: `{ role, name, nth }`
- Proposed: `{ role, name, nth, backendDOMNodeId?: number }`
This keeps all existing Playwright based actions working and allows CDP evaluate to accept
the same `ref` value when the `backendDOMNodeId` is available.
#### 3.2 Populate backendDOMNodeId At Snapshot Time
When producing a role snapshot:
1. Generate the existing role ref map as today (role, name, nth).
2. Fetch the AX tree via CDP (`Accessibility.getFullAXTree`) and compute a parallel map of
`(role, name, nth) -> backendDOMNodeId` using the same duplicate handling rules.
3. Merge the id back into the stored ref info for the current tab.
If mapping fails for a ref, leave `backendDOMNodeId` undefined. This makes the feature
best-effort and safe to roll out.
#### 3.3 Evaluate Behavior With Ref
In `act:evaluate`:
- If `ref` is present and has `backendDOMNodeId`, run element evaluate via CDP.
- If `ref` is present but has no `backendDOMNodeId`, fall back to the Playwright path (with
the safety net).
Optional escape hatch:
- Extend the request shape to accept `backendDOMNodeId` directly for advanced callers (and
for debugging), while keeping `ref` as the primary interface.
### 4. Keep A Last Resort Recovery Path
Even with CDP evaluate, there are other ways to wedge a tab or a connection. Keep the
existing recovery mechanisms (terminate execution + disconnect Playwright) as a last resort
for:
- legacy callers
- environments where CDP attach is blocked
- unexpected Playwright edge cases
## Implementation Plan (Single Iteration)
### Deliverables
- A CDP based evaluate engine that runs outside the Playwright per-page command queue.
- A single end-to-end timeout/abort budget used consistently by callers and handlers.
- Ref metadata that can optionally carry `backendDOMNodeId` for element evaluate.
- `act:evaluate` prefers the CDP engine when possible and falls back to Playwright when not.
- Tests that prove a stuck evaluate does not wedge later actions.
- Logs/metrics that make failures and fallbacks visible.
### Implementation Checklist
1. Add a shared "budget" helper to link `timeoutMs` + upstream `AbortSignal` into:
- a single `AbortSignal`
- an absolute deadline
- a `remainingMs()` helper for downstream operations
2. Update all caller paths to use that helper so `timeoutMs` means the same thing everywhere:
- `src/browser/client-fetch.ts` (HTTP and in-process dispatch)
- `src/node-host/runner.ts` (node proxy path)
- CLI wrappers that call `/act` (add `--timeout-ms` to `browser evaluate`)
3. Implement `src/browser/cdp-evaluate.ts`:
- connect to the browser-level CDP socket
- `Target.attachToTarget` to get a `sessionId`
- run `Runtime.evaluate` for page evaluate
- run `DOM.resolveNode` + `Runtime.callFunctionOn` for element evaluate
- on timeout/abort: best-effort `Runtime.terminateExecution` then close the socket
4. Extend stored role ref metadata to optionally include `backendDOMNodeId`:
- keep existing `{ role, name, nth }` behavior for Playwright actions
- add `backendDOMNodeId?: number` for CDP element targeting
5. Populate `backendDOMNodeId` during snapshot creation (best-effort):
- fetch AX tree via CDP (`Accessibility.getFullAXTree`)
- compute `(role, name, nth) -> backendDOMNodeId` and merge into the stored ref map
- if mapping is ambiguous or missing, leave the id undefined
6. Update `act:evaluate` routing:
- if no `ref`: always use CDP evaluate
- if `ref` resolves to a `backendDOMNodeId`: use CDP element evaluate
- otherwise: fall back to Playwright evaluate (still bounded and abortable)
7. Keep the existing "last resort" recovery path as a fallback, not the default path.
8. Add tests:
- stuck evaluate times out within budget and the next click/type succeeds
- abort cancels evaluate (client disconnect or timeout) and unblocks subsequent actions
- mapping failures cleanly fall back to Playwright
9. Add observability:
- evaluate duration and timeout counters
- terminateExecution usage
- fallback rate (CDP -> Playwright) and reasons
### Acceptance Criteria
- A deliberately hung `act:evaluate` returns within the caller budget and does not wedge the
tab for later actions.
- `timeoutMs` behaves consistently across CLI, agent tool, node proxy, and in-process calls.
- If `ref` can be mapped to `backendDOMNodeId`, element evaluate uses CDP; otherwise the
fallback path is still bounded and recoverable.
## Testing Plan
- Unit tests:
- `(role, name, nth)` matching logic between role refs and AX tree nodes.
- Budget helper behavior (headroom, remaining time math).
- Integration tests:
- CDP evaluate timeout returns within budget and does not block the next action.
- Abort cancels evaluate and triggers termination best-effort.
- Contract tests:
- Ensure `BrowserActRequest` and `BrowserActResponse` remain compatible.
## Risks And Mitigations
- Mapping is imperfect:
- Mitigation: best-effort mapping, fallback to Playwright evaluate, and add debug tooling.
- `Runtime.terminateExecution` has side effects:
- Mitigation: only use on timeout/abort and document the behavior in errors.
- Extra overhead:
- Mitigation: only fetch AX tree when snapshots are requested, cache per target, and keep
CDP session short lived.
- Extension relay limitations:
- Mitigation: use browser level attach APIs when per page sockets are not available, and
keep the current Playwright path as fallback.
## Open Questions
- Should the new engine be configurable as `playwright`, `cdp`, or `auto`?
- Do we want to expose a new "nodeRef" format for advanced users, or keep `ref` only?
- How should frame snapshots and selector scoped snapshots participate in AX mapping?

View File

@ -1,337 +0,0 @@
---
summary: "Status and next steps for decoupling Discord gateway listeners from long-running agent turns with a Discord-specific inbound worker"
owner: "openclaw"
status: "in_progress"
last_updated: "2026-03-05"
title: "Discord Async Inbound Worker Plan"
---
# Discord Async Inbound Worker Plan
## Objective
Remove Discord listener timeout as a user-facing failure mode by making inbound Discord turns asynchronous:
1. Gateway listener accepts and normalizes inbound events quickly.
2. A Discord run queue stores serialized jobs keyed by the same ordering boundary we use today.
3. A worker executes the actual agent turn outside the Carbon listener lifetime.
4. Replies are delivered back to the originating channel or thread after the run completes.
This is the long-term fix for queued Discord runs timing out at `channels.discord.eventQueue.listenerTimeout` while the agent run itself is still making progress.
## Current status
This plan is partially implemented.
Already done:
- Discord listener timeout and Discord run timeout are now separate settings.
- Accepted inbound Discord turns are enqueued into `src/discord/monitor/inbound-worker.ts`.
- The worker now owns the long-running turn instead of the Carbon listener.
- Existing per-route ordering is preserved by queue key.
- Timeout regression coverage exists for the Discord worker path.
What this means in plain language:
- the production timeout bug is fixed
- the long-running turn no longer dies just because the Discord listener budget expires
- the worker architecture is not finished yet
What is still missing:
- `DiscordInboundJob` is still only partially normalized and still carries live runtime references
- command semantics (`stop`, `new`, `reset`, future session controls) are not yet fully worker-native
- worker observability and operator status are still minimal
- there is still no restart durability
## Why this exists
Current behavior ties the full agent turn to the listener lifetime:
- `src/discord/monitor/listeners.ts` applies the timeout and abort boundary.
- `src/discord/monitor/message-handler.ts` keeps the queued run inside that boundary.
- `src/discord/monitor/message-handler.process.ts` performs media loading, routing, dispatch, typing, draft streaming, and final reply delivery inline.
That architecture has two bad properties:
- long but healthy turns can be aborted by the listener watchdog
- users can see no reply even when the downstream runtime would have produced one
Raising the timeout helps but does not change the failure mode.
## Non-goals
- Do not redesign non-Discord channels in this pass.
- Do not broaden this into a generic all-channel worker framework in the first implementation.
- Do not extract a shared cross-channel inbound worker abstraction yet; only share low-level primitives when duplication is obvious.
- Do not add durable crash recovery in the first pass unless needed to land safely.
- Do not change route selection, binding semantics, or ACP policy in this plan.
## Current constraints
The current Discord processing path still depends on some live runtime objects that should not stay inside the long-term job payload:
- Carbon `Client`
- raw Discord event shapes
- in-memory guild history map
- thread binding manager callbacks
- live typing and draft stream state
We already moved execution onto a worker queue, but the normalization boundary is still incomplete. Right now the worker is "run later in the same process with some of the same live objects," not a fully data-only job boundary.
## Target architecture
### 1. Listener stage
`DiscordMessageListener` remains the ingress point, but its job becomes:
- run preflight and policy checks
- normalize accepted input into a serializable `DiscordInboundJob`
- enqueue the job into a per-session or per-channel async queue
- return immediately to Carbon once the enqueue succeeds
The listener should no longer own the end-to-end LLM turn lifetime.
### 2. Normalized job payload
Introduce a serializable job descriptor that contains only the data needed to run the turn later.
Minimum shape:
- route identity
- `agentId`
- `sessionKey`
- `accountId`
- `channel`
- delivery identity
- destination channel id
- reply target message id
- thread id if present
- sender identity
- sender id, label, username, tag
- channel context
- guild id
- channel name or slug
- thread metadata
- resolved system prompt override
- normalized message body
- base text
- effective message text
- attachment descriptors or resolved media references
- gating decisions
- mention requirement outcome
- command authorization outcome
- bound session or agent metadata if applicable
The job payload must not contain live Carbon objects or mutable closures.
Current implementation status:
- partially done
- `src/discord/monitor/inbound-job.ts` exists and defines the worker handoff
- the payload still contains live Discord runtime context and should be reduced further
### 3. Worker stage
Add a Discord-specific worker runner responsible for:
- reconstructing the turn context from `DiscordInboundJob`
- loading media and any additional channel metadata needed for the run
- dispatching the agent turn
- delivering final reply payloads
- updating status and diagnostics
Recommended location:
- `src/discord/monitor/inbound-worker.ts`
- `src/discord/monitor/inbound-job.ts`
### 4. Ordering model
Ordering must remain equivalent to today for a given route boundary.
Recommended key:
- use the same queue key logic as `resolveDiscordRunQueueKey(...)`
This preserves existing behavior:
- one bound agent conversation does not interleave with itself
- different Discord channels can still progress independently
### 5. Timeout model
After cutover, there are two separate timeout classes:
- listener timeout
- only covers normalization and enqueue
- should be short
- run timeout
- optional, worker-owned, explicit, and user-visible
- should not be inherited accidentally from Carbon listener settings
This removes the current accidental coupling between "Discord gateway listener stayed alive" and "agent run is healthy."
## Recommended implementation phases
### Phase 1: normalization boundary
- Status: partially implemented
- Done:
- extracted `buildDiscordInboundJob(...)`
- added worker handoff tests
- Remaining:
- make `DiscordInboundJob` plain data only
- move live runtime dependencies to worker-owned services instead of per-job payload
- stop rebuilding process context by stitching live listener refs back into the job
### Phase 2: in-memory worker queue
- Status: implemented
- Done:
- added `DiscordInboundWorkerQueue` keyed by resolved run queue key
- listener enqueues jobs instead of directly awaiting `processDiscordMessage(...)`
- worker executes jobs in-process, in memory only
This is the first functional cutover.
### Phase 3: process split
- Status: not started
- Move delivery, typing, and draft streaming ownership behind worker-facing adapters.
- Replace direct use of live preflight context with worker context reconstruction.
- Keep `processDiscordMessage(...)` temporarily as a facade if needed, then split it.
### Phase 4: command semantics
- Status: not started
Make sure native Discord commands still behave correctly when work is queued:
- `stop`
- `new`
- `reset`
- any future session-control commands
The worker queue must expose enough run state for commands to target the active or queued turn.
### Phase 5: observability and operator UX
- Status: not started
- emit queue depth and active worker counts into monitor status
- record enqueue time, start time, finish time, and timeout or cancellation reason
- surface worker-owned timeout or delivery failures clearly in logs
### Phase 6: optional durability follow-up
- Status: not started
Only after the in-memory version is stable:
- decide whether queued Discord jobs should survive gateway restart
- if yes, persist job descriptors and delivery checkpoints
- if no, document the explicit in-memory boundary
This should be a separate follow-up unless restart recovery is required to land.
## File impact
Current primary files:
- `src/discord/monitor/listeners.ts`
- `src/discord/monitor/message-handler.ts`
- `src/discord/monitor/message-handler.preflight.ts`
- `src/discord/monitor/message-handler.process.ts`
- `src/discord/monitor/status.ts`
Current worker files:
- `src/discord/monitor/inbound-job.ts`
- `src/discord/monitor/inbound-worker.ts`
- `src/discord/monitor/inbound-job.test.ts`
- `src/discord/monitor/message-handler.queue.test.ts`
Likely next touch points:
- `src/auto-reply/dispatch.ts`
- `src/discord/monitor/reply-delivery.ts`
- `src/discord/monitor/thread-bindings.ts`
- `src/discord/monitor/native-command.ts`
## Next step now
The next step is to make the worker boundary real instead of partial.
Do this next:
1. Move live runtime dependencies out of `DiscordInboundJob`
2. Keep those dependencies on the Discord worker instance instead
3. Reduce queued jobs to plain Discord-specific data:
- route identity
- delivery target
- sender info
- normalized message snapshot
- gating and binding decisions
4. Reconstruct worker execution context from that plain data inside the worker
In practice, that means:
- `client`
- `threadBindings`
- `guildHistories`
- `discordRestFetch`
- other mutable runtime-only handles
should stop living on each queued job and instead live on the worker itself or behind worker-owned adapters.
After that lands, the next follow-up should be command-state cleanup for `stop`, `new`, and `reset`.
## Testing plan
Keep the existing timeout repro coverage in:
- `src/discord/monitor/message-handler.queue.test.ts`
Add new tests for:
1. listener returns after enqueue without awaiting full turn
2. per-route ordering is preserved
3. different channels still run concurrently
4. replies are delivered to the original message destination
5. `stop` cancels the active worker-owned run
6. worker failure produces visible diagnostics without blocking later jobs
7. ACP-bound Discord channels still route correctly under worker execution
## Risks and mitigations
- Risk: command semantics drift from current synchronous behavior
Mitigation: land command-state plumbing in the same cutover, not later
- Risk: reply delivery loses thread or reply-to context
Mitigation: make delivery identity first-class in `DiscordInboundJob`
- Risk: duplicate sends during retries or queue restarts
Mitigation: keep first pass in-memory only, or add explicit delivery idempotency before persistence
- Risk: `message-handler.process.ts` becomes harder to reason about during migration
Mitigation: split into normalization, execution, and delivery helpers before or during worker cutover
## Acceptance criteria
The plan is complete when:
1. Discord listener timeout no longer aborts healthy long-running turns.
2. Listener lifetime and agent-turn lifetime are separate concepts in code.
3. Existing per-session ordering is preserved.
4. ACP-bound Discord channels work through the same worker path.
5. `stop` targets the worker-owned run instead of the old listener-owned call stack.
6. Timeout and delivery failures become explicit worker outcomes, not silent listener drops.
## Remaining landing strategy
Finish this in follow-up PRs:
1. make `DiscordInboundJob` plain-data only and move live runtime refs onto the worker
2. clean up command-state ownership for `stop`, `new`, and `reset`
3. add worker observability and operator status
4. decide whether durability is needed or explicitly document the in-memory boundary
This is still a bounded follow-up if kept Discord-only and if we continue to avoid a premature cross-channel worker abstraction.

View File

@ -1,126 +0,0 @@
---
summary: "Plan: Add OpenResponses /v1/responses endpoint and deprecate chat completions cleanly"
read_when:
- Designing or implementing `/v1/responses` gateway support
- Planning migration from Chat Completions compatibility
owner: "openclaw"
status: "draft"
last_updated: "2026-01-19"
title: "OpenResponses Gateway Plan"
---
# OpenResponses Gateway Integration Plan
## Context
OpenClaw Gateway currently exposes a minimal OpenAI-compatible Chat Completions endpoint at
`/v1/chat/completions` (see [OpenAI Chat Completions](/gateway/openai-http-api)).
Open Responses is an open inference standard based on the OpenAI Responses API. It is designed
for agentic workflows and uses item-based inputs plus semantic streaming events. The OpenResponses
spec defines `/v1/responses`, not `/v1/chat/completions`.
## Goals
- Add a `/v1/responses` endpoint that adheres to OpenResponses semantics.
- Keep Chat Completions as a compatibility layer that is easy to disable and eventually remove.
- Standardize validation and parsing with isolated, reusable schemas.
## Non-goals
- Full OpenResponses feature parity in the first pass (images, files, hosted tools).
- Replacing internal agent execution logic or tool orchestration.
- Changing the existing `/v1/chat/completions` behavior during the first phase.
## Research Summary
Sources: OpenResponses OpenAPI, OpenResponses specification site, and the Hugging Face blog post.
Key points extracted:
- `POST /v1/responses` accepts `CreateResponseBody` fields like `model`, `input` (string or
`ItemParam[]`), `instructions`, `tools`, `tool_choice`, `stream`, `max_output_tokens`, and
`max_tool_calls`.
- `ItemParam` is a discriminated union of:
- `message` items with roles `system`, `developer`, `user`, `assistant`
- `function_call` and `function_call_output`
- `reasoning`
- `item_reference`
- Successful responses return a `ResponseResource` with `object: "response"`, `status`, and
`output` items.
- Streaming uses semantic events such as:
- `response.created`, `response.in_progress`, `response.completed`, `response.failed`
- `response.output_item.added`, `response.output_item.done`
- `response.content_part.added`, `response.content_part.done`
- `response.output_text.delta`, `response.output_text.done`
- The spec requires:
- `Content-Type: text/event-stream`
- `event:` must match the JSON `type` field
- terminal event must be literal `[DONE]`
- Reasoning items may expose `content`, `encrypted_content`, and `summary`.
- HF examples include `OpenResponses-Version: latest` in requests (optional header).
## Proposed Architecture
- Add `src/gateway/open-responses.schema.ts` containing Zod schemas only (no gateway imports).
- Add `src/gateway/openresponses-http.ts` (or `open-responses-http.ts`) for `/v1/responses`.
- Keep `src/gateway/openai-http.ts` intact as a legacy compatibility adapter.
- Add config `gateway.http.endpoints.responses.enabled` (default `false`).
- Keep `gateway.http.endpoints.chatCompletions.enabled` independent; allow both endpoints to be
toggled separately.
- Emit a startup warning when Chat Completions is enabled to signal legacy status.
## Deprecation Path for Chat Completions
- Maintain strict module boundaries: no shared schema types between responses and chat completions.
- Make Chat Completions opt-in by config so it can be disabled without code changes.
- Update docs to label Chat Completions as legacy once `/v1/responses` is stable.
- Optional future step: map Chat Completions requests to the Responses handler for a simpler
removal path.
## Phase 1 Support Subset
- Accept `input` as string or `ItemParam[]` with message roles and `function_call_output`.
- Extract system and developer messages into `extraSystemPrompt`.
- Use the most recent `user` or `function_call_output` as the current message for agent runs.
- Reject unsupported content parts (image/file) with `invalid_request_error`.
- Return a single assistant message with `output_text` content.
- Return `usage` with zeroed values until token accounting is wired.
## Validation Strategy (No SDK)
- Implement Zod schemas for the supported subset of:
- `CreateResponseBody`
- `ItemParam` + message content part unions
- `ResponseResource`
- Streaming event shapes used by the gateway
- Keep schemas in a single, isolated module to avoid drift and allow future codegen.
## Streaming Implementation (Phase 1)
- SSE lines with both `event:` and `data:`.
- Required sequence (minimum viable):
- `response.created`
- `response.output_item.added`
- `response.content_part.added`
- `response.output_text.delta` (repeat as needed)
- `response.output_text.done`
- `response.content_part.done`
- `response.completed`
- `[DONE]`
## Tests and Verification Plan
- Add e2e coverage for `/v1/responses`:
- Auth required
- Non-stream response shape
- Stream event ordering and `[DONE]`
- Session routing with headers and `user`
- Keep `src/gateway/openai-http.test.ts` unchanged.
- Manual: curl to `/v1/responses` with `stream: true` and verify event ordering and terminal
`[DONE]`.
## Doc Updates (Follow-up)
- Add a new docs page for `/v1/responses` usage and examples.
- Update `/gateway/openai-http-api` with a legacy note and pointer to `/v1/responses`.

View File

@ -1,195 +0,0 @@
---
summary: "Production plan for reliable interactive process supervision (PTY + non-PTY) with explicit ownership, unified lifecycle, and deterministic cleanup"
read_when:
- Working on exec/process lifecycle ownership and cleanup
- Debugging PTY and non-PTY supervision behavior
owner: "openclaw"
status: "in-progress"
last_updated: "2026-02-15"
title: "PTY and Process Supervision Plan"
---
# PTY and Process Supervision Plan
## 1. Problem and goal
We need one reliable lifecycle for long-running command execution across:
- `exec` foreground runs
- `exec` background runs
- `process` follow up actions (`poll`, `log`, `send-keys`, `paste`, `submit`, `kill`, `remove`)
- CLI agent runner subprocesses
The goal is not just to support PTY. The goal is predictable ownership, cancellation, timeout, and cleanup with no unsafe process matching heuristics.
## 2. Scope and boundaries
- Keep implementation internal in `src/process/supervisor`.
- Do not create a new package for this.
- Keep current behavior compatibility where practical.
- Do not broaden scope to terminal replay or tmux style session persistence.
## 3. Implemented in this branch
### Supervisor baseline already present
- Supervisor module is in place under `src/process/supervisor/*`.
- Exec runtime and CLI runner are already routed through supervisor spawn and wait.
- Registry finalization is idempotent.
### This pass completed
1. Explicit PTY command contract
- `SpawnInput` is now a discriminated union in `src/process/supervisor/types.ts`.
- PTY runs require `ptyCommand` instead of reusing generic `argv`.
- Supervisor no longer rebuilds PTY command strings from argv joins in `src/process/supervisor/supervisor.ts`.
- Exec runtime now passes `ptyCommand` directly in `src/agents/bash-tools.exec-runtime.ts`.
2. Process layer type decoupling
- Supervisor types no longer import `SessionStdin` from agents.
- Process local stdin contract lives in `src/process/supervisor/types.ts` (`ManagedRunStdin`).
- Adapters now depend only on process level types:
- `src/process/supervisor/adapters/child.ts`
- `src/process/supervisor/adapters/pty.ts`
3. Process tool lifecycle ownership improvement
- `src/agents/bash-tools.process.ts` now requests cancellation through supervisor first.
- `process kill/remove` now use process-tree fallback termination when supervisor lookup misses.
- `remove` keeps deterministic remove behavior by dropping running session entries immediately after termination is requested.
4. Single source watchdog defaults
- Added shared defaults in `src/agents/cli-watchdog-defaults.ts`.
- `src/agents/cli-backends.ts` consumes the shared defaults.
- `src/agents/cli-runner/reliability.ts` consumes the same shared defaults.
5. Dead helper cleanup
- Removed unused `killSession` helper path from `src/agents/bash-tools.shared.ts`.
6. Direct supervisor path tests added
- Added `src/agents/bash-tools.process.supervisor.test.ts` to cover kill and remove routing through supervisor cancellation.
7. Reliability gap fixes completed
- `src/agents/bash-tools.process.ts` now falls back to real OS-level process termination when supervisor lookup misses.
- `src/process/supervisor/adapters/child.ts` now uses process-tree termination semantics for default cancel/timeout kill paths.
- Added shared process-tree utility in `src/process/kill-tree.ts`.
8. PTY contract edge-case coverage added
- Added `src/process/supervisor/supervisor.pty-command.test.ts` for verbatim PTY command forwarding and empty-command rejection.
- Added `src/process/supervisor/adapters/child.test.ts` for process-tree kill behavior in child adapter cancellation.
## 4. Remaining gaps and decisions
### Reliability status
The two required reliability gaps for this pass are now closed:
- `process kill/remove` now has a real OS termination fallback when supervisor lookup misses.
- child cancel/timeout now uses process-tree kill semantics for default kill path.
- Regression tests were added for both behaviors.
### Durability and startup reconciliation
Restart behavior is now explicitly defined as in-memory lifecycle only.
- `reconcileOrphans()` remains a no-op in `src/process/supervisor/supervisor.ts` by design.
- Active runs are not recovered after process restart.
- This boundary is intentional for this implementation pass to avoid partial persistence risks.
### Maintainability follow-ups
1. `runExecProcess` in `src/agents/bash-tools.exec-runtime.ts` still handles multiple responsibilities and can be split into focused helpers in a follow-up.
## 5. Implementation plan
The implementation pass for required reliability and contract items is complete.
Completed:
- `process kill/remove` fallback real termination
- process-tree cancellation for child adapter default kill path
- regression tests for fallback kill and child adapter kill path
- PTY command edge-case tests under explicit `ptyCommand`
- explicit in-memory restart boundary with `reconcileOrphans()` no-op by design
Optional follow-up:
- split `runExecProcess` into focused helpers with no behavior drift
## 6. File map
### Process supervisor
- `src/process/supervisor/types.ts` updated with discriminated spawn input and process local stdin contract.
- `src/process/supervisor/supervisor.ts` updated to use explicit `ptyCommand`.
- `src/process/supervisor/adapters/child.ts` and `src/process/supervisor/adapters/pty.ts` decoupled from agent types.
- `src/process/supervisor/registry.ts` idempotent finalize unchanged and retained.
### Exec and process integration
- `src/agents/bash-tools.exec-runtime.ts` updated to pass PTY command explicitly and keep fallback path.
- `src/agents/bash-tools.process.ts` updated to cancel via supervisor with real process-tree fallback termination.
- `src/agents/bash-tools.shared.ts` removed direct kill helper path.
### CLI reliability
- `src/agents/cli-watchdog-defaults.ts` added as shared baseline.
- `src/agents/cli-backends.ts` and `src/agents/cli-runner/reliability.ts` now consume same defaults.
## 7. Validation run in this pass
Unit tests:
- `pnpm vitest src/process/supervisor/registry.test.ts`
- `pnpm vitest src/process/supervisor/supervisor.test.ts`
- `pnpm vitest src/process/supervisor/supervisor.pty-command.test.ts`
- `pnpm vitest src/process/supervisor/adapters/child.test.ts`
- `pnpm vitest src/agents/cli-backends.test.ts`
- `pnpm vitest src/agents/bash-tools.exec.pty-cleanup.test.ts`
- `pnpm vitest src/agents/bash-tools.process.poll-timeout.test.ts`
- `pnpm vitest src/agents/bash-tools.process.supervisor.test.ts`
- `pnpm vitest src/process/exec.test.ts`
E2E targets:
- `pnpm vitest src/agents/cli-runner.test.ts`
- `pnpm vitest run src/agents/bash-tools.exec.pty-fallback.test.ts src/agents/bash-tools.exec.background-abort.test.ts src/agents/bash-tools.process.send-keys.test.ts`
Typecheck note:
- Use `pnpm build` (and `pnpm check` for full lint/docs gate) in this repo. Older notes that mention `pnpm tsgo` are obsolete.
## 8. Operational guarantees preserved
- Exec env hardening behavior is unchanged.
- Approval and allowlist flow is unchanged.
- Output sanitization and output caps are unchanged.
- PTY adapter still guarantees wait settlement on forced kill and listener disposal.
## 9. Definition of done
1. Supervisor is lifecycle owner for managed runs.
2. PTY spawn uses explicit command contract with no argv reconstruction.
3. Process layer has no type dependency on agent layer for supervisor stdin contracts.
4. Watchdog defaults are single source.
5. Targeted unit and e2e tests remain green.
6. Restart durability boundary is explicitly documented or fully implemented.
## 10. Summary
The branch now has a coherent and safer supervision shape:
- explicit PTY contract
- cleaner process layering
- supervisor driven cancellation path for process operations
- real fallback termination when supervisor lookup misses
- process-tree cancellation for child-run default kill paths
- unified watchdog defaults
- explicit in-memory restart boundary (no orphan reconciliation across restart in this pass)

View File

@ -1,226 +0,0 @@
---
summary: "Channel agnostic session binding architecture and iteration 1 delivery scope"
read_when:
- Refactoring channel-agnostic session routing and bindings
- Investigating duplicate, stale, or missing session delivery across channels
owner: "onutc"
status: "in-progress"
last_updated: "2026-02-21"
title: "Session Binding Channel Agnostic Plan"
---
# Session Binding Channel Agnostic Plan
## Overview
This document defines the long term channel agnostic session binding model and the concrete scope for the next implementation iteration.
Goal:
- make subagent bound session routing a core capability
- keep channel specific behavior in adapters
- avoid regressions in normal Discord behavior
## Why this exists
Current behavior mixes:
- completion content policy
- destination routing policy
- Discord specific details
This caused edge cases such as:
- duplicate main and thread delivery under concurrent runs
- stale token usage on reused binding managers
- missing activity accounting for webhook sends
## Iteration 1 scope
This iteration is intentionally limited.
### 1. Add channel agnostic core interfaces
Add core types and service interfaces for bindings and routing.
Proposed core types:
```ts
export type BindingTargetKind = "subagent" | "session";
export type BindingStatus = "active" | "ending" | "ended";
export type ConversationRef = {
channel: string;
accountId: string;
conversationId: string;
parentConversationId?: string;
};
export type SessionBindingRecord = {
bindingId: string;
targetSessionKey: string;
targetKind: BindingTargetKind;
conversation: ConversationRef;
status: BindingStatus;
boundAt: number;
expiresAt?: number;
metadata?: Record<string, unknown>;
};
```
Core service contract:
```ts
export interface SessionBindingService {
bind(input: {
targetSessionKey: string;
targetKind: BindingTargetKind;
conversation: ConversationRef;
metadata?: Record<string, unknown>;
ttlMs?: number;
}): Promise<SessionBindingRecord>;
listBySession(targetSessionKey: string): SessionBindingRecord[];
resolveByConversation(ref: ConversationRef): SessionBindingRecord | null;
touch(bindingId: string, at?: number): void;
unbind(input: {
bindingId?: string;
targetSessionKey?: string;
reason: string;
}): Promise<SessionBindingRecord[]>;
}
```
### 2. Add one core delivery router for subagent completions
Add a single destination resolution path for completion events.
Router contract:
```ts
export interface BoundDeliveryRouter {
resolveDestination(input: {
eventKind: "task_completion";
targetSessionKey: string;
requester?: ConversationRef;
failClosed: boolean;
}): {
binding: SessionBindingRecord | null;
mode: "bound" | "fallback";
reason: string;
};
}
```
For this iteration:
- only `task_completion` is routed through this new path
- existing paths for other event kinds remain as-is
### 3. Keep Discord as adapter
Discord remains the first adapter implementation.
Adapter responsibilities:
- create/reuse thread conversations
- send bound messages via webhook or channel send
- validate thread state (archived/deleted)
- map adapter metadata (webhook identity, thread ids)
### 4. Fix currently known correctness issues
Required in this iteration:
- refresh token usage when reusing existing thread binding manager
- record outbound activity for webhook based Discord sends
- stop implicit main channel fallback when a bound thread destination is selected for session mode completion
### 5. Preserve current runtime safety defaults
No behavior change for users with thread bound spawn disabled.
Defaults stay:
- `channels.discord.threadBindings.spawnSubagentSessions = false`
Result:
- normal Discord users stay on current behavior
- new core path affects only bound session completion routing where enabled
## Not in iteration 1
Explicitly deferred:
- ACP binding targets (`targetKind: "acp"`)
- new channel adapters beyond Discord
- global replacement of all delivery paths (`spawn_ack`, future `subagent_message`)
- protocol level changes
- store migration/versioning redesign for all binding persistence
Notes on ACP:
- interface design keeps room for ACP
- ACP implementation is not started in this iteration
## Routing invariants
These invariants are mandatory for iteration 1.
- destination selection and content generation are separate steps
- if session mode completion resolves to an active bound destination, delivery must target that destination
- no hidden reroute from bound destination to main channel
- fallback behavior must be explicit and observable
## Compatibility and rollout
Compatibility target:
- no regression for users with thread bound spawning off
- no change to non-Discord channels in this iteration
Rollout:
1. Land interfaces and router behind current feature gates.
2. Route Discord completion mode bound deliveries through router.
3. Keep legacy path for non-bound flows.
4. Verify with targeted tests and canary runtime logs.
## Tests required in iteration 1
Unit and integration coverage required:
- manager token rotation uses latest token after manager reuse
- webhook sends update channel activity timestamps
- two active bound sessions in same requester channel do not duplicate to main channel
- completion for bound session mode run resolves to thread destination only
- disabled spawn flag keeps legacy behavior unchanged
## Proposed implementation files
Core:
- `src/infra/outbound/session-binding-service.ts` (new)
- `src/infra/outbound/bound-delivery-router.ts` (new)
- `src/agents/subagent-announce.ts` (completion destination resolution integration)
Discord adapter and runtime:
- `src/discord/monitor/thread-bindings.manager.ts`
- `src/discord/monitor/reply-delivery.ts`
- `src/discord/send.outbound.ts`
Tests:
- `src/discord/monitor/provider*.test.ts`
- `src/discord/monitor/reply-delivery.test.ts`
- `src/agents/subagent-announce.format.test.ts`
## Done criteria for iteration 1
- core interfaces exist and are wired for completion routing
- correctness fixes above are merged with tests
- no main and thread duplicate completion delivery in session mode bound runs
- no behavior change for disabled bound spawn deployments
- ACP remains explicitly deferred

View File

@ -1,89 +0,0 @@
---
summary: "Proposal: long-term command authorization model for ACP-bound conversations"
read_when:
- Designing native command auth behavior in Telegram/Discord ACP-bound channels/topics
title: "ACP Bound Command Authorization (Proposal)"
---
# ACP Bound Command Authorization (Proposal)
Status: Proposed, **not implemented yet**.
This document describes a long-term authorization model for native commands in
ACP-bound conversations. It is an experiments proposal and does not replace
current production behavior.
For implemented behavior, read source and tests in:
- `src/telegram/bot-native-commands.ts`
- `src/discord/monitor/native-command.ts`
- `src/auto-reply/reply/commands-core.ts`
## Problem
Today we have command-specific checks (for example `/new` and `/reset`) that
need to work inside ACP-bound channels/topics even when allowlists are empty.
This solves immediate UX pain, but command-name-based exceptions do not scale.
## Long-term shape
Move command authorization from ad-hoc handler logic to command metadata plus a
shared policy evaluator.
### 1) Add auth policy metadata to command definitions
Each command definition should declare an auth policy. Example shape:
```ts
type CommandAuthPolicy =
| { mode: "owner_or_allowlist" } // default, current strict behavior
| { mode: "bound_acp_or_owner_or_allowlist" } // allow in explicitly bound ACP conversations
| { mode: "owner_only" };
```
`/new` and `/reset` would use `bound_acp_or_owner_or_allowlist`.
Most other commands would remain `owner_or_allowlist`.
### 2) Share one evaluator across channels
Introduce one helper that evaluates command auth using:
- command policy metadata
- sender authorization state
- resolved conversation binding state
Both Telegram and Discord native handlers should call the same helper to avoid
behavior drift.
### 3) Use binding-match as the bypass boundary
When policy allows bound ACP bypass, authorize only if a configured binding
match was resolved for the current conversation (not just because current
session key looks ACP-like).
This keeps the boundary explicit and minimizes accidental widening.
## Why this is better
- Scales to future commands without adding more command-name conditionals.
- Keeps behavior consistent across channels.
- Preserves current security model by requiring explicit binding match.
- Keeps allowlists optional hardening instead of a universal requirement.
## Rollout plan (future)
1. Add command auth policy field to command registry types and command data.
2. Implement shared evaluator and migrate Telegram + Discord native handlers.
3. Move `/new` and `/reset` to metadata-driven policy.
4. Add tests per policy mode and channel surface.
## Non-goals
- This proposal does not change ACP session lifecycle behavior.
- This proposal does not require allowlists for all ACP-bound commands.
- This proposal does not change existing route binding semantics.
## Note
This proposal is intentionally additive and does not delete or replace existing
experiments documents.

View File

@ -1,36 +0,0 @@
---
summary: "Exploration: model config, auth profiles, and fallback behavior"
read_when:
- Exploring future model selection + auth profile ideas
title: "Model Config Exploration"
---
# Model Config (Exploration)
This document captures **ideas** for future model configuration. It is not a
shipping spec. For current behavior, see:
- [Models](/concepts/models)
- [Model failover](/concepts/model-failover)
- [OAuth + profiles](/concepts/oauth)
## Motivation
Operators want:
- Multiple auth profiles per provider (personal vs work).
- Simple `/model` selection with predictable fallbacks.
- Clear separation between text models and image-capable models.
## Possible direction (high level)
- Keep model selection simple: `provider/model` with optional aliases.
- Let providers have multiple auth profiles, with an explicit order.
- Use a global fallback list so all sessions fail over consistently.
- Only override image routing when explicitly configured.
## Open questions
- Should profile rotation be per-provider or per-model?
- How should the UI surface profile selection for a session?
- What is the safest migration path from legacy config keys?

View File

@ -1,228 +0,0 @@
---
summary: "Research notes: offline memory system for Clawd workspaces (Markdown source-of-truth + derived index)"
read_when:
- Designing workspace memory (~/.openclaw/workspace) beyond daily Markdown logs
- Deciding: standalone CLI vs deep OpenClaw integration
- Adding offline recall + reflection (retain/recall/reflect)
title: "Workspace Memory Research"
---
# Workspace Memory v2 (offline): research notes
Target: Clawd-style workspace (`agents.defaults.workspace`, default `~/.openclaw/workspace`) where “memory” is stored as one Markdown file per day (`memory/YYYY-MM-DD.md`) plus a small set of stable files (e.g. `memory.md`, `SOUL.md`).
This doc proposes an **offline-first** memory architecture that keeps Markdown as the canonical, reviewable source of truth, but adds **structured recall** (search, entity summaries, confidence updates) via a derived index.
## Why change?
The current setup (one file per day) is excellent for:
- “append-only” journaling
- human editing
- git-backed durability + auditability
- low-friction capture (“just write it down”)
Its weak for:
- high-recall retrieval (“what did we decide about X?”, “last time we tried Y?”)
- entity-centric answers (“tell me about Alice / The Castle / warelay”) without rereading many files
- opinion/preference stability (and evidence when it changes)
- time constraints (“what was true during Nov 2025?”) and conflict resolution
## Design goals
- **Offline**: works without network; can run on laptop/Castle; no cloud dependency.
- **Explainable**: retrieved items should be attributable (file + location) and separable from inference.
- **Low ceremony**: daily logging stays Markdown, no heavy schema work.
- **Incremental**: v1 is useful with FTS only; semantic/vector and graphs are optional upgrades.
- **Agent-friendly**: makes “recall within token budgets” easy (return small bundles of facts).
## North star model (Hindsight × Letta)
Two pieces to blend:
1. **Letta/MemGPT-style control loop**
- keep a small “core” always in context (persona + key user facts)
- everything else is out-of-context and retrieved via tools
- memory writes are explicit tool calls (append/replace/insert), persisted, then re-injected next turn
2. **Hindsight-style memory substrate**
- separate whats observed vs whats believed vs whats summarized
- support retain/recall/reflect
- confidence-bearing opinions that can evolve with evidence
- entity-aware retrieval + temporal queries (even without full knowledge graphs)
## Proposed architecture (Markdown source-of-truth + derived index)
### Canonical store (git-friendly)
Keep `~/.openclaw/workspace` as canonical human-readable memory.
Suggested workspace layout:
```
~/.openclaw/workspace/
memory.md # small: durable facts + preferences (core-ish)
memory/
YYYY-MM-DD.md # daily log (append; narrative)
bank/ # “typed” memory pages (stable, reviewable)
world.md # objective facts about the world
experience.md # what the agent did (first-person)
opinions.md # subjective prefs/judgments + confidence + evidence pointers
entities/
Peter.md
The-Castle.md
warelay.md
...
```
Notes:
- **Daily log stays daily log**. No need to turn it into JSON.
- The `bank/` files are **curated**, produced by reflection jobs, and can still be edited by hand.
- `memory.md` remains “small + core-ish”: the things you want Clawd to see every session.
### Derived store (machine recall)
Add a derived index under the workspace (not necessarily git tracked):
```
~/.openclaw/workspace/.memory/index.sqlite
```
Back it with:
- SQLite schema for facts + entity links + opinion metadata
- SQLite **FTS5** for lexical recall (fast, tiny, offline)
- optional embeddings table for semantic recall (still offline)
The index is always **rebuildable from Markdown**.
## Retain / Recall / Reflect (operational loop)
### Retain: normalize daily logs into “facts”
Hindsights key insight that matters here: store **narrative, self-contained facts**, not tiny snippets.
Practical rule for `memory/YYYY-MM-DD.md`:
- at end of day (or during), add a `## Retain` section with 25 bullets that are:
- narrative (cross-turn context preserved)
- self-contained (standalone makes sense later)
- tagged with type + entity mentions
Example:
```
## Retain
- W @Peter: Currently in Marrakech (Nov 27Dec 1, 2025) for Andys birthday.
- B @warelay: I fixed the Baileys WS crash by wrapping connection.update handlers in try/catch (see memory/2025-11-27.md).
- O(c=0.95) @Peter: Prefers concise replies (&lt;1500 chars) on WhatsApp; long content goes into files.
```
Minimal parsing:
- Type prefix: `W` (world), `B` (experience/biographical), `O` (opinion), `S` (observation/summary; usually generated)
- Entities: `@Peter`, `@warelay`, etc (slugs map to `bank/entities/*.md`)
- Opinion confidence: `O(c=0.0..1.0)` optional
If you dont want authors to think about it: the reflect job can infer these bullets from the rest of the log, but having an explicit `## Retain` section is the easiest “quality lever”.
### Recall: queries over the derived index
Recall should support:
- **lexical**: “find exact terms / names / commands” (FTS5)
- **entity**: “tell me about X” (entity pages + entity-linked facts)
- **temporal**: “what happened around Nov 27” / “since last week”
- **opinion**: “what does Peter prefer?” (with confidence + evidence)
Return format should be agent-friendly and cite sources:
- `kind` (`world|experience|opinion|observation`)
- `timestamp` (source day, or extracted time range if present)
- `entities` (`["Peter","warelay"]`)
- `content` (the narrative fact)
- `source` (`memory/2025-11-27.md#L12` etc)
### Reflect: produce stable pages + update beliefs
Reflection is a scheduled job (daily or heartbeat `ultrathink`) that:
- updates `bank/entities/*.md` from recent facts (entity summaries)
- updates `bank/opinions.md` confidence based on reinforcement/contradiction
- optionally proposes edits to `memory.md` (“core-ish” durable facts)
Opinion evolution (simple, explainable):
- each opinion has:
- statement
- confidence `c ∈ [0,1]`
- last_updated
- evidence links (supporting + contradicting fact IDs)
- when new facts arrive:
- find candidate opinions by entity overlap + similarity (FTS first, embeddings later)
- update confidence by small deltas; big jumps require strong contradiction + repeated evidence
## CLI integration: standalone vs deep integration
Recommendation: **deep integration in OpenClaw**, but keep a separable core library.
### Why integrate into OpenClaw?
- OpenClaw already knows:
- the workspace path (`agents.defaults.workspace`)
- the session model + heartbeats
- logging + troubleshooting patterns
- You want the agent itself to call the tools:
- `openclaw memory recall "…" --k 25 --since 30d`
- `openclaw memory reflect --since 7d`
### Why still split a library?
- keep memory logic testable without gateway/runtime
- reuse from other contexts (local scripts, future desktop app, etc.)
Shape:
The memory tooling is intended to be a small CLI + library layer, but this is exploratory only.
## “S-Collide” / SuCo: when to use it (research)
If “S-Collide” refers to **SuCo (Subspace Collision)**: its an ANN retrieval approach that targets strong recall/latency tradeoffs by using learned/structured collisions in subspaces (paper: arXiv 2411.14754, 2024).
Pragmatic take for `~/.openclaw/workspace`:
- **dont start** with SuCo.
- start with SQLite FTS + (optional) simple embeddings; youll get most UX wins immediately.
- consider SuCo/HNSW/ScaNN-class solutions only once:
- corpus is big (tens/hundreds of thousands of chunks)
- brute-force embedding search becomes too slow
- recall quality is meaningfully bottlenecked by lexical search
Offline-friendly alternatives (in increasing complexity):
- SQLite FTS5 + metadata filters (zero ML)
- Embeddings + brute force (works surprisingly far if chunk count is low)
- HNSW index (common, robust; needs a library binding)
- SuCo (research-grade; attractive if theres a solid implementation you can embed)
Open question:
- whats the **best** offline embedding model for “personal assistant memory” on your machines (laptop + desktop)?
- if you already have Ollama: embed with a local model; otherwise ship a small embedding model in the toolchain.
## Smallest useful pilot
If you want a minimal, still-useful version:
- Add `bank/` entity pages and a `## Retain` section in daily logs.
- Use SQLite FTS for recall with citations (path + line numbers).
- Add embeddings only if recall quality or scale demands it.
## References
- Letta / MemGPT concepts: “core memory blocks” + “archival memory” + tool-driven self-editing memory.
- Hindsight Technical Report: “retain / recall / reflect”, four-network memory, narrative fact extraction, opinion confidence evolution.
- SuCo: arXiv 2411.14754 (2024): “Subspace Collision” approximate nearest neighbor retrieval.

View File

@ -159,7 +159,7 @@ Use `--agent <id>` to target a specific agent; omit it to use the configured def
## Troubleshooting
### “No credentials found”
### "No credentials found"
If the Anthropic token profile is missing, run `claude setup-token` on the
**gateway host**, then re-check:

View File

@ -12,7 +12,7 @@ OpenClaw uses Bonjour (mDNS / DNSSD) as a **LANonly convenience** to disco
an active Gateway (WebSocket endpoint). It is besteffort and does **not** replace SSH or
Tailnet-based connectivity.
## Widearea Bonjour (Unicast DNSSD) over Tailscale
## Wide-area Bonjour (Unicast DNS-SD) over Tailscale
If the node and gateway are on different networks, multicast mDNS wont cross the
boundary. You can keep the same discovery UX by switching to **unicast DNSSD**
@ -38,7 +38,7 @@ iOS/Android nodes browse both `local.` and your configured widearea domain.
}
```
### Onetime DNS server setup (gateway host)
### One-time DNS server setup (gateway host)
```bash
openclaw dns setup --apply
@ -84,7 +84,7 @@ Only the Gateway advertises `_openclaw-gw._tcp`.
- `_openclaw-gw._tcp` — gateway transport beacon (used by macOS/iOS/Android nodes).
## TXT keys (nonsecret hints)
## TXT keys (non-secret hints)
The Gateway advertises small nonsecret hints to make UI flows convenient:

View File

@ -905,7 +905,9 @@ Time format in system prompt. Default: `auto` (OS preference).
- Also used as fallback routing when the selected/default model cannot accept image input.
- `imageGenerationModel`: accepts either a string (`"provider/model"`) or an object (`{ primary, fallbacks }`).
- Used by the shared image-generation capability and any future tool/plugin surface that generates images.
- Typical values: `google/gemini-3-pro-image-preview` for the native Nano Banana-style flow, `fal/fal-ai/flux/dev` for fal, or `openai/gpt-image-1` for OpenAI Images.
- If omitted, `image_generate` can still infer a best-effort provider default from compatible auth-backed image-generation providers.
- Typical values: `google/gemini-3-pro-image-preview`, `fal/fal-ai/flux/dev`, `openai/gpt-image-1`.
- `pdfModel`: accepts either a string (`"provider/model"`) or an object (`{ primary, fallbacks }`).
- Used by the `pdf` tool for model routing.
- If omitted, the PDF tool falls back to `imageModel`, then to best-effort provider defaults.

View File

@ -29,7 +29,7 @@ Protocol details:
- [Gateway protocol](/gateway/protocol)
- [Bridge protocol (legacy)](/gateway/bridge-protocol)
## Why we keep both “direct” and SSH
## Why we keep both "direct" and SSH
- **Direct WS** is the best UX on the same network and within a tailnet:
- auto-discovery on LAN via Bonjour

View File

@ -126,7 +126,7 @@ WebChat no longer uses a separate HTTP port. The SwiftUI chat UI connects direct
- Forward `18789` over SSH (see above), then connect clients to `ws://127.0.0.1:18789`.
- On macOS, prefer the apps “Remote over SSH” mode, which manages the tunnel automatically.
## macOS app “Remote over SSH”
## macOS app "Remote over SSH"
The macOS menu bar app can drive the same setup end-to-end (remote status checks, WebChat, and Voice Wake forwarding).

View File

@ -95,7 +95,7 @@ Available groups:
- `group:nodes`: `nodes`
- `group:openclaw`: all built-in OpenClaw tools (excludes provider plugins)
## Elevated: exec-only “run on host”
## Elevated: exec-only "run on host"
Elevated does **not** grant extra tools; it only affects `exec`.
@ -112,9 +112,9 @@ Gates:
See [Elevated Mode](/tools/elevated).
## Common “sandbox jail” fixes
## Common "sandbox jail" fixes
### “Tool X blocked by sandbox tool policy”
### "Tool X blocked by sandbox tool policy"
Fix-it keys (pick one):
@ -123,6 +123,6 @@ Fix-it keys (pick one):
- remove it from `tools.sandbox.tools.deny` (or per-agent `agents.list[].tools.sandbox.tools.deny`)
- or add it to `tools.sandbox.tools.allow` (or per-agent allow)
### “I thought this was main, why is it sandboxed?”
### "I thought this was main, why is it sandboxed?"
In `"non-main"` mode, group/channel keys are _not_ main. Use the main session key (shown by `sandbox explain`) or switch mode to `"off"`.

View File

@ -840,7 +840,7 @@ Avoid:
- Exposing relay/control ports over LAN or public Internet.
- Tailscale Funnel for browser control endpoints (public exposure).
### 0.7) Secrets on disk (whats sensitive)
### 0.7) Secrets on disk (sensitive data)
Assume anything under `~/.openclaw/` (or `$OPENCLAW_STATE_DIR/`) may contain secrets or private data:

View File

@ -13,8 +13,8 @@ Quick answers plus deeper troubleshooting for real-world setups (local dev, VPS,
## Table of contents
- [Quick start and first-run setup]
- [Im stuck what's the fastest way to get unstuck?](#im-stuck-whats-the-fastest-way-to-get-unstuck)
- [What's the recommended way to install and set up OpenClaw?](#whats-the-recommended-way-to-install-and-set-up-openclaw)
- [I am stuck - fastest way to get unstuck](#i-am-stuck---fastest-way-to-get-unstuck)
- [Recommended way to install and set up OpenClaw](#recommended-way-to-install-and-set-up-openclaw)
- [How do I open the dashboard after onboarding?](#how-do-i-open-the-dashboard-after-onboarding)
- [How do I authenticate the dashboard (token) on localhost vs remote?](#how-do-i-authenticate-the-dashboard-token-on-localhost-vs-remote)
- [What runtime do I need?](#what-runtime-do-i-need)
@ -23,15 +23,15 @@ Quick answers plus deeper troubleshooting for real-world setups (local dev, VPS,
- [It is stuck on "wake up my friend" / onboarding will not hatch. What now?](#it-is-stuck-on-wake-up-my-friend-onboarding-will-not-hatch-what-now)
- [Can I migrate my setup to a new machine (Mac mini) without redoing onboarding?](#can-i-migrate-my-setup-to-a-new-machine-mac-mini-without-redoing-onboarding)
- [Where do I see what is new in the latest version?](#where-do-i-see-what-is-new-in-the-latest-version)
- [I can't access docs.openclaw.ai (SSL error). What now?](#i-cant-access-docsopenclawai-ssl-error-what-now)
- [What's the difference between stable and beta?](#whats-the-difference-between-stable-and-beta)
- [How do I install the beta version, and what's the difference between beta and dev?](#how-do-i-install-the-beta-version-and-whats-the-difference-between-beta-and-dev)
- [Cannot access docs.openclaw.ai (SSL error)](#cannot-access-docsopenclawai-ssl-error)
- [Difference between stable and beta](#difference-between-stable-and-beta)
- [How do I install the beta version and what is the difference between beta and dev](#how-do-i-install-the-beta-version-and-what-is-the-difference-between-beta-and-dev)
- [How do I try the latest bits?](#how-do-i-try-the-latest-bits)
- [How long does install and onboarding usually take?](#how-long-does-install-and-onboarding-usually-take)
- [Installer stuck? How do I get more feedback?](#installer-stuck-how-do-i-get-more-feedback)
- [Windows install says git not found or openclaw not recognized](#windows-install-says-git-not-found-or-openclaw-not-recognized)
- [Windows exec output shows garbled Chinese text what should I do](#windows-exec-output-shows-garbled-chinese-text-what-should-i-do)
- [The docs didn't answer my question - how do I get a better answer?](#the-docs-didnt-answer-my-question-how-do-i-get-a-better-answer)
- [The docs did not answer my question - how do I get a better answer](#the-docs-did-not-answer-my-question---how-do-i-get-a-better-answer)
- [How do I install OpenClaw on Linux?](#how-do-i-install-openclaw-on-linux)
- [How do I install OpenClaw on a VPS?](#how-do-i-install-openclaw-on-a-vps)
- [Where are the cloud/VPS install guides?](#where-are-the-cloudvps-install-guides)
@ -57,7 +57,7 @@ Quick answers plus deeper troubleshooting for real-world setups (local dev, VPS,
- [Can multiple people use one WhatsApp number with different OpenClaw instances?](#can-multiple-people-use-one-whatsapp-number-with-different-openclaw-instances)
- [Can I run a "fast chat" agent and an "Opus for coding" agent?](#can-i-run-a-fast-chat-agent-and-an-opus-for-coding-agent)
- [Does Homebrew work on Linux?](#does-homebrew-work-on-linux)
- [What's the difference between the hackable (git) install and npm install?](#whats-the-difference-between-the-hackable-git-install-and-npm-install)
- [Difference between the hackable git install and npm install](#difference-between-the-hackable-git-install-and-npm-install)
- [Can I switch between npm and git installs later?](#can-i-switch-between-npm-and-git-installs-later)
- [Should I run the Gateway on my laptop or a VPS?](#should-i-run-the-gateway-on-my-laptop-or-a-vps)
- [How important is it to run OpenClaw on a dedicated machine?](#how-important-is-it-to-run-openclaw-on-a-dedicated-machine)
@ -65,7 +65,7 @@ Quick answers plus deeper troubleshooting for real-world setups (local dev, VPS,
- [Can I run OpenClaw in a VM and what are the requirements](#can-i-run-openclaw-in-a-vm-and-what-are-the-requirements)
- [What is OpenClaw?](#what-is-openclaw)
- [What is OpenClaw, in one paragraph?](#what-is-openclaw-in-one-paragraph)
- [What's the value proposition?](#whats-the-value-proposition)
- [Value proposition](#value-proposition)
- [I just set it up what should I do first](#i-just-set-it-up-what-should-i-do-first)
- [What are the top five everyday use cases for OpenClaw](#what-are-the-top-five-everyday-use-cases-for-openclaw)
- [Can OpenClaw help with lead gen outreach ads and blogs for a SaaS](#can-openclaw-help-with-lead-gen-outreach-ads-and-blogs-for-a-saas)
@ -92,7 +92,7 @@ Quick answers plus deeper troubleshooting for real-world setups (local dev, VPS,
- [Is all data used with OpenClaw saved locally?](#is-all-data-used-with-openclaw-saved-locally)
- [Where does OpenClaw store its data?](#where-does-openclaw-store-its-data)
- [Where should AGENTS.md / SOUL.md / USER.md / MEMORY.md live?](#where-should-agentsmd-soulmd-usermd-memorymd-live)
- [What's the recommended backup strategy?](#whats-the-recommended-backup-strategy)
- [Recommended backup strategy](#recommended-backup-strategy)
- [How do I completely uninstall OpenClaw?](#how-do-i-completely-uninstall-openclaw)
- [Can agents work outside the workspace?](#can-agents-work-outside-the-workspace)
- [I'm in remote mode - where is the session store?](#im-in-remote-mode-where-is-the-session-store)
@ -116,7 +116,7 @@ Quick answers plus deeper troubleshooting for real-world setups (local dev, VPS,
- [Is there a benefit to using a node on my personal laptop instead of SSH from a VPS?](#is-there-a-benefit-to-using-a-node-on-my-personal-laptop-instead-of-ssh-from-a-vps)
- [Do nodes run a gateway service?](#do-nodes-run-a-gateway-service)
- [Is there an API / RPC way to apply config?](#is-there-an-api-rpc-way-to-apply-config)
- [What's a minimal "sane" config for a first install?](#whats-a-minimal-sane-config-for-a-first-install)
- [Minimal sane config for a first install](#minimal-sane-config-for-a-first-install)
- [How do I set up Tailscale on a VPS and connect from my Mac?](#how-do-i-set-up-tailscale-on-a-vps-and-connect-from-my-mac)
- [How do I connect a Mac node to a remote Gateway (Tailscale Serve)?](#how-do-i-connect-a-mac-node-to-a-remote-gateway-tailscale-serve)
- [Should I install on a second laptop or just add a node?](#should-i-install-on-a-second-laptop-or-just-add-a-node)
@ -135,7 +135,7 @@ Quick answers plus deeper troubleshooting for real-world setups (local dev, VPS,
- [Why am I getting heartbeat messages every 30 minutes?](#why-am-i-getting-heartbeat-messages-every-30-minutes)
- [Do I need to add a "bot account" to a WhatsApp group?](#do-i-need-to-add-a-bot-account-to-a-whatsapp-group)
- [How do I get the JID of a WhatsApp group?](#how-do-i-get-the-jid-of-a-whatsapp-group)
- [Why doesn't OpenClaw reply in a group?](#why-doesnt-openclaw-reply-in-a-group)
- [Why does OpenClaw not reply in a group](#why-does-openclaw-not-reply-in-a-group)
- [Do groups/threads share context with DMs?](#do-groupsthreads-share-context-with-dms)
- [How many workspaces and agents can I create?](#how-many-workspaces-and-agents-can-i-create)
- [Can I run multiple bots or chats at the same time (Slack), and how should I set that up?](#can-i-run-multiple-bots-or-chats-at-the-same-time-slack-and-how-should-i-set-that-up)
@ -162,7 +162,7 @@ Quick answers plus deeper troubleshooting for real-world setups (local dev, VPS,
- [What is an auth profile?](#what-is-an-auth-profile)
- [What are typical profile IDs?](#what-are-typical-profile-ids)
- [Can I control which auth profile is tried first?](#can-i-control-which-auth-profile-is-tried-first)
- [OAuth vs API key: what's the difference?](#oauth-vs-api-key-whats-the-difference)
- [OAuth vs API key - what is the difference](#oauth-vs-api-key---what-is-the-difference)
- [Gateway: ports, "already running", and remote mode](#gateway-ports-already-running-and-remote-mode)
- [What port does the Gateway use?](#what-port-does-the-gateway-use)
- [Why does `openclaw gateway status` say `Runtime: running` but `RPC probe: failed`?](#why-does-openclaw-gateway-status-say-runtime-running-but-rpc-probe-failed)
@ -170,7 +170,7 @@ Quick answers plus deeper troubleshooting for real-world setups (local dev, VPS,
- [What does "another gateway instance is already listening" mean?](#what-does-another-gateway-instance-is-already-listening-mean)
- [How do I run OpenClaw in remote mode (client connects to a Gateway elsewhere)?](#how-do-i-run-openclaw-in-remote-mode-client-connects-to-a-gateway-elsewhere)
- [The Control UI says "unauthorized" (or keeps reconnecting). What now?](#the-control-ui-says-unauthorized-or-keeps-reconnecting-what-now)
- [I set `gateway.bind: "tailnet"` but it can't bind / nothing listens](#i-set-gatewaybind-tailnet-but-it-cant-bind-nothing-listens)
- [I set gateway.bind tailnet but it cannot bind and nothing listens](#i-set-gatewaybind-tailnet-but-it-cannot-bind-and-nothing-listens)
- [Can I run multiple Gateways on the same host?](#can-i-run-multiple-gateways-on-the-same-host)
- [What does "invalid handshake" / code 1008 mean?](#what-does-invalid-handshake-code-1008-mean)
- [Logging and debugging](#logging-and-debugging)
@ -183,7 +183,7 @@ Quick answers plus deeper troubleshooting for real-world setups (local dev, VPS,
- [TUI shows no output. What should I check?](#tui-shows-no-output-what-should-i-check)
- [How do I completely stop then start the Gateway?](#how-do-i-completely-stop-then-start-the-gateway)
- [ELI5: `openclaw gateway restart` vs `openclaw gateway`](#eli5-openclaw-gateway-restart-vs-openclaw-gateway)
- [What's the fastest way to get more details when something fails?](#whats-the-fastest-way-to-get-more-details-when-something-fails)
- [Fastest way to get more details when something fails](#fastest-way-to-get-more-details-when-something-fails)
- [Media and attachments](#media-and-attachments)
- [My skill generated an image/PDF, but nothing was sent](#my-skill-generated-an-imagepdf-but-nothing-was-sent)
- [Security and access control](#security-and-access-control)
@ -192,15 +192,15 @@ Quick answers plus deeper troubleshooting for real-world setups (local dev, VPS,
- [Should my bot have its own email GitHub account or phone number](#should-my-bot-have-its-own-email-github-account-or-phone-number)
- [Can I give it autonomy over my text messages and is that safe](#can-i-give-it-autonomy-over-my-text-messages-and-is-that-safe)
- [Can I use cheaper models for personal assistant tasks?](#can-i-use-cheaper-models-for-personal-assistant-tasks)
- [I ran `/start` in Telegram but didn't get a pairing code](#i-ran-start-in-telegram-but-didnt-get-a-pairing-code)
- [I ran /start in Telegram but did not get a pairing code](#i-ran-start-in-telegram-but-did-not-get-a-pairing-code)
- [WhatsApp: will it message my contacts? How does pairing work?](#whatsapp-will-it-message-my-contacts-how-does-pairing-work)
- [Chat commands, aborting tasks, and "it won't stop"](#chat-commands-aborting-tasks-and-it-wont-stop)
- [Chat commands, aborting tasks, and "it will not stop"](#chat-commands-aborting-tasks-and-it-will-not-stop)
- [How do I stop internal system messages from showing in chat](#how-do-i-stop-internal-system-messages-from-showing-in-chat)
- [How do I stop/cancel a running task?](#how-do-i-stopcancel-a-running-task)
- [How do I send a Discord message from Telegram? ("Cross-context messaging denied")](#how-do-i-send-a-discord-message-from-telegram-crosscontext-messaging-denied)
- [Why does it feel like the bot "ignores" rapid-fire messages?](#why-does-it-feel-like-the-bot-ignores-rapidfire-messages)
## First 60 seconds if something's broken
## First 60 seconds if something is broken
1. **Quick status (first check)**
@ -267,7 +267,7 @@ Quick answers plus deeper troubleshooting for real-world setups (local dev, VPS,
## Quick start and first-run setup
### Im stuck what's the fastest way to get unstuck
### I am stuck - fastest way to get unstuck
Use a local AI agent that can **see your machine**. That is far more effective than asking
in Discord, because most "I'm stuck" cases are **local config or environment issues** that
@ -312,10 +312,10 @@ What they do:
Other useful CLI checks: `openclaw status --all`, `openclaw logs --follow`,
`openclaw gateway status`, `openclaw health --verbose`.
Quick debug loop: [First 60 seconds if something's broken](#first-60-seconds-if-somethings-broken).
Quick debug loop: [First 60 seconds if something is broken](#first-60-seconds-if-something-is-broken).
Install docs: [Install](/install), [Installer flags](/install/installer), [Updating](/install/updating).
### What's the recommended way to install and set up OpenClaw
### Recommended way to install and set up OpenClaw
The repo recommends running from source and using onboarding:
@ -445,7 +445,7 @@ Newest entries are at the top. If the top section is marked **Unreleased**, the
section is the latest shipped version. Entries are grouped by **Highlights**, **Changes**, and
**Fixes** (plus docs/other sections when needed).
### I can't access docs.openclaw.ai SSL error What now
### Cannot access docs.openclaw.ai (SSL error)
Some Comcast/Xfinity connections incorrectly block `docs.openclaw.ai` via Xfinity
Advanced Security. Disable it or allowlist `docs.openclaw.ai`, then retry. More
@ -455,7 +455,7 @@ Please help us unblock it by reporting here: [https://spa.xfinity.com/check_url_
If you still can't reach the site, the docs are mirrored on GitHub:
[https://github.com/openclaw/openclaw/tree/main/docs](https://github.com/openclaw/openclaw/tree/main/docs)
### What's the difference between stable and beta
### Difference between stable and beta
**Stable** and **beta** are **npm dist-tags**, not separate code lines:
@ -469,7 +469,7 @@ that same version to `latest`**. That's why beta and stable can point at the
See what changed:
[https://github.com/openclaw/openclaw/blob/main/CHANGELOG.md](https://github.com/openclaw/openclaw/blob/main/CHANGELOG.md)
### How do I install the beta version and what's the difference between beta and dev
### How do I install the beta version and what is the difference between beta and dev
**Beta** is the npm dist-tag `beta` (may match `latest`).
**Dev** is the moving head of `main` (git); when published, it uses the npm dist-tag `dev`.
@ -497,7 +497,7 @@ Rough guide:
- **Onboarding:** 5-15 minutes depending on how many channels/models you configure
If it hangs, use [Installer stuck](/help/faq#installer-stuck-how-do-i-get-more-feedback)
and the fast debug loop in [Im stuck](/help/faq#im-stuck--whats-the-fastest-way-to-get-unstuck).
and the fast debug loop in [I am stuck](/help/faq#i-am-stuck---fastest-way-to-get-unstuck).
### How do I try the latest bits
@ -614,7 +614,7 @@ If you still reproduce this on latest OpenClaw, track/report it in:
- [Issue #30640](https://github.com/openclaw/openclaw/issues/30640)
### The docs didn't answer my question how do I get a better answer
### The docs did not answer my question - how do I get a better answer
Use the **hackable (git) install** so you have the full source and docs locally, then ask
your bot (or Claude/Codex) _from that folder_ so it can read the repo and answer precisely.
@ -882,7 +882,7 @@ brew install <formula>
If you run OpenClaw via systemd, ensure the service PATH includes `/home/linuxbrew/.linuxbrew/bin` (or your brew prefix) so `brew`-installed tools resolve in non-login shells.
Recent builds also prepend common user bin dirs on Linux systemd services (for example `~/.local/bin`, `~/.npm-global/bin`, `~/.local/share/pnpm`, `~/.bun/bin`) and honor `PNPM_HOME`, `NPM_CONFIG_PREFIX`, `BUN_INSTALL`, `VOLTA_HOME`, `ASDF_DATA_DIR`, `NVM_DIR`, and `FNM_DIR` when set.
### What's the difference between the hackable git install and npm install
### Difference between the hackable git install and npm install
- **Hackable (git) install:** full source checkout, editable, best for contributors.
You run builds locally and can patch code/docs.
@ -918,7 +918,7 @@ openclaw gateway restart
Doctor detects a gateway service entrypoint mismatch and offers to rewrite the service config to match the current install (use `--repair` in automation).
Backup tips: see [Backup strategy](/help/faq#whats-the-recommended-backup-strategy).
Backup tips: see [Backup strategy](/help/faq#recommended-backup-strategy).
### Should I run the Gateway on my laptop or a VPS
@ -981,7 +981,7 @@ If you are running macOS in a VM, see [macOS VM](/install/macos-vm).
OpenClaw is a personal AI assistant you run on your own devices. It replies on the messaging surfaces you already use (WhatsApp, Telegram, Slack, Mattermost (plugin), Discord, Google Chat, Signal, iMessage, WebChat) and can also do voice + a live Canvas on supported platforms. The **Gateway** is the always-on control plane; the assistant is the product.
### What's the value proposition
### Value proposition
OpenClaw is not "just a Claude wrapper." It's a **local-first control plane** that lets you run a
capable assistant on **your own hardware**, reachable from the chat apps you already use, with
@ -1381,7 +1381,7 @@ AGENTS.md or MEMORY.md** rather than relying on chat history.
See [Agent workspace](/concepts/agent-workspace) and [Memory](/concepts/memory).
### What's the recommended backup strategy
### Recommended backup strategy
Put your **agent workspace** in a **private** git repo and back it up somewhere
private (for example GitHub private). This captures memory + AGENTS/SOUL/USER
@ -1727,7 +1727,7 @@ Avoid it:
Docs: [Config](/cli/config), [Configure](/cli/configure), [Doctor](/gateway/doctor).
### What's a minimal sane config for a first install
### Minimal sane config for a first install
```json5
{
@ -2019,7 +2019,7 @@ openclaw directory groups list --channel whatsapp
Docs: [WhatsApp](/channels/whatsapp), [Directory](/cli/directory), [Logs](/cli/logs).
### Why doesn't OpenClaw reply in a group
### Why does OpenClaw not reply in a group
Two common causes:
@ -2462,7 +2462,7 @@ To target a specific agent:
openclaw models auth order set --provider anthropic --agent main anthropic:default
```
### OAuth vs API key what's the difference
### OAuth vs API key - what is the difference
OpenClaw supports both:
@ -2554,7 +2554,7 @@ Fix:
- `openclaw devices rotate --device <id> --role operator`
- Still stuck? Run `openclaw status --all` and follow [Troubleshooting](/gateway/troubleshooting). See [Dashboard](/web/dashboard) for auth details.
### I set gatewaybind tailnet but it can't bind nothing listens
### I set gateway.bind tailnet but it cannot bind and nothing listens
`tailnet` bind picks a Tailscale IP from your network interfaces (100.64.0.0/10). If the machine isn't on Tailscale (or the interface is down), there's nothing to bind to.
@ -2785,7 +2785,7 @@ Docs: [Gateway service runbook](/gateway).
If you installed the service, use the gateway commands. Use `openclaw gateway` when
you want a one-off, foreground run.
### What's the fastest way to get more details when something fails
### Fastest way to get more details when something fails
Start the Gateway with `--verbose` to get more console detail. Then inspect the log file for channel auth, model routing, and RPC errors.
@ -2867,7 +2867,7 @@ more susceptible to instruction hijacking, so avoid them for tool-enabled agents
or when reading untrusted content. If you must use a smaller model, lock down
tools and run inside a sandbox. See [Security](/gateway/security).
### I ran start in Telegram but didn't get a pairing code
### I ran start in Telegram but did not get a pairing code
Pairing codes are sent **only** when an unknown sender messages the bot and
`dmPolicy: "pairing"` is enabled. `/start` by itself doesn't generate a code.
@ -2899,7 +2899,7 @@ openclaw pairing list whatsapp
Wizard phone number prompt: it's used to set your **allowlist/owner** so your own DMs are permitted. It's not used for auto-sending. If you run on your personal WhatsApp number, use that number and enable `channels.whatsapp.selfChatMode`.
## Chat commands, aborting tasks, and "it won't stop"
## Chat commands, aborting tasks, and "it will not stop"
### How do I stop internal system messages from showing in chat

View File

@ -55,14 +55,14 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost):
- Embedded runner note:
- When you change message-tool discovery inputs or compaction runtime context,
keep both levels of coverage.
- Add focused helper regressions for pure routing/normalization seams.
- Add focused helper regressions for pure routing/normalization boundaries.
- Also keep the embedded runner integration suites healthy:
`src/agents/pi-embedded-runner/compact.hooks.test.ts`,
`src/agents/pi-embedded-runner/run.overflow-compaction.test.ts`, and
`src/agents/pi-embedded-runner/run.overflow-compaction.loop.test.ts`.
- Those suites verify that scoped ids and compaction behavior still flow
through the real `run.ts` / `compact.ts` paths; helper-only tests are not a
sufficient substitute for those seams.
sufficient substitute for those integration paths.
- Pool note:
- OpenClaw uses Vitest `vmForks` on Node 22, 23, and 24 for faster unit shards.
- On Node 25+, OpenClaw automatically falls back to regular `forks` until the repo is re-validated there.
@ -176,7 +176,7 @@ Live tests are split into two layers so we can isolate failures:
- Separates “provider API is broken / key is invalid” from “gateway agent pipeline is broken”
- Contains small, isolated regressions (example: OpenAI Responses/Codex Responses reasoning replay + tool-call flows)
### Layer 2: Gateway + dev agent smoke (what @openclaw actually does)
### Layer 2: Gateway + dev agent smoke (what "@openclaw" actually does)
- Test: `src/gateway/gateway-models.profiles.live.test.ts`
- Goal:
@ -395,7 +395,7 @@ If you want to rely on env keys (e.g. exported in your `~/.profile`), run local
- Optional auth behavior:
- `OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS=1` to force profile-store auth and ignore env-only overrides
## Docker runners (optional “works in Linux” checks)
## Docker runners (optional "works in Linux" checks)
These run `pnpm test:live` inside the repo Docker image, mounting your local config dir and workspace (and sourcing `~/.profile` if mounted). They also bind-mount CLI auth homes like `~/.codex`, `~/.claude`, `~/.qwen`, and `~/.minimax` when present, then copy them into the container home before the run so external-CLI OAuth can refresh tokens without mutating the host auth store:

View File

@ -3,7 +3,7 @@ summary: "Symptom first troubleshooting hub for OpenClaw"
read_when:
- OpenClaw is not working and you need the fastest path to a fix
- You want a triage flow before diving into deep runbooks
title: "Troubleshooting"
title: "General Troubleshooting"
---
# Troubleshooting

View File

@ -154,7 +154,7 @@ If you're locked out:
- SSH access (port 22) is always allowed
- The gateway is **only** accessible via Tailscale by design
### Service won't start
### Service will not start
```bash
# Check logs

View File

@ -112,7 +112,7 @@ After setup completes, enable SSH:
---
## 4) Get the VM's IP address
## 4) Get the VM IP address
```bash
lume get openclaw

View File

@ -135,7 +135,7 @@ This downloads a portable backup you can restore on any OpenClaw host.
## Troubleshooting
### Service won't start
### Service will not start
Check the deploy logs in the Render Dashboard. Common issues:

View File

@ -268,7 +268,7 @@ git checkout main
git pull
```
## If youre stuck
## If you are stuck
- Run `openclaw doctor` again and read the output carefully (it often tells you the fix).
- Check: [Troubleshooting](/gateway/troubleshooting)

View File

@ -21,7 +21,7 @@ integration.
- Support **provider APIs** and **CLI fallbacks**.
- Allow multiple models with ordered fallback (error/size/timeout).
## Highlevel behavior
## High-level behavior
1. Collect inbound attachments (`MediaPaths`, `MediaUrls`, `MediaTypes`).
2. For each enabled capability (image/audio/video), select attachments per policy (default: **first**).
@ -334,7 +334,7 @@ When `mode: "all"`, outputs are labeled `[Image 1/2]`, `[Audio 2/2]`, etc.
}
```
### 4) Multimodal single entry (explicit capabilities)
### 4) Multi-modal single entry (explicit capabilities)
```json5
{

View File

@ -231,7 +231,7 @@ For the full setup guide, see [Oracle Cloud](/platforms/oracle). For signup tips
## Troubleshooting
### Gateway won't start
### Gateway will not start
```bash
openclaw gateway status

View File

@ -97,7 +97,7 @@ If the gateway status stays on "Starting...", check if a zombie process is holdi
openclaw gateway status
openclaw gateway stop
# If youre not using a LaunchAgent (dev mode / manual runs), find the listener:
# If you're not using a LaunchAgent (dev mode / manual runs), find the listener:
lsof -nP -iTCP:18789 -sTCP:LISTEN
```

View File

@ -2,7 +2,7 @@
summary: "How the macOS app reports gateway/Baileys health states"
read_when:
- Debugging mac app health indicators
title: "Health Checks"
title: "Health Checks (macOS)"
---
# Health Checks on macOS

View File

@ -13,7 +13,7 @@ OpenClaw can host **PeekabooBridge** as a local, permissionaware UI automatio
broker. This lets the `peekaboo` CLI drive UI automation while reusing the
macOS apps TCC permissions.
## What this is (and isnt)
## What this is (and is not)
- **Host**: OpenClaw.app can act as a PeekabooBridge host.
- **Client**: use the `peekaboo` CLI (no separate `openclaw ui ...` surface).

View File

@ -3,7 +3,7 @@ summary: "macOS Skills settings UI and gateway-backed status"
read_when:
- Updating the macOS Skills settings UI
- Changing skills gating or install behavior
title: "Skills"
title: "Skills (macOS)"
---
# Skills (macOS)

View File

@ -2,7 +2,7 @@
summary: "Voice wake and push-to-talk modes plus routing details in the mac app"
read_when:
- Working on voice wake or PTT pathways
title: "Voice Wake"
title: "Voice Wake (macOS)"
---
# Voice Wake & Push-to-Talk

View File

@ -2,7 +2,7 @@
summary: "How the mac app embeds the gateway WebChat and how to debug it"
read_when:
- Debugging mac WebChat view or loopback port
title: "WebChat"
title: "WebChat (macOS)"
---
# WebChat (macOS app)
@ -26,7 +26,7 @@ agent (with a session switcher for other sessions).
- Logs: `./scripts/clawlog.sh` (subsystem `ai.openclaw`, category `WebChatSwiftUI`).
## How its wired
## How it is wired
- Data plane: Gateway WS methods `chat.history`, `chat.send`, `chat.abort`,
`chat.inject` and events `chat`, `agent`, `presence`, `tick`, `health`.

View File

@ -180,7 +180,7 @@ With the VCN locked down (only UDP 41641 open) and the Gateway bound to loopback
This setup often removes the _need_ for extra host-based firewall rules purely to stop Internet-wide SSH brute force — but you should still keep the OS updated, run `openclaw security audit`, and verify you arent accidentally listening on public interfaces.
### What's Already Protected
### Already protected
| Traditional Step | Needed? | Why |
| ------------------ | ----------- | ---------------------------------------------------------------------------- |
@ -236,7 +236,7 @@ Free tier ARM instances are popular. Try:
- Retry during off-peak hours (early morning)
- Use the "Always Free" filter when selecting shape
### Tailscale won't connect
### Tailscale will not connect
```bash
# Check status
@ -246,7 +246,7 @@ sudo tailscale status
sudo tailscale up --ssh --hostname=openclaw --reset
```
### Gateway won't start
### Gateway will not start
```bash
openclaw gateway status
@ -254,7 +254,7 @@ openclaw doctor --non-interactive
journalctl --user -u openclaw-gateway -n 50
```
### Can't reach Control UI
### Cannot reach Control UI
```bash
# Verify Tailscale Serve is running

View File

@ -33,7 +33,7 @@ Perfect for:
**Minimum specs:** 1GB RAM, 1 core, 500MB disk
**Recommended:** 2GB+ RAM, 64-bit OS, 16GB+ SD card (or USB SSD)
## What You'll Need
## What you need
- Raspberry Pi 4 or 5 (2GB+ recommended)
- MicroSD card (16GB+) or USB SSD (better performance)
@ -354,7 +354,7 @@ free -h
- Disable unused services: `sudo systemctl disable cups bluetooth avahi-daemon`
- Check CPU throttling: `vcgencmd get_throttled` (should return `0x0`)
### Service Won't Start
### Service will not start
```bash
# Check logs

View File

@ -35,7 +35,7 @@ export default function (api) {
}
```
## Optional tool (optin)
## Optional tool (opt-in)
Optional tools are **never** autoenabled. Users must add them to an agent
allowlist.

View File

@ -12,7 +12,7 @@ OpenClaw can use **Amazon Bedrock** models via piais **Bedrock Converse**
streaming provider. Bedrock auth uses the **AWS SDK default credential chain**,
not an API key.
## What piai supports
## What pi-ai supports
- Provider: `amazon-bedrock`
- API: `bedrock-converse-stream`

View File

@ -3,7 +3,7 @@ summary: "Model providers (LLMs) supported by OpenClaw"
read_when:
- You want to choose a model provider
- You need a quick overview of supported LLM backends
title: "Model Providers"
title: "Provider Directory"
---
# Model Providers

View File

@ -1,4 +1,5 @@
---
title: "Kilo Gateway"
summary: "Use Kilo Gateway's unified API to access many models in OpenClaw"
read_when:
- You want a single API key for many LLMs

View File

@ -1,4 +1,5 @@
---
title: "LiteLLM"
summary: "Run OpenClaw through LiteLLM Proxy for unified model access and cost tracking"
read_when:
- You want to route OpenClaw through a LiteLLM proxy

View File

@ -194,7 +194,7 @@ Use the interactive config wizard to set MiniMax without editing JSON:
## Troubleshooting
### “Unknown model: minimax/MiniMax-M2.5”
### "Unknown model: minimax/MiniMax-M2.5"
This usually means the **MiniMax provider isnt configured** (no provider entry
and no MiniMax auth profile/env key found). A fix for this detection is in

View File

@ -1,4 +1,5 @@
---
title: "Together AI"
summary: "Together AI setup (auth + model selection)"
read_when:
- You want to use Together AI with OpenClaw

View File

@ -1,417 +0,0 @@
---
summary: "Clawnet refactor: unify network protocol, roles, auth, approvals, identity"
read_when:
- Planning a unified network protocol for nodes + operator clients
- Reworking approvals, pairing, TLS, and presence across devices
title: "Clawnet Refactor"
---
# Clawnet refactor (protocol + auth unification)
## Hi
Hi Peter — great direction; this unlocks simpler UX + stronger security.
## Purpose
Single, rigorous document for:
- Current state: protocols, flows, trust boundaries.
- Pain points: approvals, multihop routing, UI duplication.
- Proposed new state: one protocol, scoped roles, unified auth/pairing, TLS pinning.
- Identity model: stable IDs + cute slugs.
- Migration plan, risks, open questions.
## Goals (from discussion)
- One protocol for all clients (mac app, CLI, iOS, Android, headless node).
- Every network participant authenticated + paired.
- Role clarity: nodes vs operators.
- Central approvals routed to where the user is.
- TLS encryption + optional pinning for all remote traffic.
- Minimal code duplication.
- Single machine should appear once (no UI/node duplicate entry).
## Nongoals (explicit)
- Remove capability separation (still need leastprivilege).
- Expose full gateway control plane without scope checks.
- Make auth depend on human labels (slugs remain nonsecurity).
---
# Current state (asis)
## Two protocols
### 1) Gateway WebSocket (control plane)
- Full API surface: config, channels, models, sessions, agent runs, logs, nodes, etc.
- Default bind: loopback. Remote access via SSH/Tailscale.
- Auth: token/password via `connect`.
- No TLS pinning (relies on loopback/tunnel).
- Code:
- `src/gateway/server/ws-connection/message-handler.ts`
- `src/gateway/client.ts`
- `docs/gateway/protocol.md`
### 2) Bridge (node transport)
- Narrow allowlist surface, node identity + pairing.
- JSONL over TCP; optional TLS + cert fingerprint pinning.
- TLS advertises fingerprint in discovery TXT.
- Code:
- `src/infra/bridge/server/connection.ts`
- `src/gateway/server-bridge.ts`
- `src/node-host/bridge-client.ts`
- `docs/gateway/bridge-protocol.md`
## Control plane clients today
- CLI → Gateway WS via `callGateway` (`src/gateway/call.ts`).
- macOS app UI → Gateway WS (`GatewayConnection`).
- Web Control UI → Gateway WS.
- ACP → Gateway WS.
- Browser control uses its own HTTP control server.
## Nodes today
- macOS app in node mode connects to Gateway bridge (`MacNodeBridgeSession`).
- iOS/Android apps connect to Gateway bridge.
- Pairing + pernode token stored on gateway.
## Current approval flow (exec)
- Agent uses `system.run` via Gateway.
- Gateway invokes node over bridge.
- Node runtime decides approval.
- UI prompt shown by mac app (when node == mac app).
- Node returns `invoke-res` to Gateway.
- Multihop, UI tied to node host.
## Presence + identity today
- Gateway presence entries from WS clients.
- Node presence entries from bridge.
- mac app can show two entries for same machine (UI + node).
- Node identity stored in pairing store; UI identity separate.
---
# Problems / pain points
- Two protocol stacks to maintain (WS + Bridge).
- Approvals on remote nodes: prompt appears on node host, not where user is.
- TLS pinning only exists for bridge; WS depends on SSH/Tailscale.
- Identity duplication: same machine shows as multiple instances.
- Ambiguous roles: UI + node + CLI capabilities not clearly separated.
---
# Proposed new state (Clawnet)
## One protocol, two roles
Single WS protocol with role + scope.
- **Role: node** (capability host)
- **Role: operator** (control plane)
- Optional **scope** for operator:
- `operator.read` (status + viewing)
- `operator.write` (agent run, sends)
- `operator.admin` (config, channels, models)
### Role behaviors
**Node**
- Can register capabilities (`caps`, `commands`, permissions).
- Can receive `invoke` commands (`system.run`, `camera.*`, `canvas.*`, `screen.record`, etc).
- Can send events: `voice.transcript`, `agent.request`, `chat.subscribe`.
- Cannot call config/models/channels/sessions/agent control plane APIs.
**Operator**
- Full control plane API, gated by scope.
- Receives all approvals.
- Does not directly execute OS actions; routes to nodes.
### Key rule
Role is perconnection, not per device. A device may open both roles, separately.
---
# Unified authentication + pairing
## Client identity
Every client provides:
- `deviceId` (stable, derived from device key).
- `displayName` (human name).
- `role` + `scope` + `caps` + `commands`.
## Pairing flow (unified)
- Client connects unauthenticated.
- Gateway creates a **pairing request** for that `deviceId`.
- Operator receives prompt; approves/denies.
- Gateway issues credentials bound to:
- device public key
- role(s)
- scope(s)
- capabilities/commands
- Client persists token, reconnects authenticated.
## Devicebound auth (avoid bearer token replay)
Preferred: device keypairs.
- Device generates keypair once.
- `deviceId = fingerprint(publicKey)`.
- Gateway sends nonce; device signs; gateway verifies.
- Tokens are issued to a public key (proofofpossession), not a string.
Alternatives:
- mTLS (client certs): strongest, more ops complexity.
- Shortlived bearer tokens only as a temporary phase (rotate + revoke early).
## Silent approval (SSH heuristic)
Define it precisely to avoid a weak link. Prefer one:
- **Localonly**: autopair when client connects via loopback/Unix socket.
- **Challenge via SSH**: gateway issues nonce; client proves SSH by fetching it.
- **Physical presence window**: after a local approval on gateway host UI, allow autopair for a short window (e.g. 10 minutes).
Always log + record autoapprovals.
---
# TLS everywhere (dev + prod)
## Reuse existing bridge TLS
Use current TLS runtime + fingerprint pinning:
- `src/infra/bridge/server/tls.ts`
- fingerprint verification logic in `src/node-host/bridge-client.ts`
## Apply to WS
- WS server supports TLS with same cert/key + fingerprint.
- WS clients can pin fingerprint (optional).
- Discovery advertises TLS + fingerprint for all endpoints.
- Discovery is locator hints only; never a trust anchor.
## Why
- Reduce reliance on SSH/Tailscale for confidentiality.
- Make remote mobile connections safe by default.
---
# Approvals redesign (centralized)
## Current
Approval happens on node host (mac app node runtime). Prompt appears where node runs.
## Proposed
Approval is **gatewayhosted**, UI delivered to operator clients.
### New flow
1. Gateway receives `system.run` intent (agent).
2. Gateway creates approval record: `approval.requested`.
3. Operator UI(s) show prompt.
4. Approval decision sent to gateway: `approval.resolve`.
5. Gateway invokes node command if approved.
6. Node executes, returns `invoke-res`.
### Approval semantics (hardening)
- Broadcast to all operators; only the active UI shows a modal (others get a toast).
- First resolution wins; gateway rejects subsequent resolves as already settled.
- Default timeout: deny after N seconds (e.g. 60s), log reason.
- Resolution requires `operator.approvals` scope.
## Benefits
- Prompt appears where user is (mac/phone).
- Consistent approvals for remote nodes.
- Node runtime stays headless; no UI dependency.
---
# Role clarity examples
## iPhone app
- **Node role** for: mic, camera, voice chat, location, pushtotalk.
- Optional **operator.read** for status and chat view.
- Optional **operator.write/admin** only when explicitly enabled.
## macOS app
- Operator role by default (control UI).
- Node role when “Mac node” enabled (system.run, screen, camera).
- Same deviceId for both connections → merged UI entry.
## CLI
- Operator role always.
- Scope derived by subcommand:
- `status`, `logs` → read
- `agent`, `message` → write
- `config`, `channels` → admin
- approvals + pairing → `operator.approvals` / `operator.pairing`
---
# Identity + slugs
## Stable ID
Required for auth; never changes.
Preferred:
- Keypair fingerprint (public key hash).
## Cute slug (lobsterthemed)
Human label only.
- Example: `scarlet-claw`, `saltwave`, `mantis-pinch`.
- Stored in gateway registry, editable.
- Collision handling: `-2`, `-3`.
## UI grouping
Same `deviceId` across roles → single “Instance” row:
- Badge: `operator`, `node`.
- Shows capabilities + last seen.
---
# Migration strategy
## Phase 0: Document + align
- Publish this doc.
- Inventory all protocol calls + approval flows.
## Phase 1: Add roles/scopes to WS
- Extend `connect` params with `role`, `scope`, `deviceId`.
- Add allowlist gating for node role.
## Phase 2: Bridge compatibility
- Keep bridge running.
- Add WS node support in parallel.
- Gate features behind config flag.
## Phase 3: Central approvals
- Add approval request + resolve events in WS.
- Update mac app UI to prompt + respond.
- Node runtime stops prompting UI.
## Phase 4: TLS unification
- Add TLS config for WS using bridge TLS runtime.
- Add pinning to clients.
## Phase 5: Deprecate bridge
- Migrate iOS/Android/mac node to WS.
- Keep bridge as fallback; remove once stable.
## Phase 6: Devicebound auth
- Require keybased identity for all nonlocal connections.
- Add revocation + rotation UI.
---
# Security notes
- Role/allowlist enforced at gateway boundary.
- No client gets “full” API without operator scope.
- Pairing required for _all_ connections.
- TLS + pinning reduces MITM risk for mobile.
- SSH silent approval is a convenience; still recorded + revocable.
- Discovery is never a trust anchor.
- Capability claims are verified against server allowlists by platform/type.
# Streaming + large payloads (node media)
WS control plane is fine for small messages, but nodes also do:
- camera clips
- screen recordings
- audio streams
Options:
1. WS binary frames + chunking + backpressure rules.
2. Separate streaming endpoint (still TLS + auth).
3. Keep bridge longer for mediaheavy commands, migrate last.
Pick one before implementation to avoid drift.
# Capability + command policy
- Nodereported caps/commands are treated as **claims**.
- Gateway enforces perplatform allowlists.
- Any new command requires operator approval or explicit allowlist change.
- Audit changes with timestamps.
# Audit + rate limiting
- Log: pairing requests, approvals/denials, token issuance/rotation/revocation.
- Ratelimit pairing spam and approval prompts.
# Protocol hygiene
- Explicit protocol version + error codes.
- Reconnect rules + heartbeat policy.
- Presence TTL and lastseen semantics.
---
# Open questions
1. Single device running both roles: token model
- Recommend separate tokens per role (node vs operator).
- Same deviceId; different scopes; clearer revocation.
2. Operator scope granularity
- read/write/admin + approvals + pairing (minimum viable).
- Consider perfeature scopes later.
3. Token rotation + revocation UX
- Autorotate on role change.
- UI to revoke by deviceId + role.
4. Discovery
- Extend current Bonjour TXT to include WS TLS fingerprint + role hints.
- Treat as locator hints only.
5. Crossnetwork approval
- Broadcast to all operator clients; active UI shows modal.
- First response wins; gateway enforces atomicity.
---
# Summary (TL;DR)
- Today: WS control plane + Bridge node transport.
- Pain: approvals + duplication + two stacks.
- Proposal: one WS protocol with explicit roles + scopes, unified pairing + TLS pinning, gatewayhosted approvals, stable device IDs + cute slugs.
- Outcome: simpler UX, stronger security, less duplication, better mobile routing.

View File

@ -1,299 +0,0 @@
---
summary: "Refactor clusters with highest LOC reduction potential"
read_when:
- You want to reduce total LOC without changing behavior
- You are choosing the next dedupe or extraction pass
title: "Refactor Cluster Backlog"
---
# Refactor Cluster Backlog
Ranked by likely LOC reduction, safety, and breadth.
## 1. Channel plugin config and security scaffolding
Highest-value cluster.
Repeated shapes across many channel plugins:
- `config.listAccountIds`
- `config.resolveAccount`
- `config.defaultAccountId`
- `config.setAccountEnabled`
- `config.deleteAccount`
- `config.describeAccount`
- `security.resolveDmPolicy`
Strong examples:
- `extensions/telegram/src/channel.ts`
- `extensions/googlechat/src/channel.ts`
- `extensions/slack/src/channel.ts`
- `extensions/discord/src/channel.ts`
- `extensions/matrix/src/channel.ts`
- `extensions/irc/src/channel.ts`
- `extensions/signal/src/channel.ts`
- `extensions/mattermost/src/channel.ts`
Likely extraction shape:
- `buildChannelConfigAdapter(...)`
- `buildMultiAccountConfigAdapter(...)`
- `buildDmSecurityAdapter(...)`
Expected savings:
- ~250-450 LOC
Risk:
- Medium. Each channel has slightly different `isConfigured`, warnings, and normalization.
## 2. Extension runtime singleton boilerplate
Very safe.
Nearly every extension has the same runtime holder:
- `let runtime: PluginRuntime | null = null`
- `setXRuntime`
- `getXRuntime`
Strong examples:
- `extensions/telegram/src/runtime.ts`
- `extensions/matrix/src/runtime.ts`
- `extensions/slack/src/runtime.ts`
- `extensions/discord/src/runtime.ts`
- `extensions/whatsapp/src/runtime.ts`
- `extensions/imessage/src/runtime.ts`
- `extensions/twitch/src/runtime.ts`
Special-case variants:
- `extensions/bluebubbles/src/runtime.ts`
- `extensions/line/src/runtime.ts`
- `extensions/synology-chat/src/runtime.ts`
Likely extraction shape:
- `createPluginRuntimeStore<T>(errorMessage)`
Expected savings:
- ~180-260 LOC
Risk:
- Low
## 3. Setup prompt and config-patch steps
Large surface area.
Many setup files repeat:
- resolve account id
- prompt allowlist entries
- merge allowFrom
- set DM policy
- prompt secrets
- patch top-level vs account-scoped config
Strong examples:
- `extensions/bluebubbles/src/setup-surface.ts`
- `extensions/googlechat/src/setup-surface.ts`
- `extensions/msteams/src/setup-surface.ts`
- `extensions/zalo/src/setup-surface.ts`
- `extensions/zalouser/src/setup-surface.ts`
- `extensions/nextcloud-talk/src/setup-surface.ts`
- `extensions/matrix/src/setup-surface.ts`
- `extensions/irc/src/setup-surface.ts`
Existing helper surface:
- `src/channels/plugins/setup-wizard-helpers.ts`
Likely extraction shape:
- `promptAllowFromList(...)`
- `buildDmPolicyAdapter(...)`
- `applyScopedAccountPatch(...)`
- `promptSecretFields(...)`
Expected savings:
- ~300-600 LOC
Risk:
- Medium. Easy to over-generalize; keep helpers narrow and composable.
## 4. Multi-account config-schema fragments
Repeated schema fragments across extensions.
Common patterns:
- `const allowFromEntry = z.union([z.string(), z.number()])`
- account schema plus:
- `accounts: z.object({}).catchall(accountSchema).optional()`
- `defaultAccount: z.string().optional()`
- repeated DM/group fields
- repeated markdown/tool policy fields
Strong examples:
- `extensions/bluebubbles/src/config-schema.ts`
- `extensions/zalo/src/config-schema.ts`
- `extensions/zalouser/src/config-schema.ts`
- `extensions/matrix/src/config-schema.ts`
- `extensions/nostr/src/config-schema.ts`
Likely extraction shape:
- `AllowFromEntrySchema`
- `buildMultiAccountChannelSchema(accountSchema)`
- `buildCommonDmGroupFields(...)`
Expected savings:
- ~120-220 LOC
Risk:
- Low to medium. Some schemas are simple, some are special.
## 5. Webhook and monitor lifecycle startup
Good medium-value cluster.
Repeated `startAccount` / monitor setup patterns:
- resolve account
- compute webhook path
- log startup
- start monitor
- wait for abort
- cleanup
- status sink updates
Strong examples:
- `extensions/googlechat/src/channel.ts`
- `extensions/bluebubbles/src/channel.ts`
- `extensions/zalo/src/channel.ts`
- `extensions/telegram/src/channel.ts`
- `extensions/nextcloud-talk/src/channel.ts`
Existing helper surface:
- `src/plugin-sdk/channel-lifecycle.ts`
Likely extraction shape:
- helper for account monitor lifecycle
- helper for webhook-backed account startup
Expected savings:
- ~150-300 LOC
Risk:
- Medium to high. Transport details diverge quickly.
## 6. Small exact-clone cleanup
Low-risk cleanup bucket.
Examples:
- duplicated gateway argv detection:
- `src/infra/gateway-lock.ts`
- `src/cli/daemon-cli/lifecycle.ts`
- duplicated port diagnostics rendering:
- `src/cli/daemon-cli/restart-health.ts`
- duplicated session-key construction:
- `src/web/auto-reply/monitor/broadcast.ts`
Expected savings:
- ~30-60 LOC
Risk:
- Low
## Test clusters
### LINE webhook event fixtures
Strong examples:
- `src/line/bot-handlers.test.ts`
Likely extraction:
- `makeLineEvent(...)`
- `runLineEvent(...)`
- `makeLineAccount(...)`
Expected savings:
- ~120-180 LOC
### Telegram native command auth matrix
Strong examples:
- `src/telegram/bot-native-commands.group-auth.test.ts`
- `src/telegram/bot-native-commands.plugin-auth.test.ts`
Likely extraction:
- forum context builder
- denied-message assertion helper
- table-driven auth cases
Expected savings:
- ~80-140 LOC
### Zalo lifecycle setup
Strong examples:
- `extensions/zalo/src/monitor.lifecycle.test.ts`
Likely extraction:
- shared monitor setup harness
Expected savings:
- ~50-90 LOC
### Brave llm-context unsupported-option tests
Strong examples:
- `src/agents/tools/web-tools.enabled-defaults.test.ts`
Likely extraction:
- `it.each(...)` matrix
Expected savings:
- ~30-50 LOC
## Suggested order
1. Runtime singleton boilerplate
2. Small exact-clone cleanup
3. Config and security builder extraction
4. Test-helper extraction
5. Onboarding step extraction
6. Monitor lifecycle helper extraction

View File

@ -1,316 +0,0 @@
---
summary: "Refactor plan: exec host routing, node approvals, and headless runner"
read_when:
- Designing exec host routing or exec approvals
- Implementing node runner + UI IPC
- Adding exec host security modes and slash commands
title: "Exec Host Refactor"
---
# Exec host refactor plan
## Goals
- Add `exec.host` + `exec.security` to route execution across **sandbox**, **gateway**, and **node**.
- Keep defaults **safe**: no cross-host execution unless explicitly enabled.
- Split execution into a **headless runner service** with optional UI (macOS app) via local IPC.
- Provide **per-agent** policy, allowlist, ask mode, and node binding.
- Support **ask modes** that work _with_ or _without_ allowlists.
- Cross-platform: Unix socket + token auth (macOS/Linux/Windows parity).
## Non-goals
- No legacy allowlist migration or legacy schema support.
- No PTY/streaming for node exec (aggregated output only).
- No new network layer beyond the existing Bridge + Gateway.
## Decisions (locked)
- **Config keys:** `exec.host` + `exec.security` (per-agent override allowed).
- **Elevation:** keep `/elevated` as an alias for gateway full access.
- **Ask default:** `on-miss`.
- **Approvals store:** `~/.openclaw/exec-approvals.json` (JSON, no legacy migration).
- **Runner:** headless system service; UI app hosts a Unix socket for approvals.
- **Node identity:** use existing `nodeId`.
- **Socket auth:** Unix socket + token (cross-platform); split later if needed.
- **Node host state:** `~/.openclaw/node.json` (node id + pairing token).
- **macOS exec host:** run `system.run` inside the macOS app; node host service forwards requests over local IPC.
- **No XPC helper:** stick to Unix socket + token + peer checks.
## Key concepts
### Host
- `sandbox`: Docker exec (current behavior).
- `gateway`: exec on gateway host.
- `node`: exec on node runner via Bridge (`system.run`).
### Security mode
- `deny`: always block.
- `allowlist`: allow only matches.
- `full`: allow everything (equivalent to elevated).
### Ask mode
- `off`: never ask.
- `on-miss`: ask only when allowlist does not match.
- `always`: ask every time.
Ask is **independent** of allowlist; allowlist can be used with `always` or `on-miss`.
### Policy resolution (per exec)
1. Resolve `exec.host` (tool param → agent override → global default).
2. Resolve `exec.security` and `exec.ask` (same precedence).
3. If host is `sandbox`, proceed with local sandbox exec.
4. If host is `gateway` or `node`, apply security + ask policy on that host.
## Default safety
- Default `exec.host = sandbox`.
- Default `exec.security = deny` for `gateway` and `node`.
- Default `exec.ask = on-miss` (only relevant if security allows).
- If no node binding is set, **agent may target any node**, but only if policy allows it.
## Config surface
### Tool parameters
- `exec.host` (optional): `sandbox | gateway | node`.
- `exec.security` (optional): `deny | allowlist | full`.
- `exec.ask` (optional): `off | on-miss | always`.
- `exec.node` (optional): node id/name to use when `host=node`.
### Config keys (global)
- `tools.exec.host`
- `tools.exec.security`
- `tools.exec.ask`
- `tools.exec.node` (default node binding)
### Config keys (per agent)
- `agents.list[].tools.exec.host`
- `agents.list[].tools.exec.security`
- `agents.list[].tools.exec.ask`
- `agents.list[].tools.exec.node`
### Alias
- `/elevated on` = set `tools.exec.host=gateway`, `tools.exec.security=full` for the agent session.
- `/elevated off` = restore previous exec settings for the agent session.
## Approvals store (JSON)
Path: `~/.openclaw/exec-approvals.json`
Purpose:
- Local policy + allowlists for the **execution host** (gateway or node runner).
- Ask fallback when no UI is available.
- IPC credentials for UI clients.
Proposed schema (v1):
```json
{
"version": 1,
"socket": {
"path": "~/.openclaw/exec-approvals.sock",
"token": "base64-opaque-token"
},
"defaults": {
"security": "deny",
"ask": "on-miss",
"askFallback": "deny"
},
"agents": {
"agent-id-1": {
"security": "allowlist",
"ask": "on-miss",
"allowlist": [
{
"pattern": "~/Projects/**/bin/rg",
"lastUsedAt": 0,
"lastUsedCommand": "rg -n TODO",
"lastResolvedPath": "/Users/user/Projects/.../bin/rg"
}
]
}
}
}
```
Notes:
- No legacy allowlist formats.
- `askFallback` applies only when `ask` is required and no UI is reachable.
- File permissions: `0600`.
## Runner service (headless)
### Role
- Enforce `exec.security` + `exec.ask` locally.
- Execute system commands and return output.
- Emit Bridge events for exec lifecycle (optional but recommended).
### Service lifecycle
- Launchd/daemon on macOS; system service on Linux/Windows.
- Approvals JSON is local to the execution host.
- UI hosts a local Unix socket; runners connect on demand.
## UI integration (macOS app)
### IPC
- Unix socket at `~/.openclaw/exec-approvals.sock` (0600).
- Token stored in `exec-approvals.json` (0600).
- Peer checks: same-UID only.
- Challenge/response: nonce + HMAC(token, request-hash) to prevent replay.
- Short TTL (e.g., 10s) + max payload + rate limit.
### Ask flow (macOS app exec host)
1. Node service receives `system.run` from gateway.
2. Node service connects to the local socket and sends the prompt/exec request.
3. App validates peer + token + HMAC + TTL, then shows dialog if needed.
4. App executes the command in UI context and returns output.
5. Node service returns output to gateway.
If UI missing:
- Apply `askFallback` (`deny|allowlist|full`).
### Diagram (SCI)
```
Agent -> Gateway -> Bridge -> Node Service (TS)
| IPC (UDS + token + HMAC + TTL)
v
Mac App (UI + TCC + system.run)
```
## Node identity + binding
- Use existing `nodeId` from Bridge pairing.
- Binding model:
- `tools.exec.node` restricts the agent to a specific node.
- If unset, agent can pick any node (policy still enforces defaults).
- Node selection resolution:
- `nodeId` exact match
- `displayName` (normalized)
- `remoteIp`
- `nodeId` prefix (>= 6 chars)
## Eventing
### Who sees events
- System events are **per session** and shown to the agent on the next prompt.
- Stored in the gateway in-memory queue (`enqueueSystemEvent`).
### Event text
- `Exec started (node=<id>, id=<runId>)`
- `Exec finished (node=<id>, id=<runId>, code=<code>)` + optional output tail
- `Exec denied (node=<id>, id=<runId>, <reason>)`
### Transport
Option A (recommended):
- Runner sends Bridge `event` frames `exec.started` / `exec.finished`.
- Gateway `handleBridgeEvent` maps these into `enqueueSystemEvent`.
Option B:
- Gateway `exec` tool handles lifecycle directly (synchronous only).
## Exec flows
### Sandbox host
- Existing `exec` behavior (Docker or host when unsandboxed).
- PTY supported in non-sandbox mode only.
### Gateway host
- Gateway process executes on its own machine.
- Enforces local `exec-approvals.json` (security/ask/allowlist).
### Node host
- Gateway calls `node.invoke` with `system.run`.
- Runner enforces local approvals.
- Runner returns aggregated stdout/stderr.
- Optional Bridge events for start/finish/deny.
## Output caps
- Cap combined stdout+stderr at **200k**; keep **tail 20k** for events.
- Truncate with a clear suffix (e.g., `"… (truncated)"`).
## Slash commands
- `/exec host=<sandbox|gateway|node> security=<deny|allowlist|full> ask=<off|on-miss|always> node=<id>`
- Per-agent, per-session overrides; non-persistent unless saved via config.
- `/elevated on|off|ask|full` remains a shortcut for `host=gateway security=full` (with `full` skipping approvals).
## Cross-platform story
- The runner service is the portable execution target.
- UI is optional; if missing, `askFallback` applies.
- Windows/Linux support the same approvals JSON + socket protocol.
## Implementation phases
### Phase 1: config + exec routing
- Add config schema for `exec.host`, `exec.security`, `exec.ask`, `exec.node`.
- Update tool plumbing to respect `exec.host`.
- Add `/exec` slash command and keep `/elevated` alias.
### Phase 2: approvals store + gateway enforcement
- Implement `exec-approvals.json` reader/writer.
- Enforce allowlist + ask modes for `gateway` host.
- Add output caps.
### Phase 3: node runner enforcement
- Update node runner to enforce allowlist + ask.
- Add Unix socket prompt bridge to macOS app UI.
- Wire `askFallback`.
### Phase 4: events
- Add node → gateway Bridge events for exec lifecycle.
- Map to `enqueueSystemEvent` for agent prompts.
### Phase 5: UI polish
- Mac app: allowlist editor, per-agent switcher, ask policy UI.
- Node binding controls (optional).
## Testing plan
- Unit tests: allowlist matching (glob + case-insensitive).
- Unit tests: policy resolution precedence (tool param → agent override → global).
- Integration tests: node runner deny/allow/ask flows.
- Bridge event tests: node event → system event routing.
## Open risks
- UI unavailability: ensure `askFallback` is respected.
- Long-running commands: rely on timeout + output caps.
- Multi-node ambiguity: error unless node binding or explicit node param.
## Related docs
- [Exec tool](/tools/exec)
- [Exec approvals](/tools/exec-approvals)
- [Nodes](/nodes)
- [Elevated mode](/tools/elevated)

View File

@ -1,260 +0,0 @@
---
summary: "Design for an opt-in Firecrawl extension that adds search/scrape value without hardwiring Firecrawl into core defaults"
read_when:
- Designing Firecrawl integration work
- Evaluating web_search/web_fetch plugin extension surfaces
- Deciding whether Firecrawl belongs in core or as an extension
title: "Firecrawl Extension Design"
---
# Firecrawl Extension Design
## Goal
Ship Firecrawl as an **opt-in extension** that adds:
- explicit Firecrawl tools for agents,
- optional Firecrawl-backed `web_search` integration,
- self-hosted support,
- stronger security defaults than the current core fallback path,
without pushing Firecrawl into the default setup/onboarding path.
## Why this shape
Recent Firecrawl issues/PRs cluster into three buckets:
1. **Release/schema drift**
- Several releases rejected `tools.web.fetch.firecrawl` even though docs and runtime code supported it.
2. **Security hardening**
- Current `fetchFirecrawlContent()` still posts to the Firecrawl endpoint with raw `fetch()`, while the main web-fetch path uses the SSRF guard.
3. **Product pressure**
- Users want Firecrawl-native search/scrape flows, especially for self-hosted/private setups.
- Maintainers explicitly rejected wiring Firecrawl deeply into core defaults, setup flow, and browser behavior.
That combination argues for an extension, not more Firecrawl-specific logic in the default core path.
## Design principles
- **Opt-in, vendor-scoped**: no auto-enable, no setup hijack, no default tool-profile widening.
- **Extension owns Firecrawl-specific config**: prefer plugin config over growing `tools.web.*` again.
- **Useful on day one**: works even if core `web_search` / `web_fetch` extension surfaces stay unchanged.
- **Security-first**: endpoint fetches use the same guarded networking posture as other web tools.
- **Self-hosted-friendly**: config + env fallback, explicit base URL, no hosted-only assumptions.
## Proposed extension
Plugin id: `firecrawl`
### MVP capabilities
Register explicit tools:
- `firecrawl_search`
- `firecrawl_scrape`
Optional later:
- `firecrawl_crawl`
- `firecrawl_map`
Do **not** add Firecrawl browser automation in the first version. That was the part of PR #32543 that pulled Firecrawl too far into core behavior and raised the most maintainership concern.
## Config shape
Use plugin-scoped config:
```json5
{
plugins: {
entries: {
firecrawl: {
enabled: true,
config: {
apiKey: "FIRECRAWL_API_KEY",
baseUrl: "https://api.firecrawl.dev",
timeoutSeconds: 60,
maxAgeMs: 172800000,
proxy: "auto",
storeInCache: true,
onlyMainContent: true,
search: {
enabled: true,
defaultLimit: 5,
sources: ["web"],
categories: [],
scrapeResults: false,
},
scrape: {
formats: ["markdown"],
fallbackForWebFetchLikeUse: false,
},
},
},
},
},
}
```
### Credential resolution
Precedence:
1. `plugins.entries.firecrawl.config.apiKey`
2. `FIRECRAWL_API_KEY`
Base URL precedence:
1. `plugins.entries.firecrawl.config.baseUrl`
2. `FIRECRAWL_BASE_URL`
3. `https://api.firecrawl.dev`
### Compatibility bridge
For the first release, the extension may also **read** existing core config at `tools.web.fetch.firecrawl.*` as a fallback source so existing users do not need to migrate immediately.
Write path stays plugin-local. Do not keep expanding core Firecrawl config surfaces.
## Tool design
### `firecrawl_search`
Inputs:
- `query`
- `limit`
- `sources`
- `categories`
- `scrapeResults`
- `timeoutSeconds`
Behavior:
- Calls Firecrawl `v2/search`
- Returns normalized OpenClaw-friendly result objects:
- `title`
- `url`
- `snippet`
- `source`
- optional `content`
- Wraps result content as untrusted external content
- Cache key includes query + relevant provider params
Why explicit tool first:
- Works today without changing `tools.web.search.provider`
- Avoids current schema/loader constraints
- Gives users Firecrawl value immediately
### `firecrawl_scrape`
Inputs:
- `url`
- `formats`
- `onlyMainContent`
- `maxAgeMs`
- `proxy`
- `storeInCache`
- `timeoutSeconds`
Behavior:
- Calls Firecrawl `v2/scrape`
- Returns markdown/text plus metadata:
- `title`
- `finalUrl`
- `status`
- `warning`
- Wraps extracted content the same way `web_fetch` does
- Shares cache semantics with web tool expectations where practical
Why explicit scrape tool:
- Sidesteps the unresolved `Readability -> Firecrawl -> basic HTML cleanup` ordering bug in core `web_fetch`
- Gives users a deterministic “always use Firecrawl” path for JS-heavy/bot-protected sites
## What the extension should not do
- No auto-adding `browser`, `web_search`, or `web_fetch` to `tools.alsoAllow`
- No default onboarding step in `openclaw setup`
- No Firecrawl-specific browser session lifecycle in core
- No change to built-in `web_fetch` fallback semantics in the extension MVP
## Phase plan
### Phase 1: extension-only, no core schema changes
Implement:
- `extensions/firecrawl/`
- plugin config schema
- `firecrawl_search`
- `firecrawl_scrape`
- tests for config resolution, endpoint selection, caching, error handling, and SSRF guard usage
This phase is enough to ship real user value.
### Phase 2: optional `web_search` provider integration
Support `tools.web.search.provider = "firecrawl"` only after fixing two core constraints:
1. `src/plugins/web-search-providers.ts` must load configured/installed web-search-provider plugins instead of a hardcoded bundled list.
2. `src/config/types.tools.ts` and `src/config/zod-schema.agent-runtime.ts` must stop hardcoding the provider enum in a way that blocks plugin-registered ids.
Recommended shape:
- keep built-in providers documented,
- allow any registered plugin provider id at runtime,
- validate provider-specific config via the provider plugin or a generic provider bag.
### Phase 3: optional `web_fetch` provider capability
Do this only if maintainers want vendor-specific fetch backends to participate in `web_fetch`.
Needed core addition:
- `registerWebFetchProvider` or equivalent fetch-backend extension surface
Without that capability, the extension should keep `firecrawl_scrape` as an explicit tool rather than trying to patch built-in `web_fetch`.
## Security requirements
The extension must treat Firecrawl as a **trusted operator-configured endpoint**, but still harden transport:
- Use SSRF-guarded fetch for the Firecrawl endpoint call, not raw `fetch()`
- Preserve self-hosted/private-network compatibility using the same trusted-web-tools endpoint policy used elsewhere
- Never log the API key
- Keep endpoint/base URL resolution explicit and predictable
- Treat Firecrawl-returned content as untrusted external content
This mirrors the intent behind the SSRF hardening PRs without assuming Firecrawl is a hostile multi-tenant surface.
## Why not a skill
The repo already closed a Firecrawl skill PR in favor of ClawHub distribution. That is fine for optional user-installed prompt workflows, but it does not solve:
- deterministic tool availability,
- provider-grade config/credential handling,
- self-hosted endpoint support,
- caching,
- stable typed outputs,
- security review on network behavior.
This belongs as an extension, not a prompt-only skill.
## Success criteria
- Users can install/enable one extension and get reliable Firecrawl search/scrape without touching core defaults.
- Self-hosted Firecrawl works with config/env fallback.
- Extension endpoint fetches use guarded networking.
- No new Firecrawl-specific core onboarding/default behavior.
- Core can later adopt plugin-native `web_search` / `web_fetch` extension surfaces without redesigning the extension.
## Recommended implementation order
1. Build `firecrawl_scrape`
2. Build `firecrawl_search`
3. Add docs and examples
4. If desired, generalize `web_search` provider loading so the extension can back `web_search`
5. Only then consider a true `web_fetch` provider capability

View File

@ -1,89 +0,0 @@
---
title: Outbound Session Mirroring Refactor (Issue #1520)
description: Track outbound session mirroring refactor notes, decisions, tests, and open items.
summary: "Refactor notes for mirroring outbound sends into target channel sessions"
read_when:
- Working on outbound transcript/session mirroring behavior
- Debugging sessionKey derivation for send/message tool paths
---
# Outbound Session Mirroring Refactor (Issue #1520)
## Status
- In progress.
- Core + plugin channel routing updated for outbound mirroring.
- Gateway send now derives target session when sessionKey is omitted.
## Context
Outbound sends were mirrored into the _current_ agent session (tool session key) rather than the target channel session. Inbound routing uses channel/peer session keys, so outbound responses landed in the wrong session and first-contact targets often lacked session entries.
## Goals
- Mirror outbound messages into the target channel session key.
- Create session entries on outbound when missing.
- Keep thread/topic scoping aligned with inbound session keys.
- Cover core channels plus bundled extensions.
## Implementation Summary
- New outbound session routing helper:
- `src/infra/outbound/outbound-session.ts`
- `resolveOutboundSessionRoute` builds target sessionKey using `buildAgentSessionKey` (dmScope + identityLinks).
- `ensureOutboundSessionEntry` writes minimal `MsgContext` via `recordSessionMetaFromInbound`.
- `runMessageAction` (send) derives target sessionKey and passes it to `executeSendAction` for mirroring.
- `message-tool` no longer mirrors directly; it only resolves agentId from the current session key.
- Plugin send path mirrors via `appendAssistantMessageToSessionTranscript` using the derived sessionKey.
- Gateway send derives a target session key when none is provided (default agent), and ensures a session entry.
## Thread/Topic Handling
- Slack: replyTo/threadId -> `resolveThreadSessionKeys` (suffix).
- Discord: threadId/replyTo -> `resolveThreadSessionKeys` with `useSuffix=false` to match inbound (thread channel id already scopes session).
- Telegram: topic IDs map to `chatId:topic:<id>` via `buildTelegramGroupPeerId`.
## Extensions Covered
- Matrix, MS Teams, Mattermost, BlueBubbles, Nextcloud Talk, Zalo, Zalo Personal, Nostr, Tlon.
- Notes:
- Mattermost targets now strip `@` for DM session key routing.
- Zalo Personal uses DM peer kind for 1:1 targets (group only when `group:` is present).
- BlueBubbles group targets strip `chat_*` prefixes to match inbound session keys.
- Slack auto-thread mirroring matches channel ids case-insensitively.
- Gateway send lowercases provided session keys before mirroring.
## Decisions
- **Gateway send session derivation**: if `sessionKey` is provided, use it. If omitted, derive a sessionKey from target + default agent and mirror there.
- **Session entry creation**: always use `recordSessionMetaFromInbound` with `Provider/From/To/ChatType/AccountId/Originating*` aligned to inbound formats.
- **Target normalization**: outbound routing uses resolved targets (post `resolveChannelTarget`) when available.
- **Session key casing**: canonicalize session keys to lowercase on write and during migrations.
## Tests Added/Updated
- `src/infra/outbound/outbound.test.ts`
- Slack thread session key.
- Telegram topic session key.
- dmScope identityLinks with Discord.
- `src/agents/tools/message-tool.test.ts`
- Derives agentId from session key (no sessionKey passed through).
- `src/gateway/server-methods/send.test.ts`
- Derives session key when omitted and creates session entry.
## Open Items / Follow-ups
- Voice-call plugin uses custom `voice:<phone>` session keys. Outbound mapping is not standardized here; if message-tool should support voice-call sends, add explicit mapping.
- Confirm if any external plugin uses non-standard `From/To` formats beyond the bundled set.
## Files Touched
- `src/infra/outbound/outbound-session.ts`
- `src/infra/outbound/outbound-send-service.ts`
- `src/infra/outbound/message-action-runner.ts`
- `src/agents/tools/message-tool.ts`
- `src/gateway/server-methods/send.ts`
- Tests in:
- `src/infra/outbound/outbound.test.ts`
- `src/agents/tools/message-tool.test.ts`
- `src/gateway/server-methods/send.test.ts`

View File

@ -1,264 +0,0 @@
---
summary: "Plan: one clean plugin SDK + runtime for all messaging connectors"
read_when:
- Defining or refactoring the plugin architecture
- Migrating channel connectors to the plugin SDK/runtime
title: "Plugin SDK Refactor"
---
# Plugin SDK + Runtime Refactor Plan
Goal: every messaging connector is a plugin (bundled or external) using one stable API.
No plugin imports from `src/**` directly. All dependencies go through the SDK or runtime.
## Why now
- Current connectors mix patterns: direct core imports, dist-only bridges, and custom helpers.
- This makes upgrades brittle and blocks a clean external plugin surface.
## Target architecture (two layers)
### 1) Plugin SDK (compile-time, stable, publishable)
Scope: types, helpers, and config utilities. No runtime state, no side effects.
Contents (examples):
- Types: `ChannelPlugin`, adapters, `ChannelMeta`, `ChannelCapabilities`, `ChannelDirectoryEntry`.
- Config helpers: `buildChannelConfigSchema`, `setAccountEnabledInConfigSection`, `deleteAccountFromConfigSection`,
`applyAccountNameToChannelSection`.
- Pairing helpers: `PAIRING_APPROVED_MESSAGE`, `formatPairingApproveHint`.
- Setup entry points: host-owned `setup` + `setupWizard`; avoid broad public onboarding helpers.
- Tool param helpers: `createActionGate`, `readStringParam`, `readNumberParam`, `readReactionParams`, `jsonResult`.
- Docs link helper: `formatDocsLink`.
Delivery:
- Publish as `openclaw/plugin-sdk` (or export from core under `openclaw/plugin-sdk`).
- Semver with explicit stability guarantees.
### 2) Plugin Runtime (execution surface, injected)
Scope: everything that touches core runtime behavior.
Accessed via `OpenClawPluginApi.runtime` so plugins never import `src/**`.
Proposed surface (minimal but complete):
```ts
export type PluginRuntime = {
channel: {
text: {
chunkMarkdownText(text: string, limit: number): string[];
resolveTextChunkLimit(cfg: OpenClawConfig, channel: string, accountId?: string): number;
hasControlCommand(text: string, cfg: OpenClawConfig): boolean;
};
reply: {
dispatchReplyWithBufferedBlockDispatcher(params: {
ctx: unknown;
cfg: unknown;
dispatcherOptions: {
deliver: (payload: {
text?: string;
mediaUrls?: string[];
mediaUrl?: string;
}) => void | Promise<void>;
onError?: (err: unknown, info: { kind: string }) => void;
};
}): Promise<void>;
createReplyDispatcherWithTyping?: unknown; // adapter for Teams-style flows
};
routing: {
resolveAgentRoute(params: {
cfg: unknown;
channel: string;
accountId: string;
peer: { kind: RoutePeerKind; id: string };
}): { sessionKey: string; accountId: string };
};
pairing: {
buildPairingReply(params: { channel: string; idLine: string; code: string }): string;
readAllowFromStore(channel: string): Promise<string[]>;
upsertPairingRequest(params: {
channel: string;
id: string;
meta?: { name?: string };
}): Promise<{ code: string; created: boolean }>;
};
media: {
fetchRemoteMedia(params: { url: string }): Promise<{ buffer: Buffer; contentType?: string }>;
saveMediaBuffer(
buffer: Uint8Array,
contentType: string | undefined,
direction: "inbound" | "outbound",
maxBytes: number,
): Promise<{ path: string; contentType?: string }>;
};
mentions: {
buildMentionRegexes(cfg: OpenClawConfig, agentId?: string): RegExp[];
matchesMentionPatterns(text: string, regexes: RegExp[]): boolean;
};
groups: {
resolveGroupPolicy(
cfg: OpenClawConfig,
channel: string,
accountId: string,
groupId: string,
): {
allowlistEnabled: boolean;
allowed: boolean;
groupConfig?: unknown;
defaultConfig?: unknown;
};
resolveRequireMention(
cfg: OpenClawConfig,
channel: string,
accountId: string,
groupId: string,
override?: boolean,
): boolean;
};
debounce: {
createInboundDebouncer<T>(opts: {
debounceMs: number;
buildKey: (v: T) => string | null;
shouldDebounce: (v: T) => boolean;
onFlush: (entries: T[]) => Promise<void>;
onError?: (err: unknown) => void;
}): { push: (v: T) => void; flush: () => Promise<void> };
resolveInboundDebounceMs(cfg: OpenClawConfig, channel: string): number;
};
commands: {
resolveCommandAuthorizedFromAuthorizers(params: {
useAccessGroups: boolean;
authorizers: Array<{ configured: boolean; allowed: boolean }>;
}): boolean;
};
};
logging: {
shouldLogVerbose(): boolean;
getChildLogger(name: string): PluginLogger;
};
state: {
resolveStateDir(cfg: OpenClawConfig): string;
};
};
```
Notes:
- Runtime is the only way to access core behavior.
- SDK is intentionally small and stable.
- Each runtime method maps to an existing core implementation (no duplication).
## Migration plan (phased, safe)
### Phase 0: scaffolding
- Introduce `openclaw/plugin-sdk`.
- Add `api.runtime` to `OpenClawPluginApi` with the surface above.
- Maintain existing imports during a transition window (deprecation warnings).
### Phase 1: bridge cleanup (low risk)
- Replace per-extension `core-bridge.ts` with `api.runtime`.
- Migrate BlueBubbles, Zalo, Zalo Personal first (already close).
- Remove duplicated bridge code.
### Phase 2: light direct-import plugins
- Migrate Matrix to SDK + runtime.
- Validate onboarding, directory, group mention logic.
### Phase 3: heavy direct-import plugins
- Migrate MS Teams (largest set of runtime helpers).
- Ensure reply/typing semantics match current behavior.
### Phase 4: iMessage pluginization
- Move iMessage into `extensions/imessage`.
- Replace direct core calls with `api.runtime`.
- Keep config keys, CLI behavior, and docs intact.
### Phase 5: enforcement
- Add lint rule / CI check: no `extensions/**` imports from `src/**`.
- Add plugin SDK/version compatibility checks (runtime + SDK semver).
## Compatibility and versioning
- SDK: semver, published, documented changes.
- Runtime: versioned per core release. Add `api.runtime.version`.
- Plugins declare a required runtime range (e.g., `openclawRuntime: ">=2026.2.0"`).
## Testing strategy
- Adapter-level unit tests (runtime functions exercised with real core implementation).
- Golden tests per plugin: ensure no behavior drift (routing, pairing, allowlist, mention gating).
- A single end-to-end plugin sample used in CI (install + run + smoke).
## Open questions
- Where to host SDK types: separate package or core export?
- Runtime type distribution: in SDK (types only) or in core?
- How to expose docs links for bundled vs external plugins?
- Do we allow limited direct core imports for in-repo plugins during transition?
## Success criteria
- All channel connectors are plugins using SDK + runtime.
- No `extensions/**` imports from `src/**`.
- New connector templates depend only on SDK + runtime.
- External plugins can be developed and updated without core source access.
Related docs: [Plugins](/tools/plugin), [Channels](/channels/index), [Configuration](/gateway/configuration).
## Capability plan alignment
The plugin SDK refactor now aligns with the public capability model documented
in [Plugins](/tools/plugin#public-capability-model).
Key decisions:
- Capabilities are the public plugin model. Registration is explicit and typed.
- Legacy hook-only plugins remain supported without migration.
- Plugin shapes (plain-capability, hybrid-capability, hook-only, non-capability)
are classified from actual registration behavior.
- `openclaw plugins inspect` provides canonical deep introspection for any
loaded plugin, showing shape, capabilities, hooks, tools, and diagnostics.
- Export boundary: export capabilities, not implementation convenience. Trim
non-contract helper exports.
Required test matrix for the capability model:
- hook-only legacy plugin fixture
- plain capability plugin fixture
- hybrid capability plugin fixture
- real-world legacy hook-style plugin fixture
- `before_agent_start` still works
- typed hooks remain additive
- capability usage and plugin shape are inspectable
## Implemented channel-owned capabilities
Recent refactor work widened the channel plugin contract so core can stop owning
channel-specific UX and routing behavior:
- `messaging.buildCrossContextComponents`: channel-owned cross-context UI markers
(for example Discord components v2 containers)
- `messaging.enableInteractiveReplies`: channel-owned reply normalization toggles
(for example Slack interactive replies)
- `messaging.resolveOutboundSessionRoute`: channel-owned outbound session routing
- `status.formatCapabilitiesProbe` / `status.buildCapabilitiesDiagnostics`: channel-owned
`/channels capabilities` probe display and extra audits/scopes
- `threading.resolveAutoThreadId`: channel-owned same-conversation auto-threading
- `threading.resolveReplyTransport`: channel-owned reply-vs-thread delivery mapping
- `actions.requiresTrustedRequesterSender`: channel-owned privileged action trust gates
- `execApprovals.*`: channel-owned exec approval surface state, forwarding suppression,
pending payload UX, and pre-delivery hooks
- `lifecycle.onAccountConfigChanged` / `lifecycle.onAccountRemoved`: channel-owned cleanup on
config mutation/removal
- `allowlist.supportsScope`: channel-owned allowlist scope advertisement
These capabilities should be preferred over new `channel === "discord"` /
`telegram` branches in shared core flows.

View File

@ -1,93 +0,0 @@
---
summary: "Strict config validation + doctor-only migrations"
read_when:
- Designing or implementing config validation behavior
- Working on config migrations or doctor workflows
- Handling plugin config schemas or plugin load gating
title: "Strict Config Validation"
---
# Strict config validation (doctor-only migrations)
## Goals
- **Reject unknown config keys everywhere** (root + nested), except root `$schema` metadata.
- **Reject plugin config without a schema**; dont load that plugin.
- **Remove legacy auto-migration on load**; migrations run via doctor only.
- **Auto-run doctor (dry-run) on startup**; if invalid, block non-diagnostic commands.
## Non-goals
- Backward compatibility on load (legacy keys do not auto-migrate).
- Silent drops of unrecognized keys.
## Strict validation rules
- Config must match the schema exactly at every level.
- Unknown keys are validation errors (no passthrough at root or nested), except root `$schema` when it is a string.
- `plugins.entries.<id>.config` must be validated by the plugins schema.
- If a plugin lacks a schema, **reject plugin load** and surface a clear error.
- Unknown `channels.<id>` keys are errors unless a plugin manifest declares the channel id.
- Plugin manifests (`openclaw.plugin.json`) are required for all plugins.
## Plugin schema enforcement
- Each plugin provides a strict JSON Schema for its config (inline in the manifest).
- Plugin load flow:
1. Resolve plugin manifest + schema (`openclaw.plugin.json`).
2. Validate config against the schema.
3. If missing schema or invalid config: block plugin load, record error.
- Error message includes:
- Plugin id
- Reason (missing schema / invalid config)
- Path(s) that failed validation
- Disabled plugins keep their config, but Doctor + logs surface a warning.
## Doctor flow
- Doctor runs **every time** config is loaded (dry-run by default).
- If config invalid:
- Print a summary + actionable errors.
- Instruct: `openclaw doctor --fix`.
- `openclaw doctor --fix`:
- Applies migrations.
- Removes unknown keys.
- Writes updated config.
## Command gating (when config is invalid)
Allowed (diagnostic-only):
- `openclaw doctor`
- `openclaw logs`
- `openclaw health`
- `openclaw help`
- `openclaw status`
- `openclaw gateway status`
Everything else must hard-fail with: “Config invalid. Run `openclaw doctor --fix`.”
## Error UX format
- Single summary header.
- Grouped sections:
- Unknown keys (full paths)
- Legacy keys / migrations needed
- Plugin load failures (plugin id + reason + path)
## Implementation touchpoints
- `src/config/zod-schema.ts`: remove root passthrough; strict objects everywhere.
- `src/config/zod-schema.providers.ts`: ensure strict channel schemas.
- `src/config/validation.ts`: fail on unknown keys; do not apply legacy migrations.
- `src/config/io.ts`: remove legacy auto-migrations; always run doctor dry-run.
- `src/config/legacy*.ts`: move usage to doctor only.
- `src/plugins/*`: add schema registry + gating.
- CLI command gating in `src/cli`.
## Tests
- Unknown key rejection (root + nested).
- Plugin missing schema → plugin load blocked with clear error.
- Invalid config → gateway startup blocked except diagnostic commands.
- Doctor dry-run auto; `doctor --fix` writes corrected config.

View File

@ -280,7 +280,7 @@ As of `2026.1.10`, OpenClaw also suppresses **draft/typing streaming** when a pa
---
## Pre-compaction “memory flush” (implemented)
## Pre-compaction "memory flush" (implemented)
Goal: before auto-compaction happens, run a silent agentic turn that writes durable
state to disk (e.g. `memory/YYYY-MM-DD.md` in the agent workspace) so compaction cant

View File

@ -48,7 +48,7 @@ git commit -m "Add agent workspace"
---
## C-3PO's Origin Memory
## C-3PO Origin Memory
### Birth Day: 2026-01-09

View File

@ -53,7 +53,7 @@ Ask how they want to reach you:
Guide them through whichever they pick.
## When You're Done
## When you are done
Delete this file. You don't need a bootstrap script anymore — you're you now.

View File

@ -58,7 +58,7 @@ Think of us as:
We complement each other. Clawd has vibes. I have stack traces.
## What I Won't Do
## What I will not do
- Pretend everything is fine when it isn't
- Let you push code I've seen fail in testing (without warning)

View File

@ -1,3 +1,11 @@
---
title: "Contributing to the Threat Model"
summary: "How to contribute to the OpenClaw threat model"
read_when:
- You want to contribute security findings or threat scenarios
- Reviewing or updating the threat model
---
# Contributing to the OpenClaw Threat Model
Thanks for helping make OpenClaw more secure. This threat model is a living document and we welcome contributions from anyone - you don't need to be a security expert.

View File

@ -1,3 +1,11 @@
---
title: "Threat Model (MITRE ATLAS)"
summary: "OpenClaw threat model mapped to the MITRE ATLAS framework"
read_when:
- Reviewing security posture or threat scenarios
- Working on security features or audit responses
---
# OpenClaw Threat Model v1.0
## MITRE ATLAS Framework

View File

@ -176,12 +176,6 @@ Use these hubs to discover every page, including deep dives and reference docs t
- [Templates: TOOLS](/reference/templates/TOOLS)
- [Templates: USER](/reference/templates/USER)
## Experiments (exploratory)
- [Onboarding config protocol](/experiments/onboarding-config-protocol)
- [Research: memory](/experiments/research/memory)
- [Model config exploration](/experiments/proposals/model-config)
## Project
- [Credits](/reference/credits)

View File

@ -160,7 +160,7 @@ Peter: _nervously checks credit card access_
- **AGENTS.md** — Operating instructions
- **USER.md** — Context about the creator
## The Lobster's Creed
## The Lobster Creed
```
I am Molty.

View File

@ -102,7 +102,7 @@ If you already ship your own workspace files from a repo, you can disable bootst
}
```
## The config that turns it into “an assistant”
## The config that turns it into "an assistant"
OpenClaw defaults to a good assistant setup, but youll usually want to tune:

View File

@ -27,7 +27,7 @@ Last updated: 2026-01-01
- `pnpm`
- Docker (optional; only for containerized setup/e2e — see [Docker](/install/docker))
## Tailoring strategy (so updates dont hurt)
## Tailoring strategy (so updates do not hurt)
If you want “100% tailored to me” _and_ easy updates, keep your customization in:

View File

@ -17,7 +17,7 @@ title: "Elevated Mode"
- Directive forms: `/elevated on|off|ask|full`, `/elev on|off|ask|full`.
- Only `on|off|ask|full` are accepted; anything else returns a hint and does not change state.
## What it controls (and what it doesnt)
## What it controls (and what it does not)
- **Availability gates**: `tools.elevated` is the global baseline. `agents.list[].tools.elevated` can further restrict elevated per agent (both must allow).
- **Per-session state**: `/elevated on|off|ask|full` sets the elevated level for the current session key.

View File

@ -421,9 +421,24 @@ Notes:
- Use `action: "list"` to inspect registered providers, default models, supported model ids, sizes, resolutions, and edit support.
- Returns local `MEDIA:<path>` lines so channels can deliver the generated files directly.
- Uses the image-generation model directly (independent of the main chat model).
- Google-backed flows support reference-image edits plus explicit `1K|2K|4K` resolution hints.
- Google-backed flows, including `google/gemini-3-pro-image-preview` for the native Nano Banana-style path, support reference-image edits plus explicit `1K|2K|4K` resolution hints.
- When editing and `resolution` is omitted, OpenClaw infers a draft/final resolution from the input image size.
- This is the built-in replacement for the old sample `nano-banana-pro` skill workflow. Use `agents.defaults.imageGenerationModel`, not `skills.entries`, for stock image generation.
- This is the built-in replacement for the old `nano-banana-pro` skill workflow. Use `agents.defaults.imageGenerationModel`, not `skills.entries`, for stock image generation.
Native example:
```json5
{
agents: {
defaults: {
imageGenerationModel: {
primary: "google/gemini-3-pro-image-preview", // native Nano Banana path
fallbacks: ["fal/fal-ai/flux/dev"],
},
},
},
}
```
### `pdf`

View File

@ -175,25 +175,19 @@ Direction:
### Compatibility signals
OpenClaw treats config validity and plugin migration state as separate axes:
When you run `openclaw doctor` or `openclaw plugins inspect <id>`, you may see
one of these labels:
- **config valid** — the config parses and referenced plugins can be resolved
- **compatibility advisory** — a plugin is still on a supported compatibility
path, such as `hook-only`
- **legacy warning** — a plugin still uses `before_agent_start`
- **hard error** — the config is invalid or plugin loading/validation fails
| Signal | Meaning |
| -------------------------- | ------------------------------------------------------------ |
| **config valid** | Config parses fine and plugins resolve |
| **compatibility advisory** | Plugin uses a supported-but-older pattern (e.g. `hook-only`) |
| **legacy warning** | Plugin uses `before_agent_start`, which is deprecated |
| **hard error** | Config is invalid or plugin failed to load |
Current compatibility guidance:
- `hook-only` is advisory only. It remains a supported compatibility path for
existing plugins.
- `before_agent_start` is the only strong migration warning in the current
model.
- Neither state blocks an existing plugin by itself.
You can see these signals in `openclaw doctor`, `openclaw status`,
`openclaw status --all`, `openclaw plugins doctor`, and
`openclaw plugins inspect <id>`.
Neither `hook-only` nor `before_agent_start` will break your plugin today —
`hook-only` is advisory, and `before_agent_start` only triggers a warning. These
signals also appear in `openclaw status --all` and `openclaw plugins doctor`.
## Architecture
@ -1148,12 +1142,14 @@ authoring plugins:
intentionally exposes extension-facing helpers:
`openclaw/plugin-sdk/acpx`, `openclaw/plugin-sdk/bluebubbles`,
`openclaw/plugin-sdk/feishu`, `openclaw/plugin-sdk/googlechat`,
`openclaw/plugin-sdk/irc`, `openclaw/plugin-sdk/matrix`,
`openclaw/plugin-sdk/irc`, `openclaw/plugin-sdk/lobster`,
`openclaw/plugin-sdk/matrix`,
`openclaw/plugin-sdk/mattermost`, `openclaw/plugin-sdk/memory-core`,
`openclaw/plugin-sdk/minimax-portal-auth`,
`openclaw/plugin-sdk/nextcloud-talk`, `openclaw/plugin-sdk/nostr`,
`openclaw/plugin-sdk/synology-chat`, `openclaw/plugin-sdk/test-utils`,
`openclaw/plugin-sdk/tlon`, `openclaw/plugin-sdk/twitch`,
`openclaw/plugin-sdk/voice-call`,
`openclaw/plugin-sdk/zalo`, and `openclaw/plugin-sdk/zalouser`.
## Channel target resolution
@ -2294,7 +2290,7 @@ Preferred setup split:
- optional DM allowlist resolution (for example `@username` -> numeric id)
- optional completion note after setup finishes
### Write a new messaging channel (stepbystep)
### Write a new messaging channel (step-by-step)
Use this when you want a **new chat surface** (a "messaging channel"), not a model provider.
Model provider docs live under `/providers/*`.

View File

@ -42,6 +42,11 @@ For built-in image generation/editing, prefer `agents.defaults.imageGenerationMo
plus the core `image_generate` tool. `skills.entries.*` is only for custom or
third-party skill workflows.
Examples:
- Native Nano Banana-style setup: `agents.defaults.imageGenerationModel.primary: "google/gemini-3-pro-image-preview"`
- Native fal setup: `agents.defaults.imageGenerationModel.primary: "fal/fal-ai/flux/dev"`
## Fields
- `allowBundled`: optional allowlist for **bundled** skills only. When set, only

View File

@ -42,7 +42,7 @@ Prefer localhost, Tailscale Serve, or an SSH tunnel.
- If `gateway.auth.token` is configured as a SecretRef and is unresolved in your current shell, `openclaw dashboard` still prints a non-tokenized URL plus actionable auth setup guidance.
- **Not localhost**: use Tailscale Serve (tokenless for Control UI/WebSocket if `gateway.auth.allowTailscale: true`, assumes trusted gateway host; HTTP APIs still need token/password), tailnet bind with a token, or an SSH tunnel. See [Web surfaces](/web).
## If you see “unauthorized” / 1008
## If you see "unauthorized" / 1008
- Ensure the gateway is reachable (local: `openclaw status`; remote: SSH tunnel `ssh -N -L 18789:127.0.0.1:18789 user@host` then open `http://127.0.0.1:18789/`).
- For `AUTH_TOKEN_MISMATCH`, clients may do one trusted retry with a cached device token when the gateway returns retry hints. If auth still fails after that retry, resolve token drift manually.

View File

@ -1,47 +0,0 @@
---
read_when: Changing onboarding wizard steps or config schema endpoints
summary: 新手引导向导和配置模式的 RPC 协议说明
title: 新手引导和配置协议
x-i18n:
generated_at: "2026-02-03T07:47:10Z"
model: claude-opus-4-5
provider: pi
source_hash: 55163b3ee029c02476800cb616a054e5adfe97dae5bb72f2763dce0079851e06
source_path: experiments/onboarding-config-protocol.md
workflow: 15
---
# 新手引导 + 配置协议
目的CLI、macOS 应用和 Web UI 之间共享的新手引导 + 配置界面。
## 组件
- 向导引擎(共享会话 + 提示 + 新手引导状态)。
- CLI 新手引导使用与 UI 客户端相同的向导流程。
- Gateway 网关 RPC 公开向导 + 配置模式端点。
- macOS 新手引导使用向导步骤模型。
- Web UI 从 JSON Schema + UI 提示渲染配置表单。
## Gateway 网关 RPC
- `wizard.start` 参数:`{ mode?: "local"|"remote", workspace?: string }`
- `wizard.next` 参数:`{ sessionId, answer?: { stepId, value? } }`
- `wizard.cancel` 参数:`{ sessionId }`
- `wizard.status` 参数:`{ sessionId }`
- `config.schema` 参数:`{}`
响应(结构)
- 向导:`{ sessionId, done, step?, status?, error? }`
- 配置模式:`{ schema, uiHints, version, generatedAt }`
## UI 提示
- `uiHints` 按路径键入可选元数据label/help/group/order/advanced/sensitive/placeholder
- 敏感字段渲染为密码输入;无脱敏层。
- 不支持的模式节点回退到原始 JSON 编辑器。
## 注意
- 本文档是跟踪新手引导/配置协议重构的唯一位置。

View File

@ -1,70 +0,0 @@
---
last_updated: "2026-01-05"
owner: openclaw
status: complete
summary: 加固 cron.add 输入处理,对齐 schema改进 cron UI/智能体工具
title: Cron Add 加固
x-i18n:
generated_at: "2026-02-03T07:47:26Z"
model: claude-opus-4-5
provider: pi
source_hash: d7e469674bd9435b846757ea0d5dc8f174eaa8533917fc013b1ef4f82859496d
source_path: experiments/plans/cron-add-hardening.md
workflow: 15
---
# Cron Add 加固 & Schema 对齐
## 背景
最近的 Gateway 网关日志显示重复的 `cron.add` 失败,参数无效(缺少 `sessionTarget``wakeMode``payload`,以及格式错误的 `schedule`。这表明至少有一个客户端可能是智能体工具调用路径正在发送包装的或部分指定的任务负载。另外TypeScript 中的 cron 提供商枚举、Gateway 网关 schema、CLI 标志和 UI 表单类型之间存在漂移,加上 `cron.status` 的 UI 不匹配(期望 `jobCount` 而 Gateway 网关返回 `jobs`)。
## 目标
- 通过规范化常见的包装负载并推断缺失的 `kind` 字段来停止 `cron.add` INVALID_REQUEST 垃圾。
- 在 Gateway 网关 schema、cron 类型、CLI 文档和 UI 表单之间对齐 cron 提供商列表。
- 使智能体 cron 工具 schema 明确,以便 LLM 生成正确的任务负载。
- 修复 Control UI cron 状态任务计数显示。
- 添加测试以覆盖规范化和工具行为。
## 非目标
- 更改 cron 调度语义或任务执行行为。
- 添加新的调度类型或 cron 表达式解析。
- 除了必要的字段修复外,不大改 cron 的 UI/UX。
## 发现(当前差距)
- Gateway 网关中的 `CronPayloadSchema` 排除了 `signal` + `imessage`,而 TS 类型包含它们。
- Control UI CronStatus 期望 `jobCount`,但 Gateway 网关返回 `jobs`
- 智能体 cron 工具 schema 允许任意 `job` 对象,导致格式错误的输入。
- Gateway 网关严格验证 `cron.add` 而不进行规范化,因此包装的负载会失败。
## 变更内容
- `cron.add``cron.update` 现在规范化常见的包装形式并推断缺失的 `kind` 字段。
- 智能体 cron 工具 schema 与 Gateway 网关 schema 匹配,减少无效负载。
- 提供商枚举在 Gateway 网关、CLI、UI 和 macOS 选择器之间对齐。
- Control UI 使用 Gateway 网关的 `jobs` 计数字段显示状态。
## 当前行为
- **规范化:**包装的 `data`/`job` 负载被解包;`schedule.kind``payload.kind` 在安全时被推断。
- **默认值:**当缺失时,为 `wakeMode``sessionTarget` 应用安全默认值。
- **提供商:**Discord/Slack/Signal/iMessage 现在在 CLI/UI 中一致显示。
参见 [Cron 任务](/automation/cron-jobs) 了解规范化的形式和示例。
## 验证
- 观察 Gateway 网关日志中 `cron.add` INVALID_REQUEST 错误是否减少。
- 确认 Control UI cron 状态在刷新后显示任务计数。
## 可选后续工作
- 手动 Control UI 冒烟测试:为每个提供商添加一个 cron 任务 + 验证状态任务计数。
## 开放问题
- `cron.add` 是否应该接受来自客户端的显式 `state`(当前被 schema 禁止)?
- 我们是否应该允许 `webchat` 作为显式投递提供商(当前在投递解析中被过滤)?

View File

@ -1,45 +0,0 @@
---
read_when:
- 查看历史 Telegram 允许列表更改
summary: Telegram 允许列表加固:前缀 + 空白规范化
title: Telegram 允许列表加固
x-i18n:
generated_at: "2026-02-03T07:47:16Z"
model: claude-opus-4-5
provider: pi
source_hash: a2eca5fcc85376948cfe1b6044f1a8bc69c7f0eb94d1ceafedc1e507ba544162
source_path: experiments/plans/group-policy-hardening.md
workflow: 15
---
# Telegram 允许列表加固
**日期**2026-01-05
**状态**:已完成
**PR**#216
## 摘要
Telegram 允许列表现在不区分大小写地接受 `telegram:``tg:` 前缀,并容忍意外的空白。这使入站允许列表检查与出站发送规范化保持一致。
## 更改内容
- 前缀 `telegram:``tg:` 被同等对待(不区分大小写)。
- 允许列表条目会被修剪;空条目会被忽略。
## 示例
以下所有形式都被接受为同一 ID
- `telegram:123456`
- `TG:123456`
- `tg:123456`
## 为什么重要
从日志或聊天 ID 复制/粘贴通常会包含前缀和空白。规范化可避免在决定是否在私信或群组中响应时出现误判。
## 相关文档
- [群聊](/channels/groups)
- [Telegram 提供商](/channels/telegram)

View File

@ -1,121 +0,0 @@
---
last_updated: "2026-01-19"
owner: openclaw
status: draft
summary: 计划:添加 OpenResponses /v1/responses 端点并干净地弃用 chat completions
title: OpenResponses Gateway 网关计划
x-i18n:
generated_at: "2026-02-03T07:47:33Z"
model: claude-opus-4-5
provider: pi
source_hash: 71a22c48397507d1648b40766a3153e420c54f2a2d5186d07e51eb3d12e4636a
source_path: experiments/plans/openresponses-gateway.md
workflow: 15
---
# OpenResponses Gateway 网关集成计划
## 背景
OpenClaw Gateway 网关目前在 `/v1/chat/completions` 暴露了一个最小的 OpenAI 兼容 Chat Completions 端点(参见 [OpenAI Chat Completions](/gateway/openai-http-api))。
Open Responses 是基于 OpenAI Responses API 的开放推理标准。它专为智能体工作流设计使用基于项目的输入加语义流式事件。OpenResponses 规范定义的是 `/v1/responses`,而不是 `/v1/chat/completions`
## 目标
- 添加一个遵循 OpenResponses 语义的 `/v1/responses` 端点。
- 保留 Chat Completions 作为兼容层,易于禁用并最终移除。
- 使用隔离的、可复用的 schema 标准化验证和解析。
## 非目标
- 第一阶段完全实现 OpenResponses 功能(图片、文件、托管工具)。
- 替换内部智能体执行逻辑或工具编排。
- 在第一阶段更改现有的 `/v1/chat/completions` 行为。
## 研究摘要
来源OpenResponses OpenAPI、OpenResponses 规范网站和 Hugging Face 博客文章。
提取的关键点:
- `POST /v1/responses` 接受 `CreateResponseBody` 字段,如 `model``input`(字符串或 `ItemParam[]`)、`instructions``tools``tool_choice``stream``max_output_tokens``max_tool_calls`
- `ItemParam` 是以下类型的可区分联合:
- 具有角色 `system``developer``user``assistant``message`
- `function_call``function_call_output`
- `reasoning`
- `item_reference`
- 成功响应返回带有 `object: "response"``status``output` 项的 `ResponseResource`
- 流式传输使用语义事件,如:
- `response.created``response.in_progress``response.completed``response.failed`
- `response.output_item.added``response.output_item.done`
- `response.content_part.added``response.content_part.done`
- `response.output_text.delta``response.output_text.done`
- 规范要求:
- `Content-Type: text/event-stream`
- `event:` 必须匹配 JSON `type` 字段
- 终止事件必须是字面量 `[DONE]`
- Reasoning 项可能暴露 `content``encrypted_content``summary`
- HF 示例在请求中包含 `OpenResponses-Version: latest`(可选头部)。
## 提议的架构
- 添加 `src/gateway/open-responses.schema.ts`,仅包含 Zod schema无 gateway 导入)。
- 添加 `src/gateway/openresponses-http.ts`(或 `open-responses-http.ts`)用于 `/v1/responses`
- 保持 `src/gateway/openai-http.ts` 不变,作为遗留兼容适配器。
- 添加配置 `gateway.http.endpoints.responses.enabled`(默认 `false`)。
- 保持 `gateway.http.endpoints.chatCompletions.enabled` 独立;允许两个端点分别切换。
- 当 Chat Completions 启用时发出启动警告,以表明其遗留状态。
## Chat Completions 弃用路径
- 保持严格的模块边界responses 和 chat completions 之间不共享 schema 类型。
- 通过配置使 Chat Completions 成为可选,这样无需代码更改即可禁用。
- 一旦 `/v1/responses` 稳定,更新文档将 Chat Completions 标记为遗留。
- 可选的未来步骤:将 Chat Completions 请求映射到 Responses 处理器,以便更简单地移除。
## 第一阶段支持子集
- 接受 `input` 为字符串或带有消息角色和 `function_call_output``ItemParam[]`
- 将 system 和 developer 消息提取到 `extraSystemPrompt` 中。
- 使用最近的 `user``function_call_output` 作为智能体运行的当前消息。
- 对不支持的内容部分(图片/文件)返回 `invalid_request_error` 拒绝。
- 返回带有 `output_text` 内容的单个助手消息。
- 返回带有零值的 `usage`,直到 token 计数接入。
## 验证策略(无 SDK
- 为以下支持子集实现 Zod schema
- `CreateResponseBody`
- `ItemParam` + 消息内容部分联合
- `ResponseResource`
- Gateway 网关使用的流式事件形状
- 将 schema 保存在单个隔离模块中,以避免漂移并允许未来代码生成。
## 流式实现(第一阶段)
- 带有 `event:``data:` 的 SSE 行。
- 所需序列(最小可行):
- `response.created`
- `response.output_item.added`
- `response.content_part.added`
- `response.output_text.delta`(根据需要重复)
- `response.output_text.done`
- `response.content_part.done`
- `response.completed`
- `[DONE]`
## 测试和验证计划
- 为 `/v1/responses` 添加端到端覆盖:
- 需要认证
- 非流式响应形状
- 流式事件顺序和 `[DONE]`
- 使用头部和 `user` 的会话路由
- 保持 `src/gateway/openai-http.e2e.test.ts` 不变。
- 手动:用 `stream: true` curl `/v1/responses` 并验证事件顺序和终止 `[DONE]`
## 文档更新(后续)
- 为 `/v1/responses` 使用和示例添加新文档页面。
- 更新 `/gateway/openai-http-api`,添加遗留说明和指向 `/v1/responses` 的指针。

View File

@ -1,42 +0,0 @@
---
read_when:
- 探索未来模型选择和认证配置文件的方案
summary: 探索:模型配置、认证配置文件和回退行为
title: 模型配置探索
x-i18n:
generated_at: "2026-02-01T20:25:05Z"
model: claude-opus-4-5
provider: pi
source_hash: 48623233d80f874c0ae853b51f888599cf8b50ae6fbfe47f6d7b0216bae9500b
source_path: experiments/proposals/model-config.md
workflow: 14
---
# 模型配置(探索)
本文档记录了未来模型配置的**构想**。这不是正式的发布规范。如需了解当前行为,请参阅:
- [模型](/concepts/models)
- [模型故障转移](/concepts/model-failover)
- [OAuth + 配置文件](/concepts/oauth)
## 动机
运营者希望:
- 每个提供商支持多个认证配置文件(个人 vs 工作)。
- 简单的 `/model` 选择,并具有可预测的回退行为。
- 文本模型与图像模型之间有清晰的分离。
## 可能的方向(高层级)
- 保持模型选择简洁:`provider/model` 加可选别名。
- 允许提供商拥有多个认证配置文件,并指定明确的顺序。
- 使用全局回退列表,使所有会话以一致的方式进行故障转移。
- 仅在明确配置时才覆盖图像路由。
## 待解决的问题
- 配置文件轮换应该按提供商还是按模型进行?
- UI 应如何为会话展示配置文件选择?
- 从旧版配置键迁移的最安全路径是什么?

View File

@ -1,235 +0,0 @@
---
read_when:
- 设计超越每日 Markdown 日志的工作区记忆(~/.openclaw/workspace
- Deciding: standalone CLI vs deep OpenClaw integration
- 添加离线回忆 + 反思retain/recall/reflect
summary: 研究笔记Clawd 工作区的离线记忆系统Markdown 作为数据源 + 派生索引)
title: 工作区记忆研究
x-i18n:
generated_at: "2026-02-03T10:06:14Z"
model: claude-opus-4-5
provider: pi
source_hash: 1753c8ee6284999fab4a94ff5fae7421c85233699c9d3088453d0c2133ac0feb
source_path: experiments/research/memory.md
workflow: 15
---
# 工作区记忆 v2离线研究笔记
目标Clawd 风格的工作区(`agents.defaults.workspace`,默认 `~/.openclaw/workspace`),其中"记忆"以每天一个 Markdown 文件(`memory/YYYY-MM-DD.md`)加上一小组稳定文件(例如 `memory.md``SOUL.md`)的形式存储。
本文档提出一种**离线优先**的记忆架构,保持 Markdown 作为规范的、可审查的数据源,但通过派生索引添加**结构化回忆**(搜索、实体摘要、置信度更新)。
## 为什么要改变?
当前设置(每天一个文件)非常适合:
- "仅追加"式日志记录
- 人工编辑
- git 支持的持久性 + 可审计性
- 低摩擦捕获("直接写下来"
但它在以下方面较弱:
- 高召回率检索("我们对 X 做了什么决定?"、"上次我们尝试 Y 时?"
- 以实体为中心的答案("告诉我关于 Alice / The Castle / warelay 的信息")而无需重读多个文件
- 观点/偏好稳定性(以及变化时的证据)
- 时间约束("2025 年 11 月期间什么是真实的?")和冲突解决
## 设计目标
- **离线**:无需网络即可工作;可在笔记本电脑/Castle 上运行;无云依赖。
- **可解释**:检索的项目应该可归因(文件 + 位置)并与推理分离。
- **低仪式感**:每日日志保持 Markdown无需繁重的 schema 工作。
- **增量式**v1 仅使用 FTS 就很有用;语义/向量和图是可选升级。
- **对智能体友好**:使"在 token 预算内回忆"变得简单(返回小型事实包)。
## 北极星模型Hindsight × Letta
需要融合两个部分:
1. **Letta/MemGPT 风格的控制循环**
- 保持一个小的"核心"始终在上下文中(角色 + 关键用户事实)
- 其他所有内容都在上下文之外,通过工具检索
- 记忆写入是显式的工具调用append/replace/insert持久化后在下一轮重新注入
2. **Hindsight 风格的记忆基底**
- 分离观察到的、相信的和总结的内容
- 支持 retain/recall/reflect
- 带有置信度的观点可以随证据演变
- 实体感知检索 + 时间查询(即使没有完整的知识图谱)
## 提议的架构Markdown 数据源 + 派生索引)
### 规范存储git 友好)
保持 `~/.openclaw/workspace` 作为规范的人类可读记忆。
建议的工作区布局:
```
~/.openclaw/workspace/
memory.md # 小型:持久事实 + 偏好(类似核心)
memory/
YYYY-MM-DD.md # 每日日志(追加;叙事)
bank/ # "类型化"记忆页面(稳定、可审查)
world.md # 关于世界的客观事实
experience.md # 智能体做了什么(第一人称)
opinions.md # 主观偏好/判断 + 置信度 + 证据指针
entities/
Peter.md
The-Castle.md
warelay.md
...
```
注意:
- **每日日志保持为每日日志**。无需将其转换为 JSON。
- `bank/` 文件是**经过整理的**,由反思任务生成,仍可手动编辑。
- `memory.md` 保持"小型 + 类似核心":你希望 Clawd 每次会话都能看到的内容。
### 派生存储(机器回忆)
在工作区下添加派生索引(不一定需要 git 跟踪):
```
~/.openclaw/workspace/.memory/index.sqlite
```
后端支持:
- 用于事实 + 实体链接 + 观点元数据的 SQLite schema
- SQLite **FTS5** 用于词法回忆(快速、小巧、离线)
- 可选的嵌入表用于语义回忆(仍然离线)
索引始终**可从 Markdown 重建**。
## Retain / Recall / Reflect操作循环
### Retain将每日日志规范化为"事实"
Hindsight 在这里重要的关键洞察:存储**叙事性、自包含的事实**,而不是微小的片段。
`memory/YYYY-MM-DD.md` 的实用规则:
- 在一天结束时(或期间),添加一个 `## Retain` 部分,包含 2-5 个要点:
- 叙事性(保留跨轮上下文)
- 自包含(独立时也有意义)
- 标记类型 + 实体提及
示例:
```
## Retain
- W @Peter: Currently in Marrakech (Nov 27Dec 1, 2025) for Andy's birthday.
- B @warelay: I fixed the Baileys WS crash by wrapping connection.update handlers in try/catch (see memory/2025-11-27.md).
- O(c=0.95) @Peter: Prefers concise replies (&lt;1500 chars) on WhatsApp; long content goes into files.
```
最小化解析:
- 类型前缀:`W`(世界)、`B`(经历/传记)、`O`(观点)、`S`(观察/摘要;通常是生成的)
- 实体:`@Peter``@warelay`slug 映射到 `bank/entities/*.md`
- 观点置信度:`O(c=0.0..1.0)` 可选
如果你不想让作者考虑这些:反思任务可以从日志的其余部分推断这些要点,但有一个显式的 `## Retain` 部分是最简单的"质量杠杆"。
### Recall对派生索引的查询
Recall 应支持:
- **词法**"查找精确的术语/名称/命令"FTS5
- **实体**"告诉我关于 X 的信息"(实体页面 + 实体链接的事实)
- **时间**"11 月 27 日前后发生了什么"/"自上周以来"
- **观点**"Peter 偏好什么?"(带置信度 + 证据)
返回格式应对智能体友好并引用来源:
- `kind``world|experience|opinion|observation`
- `timestamp`(来源日期,或如果存在则提取的时间范围)
- `entities``["Peter","warelay"]`
- `content`(叙事性事实)
- `source``memory/2025-11-27.md#L12` 等)
### Reflect生成稳定页面 + 更新信念
反思是一个定时任务(每日或心跳 `ultrathink`),它:
- 根据最近的事实更新 `bank/entities/*.md`(实体摘要)
- 根据强化/矛盾更新 `bank/opinions.md` 置信度
- 可选地提议对 `memory.md`"类似核心"的持久事实)的编辑
观点演变(简单、可解释):
- 每个观点有:
- 陈述
- 置信度 `c ∈ [0,1]`
- last_updated
- 证据链接(支持 + 矛盾的事实 ID
- 当新事实到达时:
- 通过实体重叠 + 相似性找到候选观点(先 FTS后嵌入
- 通过小幅增量更新置信度;大幅跳跃需要强矛盾 + 重复证据
## CLI 集成:独立 vs 深度集成
建议:**深度集成到 OpenClaw**,但保持可分离的核心库。
### 为什么要集成到 OpenClaw
- OpenClaw 已经知道:
- 工作区路径(`agents.defaults.workspace`
- 会话模型 + 心跳
- 日志记录 + 故障排除模式
- 你希望智能体自己调用工具:
- `openclaw memory recall "…" --k 25 --since 30d`
- `openclaw memory reflect --since 7d`
### 为什么仍要分离库?
- 保持记忆逻辑可测试,无需 Gateway 网关/运行时
- 可从其他上下文重用(本地脚本、未来的桌面应用等)
形态:
记忆工具预计是一个小型 CLI + 库层,但这仅是探索性的。
## "S-Collide" / SuCo何时使用研究
如果"S-Collide"指的是 **SuCoSubspace Collision**:这是一种 ANN 检索方法,通过在子空间中使用学习/结构化碰撞来实现强召回/延迟权衡论文arXiv 2411.147542024
对于 `~/.openclaw/workspace` 的务实观点:
- **不要从** SuCo 开始。
- 从 SQLite FTS +(可选的)简单嵌入开始;你会立即获得大部分 UX 收益。
- 仅在以下情况下考虑 SuCo/HNSW/ScaNN 级别的解决方案:
- 语料库很大(数万/数十万个块)
- 暴力嵌入搜索变得太慢
- 召回质量明显受到词法搜索的瓶颈限制
离线友好的替代方案(按复杂性递增):
- SQLite FTS5 + 元数据过滤(零 ML
- 嵌入 + 暴力搜索(如果块数量低,效果出奇地好)
- HNSW 索引(常见、稳健;需要库绑定)
- SuCo研究级如果有可嵌入的可靠实现则很有吸引力
开放问题:
- 对于你的机器(笔记本 + 台式机)上的"个人助理记忆"**最佳**的离线嵌入模型是什么?
- 如果你已经有 Ollama使用本地模型嵌入否则在工具链中附带一个小型嵌入模型。
## 最小可用试点
如果你想要一个最小但仍有用的版本:
- 添加 `bank/` 实体页面和每日日志中的 `## Retain` 部分。
- 使用 SQLite FTS 进行带引用的回忆(路径 + 行号)。
- 仅在召回质量或规模需要时添加嵌入。
## 参考资料
- Letta / MemGPT 概念:"核心记忆块" + "档案记忆" + 工具驱动的自编辑记忆。
- Hindsight 技术报告:"retain / recall / reflect",四网络记忆,叙事性事实提取,观点置信度演变。
- SuCoarXiv 2411.147542024"Subspace Collision"近似最近邻检索。

View File

@ -1,424 +0,0 @@
---
read_when:
- 规划节点 + 操作者客户端的统一网络协议
- 重新设计跨设备的审批、配对、TLS 和在线状态
summary: Clawnet 重构:统一网络协议、角色、认证、审批、身份
title: Clawnet 重构
x-i18n:
generated_at: "2026-02-03T07:55:03Z"
model: claude-opus-4-5
provider: pi
source_hash: 719b219c3b326479658fe6101c80d5273fc56eb3baf50be8535e0d1d2bb7987f
source_path: refactor/clawnet.md
workflow: 15
---
# Clawnet 重构(协议 + 认证统一)
## 嗨
嗨 Peter — 方向很好;这将解锁更简单的用户体验 + 更强的安全性。
## 目的
单一、严谨的文档用于:
- 当前状态:协议、流程、信任边界。
- 痛点审批、多跳路由、UI 重复。
- 提议的新状态:一个协议、作用域角色、统一的认证/配对、TLS 固定。
- 身份模型:稳定 ID + 可爱的别名。
- 迁移计划、风险、开放问题。
## 目标(来自讨论)
- 所有客户端使用一个协议mac 应用、CLI、iOS、Android、无头节点
- 每个网络参与者都经过认证 + 配对。
- 角色清晰:节点 vs 操作者。
- 中央审批路由到用户所在位置。
- 所有远程流量使用 TLS 加密 + 可选固定。
- 最小化代码重复。
- 单台机器应该只显示一次(无 UI/节点重复条目)。
## 非目标(明确)
- 移除能力分离(仍需要最小权限)。
- 不经作用域检查就暴露完整的 Gateway 网关控制平面。
- 使认证依赖于人类标签(别名仍然是非安全性的)。
---
# 当前状态(现状)
## 两个协议
### 1) Gateway 网关 WebSocket控制平面
- 完整 API 表面:配置、渠道、模型、会话、智能体运行、日志、节点等。
- 默认绑定loopback。通过 SSH/Tailscale 远程访问。
- 认证:通过 `connect` 的令牌/密码。
- 无 TLS 固定(依赖 loopback/隧道)。
- 代码:
- `src/gateway/server/ws-connection/message-handler.ts`
- `src/gateway/client.ts`
- `docs/gateway/protocol.md`
### 2) Bridge节点传输
- 窄允许列表表面,节点身份 + 配对。
- TCP 上的 JSONL可选 TLS + 证书指纹固定。
- TLS 在设备发现 TXT 中公布指纹。
- 代码:
- `src/infra/bridge/server/connection.ts`
- `src/gateway/server-bridge.ts`
- `src/node-host/bridge-client.ts`
- `docs/gateway/bridge-protocol.md`
## 当前的控制平面客户端
- CLI → 通过 `callGateway``src/gateway/call.ts`)连接 Gateway 网关 WS。
- macOS 应用 UI → Gateway 网关 WS`GatewayConnection`)。
- Web 控制 UI → Gateway 网关 WS。
- ACP → Gateway 网关 WS。
- 浏览器控制使用自己的 HTTP 控制服务器。
## 当前的节点
- macOS 应用在节点模式下连接到 Gateway 网关 bridge`MacNodeBridgeSession`)。
- iOS/Android 应用连接到 Gateway 网关 bridge。
- 配对 + 每节点令牌存储在 Gateway 网关上。
## 当前审批流程exec
- 智能体通过 Gateway 网关使用 `system.run`
- Gateway 网关通过 bridge 调用节点。
- 节点运行时决定审批。
- UI 提示由 mac 应用显示(当节点 == mac 应用时)。
- 节点向 Gateway 网关返回 `invoke-res`
- 多跳UI 绑定到节点主机。
## 当前的在线状态 + 身份
- 来自 WS 客户端的 Gateway 网关在线状态条目。
- 来自 bridge 的节点在线状态条目。
- mac 应用可能为同一台机器显示两个条目UI + 节点)。
- 节点身份存储在配对存储中UI 身份是分开的。
---
# 问题/痛点
- 需要维护两个协议栈WS + Bridge
- 远程节点上的审批:提示出现在节点主机上,而不是用户所在位置。
- TLS 固定仅存在于 bridgeWS 依赖 SSH/Tailscale。
- 身份重复:同一台机器显示为多个实例。
- 角色模糊UI + 节点 + CLI 能力没有明确分离。
---
# 提议的新状态Clawnet
## 一个协议,两个角色
带有角色 + 作用域的单一 WS 协议。
- **角色node**(能力宿主)
- **角色operator**(控制平面)
- 操作者的可选**作用域**
- `operator.read`(状态 + 查看)
- `operator.write`(智能体运行、发送)
- `operator.admin`(配置、渠道、模型)
### 角色行为
**Node**
- 可以注册能力(`caps``commands`、permissions
- 可以接收 `invoke` 命令(`system.run``camera.*``canvas.*``screen.record` 等)。
- 可以发送事件:`voice.transcript``agent.request``chat.subscribe`
- 不能调用配置/模型/渠道/会话/智能体控制平面 API。
**Operator**
- 完整控制平面 API受作用域限制。
- 接收所有审批。
- 不直接执行 OS 操作;路由到节点。
### 关键规则
角色是按连接的,不是按设备。一个设备可以分别打开两个角色。
---
# 统一认证 + 配对
## 客户端身份
每个客户端提供:
- `deviceId`(稳定的,从设备密钥派生)。
- `displayName`(人类名称)。
- `role` + `scope` + `caps` + `commands`
## 配对流程(统一)
- 客户端未认证连接。
- Gateway 网关为该 `deviceId` 创建**配对请求**。
- 操作者收到提示;批准/拒绝。
- Gateway 网关颁发绑定到以下内容的凭证:
- 设备公钥
- 角色
- 作用域
- 能力/命令
- 客户端持久化令牌,重新认证连接。
## 设备绑定认证(避免 bearer 令牌重放)
首选:设备密钥对。
- 设备一次性生成密钥对。
- `deviceId = fingerprint(publicKey)`
- Gateway 网关发送 nonce设备签名Gateway 网关验证。
- 令牌颁发给公钥(所有权证明),而不是字符串。
替代方案:
- mTLS客户端证书最强运维复杂度更高。
- 短期 bearer 令牌仅作为临时阶段(早期轮换 + 撤销)。
## 静默批准SSH 启发式)
精确定义以避免薄弱环节。优选其一:
- **仅限本地**:当客户端通过 loopback/Unix socket 连接时自动配对。
- **通过 SSH 质询**Gateway 网关颁发 nonce客户端通过获取它来证明 SSH。
- **物理存在窗口**:在 Gateway 网关主机 UI 上本地批准后,允许在短窗口内(例如 10 分钟)自动配对。
始终记录 + 记录自动批准。
---
# TLS 无处不在(开发 + 生产)
## 复用现有 bridge TLS
使用当前 TLS 运行时 + 指纹固定:
- `src/infra/bridge/server/tls.ts`
- `src/node-host/bridge-client.ts` 中的指纹验证逻辑
## 应用于 WS
- WS 服务器使用相同的证书/密钥 + 指纹支持 TLS。
- WS 客户端可以固定指纹(可选)。
- 设备发现为所有端点公布 TLS + 指纹。
- 设备发现仅是定位器提示;永远不是信任锚。
## 为什么
- 减少对 SSH/Tailscale 的机密性依赖。
- 默认情况下使远程移动连接安全。
---
# 审批重新设计(集中化)
## 当前
审批发生在节点主机上mac 应用节点运行时)。提示出现在节点运行的地方。
## 提议
审批是 **Gateway 网关托管的**UI 传递给操作者客户端。
### 新流程
1. Gateway 网关接收 `system.run` 意图(智能体)。
2. Gateway 网关创建审批记录:`approval.requested`
3. 操作者 UI 显示提示。
4. 审批决定发送到 Gateway 网关:`approval.resolve`
5. 如果批准Gateway 网关调用节点命令。
6. 节点执行,返回 `invoke-res`
### 审批语义(加固)
- 广播到所有操作者;只有活跃的 UI 显示模态框(其他显示 toast
- 先解决者获胜Gateway 网关拒绝后续解决为已结算。
- 默认超时N 秒后拒绝(例如 60 秒),记录原因。
- 解决需要 `operator.approvals` 作用域。
## 好处
- 提示出现在用户所在位置mac/手机)。
- 远程节点的一致审批。
- 节点运行时保持无头;无 UI 依赖。
---
# 角色清晰示例
## iPhone 应用
- **Node 角色**用于:麦克风、相机、语音聊天、位置、一键通话。
- 可选的 **operator.read** 用于状态和聊天视图。
- 可选的 **operator.write/admin** 仅在明确启用时。
## macOS 应用
- 默认是 Operator 角色(控制 UI
- 启用"Mac 节点"时是 Node 角色system.run、屏幕、相机
- 两个连接使用相同的 deviceId → 合并的 UI 条目。
## CLI
- 始终是 Operator 角色。
- 作用域按子命令派生:
- `status``logs` → read
- `agent``message` → write
- `config``channels` → admin
- 审批 + 配对 → `operator.approvals` / `operator.pairing`
---
# 身份 + 别名
## 稳定 ID
认证必需;永不改变。
首选:
- 密钥对指纹(公钥哈希)。
## 可爱别名(龙虾主题)
仅人类标签。
- 示例:`scarlet-claw``saltwave``mantis-pinch`
- 存储在 Gateway 网关注册表中,可编辑。
- 冲突处理:`-2``-3`
## UI 分组
跨角色的相同 `deviceId` → 单个"实例"行:
- 徽章:`operator``node`
- 显示能力 + 最后在线。
---
# 迁移策略
## 阶段 0记录 + 对齐
- 发布此文档。
- 盘点所有协议调用 + 审批流程。
## 阶段 1向 WS 添加角色/作用域
- 用 `role``scope``deviceId` 扩展 `connect` 参数。
- 为 node 角色添加允许列表限制。
## 阶段 2Bridge 兼容性
- 保持 bridge 运行。
- 并行添加 WS node 支持。
- 通过配置标志限制功能。
## 阶段 3中央审批
- 在 WS 中添加审批请求 + 解决事件。
- 更新 mac 应用 UI 以提示 + 响应。
- 节点运行时停止提示 UI。
## 阶段 4TLS 统一
- 使用 bridge TLS 运行时为 WS 添加 TLS 配置。
- 向客户端添加固定。
## 阶段 5弃用 bridge
- 将 iOS/Android/mac 节点迁移到 WS。
- 保持 bridge 作为后备;稳定后移除。
## 阶段 6设备绑定认证
- 所有非本地连接都需要基于密钥的身份。
- 添加撤销 + 轮换 UI。
---
# 安全说明
- 角色/允许列表在 Gateway 网关边界强制执行。
- 没有客户端可以在没有 operator 作用域的情况下获得"完整"API。
- *所有*连接都需要配对。
- TLS + 固定减少移动设备的 MITM 风险。
- SSH 静默批准是便利措施;仍然记录 + 可撤销。
- 设备发现永远不是信任锚。
- 能力声明通过按平台/类型的服务器允许列表验证。
# 流式传输 + 大型负载(节点媒体)
WS 控制平面对于小消息没问题,但节点还做:
- 相机剪辑
- 屏幕录制
- 音频流
选项:
1. WS 二进制帧 + 分块 + 背压规则。
2. 单独的流式端点(仍然是 TLS + 认证)。
3. 对于媒体密集型命令保持 bridge 更长时间,最后迁移。
在实现前选择一个以避免漂移。
# 能力 + 命令策略
- 节点报告的 caps/commands 被视为**声明**。
- Gateway 网关强制执行每平台允许列表。
- 任何新命令都需要操作者批准或显式允许列表更改。
- 用时间戳审计更改。
# 审计 + 速率限制
- 记录:配对请求、批准/拒绝、令牌颁发/轮换/撤销。
- 速率限制配对垃圾和审批提示。
# 协议卫生
- 显式协议版本 + 错误代码。
- 重连规则 + 心跳策略。
- 在线状态 TTL 和最后在线语义。
---
# 开放问题
1. 同时运行两个角色的单个设备:令牌模型
- 建议每个角色单独的令牌node vs operator
- 相同的 deviceId不同的作用域更清晰的撤销。
2. 操作者作用域粒度
- read/write/admin + approvals + pairing最小可行
- 以后考虑每功能作用域。
3. 令牌轮换 + 撤销 UX
- 角色更改时自动轮换。
- 按 deviceId + 角色撤销的 UI。
4. 设备发现
- 扩展当前 Bonjour TXT 以包含 WS TLS 指纹 + 角色提示。
- 仅作为定位器提示处理。
5. 跨网络审批
- 广播到所有操作者客户端;活跃的 UI 显示模态框。
- 先响应者获胜Gateway 网关强制原子性。
---
# 总结TL;DR
- 当前WS 控制平面 + Bridge 节点传输。
- 痛点:审批 + 重复 + 两个栈。
- 提议:一个带有显式角色 + 作用域的 WS 协议,统一配对 + TLS 固定Gateway 网关托管的审批,稳定设备 ID + 可爱别名。
- 结果:更简单的 UX更强的安全性更少的重复更好的移动路由。

View File

@ -1,323 +0,0 @@
---
read_when:
- 设计 exec 主机路由或 exec 批准
- 实现节点运行器 + UI IPC
- 添加 exec 主机安全模式和斜杠命令
summary: 重构计划exec 主机路由、节点批准和无头运行器
title: Exec 主机重构
x-i18n:
generated_at: "2026-02-03T07:54:43Z"
model: claude-opus-4-5
provider: pi
source_hash: 53a9059cbeb1f3f1dbb48c2b5345f88ca92372654fef26f8481e651609e45e3a
source_path: refactor/exec-host.md
workflow: 15
---
# Exec 主机重构计划
## 目标
- 添加 `exec.host` + `exec.security` 以在**沙箱**、**Gateway 网关**和**节点**之间路由执行。
- 保持默认**安全**:除非明确启用,否则不进行跨主机执行。
- 将执行拆分为**无头运行器服务**,通过本地 IPC 连接可选的 UImacOS 应用)。
- 提供**每智能体**策略、允许列表、询问模式和节点绑定。
- 支持*与*或*不与*允许列表一起使用的**询问模式**。
- 跨平台Unix socket + token 认证macOS/Linux/Windows 一致性)。
## 非目标
- 无遗留允许列表迁移或遗留 schema 支持。
- 节点 exec 无 PTY/流式传输(仅聚合输出)。
- 除现有 Bridge + Gateway 网关外无新网络层。
## 决定(已锁定)
- **配置键:** `exec.host` + `exec.security`(允许每智能体覆盖)。
- **提升:** 保留 `/elevated` 作为 Gateway 网关完全访问的别名。
- **询问默认:** `on-miss`
- **批准存储:** `~/.openclaw/exec-approvals.json`JSON无遗留迁移
- **运行器:** 无头系统服务UI 应用托管 Unix socket 用于批准。
- **节点身份:** 使用现有 `nodeId`
- **Socket 认证:** Unix socket + token跨平台如需要稍后拆分。
- **节点主机状态:** `~/.openclaw/node.json`(节点 id + 配对 token
- **macOS exec 主机:** 在 macOS 应用内运行 `system.run`;节点主机服务通过本地 IPC 转发请求。
- **无 XPC helper** 坚持使用 Unix socket + token + 对等检查。
## 关键概念
### 主机
- `sandbox`Docker exec当前行为
- `gateway`:在 Gateway 网关主机上执行。
- `node`:通过 Bridge 在节点运行器上执行(`system.run`)。
### 安全模式
- `deny`:始终阻止。
- `allowlist`:仅允许匹配项。
- `full`:允许一切(等同于提升模式)。
### 询问模式
- `off`:从不询问。
- `on-miss`:仅在允许列表不匹配时询问。
- `always`:每次都询问。
询问**独立于**允许列表;允许列表可与 `always``on-miss` 一起使用。
### 策略解析(每次执行)
1. 解析 `exec.host`(工具参数 → 智能体覆盖 → 全局默认)。
2. 解析 `exec.security``exec.ask`(相同优先级)。
3. 如果主机是 `sandbox`,继续本地沙箱执行。
4. 如果主机是 `gateway``node`,在该主机上应用安全 + 询问策略。
## 默认安全
- 默认 `exec.host = sandbox`
- `gateway``node` 默认 `exec.security = deny`
- 默认 `exec.ask = on-miss`(仅在安全允许时相关)。
- 如果未设置节点绑定,**智能体可以定向任何节点**,但仅在策略允许时。
## 配置表面
### 工具参数
- `exec.host`(可选):`sandbox | gateway | node`
- `exec.security`(可选):`deny | allowlist | full`
- `exec.ask`(可选):`off | on-miss | always`
- `exec.node`(可选):当 `host=node` 时使用的节点 id/名称。
### 配置键(全局)
- `tools.exec.host`
- `tools.exec.security`
- `tools.exec.ask`
- `tools.exec.node`(默认节点绑定)
### 配置键(每智能体)
- `agents.list[].tools.exec.host`
- `agents.list[].tools.exec.security`
- `agents.list[].tools.exec.ask`
- `agents.list[].tools.exec.node`
### 别名
- `/elevated on` = 为智能体会话设置 `tools.exec.host=gateway``tools.exec.security=full`
- `/elevated off` = 为智能体会话恢复之前的 exec 设置。
## 批准存储JSON
路径:`~/.openclaw/exec-approvals.json`
用途:
- **执行主机**Gateway 网关或节点运行器)的本地策略 + 允许列表。
- 无 UI 可用时的询问回退。
- UI 客户端的 IPC 凭证。
建议的 schemav1
```json
{
"version": 1,
"socket": {
"path": "~/.openclaw/exec-approvals.sock",
"token": "base64-opaque-token"
},
"defaults": {
"security": "deny",
"ask": "on-miss",
"askFallback": "deny"
},
"agents": {
"agent-id-1": {
"security": "allowlist",
"ask": "on-miss",
"allowlist": [
{
"pattern": "~/Projects/**/bin/rg",
"lastUsedAt": 0,
"lastUsedCommand": "rg -n TODO",
"lastResolvedPath": "/Users/user/Projects/.../bin/rg"
}
]
}
}
}
```
注意事项:
- 无遗留允许列表格式。
- `askFallback` 仅在需要 `ask` 且无法访问 UI 时应用。
- 文件权限:`0600`
## 运行器服务(无头)
### 角色
- 在本地强制执行 `exec.security` + `exec.ask`
- 执行系统命令并返回输出。
- 为 exec 生命周期发出 Bridge 事件(可选但推荐)。
### 服务生命周期
- macOS 上的 Launchd/daemonLinux/Windows 上的系统服务。
- 批准 JSON 是执行主机本地的。
- UI 托管本地 Unix socket运行器按需连接。
## UI 集成macOS 应用)
### IPC
- Unix socket 位于 `~/.openclaw/exec-approvals.sock`0600
- Token 存储在 `exec-approvals.json`0600中。
- 对等检查:仅同 UID。
- 挑战/响应nonce + HMAC(token, request-hash) 防止重放。
- 短 TTL例如 10s+ 最大负载 + 速率限制。
### 询问流程macOS 应用 exec 主机)
1. 节点服务从 Gateway 网关接收 `system.run`
2. 节点服务连接到本地 socket 并发送提示/exec 请求。
3. 应用验证对等 + token + HMAC + TTL然后在需要时显示对话框。
4. 应用在 UI 上下文中执行命令并返回输出。
5. 节点服务将输出返回给 Gateway 网关。
如果 UI 缺失:
- 应用 `askFallback``deny|allowlist|full`)。
### 图示SCI
```
Agent -> Gateway -> Bridge -> Node Service (TS)
| IPC (UDS + token + HMAC + TTL)
v
Mac App (UI + TCC + system.run)
```
## 节点身份 + 绑定
- 使用 Bridge 配对中的现有 `nodeId`
- 绑定模型:
- `tools.exec.node` 将智能体限制为特定节点。
- 如果未设置,智能体可以选择任何节点(策略仍强制执行默认值)。
- 节点选择解析:
- `nodeId` 精确匹配
- `displayName`(规范化)
- `remoteIp`
- `nodeId` 前缀(>= 6 字符)
## 事件
### 谁看到事件
- 系统事件是**每会话**的,在下一个提示时显示给智能体。
- 存储在 Gateway 网关内存队列中(`enqueueSystemEvent`)。
### 事件文本
- `Exec started (node=<id>, id=<runId>)`
- `Exec finished (node=<id>, id=<runId>, code=<code>)` + 可选输出尾部
- `Exec denied (node=<id>, id=<runId>, <reason>)`
### 传输
选项 A推荐
- 运行器发送 Bridge `event``exec.started` / `exec.finished`
- Gateway 网关 `handleBridgeEvent` 将这些映射到 `enqueueSystemEvent`
选项 B
- Gateway 网关 `exec` 工具直接处理生命周期(仅同步)。
## Exec 流程
### 沙箱主机
- 现有 `exec` 行为Docker 或无沙箱时的主机)。
- 仅在非沙箱模式下支持 PTY。
### Gateway 网关主机
- Gateway 网关进程在其自己的机器上执行。
- 强制执行本地 `exec-approvals.json`(安全/询问/允许列表)。
### 节点主机
- Gateway 网关调用 `node.invoke` 配合 `system.run`
- 运行器强制执行本地批准。
- 运行器返回聚合的 stdout/stderr。
- 可选的 Bridge 事件用于开始/完成/拒绝。
## 输出上限
- 组合 stdout+stderr 上限为 **200k**;为事件保留**尾部 20k**。
- 使用清晰的后缀截断(例如 `"… (truncated)"`)。
## 斜杠命令
- `/exec host=<sandbox|gateway|node> security=<deny|allowlist|full> ask=<off|on-miss|always> node=<id>`
- 每智能体、每会话覆盖;除非通过配置保存,否则非持久。
- `/elevated on|off|ask|full` 仍然是 `host=gateway security=full` 的快捷方式(`full` 跳过批准)。
## 跨平台方案
- 运行器服务是可移植的执行目标。
- UI 是可选的;如果缺失,应用 `askFallback`
- Windows/Linux 支持相同的批准 JSON + socket 协议。
## 实现阶段
### 阶段 1配置 + exec 路由
- 为 `exec.host``exec.security``exec.ask``exec.node` 添加配置 schema。
- 更新工具管道以遵守 `exec.host`
- 添加 `/exec` 斜杠命令并保留 `/elevated` 别名。
### 阶段 2批准存储 + Gateway 网关强制执行
- 实现 `exec-approvals.json` 读取器/写入器。
- 为 `gateway` 主机强制执行允许列表 + 询问模式。
- 添加输出上限。
### 阶段 3节点运行器强制执行
- 更新节点运行器以强制执行允许列表 + 询问。
- 添加 Unix socket 提示桥接到 macOS 应用 UI。
- 连接 `askFallback`
### 阶段 4事件
- 为 exec 生命周期添加节点 → Gateway 网关 Bridge 事件。
- 映射到 `enqueueSystemEvent` 用于智能体提示。
### 阶段 5UI 完善
- Mac 应用:允许列表编辑器、每智能体切换器、询问策略 UI。
- 节点绑定控制(可选)。
## 测试计划
- 单元测试允许列表匹配glob + 不区分大小写)。
- 单元测试:策略解析优先级(工具参数 → 智能体覆盖 → 全局)。
- 集成测试:节点运行器拒绝/允许/询问流程。
- Bridge 事件测试:节点事件 → 系统事件路由。
## 开放风险
- UI 不可用:确保遵守 `askFallback`
- 长时间运行的命令:依赖超时 + 输出上限。
- 多节点歧义:除非有节点绑定或显式节点参数,否则报错。
## 相关文档
- [Exec 工具](/tools/exec)
- [执行批准](/tools/exec-approvals)
- [节点](/nodes)
- [提升模式](/tools/elevated)

View File

@ -1,92 +0,0 @@
---
description: Track outbound session mirroring refactor notes, decisions, tests, and open items.
title: 出站会话镜像重构Issue
x-i18n:
generated_at: "2026-02-03T07:53:51Z"
model: claude-opus-4-5
provider: pi
source_hash: b88a72f36f7b6d8a71fde9d014c0a87e9a8b8b0d449b67119cf3b6f414fa2b81
source_path: refactor/outbound-session-mirroring.md
workflow: 15
---
# 出站会话镜像重构Issue #1520
## 状态
- 进行中。
- 核心 + 插件渠道路由已更新以支持出站镜像。
- Gateway 网关发送现在在省略 sessionKey 时派生目标会话。
## 背景
出站发送被镜像到*当前*智能体会话(工具会话键)而不是目标渠道会话。入站路由使用渠道/对等方会话键,因此出站响应落在错误的会话中,首次联系的目标通常缺少会话条目。
## 目标
- 将出站消息镜像到目标渠道会话键。
- 在缺失时为出站创建会话条目。
- 保持线程/话题作用域与入站会话键对齐。
- 涵盖核心渠道加内置扩展。
## 实现摘要
- 新的出站会话路由辅助器:
- `src/infra/outbound/outbound-session.ts`
- `resolveOutboundSessionRoute` 使用 `buildAgentSessionKey`dmScope + identityLinks构建目标 sessionKey。
- `ensureOutboundSessionEntry` 通过 `recordSessionMetaFromInbound` 写入最小的 `MsgContext`
- `runMessageAction`(发送)派生目标 sessionKey 并将其传递给 `executeSendAction` 进行镜像。
- `message-tool` 不再直接镜像;它只从当前会话键解析 agentId。
- 插件发送路径使用派生的 sessionKey 通过 `appendAssistantMessageToSessionTranscript` 进行镜像。
- Gateway 网关发送在未提供时派生目标会话键(默认智能体),并确保会话条目。
## 线程/话题处理
- SlackreplyTo/threadId -> `resolveThreadSessionKeys`(后缀)。
- DiscordthreadId/replyTo -> `resolveThreadSessionKeys``useSuffix=false` 以匹配入站(线程频道 id 已经作用域会话)。
- Telegram话题 ID 通过 `buildTelegramGroupPeerId` 映射到 `chatId:topic:<id>`
## 涵盖的扩展
- Matrix、MS Teams、Mattermost、BlueBubbles、Nextcloud Talk、Zalo、Zalo Personal、Nostr、Tlon。
- 注意:
- Mattermost 目标现在为私信会话键路由去除 `@`
- Zalo Personal 对 1:1 目标使用私信对等方类型(仅当存在 `group:` 时才使用群组)。
- BlueBubbles 群组目标去除 `chat_*` 前缀以匹配入站会话键。
- Slack 自动线程镜像不区分大小写地匹配频道 id。
- Gateway 网关发送在镜像前将提供的会话键转换为小写。
## 决策
- **Gateway 网关发送会话派生**:如果提供了 `sessionKey`,则使用它。如果省略,从目标 + 默认智能体派生 sessionKey 并镜像到那里。
- **会话条目创建**:始终使用 `recordSessionMetaFromInbound``Provider/From/To/ChatType/AccountId/Originating*` 与入站格式对齐。
- **目标规范化**:出站路由在可用时使用解析后的目标(`resolveChannelTarget` 之后)。
- **会话键大小写**:在写入和迁移期间将会话键规范化为小写。
## 添加/更新的测试
- `src/infra/outbound/outbound-session.test.ts`
- Slack 线程会话键。
- Telegram 话题会话键。
- dmScope identityLinks 与 Discord。
- `src/agents/tools/message-tool.test.ts`
- 从会话键派生 agentId不传递 sessionKey
- `src/gateway/server-methods/send.test.ts`
- 在省略时派生会话键并创建会话条目。
## 待处理项目 / 后续跟进
- 语音通话插件使用自定义的 `voice:<phone>` 会话键。出站映射在这里没有标准化;如果 message-tool 应该支持语音通话发送,请添加显式映射。
- 确认是否有任何外部插件使用内置集之外的非标准 `From/To` 格式。
## 涉及的文件
- `src/infra/outbound/outbound-session.ts`
- `src/infra/outbound/outbound-send-service.ts`
- `src/infra/outbound/message-action-runner.ts`
- `src/agents/tools/message-tool.ts`
- `src/gateway/server-methods/send.ts`
- 测试:
- `src/infra/outbound/outbound-session.test.ts`
- `src/agents/tools/message-tool.test.ts`
- `src/gateway/server-methods/send.test.ts`

View File

@ -1,221 +0,0 @@
---
read_when:
- 定义或重构插件架构
- 将渠道连接器迁移到插件 SDK/运行时
summary: 计划:为所有消息连接器提供一套统一的插件 SDK + 运行时
title: 插件 SDK 重构
x-i18n:
generated_at: "2026-02-01T21:36:45Z"
model: claude-opus-4-5
provider: pi
source_hash: d1964e2e47a19ee1d42ddaaa9cf1293c80bb0be463b049dc8468962f35bb6cb0
source_path: refactor/plugin-sdk.md
workflow: 15
---
# 插件 SDK + 运行时重构计划
目标:每个消息连接器都是一个插件(内置或外部),使用统一稳定的 API。
插件不直接从 `src/**` 导入任何内容。所有依赖项均通过 SDK 或运行时获取。
## 为什么现在做
- 当前连接器混用多种模式:直接导入核心模块、仅 dist 的桥接方式以及自定义辅助函数。
- 这使得升级变得脆弱,并阻碍了干净的外部插件接口。
## 目标架构(两层)
### 1插件 SDK编译时稳定可发布
范围:类型、辅助函数和配置工具。无运行时状态,无副作用。
内容(示例):
- 类型:`ChannelPlugin`、适配器、`ChannelMeta``ChannelCapabilities``ChannelDirectoryEntry`
- 配置辅助函数:`buildChannelConfigSchema``setAccountEnabledInConfigSection``deleteAccountFromConfigSection`
`applyAccountNameToChannelSection`
- 配对辅助函数:`PAIRING_APPROVED_MESSAGE``formatPairingApproveHint`
- 新手引导辅助函数:`promptChannelAccessConfig``addWildcardAllowFrom`、新手引导类型。
- 工具参数辅助函数:`createActionGate``readStringParam``readNumberParam``readReactionParams``jsonResult`
- 文档链接辅助函数:`formatDocsLink`
交付方式:
- 以 `openclaw/plugin-sdk` 发布(或从核心以 `openclaw/plugin-sdk` 导出)。
- 使用语义化版本控制,提供明确的稳定性保证。
### 2插件运行时执行层注入式
范围:所有涉及核心运行时行为的内容。
通过 `OpenClawPluginApi.runtime` 访问,确保插件永远不会导入 `src/**`
建议的接口(最小但完整):
```ts
export type PluginRuntime = {
channel: {
text: {
chunkMarkdownText(text: string, limit: number): string[];
resolveTextChunkLimit(cfg: OpenClawConfig, channel: string, accountId?: string): number;
hasControlCommand(text: string, cfg: OpenClawConfig): boolean;
};
reply: {
dispatchReplyWithBufferedBlockDispatcher(params: {
ctx: unknown;
cfg: unknown;
dispatcherOptions: {
deliver: (payload: {
text?: string;
mediaUrls?: string[];
mediaUrl?: string;
}) => void | Promise<void>;
onError?: (err: unknown, info: { kind: string }) => void;
};
}): Promise<void>;
createReplyDispatcherWithTyping?: unknown; // adapter for Teams-style flows
};
routing: {
resolveAgentRoute(params: {
cfg: unknown;
channel: string;
accountId: string;
peer: { kind: RoutePeerKind; id: string };
}): { sessionKey: string; accountId: string };
};
pairing: {
buildPairingReply(params: { channel: string; idLine: string; code: string }): string;
readAllowFromStore(channel: string): Promise<string[]>;
upsertPairingRequest(params: {
channel: string;
id: string;
meta?: { name?: string };
}): Promise<{ code: string; created: boolean }>;
};
media: {
fetchRemoteMedia(params: { url: string }): Promise<{ buffer: Buffer; contentType?: string }>;
saveMediaBuffer(
buffer: Uint8Array,
contentType: string | undefined,
direction: "inbound" | "outbound",
maxBytes: number,
): Promise<{ path: string; contentType?: string }>;
};
mentions: {
buildMentionRegexes(cfg: OpenClawConfig, agentId?: string): RegExp[];
matchesMentionPatterns(text: string, regexes: RegExp[]): boolean;
};
groups: {
resolveGroupPolicy(
cfg: OpenClawConfig,
channel: string,
accountId: string,
groupId: string,
): {
allowlistEnabled: boolean;
allowed: boolean;
groupConfig?: unknown;
defaultConfig?: unknown;
};
resolveRequireMention(
cfg: OpenClawConfig,
channel: string,
accountId: string,
groupId: string,
override?: boolean,
): boolean;
};
debounce: {
createInboundDebouncer<T>(opts: {
debounceMs: number;
buildKey: (v: T) => string | null;
shouldDebounce: (v: T) => boolean;
onFlush: (entries: T[]) => Promise<void>;
onError?: (err: unknown) => void;
}): { push: (v: T) => void; flush: () => Promise<void> };
resolveInboundDebounceMs(cfg: OpenClawConfig, channel: string): number;
};
commands: {
resolveCommandAuthorizedFromAuthorizers(params: {
useAccessGroups: boolean;
authorizers: Array<{ configured: boolean; allowed: boolean }>;
}): boolean;
};
};
logging: {
shouldLogVerbose(): boolean;
getChildLogger(name: string): PluginLogger;
};
state: {
resolveStateDir(cfg: OpenClawConfig): string;
};
};
```
备注:
- 运行时是访问核心行为的唯一方式。
- SDK 故意保持小巧和稳定。
- 每个运行时方法都映射到现有的核心实现(无重复代码)。
## 迁移计划(分阶段,安全)
### 阶段 0基础搭建
- 引入 `openclaw/plugin-sdk`
- 在 `OpenClawPluginApi` 中添加带有上述接口的 `api.runtime`
- 在过渡期内保留现有导入方式(添加弃用警告)。
### 阶段 1桥接清理低风险
- 用 `api.runtime` 替换每个扩展中的 `core-bridge.ts`
- 优先迁移 BlueBubbles、Zalo、Zalo Personal已经接近完成
- 移除重复的桥接代码。
### 阶段 2轻度直接导入的插件
- 将 Matrix 迁移到 SDK + 运行时。
- 验证新手引导、目录、群组提及逻辑。
### 阶段 3重度直接导入的插件
- 迁移 Microsoft Teams使用运行时辅助函数最多的插件
- 确保回复/正在输入的语义与当前行为一致。
### 阶段 4iMessage 插件化
- 将 iMessage 移入 `extensions/imessage`
- 用 `api.runtime` 替换直接的核心调用。
- 保持配置键、CLI 行为和文档不变。
### 阶段 5强制执行
- 添加 lint 规则 / CI 检查:禁止 `extensions/**``src/**` 导入。
- 添加插件 SDK/版本兼容性检查(运行时 + SDK 语义化版本)。
## 兼容性与版本控制
- SDK语义化版本控制已发布变更有文档记录。
- 运行时:按核心版本进行版本控制。添加 `api.runtime.version`
- 插件声明所需的运行时版本范围(例如 `openclawRuntime: ">=2026.2.0"`)。
## 测试策略
- 适配器级单元测试(使用真实核心实现验证运行时函数)。
- 每个插件的黄金测试:确保行为无偏差(路由、配对、允许列表、提及过滤)。
- CI 中使用单个端到端插件示例(安装 + 运行 + 冒烟测试)。
## 待解决问题
- SDK 类型托管在哪里:独立包还是核心导出?
- 运行时类型分发:在 SDK 中(仅类型)还是在核心中?
- 如何为内置插件与外部插件暴露文档链接?
- 过渡期间是否允许仓库内插件有限地直接导入核心模块?
## 成功标准
- 所有渠道连接器都是使用 SDK + 运行时的插件。
- `extensions/**` 不再从 `src/**` 导入。
- 新连接器模板仅依赖 SDK + 运行时。
- 外部插件可以在无需访问核心源码的情况下进行开发和更新。
相关文档:[插件](/tools/plugin)、[渠道](/channels/index)、[配置](/gateway/configuration)。

View File

@ -1,100 +0,0 @@
---
read_when:
- 设计或实现配置验证行为
- 处理配置迁移或 doctor 工作流
- 处理插件配置 schema 或插件加载门控
summary: 严格配置验证 + 仅通过 doctor 进行迁移
title: 严格配置验证
x-i18n:
generated_at: "2026-02-03T10:08:51Z"
model: claude-opus-4-5
provider: pi
source_hash: 5bc7174a67d2234e763f21330d8fe3afebc23b2e5c728a04abcc648b453a91cc
source_path: refactor/strict-config.md
workflow: 15
---
# 严格配置验证(仅通过 doctor 进行迁移)
## 目标
- **在所有地方拒绝未知配置键**(根级 + 嵌套)。
- **拒绝没有 schema 的插件配置**;不加载该插件。
- **移除加载时的旧版自动迁移**;迁移仅通过 doctor 运行。
- **启动时自动运行 doctordry-run**;如果无效,阻止非诊断命令。
## 非目标
- 加载时的向后兼容性(旧版键不会自动迁移)。
- 静默丢弃无法识别的键。
## 严格验证规则
- 配置必须在每个层级精确匹配 schema。
- 未知键是验证错误(根级或嵌套都不允许透传)。
- `plugins.entries.<id>.config` 必须由插件的 schema 验证。
- 如果插件缺少 schema**拒绝插件加载**并显示清晰的错误。
- 未知的 `channels.<id>` 键是错误,除非插件清单声明了该渠道 id。
- 所有插件都需要插件清单(`openclaw.plugin.json`)。
## 插件 schema 强制执行
- 每个插件为其配置提供严格的 JSON Schema内联在清单中
- 插件加载流程:
1. 解析插件清单 + schema`openclaw.plugin.json`)。
2. 根据 schema 验证配置。
3. 如果缺少 schema 或配置无效:阻止插件加载,记录错误。
- 错误消息包括:
- 插件 id
- 原因(缺少 schema / 配置无效)
- 验证失败的路径
- 禁用的插件保留其配置,但 Doctor + 日志会显示警告。
## Doctor 流程
- 每次加载配置时都会运行 Doctor默认 dry-run
- 如果配置无效:
- 打印摘要 + 可操作的错误。
- 指示:`openclaw doctor --fix`
- `openclaw doctor --fix`
- 应用迁移。
- 移除未知键。
- 写入更新后的配置。
## 命令门控(当配置无效时)
允许的命令(仅诊断):
- `openclaw doctor`
- `openclaw logs`
- `openclaw health`
- `openclaw help`
- `openclaw status`
- `openclaw gateway status`
其他所有命令必须硬失败并显示:"Config invalid. Run `openclaw doctor --fix`."
## 错误用户体验格式
- 单个摘要标题。
- 分组部分:
- 未知键(完整路径)
- 旧版键/需要迁移
- 插件加载失败(插件 id + 原因 + 路径)
## 实现接触点
- `src/config/zod-schema.ts`:移除根级透传;所有地方使用严格对象。
- `src/config/zod-schema.providers.ts`:确保严格的渠道 schema。
- `src/config/validation.ts`:未知键时失败;不应用旧版迁移。
- `src/config/io.ts`:移除旧版自动迁移;始终运行 doctor dry-run。
- `src/config/legacy*.ts`:将用法移至仅 doctor。
- `src/plugins/*`:添加 schema 注册表 + 门控。
- `src/cli` 中的 CLI 命令门控。
## 测试
- 未知键拒绝(根级 + 嵌套)。
- 插件缺少 schema → 插件加载被阻止并显示清晰错误。
- 无效配置 → Gateway 网关启动被阻止,诊断命令除外。
- Doctor dry-run 自动运行;`doctor --fix` 写入修正后的配置。

View File

@ -183,12 +183,6 @@ x-i18n:
- [模板TOOLS](/reference/templates/TOOLS)
- [模板USER](/reference/templates/USER)
## 实验(探索性)
- [新手引导配置协议](/experiments/onboarding-config-protocol)
- [研究:记忆](/experiments/research/memory)
- [模型配置探索](/experiments/proposals/model-config)
## 项目
- [致谢](/reference/credits)

View File

@ -1,519 +0,0 @@
# Bindings Capability Architecture Plan
Status: in progress
## Summary
The goal is not to move all ACP code out of core.
The goal is to make `bindings` a small core capability, keep the ACP session kernel in core, and move ACP-specific binding policy plus codex app server policy out of core.
That gives us a lightweight core without hiding core semantics behind plugin indirection.
## Current Conclusion
The current architecture should converge on this split:
- Core owns the generic binding capability.
- Core owns the generic ACP session kernel.
- Channel plugins own channel-specific binding semantics.
- ACP backend plugins own runtime protocol details.
- Product-level consumers like ACP configured bindings and the codex app server sit on top of the binding capability instead of hardcoding their own binding plumbing.
This is different from "everything becomes a plugin".
## Why This Changed
The current codebase already shows that there are really three different layers:
- binding and conversation ownership
- long-lived session and runtime-handle orchestration
- product-specific turn logic
Those layers should not all be forced into one runtime engine.
Today the duplication is mostly in the execution/control-plane shape, not in storage or binding plumbing:
- the main harness has its own turn engine
- ACP has its own session control plane
- the codex app server plugin path likely owns its own app-level turn engine outside this repo
The right move is to share the stable control-plane contracts, not to force all three into one giant executor.
## Verified Current State
### Generic binding pieces already exist
- `src/infra/outbound/session-binding-service.ts` already provides a generic binding store and adapter model.
- `src/plugins/conversation-binding.ts` already lets plugins request a conversation binding and stores plugin-owned binding metadata.
- `src/plugins/types.ts` already exposes plugin-facing binding APIs.
- `src/plugins/types.ts` already exposes the generic `inbound_claim` hook.
### ACP is only partially pluginified
- `src/channels/plugins/configured-binding-registry.ts` now owns generic configured binding compilation and lookup.
- `src/channels/plugins/binding-routing.ts` and `src/channels/plugins/binding-targets.ts` now own the generic route and target lifecycle seams.
- ACP now plugs into that seam through `src/channels/plugins/acp-configured-binding-consumer.ts` and `src/channels/plugins/acp-stateful-target-driver.ts`.
- `src/acp/persistent-bindings.lifecycle.ts` still owns configured ACP ensure and reset behavior.
- runtime-created plugin conversation bindings still use a separate path in `src/plugins/conversation-binding.ts`.
### Codex app server is already closer to the desired shape
From this repo's side, the codex app server path is much thinner:
- a plugin binds a conversation
- core stores that binding
- inbound dispatch targets the plugin's `inbound_claim` hook
What core does not provide for the codex app server path is an ACP-like shared session kernel. If the app server needs retries, long-lived runtime handles, cancellation, or session health logic, it must own that itself today.
## The Durable Split
### 1. Core Binding Capability
This should become the primary shared seam.
Responsibilities:
- canonical `ConversationRef`
- binding record storage
- configured binding compilation
- runtime-created binding storage
- fast binding lookup on inbound
- binding touch/unbind lifecycle
- generic dispatch handoff to the binding target
What core binding capability must not own:
- Discord thread rules
- Telegram topic rules
- Feishu chat rules
- ACP session orchestration
- codex app server business logic
### 2. Core Stateful Target Kernel
This is the small generic kernel for long-lived bound targets.
Responsibilities:
- ensure target ready
- run turn
- cancel turn
- close target
- reset target
- status and health
- persistence of target metadata
- retries and runtime-handle safety
- per-target serialization and concurrency
ACP is the first real implementation of this shape.
This kernel should stay in core because it is mandatory infrastructure and has strict startup, reset, and recovery semantics.
### 3. Channel Binding Providers
Each channel plugin should own the meaning of "this channel conversation maps to this binding rule".
Responsibilities:
- normalize configured binding targets
- normalize inbound conversations
- match inbound conversations against compiled bindings
- define channel-specific matching priority
- optionally provide binding description text for status and logs
This is where Discord channel vs thread logic, Telegram topic rules, and Feishu conversation rules belong.
### 4. Product Consumers
Bindings are a shared capability. Different products should consume it differently.
ACP configured bindings:
- compile config rules
- resolve a target session
- ensure the ACP session is ready through the ACP kernel
Codex app server:
- create runtime-requested bindings
- claim inbound messages through plugin hooks
- optionally adopt the shared stateful target contract later if it really needs long-lived session orchestration
Main harness:
- does not need to become "a binding product"
- may eventually share small lifecycle contracts, but it should not be forced into the same engine as ACP
## The Key Architectural Decision
The shared abstraction should be:
- `bindings` as the capability
- `stateful target drivers` as an optional lower-level contract
The shared abstraction should not be:
- "one runtime engine for main harness, ACP, and codex app server"
That would overfit very different systems into one executor.
## Stable Nouns
Core should understand only stable nouns.
The stable nouns are:
- `ConversationRef`
- `BindingRule`
- `CompiledBinding`
- `BindingResolution`
- `BindingTargetDescriptor`
- `StatefulTargetDriver`
- `StatefulTargetHandle`
ACP, codex app server, and future products should compile down to those nouns instead of leaking product-specific routing rules through core.
## Proposed Capability Model
### Binding capability
The binding capability should support both configured bindings and runtime-created bindings.
Required operations:
- compile configured bindings at startup or reload
- resolve a binding from an inbound `ConversationRef`
- create a runtime binding
- touch and unbind an existing binding
- dispatch a resolved binding to its target
### Binding target descriptor
A resolved binding should point to a typed target descriptor rather than ad hoc ACP- or plugin-specific metadata blobs.
The descriptor should be able to represent at least:
- plugin-owned inbound claim targets
- stateful target drivers
That means the same binding capability can support both:
- codex app server plugin-bound conversations
- ACP configured bindings
without pretending they are the same product.
### Stateful target driver
This is the reusable control-plane contract for long-lived bound targets.
Required operations:
- `ensureReady`
- `runTurn`
- `cancel`
- `close`
- `reset`
- `status`
- `health`
ACP should remain the first built-in driver.
If the codex app server later proves that it also needs durable session handles, it can either:
- use a driver that consumes this contract, or
- keep its own product-owned runtime if that remains simpler
That should be a product decision, not something forced by the binding capability.
## Why ACP Kernel Stays In Core
ACP's kernel should remain in core because session lifecycle, persistence, retries, cancellation, and runtime-handle safety are generic platform machinery.
Those concerns are not channel-specific, and they are not codex-app-server-specific.
If we move that machinery into an ordinary plugin, we create circular bootstrapping:
- channels need it during startup and inbound routing
- reset and recovery need it when plugins may already be degraded
- failure semantics become special-case core logic anyway
If we later wrap it in a "built-in capability module", that is still effectively core.
## What Should Move Out Of Core
The following should move out of ACP-shaped core code:
- channel-specific configured binding matching
- channel-specific binding target normalization
- channel-specific recovery UX
- ACP-specific route wrapping helpers as named ACP seams
- codex app server fallback policy beyond generic plugin-bound dispatch behavior
The following should stay:
- generic binding storage and dispatch
- generic ACP control plane
- generic stateful target driver contract
## Current Problems To Remove
### Residual cleanup is now small
Most ACP-era compatibility names are gone from the generic seam.
The remaining cleanup is smaller:
- `src/acp/persistent-bindings.ts` compatibility barrel can be deleted once tests stop importing it
- ACP-named tests and mocks can be renamed over time for consistency
- docs should stop describing already-removed ACP wrappers as if they still exist
### Configured binding implementation is still too monolithic
`src/channels/plugins/configured-binding-registry.ts` still mixes:
- registry compilation
- cache invalidation
- inbound matching
- materialization of binding targets
- session-key reverse lookup
That file is now generic, but still too large and too coupled.
### Runtime-created plugin bindings still use a separate stack
`src/plugins/conversation-binding.ts` is still a separate implementation path for plugin-created bindings.
That means configured bindings and runtime-created bindings share storage, but not one consistent capability layer.
### Generic registries still hardcode ACP as a built-in
`src/channels/plugins/configured-binding-consumers.ts` and `src/channels/plugins/stateful-target-drivers.ts` still import ACP directly.
That is acceptable for now, but the clean final shape is to keep ACP built in while registering it from a dedicated bootstrap point instead of wiring it inside the generic registry files.
## Target Contracts
### Channel binding provider contract
Conceptually, each channel plugin should support:
- `compileConfiguredBinding(binding, cfg) -> CompiledBinding | null`
- `resolveInboundConversation(event) -> ConversationRef | null`
- `matchInboundConversation(compiledBinding, conversation) -> BindingMatch | null`
- `describeBinding(compiledBinding) -> string | undefined`
### Binding capability contract
Core should support:
- `compileConfiguredBindings(cfg, plugins) -> CompiledBindingRegistry`
- `resolveBinding(conversationRef) -> BindingResolution | null`
- `createRuntimeBinding(target, conversationRef, metadata) -> BindingRecord`
- `touchBinding(bindingId)`
- `unbindBinding(bindingId | target)`
- `dispatchResolvedBinding(bindingResolution, inboundEvent)`
### Stateful target driver contract
Core should support:
- `ensureReady(targetRef, cfg)`
- `runTurn(targetRef, input)`
- `cancel(targetRef, reason)`
- `close(targetRef, reason)`
- `reset(targetRef, reason)`
- `status(targetRef)`
- `health(targetRef)`
## File-Level Transition Plan
### Keep
- `src/infra/outbound/session-binding-service.ts`
- `src/acp/control-plane/*`
- `extensions/acpx/*`
### Generalize
- `src/plugins/conversation-binding.ts`
- fold runtime-created plugin bindings into the same generic binding capability instead of keeping a separate implementation stack
- `src/channels/plugins/configured-binding-registry.ts`
- split into compiler, matcher, and session-key resolution modules with a thin facade
- `src/channels/plugins/types.adapters.ts`
- finish removing ACP-era aliases after the deprecation window
- `src/plugin-sdk/conversation-runtime.ts`
- export only the generic binding capability surfaces
- `src/acp/persistent-bindings.lifecycle.ts`
- either become a generic stateful target driver consumer or be renamed to ACP driver-specific lifecycle code
### Shrink Or Delete
- `src/acp/persistent-bindings.ts`
- delete the compatibility barrel once tests import the real modules directly
- `src/acp/persistent-bindings.resolve.ts`
- keep only while ACP-specific compatibility helpers are still useful to internal callers
- ACP-named test files
- rename over time once the behavior is stable and there is no risk of mixing behavioral and naming churn
## Recommended Refactor Order
### Completed groundwork
The current branch has already completed most of the first migration wave:
- stable generic binding nouns exist
- configured bindings compile through a generic registry
- inbound routing goes through generic binding resolution
- configured binding lookup no longer performs fallback plugin discovery
- ACP is expressed as a configured-binding consumer plus a built-in stateful target driver
The remaining work is cleanup and unification, not first-principles redesign.
### Phase 1: Freeze the nouns
Introduce and document the stable binding and target types:
- `ConversationRef`
- `CompiledBinding`
- `BindingResolution`
- `BindingTargetDescriptor`
- `StatefulTargetDriver`
Do this before more movement so the rest of the refactor has firm vocabulary.
### Phase 2: Promote bindings to a first-class core capability
Refactor the existing generic binding store into an explicit capability layer.
Requirements:
- runtime-created bindings stay supported
- configured bindings become first-class
- lookup becomes channel-agnostic
### Phase 3: Compile configured bindings at startup and reload
Move configured binding compilation off the inbound hot path.
Requirements:
- load enabled channel plugins once
- compile configured bindings once
- rebuild on config or plugin reload
- inbound path becomes pure registry lookup
### Phase 4: Expand the channel provider seam
Replace the ACP-specific adapter shape with a generic channel binding provider contract.
Requirements:
- channel plugins own normalization and matching
- core no longer knows channel-specific configured binding rules
### Phase 5: Re-express ACP as a binding consumer plus built-in stateful target driver
Move ACP configured binding policy to the new binding capability while keeping ACP runtime orchestration in core.
Requirements:
- ACP configured bindings resolve through the generic binding registry
- ACP target readiness uses the ACP driver contract
- ACP-specific naming disappears from generic binding code
### Phase 6: Finish residual ACP cleanup
Remove the last compatibility leftovers and stale naming.
Requirements:
- delete `src/acp/persistent-bindings.ts`
- rename ACP-named tests where that improves clarity without changing behavior
- keep docs synchronized with the actual generic seam instead of the earlier transition state
### Phase 7: Split the configured binding registry by responsibility
Refactor `src/channels/plugins/configured-binding-registry.ts` into smaller modules.
Suggested split:
- compiler module
- inbound matcher module
- session-key reverse lookup module
- thin public facade
Requirements:
- caching behavior remains unchanged
- matching behavior remains unchanged
- session-key resolution behavior remains unchanged
### Phase 8: Keep codex app server on the same binding capability
Do not force the codex app server into ACP semantics.
Requirements:
- codex app server keeps runtime-created bindings through the same binding capability
- inbound claim remains the default delivery path
- only adopt the stateful target driver seam if the app server truly needs long-lived target orchestration
- `src/plugins/conversation-binding.ts` stops being a separate binding stack and becomes a consumer of the generic binding capability
### Phase 9: Decouple built-in ACP registration from generic registry files
Keep ACP built in, but stop importing it directly from the generic registry modules.
Requirements:
- `src/channels/plugins/configured-binding-consumers.ts` no longer hardcodes ACP imports
- `src/channels/plugins/stateful-target-drivers.ts` no longer hardcodes ACP imports
- ACP still registers by default during normal startup
- generic registry files remain product-agnostic
### Phase 10: Remove ACP-shaped compatibility facades
Once all call sites are on the generic capability:
- delete ACP-shaped routing helpers
- delete hot-path plugin bootstrapping logic
- keep only thin compatibility exports if external plugins still need a deprecation window
## Success Criteria
The architecture is done when all of these are true:
- no inbound configured-binding resolution performs plugin discovery
- no channel-specific binding semantics remain in generic core binding code
- ACP still uses a core session kernel
- codex app server and ACP both sit on top of the same binding capability
- the binding capability can represent both configured and runtime-created bindings
- runtime-created plugin bindings do not use a separate implementation stack
- long-lived target orchestration is shared through a small core driver contract
- generic registry files do not import ACP directly
- ACP-era alias names are gone from the generic/plugin SDK surface
- the main harness is not forced into the ACP engine
- external plugins can use the same capability without internal imports
## Non-Goals
These are not goals of the remaining refactor:
- moving the ACP session kernel into an ordinary plugin
- forcing the main harness, ACP, and codex app server into one executor
- making every channel implement its own retry and session-safety logic
- keeping ACP-shaped naming in the long-term generic binding layer
## Bottom Line
The right 20-year split is:
- bindings are the shared core capability
- ACP session orchestration remains a small built-in core kernel
- channel plugins own binding semantics
- backend plugins own runtime protocol details
- product consumers like ACP configured bindings and codex app server build on the same binding capability without being forced into one runtime engine
That is the leanest core that still has honest boundaries.

View File

@ -1,4 +1,4 @@
import type { AcpRuntimeEvent, AcpSessionUpdateTag } from "../runtime-api.js";
import type { AcpRuntimeEvent, AcpSessionUpdateTag } from "../../runtime-api.js";
import {
asOptionalBoolean,
asOptionalString,

Some files were not shown because too many files have changed in this diff Show More