Merge remote-tracking branch 'upstream/main'

Co-authored-by: Cursor <cursoragent@cursor.com> # Conflicts: # src/telegram/bot.create-telegram-bot.installs-grammy-throttler.test.ts
2026-02-15 18:18:42 -08:00 · 2026-02-15 18:18:42 -08:00 · 17bc484489
commit 17bc484489
parent 3dd23ba381 cbc3de6c97
508 changed files with 24386 additions and 21409 deletions
--- a/.github/workflows/formal-conformance.yml
+++ b/.github/workflows/formal-conformance.yml
@ -108,6 +108,7 @@ jobs:

      - name: Comment on PR (informational)
        if: steps.drift.outputs.drift == 'true'
+        continue-on-error: true
        uses: actions/github-script@v7
        with:
          script: |
--- a/.github/workflows/install-smoke.yml
+++ b/.github/workflows/install-smoke.yml
@ -33,19 +33,17 @@ jobs:
      - name: Checkout CLI
        uses: actions/checkout@v4

-      - name: Setup pnpm (corepack retry)
-        run: |
-          set -euo pipefail
-          corepack enable
-          for attempt in 1 2 3; do
-            if corepack prepare pnpm@10.23.0 --activate; then
-              pnpm -v
-              exit 0
-            fi
-            echo "corepack prepare failed (attempt $attempt/3). Retrying..."
-            sleep $((attempt * 10))
-          done
-          exit 1
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: 22.x
+          check-latest: true
+
+      - name: Setup pnpm + cache store
+        uses: ./.github/actions/setup-pnpm-store-cache
+        with:
+          pnpm-version: "10.23.0"
+          cache-key-suffix: "node22"

      - name: Install pnpm deps (minimal)
        run: pnpm install --ignore-scripts --frozen-lockfile
--- a/AGENTS.md
+++ b/AGENTS.md
@ -119,6 +119,19 @@
 - Never commit or publish real phone numbers, videos, or live configuration values. Use obviously fake placeholders in docs, tests, and examples.
 - Release flow: always read `docs/reference/RELEASING.md` and `docs/platforms/mac/release.md` before any release work; do not ask routine questions once those docs answer them.

+## GHSA (Repo Advisory) Patch/Publish
+
+- Fetch: `gh api /repos/openclaw/openclaw/security-advisories/<GHSA>`
+- Latest npm: `npm view openclaw version --userconfig "$(mktemp)"`
+- Private fork PRs must be closed:
+  `fork=$(gh api /repos/openclaw/openclaw/security-advisories/<GHSA> | jq -r .private_fork.full_name)`
+  `gh pr list -R "$fork" --state open` (must be empty)
+- Description newline footgun: write Markdown via heredoc to `/tmp/ghsa.desc.md` (no `"\\n"` strings)
+- Build patch JSON via jq: `jq -n --rawfile desc /tmp/ghsa.desc.md '{summary,severity,description:$desc,vulnerabilities:[...]}' > /tmp/ghsa.patch.json`
+- Patch + publish: `gh api -X PATCH /repos/openclaw/openclaw/security-advisories/<GHSA> --input /tmp/ghsa.patch.json` (publish = include `"state":"published"`; no `/publish` endpoint)
+- If publish fails (HTTP 422): missing `severity`/`description`/`vulnerabilities[]`, or private fork has open PRs
+- Verify: re-fetch; ensure `state=published`, `published_at` set; `jq -r .description | rg '\\\\n'` returns nothing
+
 ## Troubleshooting

 - Rebrand/migration issues or legacy config/service warnings: run `openclaw doctor` (see `docs/gateway/doctor.md`).
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -6,6 +6,8 @@ Docs: https://docs.openclaw.ai

 ### Changes

+- Cron/Gateway: add finished-run webhook delivery toggle (`notify`) and dedicated webhook auth token support (`cron.webhookToken`) for outbound cron webhook posts. (#14535) Thanks @advaitpaliwal.
+- Plugins: expose `llm_input` and `llm_output` hook payloads so extensions can observe prompt/input context and model output usage details. (#16724) Thanks @SecondThread.
 - Subagents: nested sub-agents (sub-sub-agents) with configurable depth. Set `agents.defaults.subagents.maxSpawnDepth: 2` to allow sub-agents to spawn their own children. Includes `maxChildrenPerAgent` limit (default 5), depth-aware tool policy, and proper announce chain routing. (#14447) Thanks @tyler6204.
 - Discord: components v2 UI + embeds passthrough + exec approval UX refinements (CV2 containers, button layout, Discord-forwarding skip). Thanks @thewilloftheshadow.
 - Slack/Discord/Telegram: add per-channel ack reaction overrides (account/channel-level) to support platform-specific emoji formats. (#17092) Thanks @zerone0x.
@ -13,6 +15,8 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- Sandbox/Security: block dangerous sandbox Docker config (bind mounts, host networking, unconfined seccomp/apparmor) to prevent container escape via config injection. Thanks @aether-ai-agent.
+- Control UI: prevent stored XSS via assistant name/avatar by removing inline script injection, serving bootstrap config as JSON, and enforcing `script-src 'self'`. Thanks @Adam55A-code.
 - Web UI/Agents: hide `BOOTSTRAP.md` in the Agents Files list after onboarding is completed, avoiding confusing missing-file warnings for completed workspaces. (#17491) Thanks @gumadeiras.
 - Telegram: omit `message_thread_id` for DM sends/draft previews and keep forum-topic handling (`id=1` general omitted, non-general kept), preventing DM failures with `400 Bad Request: message thread not found`. (#10942) Thanks @garnetlyx.
 - Subagents/Models: preserve `agents.defaults.model.fallbacks` when subagent sessions carry a model override, so subagent runs fail over to configured fallback models instead of retrying only the overridden primary model.
@ -21,13 +25,17 @@ Docs: https://docs.openclaw.ai
 - TUI: make searchable-select filtering and highlight rendering ANSI-aware so queries ignore hidden escape codes and no longer corrupt ANSI styling sequences during match highlighting. (#4519) Thanks @bee4come.
 - TUI/Windows: coalesce rapid single-line submit bursts in Git Bash into one multiline message as a fallback when bracketed paste is unavailable, preventing pasted multiline text from being split into multiple sends. (#4986) Thanks @adamkane.
 - TUI: suppress false `(no output)` placeholders for non-local empty final events during concurrent runs, preventing external-channel replies from showing empty assistant bubbles while a local run is still streaming. (#5782) Thanks @LagWizard and @vignesh07.
+- TUI: preserve copy-sensitive long tokens (URLs/paths/file-like identifiers) during wrapping and overflow sanitization so wrapped output no longer inserts spaces that corrupt copy/paste values. (#17515, #17466, #17505) Thanks @abe238, @trevorpan, and @JasonCry.
 - Auto-reply/WhatsApp/TUI/Web: when a final assistant message is `NO_REPLY` and a messaging tool send succeeded, mirror the delivered messaging-tool text into session-visible assistant output so TUI/Web no longer show `NO_REPLY` placeholders. (#7010) Thanks @Morrowind-Xie.
 - Gateway/Chat: harden `chat.send` inbound message handling by rejecting null bytes, stripping unsafe control characters, and normalizing Unicode to NFC before dispatch. (#8593) Thanks @fr33d3m0n.
 - Gateway/Send: return an actionable error when `send` targets internal-only `webchat`, guiding callers to use `chat.send` or a deliverable channel. (#15703) Thanks @rodrigouroz.
 - Gateway/Agent: reject malformed `agent:`-prefixed session keys (for example, `agent:main`) in `agent` and `agent.identity.get` instead of silently resolving them to the default agent, preventing accidental cross-session routing. (#15707) Thanks @rodrigouroz.
 - Gateway/Security: redact sensitive session/path details from `status` responses for non-admin clients; full details remain available to `operator.admin`. (#8590) Thanks @fr33d3m0n.
+- Web Fetch/Security: cap downloaded response body size before HTML parsing to prevent memory exhaustion from oversized or deeply nested pages. Thanks @xuemian168.
 - Agents: return an explicit timeout error reply when an embedded run times out before producing any payloads, preventing silent dropped turns during slow cache-refresh transitions. (#16659) Thanks @liaosvcaf and @vignesh07.
 - Agents/OpenAI: force `store=true` for direct OpenAI Responses/Codex runs to preserve multi-turn server-side conversation state, while leaving proxy/non-OpenAI endpoints unchanged. (#16803) Thanks @mark9232 and @vignesh07.
+- Agents/Security: sanitize workspace paths before embedding into LLM prompts (strip Unicode control/format chars) to prevent instruction injection via malicious directory names. Thanks @aether-ai-agent.
+- Agents/Context: apply configured model `contextWindow` overrides after provider discovery so `lookupContextTokens()` honors operator config values (including discovery-failure paths). (#17404) Thanks @michaelbship and @vignesh07.
 - CLI/Build: make legacy daemon CLI compatibility shim generation tolerant of minimal tsdown daemon export sets, while preserving restart/register compatibility aliases and surfacing explicit errors for unavailable legacy daemon commands. Thanks @vignesh07.
 - Telegram: replace inbound `<media:audio>` placeholder with successful preflight voice transcript in message body context, preventing placeholder-only prompt bodies for mention-gated voice messages. (#16789) Thanks @Limitless2023.
 - Telegram: retry inbound media `getFile` calls (3 attempts with backoff) and gracefully fall back to placeholder-only processing when retries fail, preventing dropped voice/media messages on transient Telegram network errors. (#16154) Thanks @yinghaosang.
@ -91,6 +99,7 @@ Docs: https://docs.openclaw.ai
 - Gateway/Sessions: abort active embedded runs and clear queued session work before `sessions.reset`, returning unavailable if the run does not stop in time. (#16576) Thanks @Grynn.
 - Sessions/Agents: harden transcript path resolution for mismatched agent context by preserving explicit store roots and adding safe absolute-path fallback to the correct agent sessions directory. (#16288) Thanks @robbyczgw-cla.
 - Agents: add a safety timeout around embedded `session.compact()` to ensure stalled compaction runs settle and release blocked session lanes. (#16331) Thanks @BinHPdev.
+- Agents/Tools: make required-parameter validation errors list missing fields and instruct: "Supply correct parameters before retrying," reducing repeated invalid tool-call loops (for example `read({})`). (#14729)
 - Agents: keep unresolved mutating tool failures visible until the same action retry succeeds, scope mutation-error surfacing to mutating calls (including `session_status` model changes), and dedupe duplicate failure warnings in outbound replies. (#16131) Thanks @Swader.
 - Agents/Process/Bootstrap: preserve unbounded `process log` offset-only pagination (default tail applies only when both `offset` and `limit` are omitted) and enforce strict `bootstrapTotalMaxChars` budgeting across injected bootstrap content (including markers), skipping additional injection when remaining budget is too small. (#16539) Thanks @CharlieGreenman.
 - Agents/Workspace: persist bootstrap onboarding state so partially initialized workspaces recover missing `BOOTSTRAP.md` once, while completed onboarding keeps BOOTSTRAP deleted even if runtime files are later recreated. Thanks @gumadeiras.
@ -102,6 +111,7 @@ Docs: https://docs.openclaw.ai
 - Tools/Write/Edit: normalize structured text-block arguments for `content`/`oldText`/`newText` before filesystem edits, preventing JSON-like file corruption and false “exact text not found” misses from block-form params. (#16778) Thanks @danielpipernz.
 - Ollama/Agents: avoid forcing `<final>` tag enforcement for Ollama models, which could suppress all output as `(no output)`. (#16191) Thanks @Glucksberg.
 - Plugins: suppress false duplicate plugin id warnings when the same extension is discovered via multiple paths (config/workspace/global vs bundled), while still warning on genuine duplicates. (#16222) Thanks @shadril238.
+- Agents/Process: supervise PTY/child process lifecycles with explicit ownership, cancellation, timeouts, and deterministic cleanup, preventing Codex/Pi PTY sessions from dying or stalling on resume. (#14257) Thanks @onutc.
 - Skills: watch `SKILL.md` only when refreshing skills snapshot to avoid file-descriptor exhaustion in large data trees. (#11325) Thanks @household-bard.
 - Memory/QMD: make `memory status` read-only by skipping QMD boot update/embed side effects for status-only manager checks.
 - Memory/QMD: keep original QMD failures when builtin fallback initialization fails (for example missing embedding API keys), instead of replacing them with fallback init errors.
@ -203,6 +213,7 @@ Docs: https://docs.openclaw.ai
 - Docs/Hooks: update hooks documentation URLs to the new `/automation/hooks` location. (#16165) Thanks @nicholascyh.
 - Security/Audit: warn when `gateway.tools.allow` re-enables default-denied tools over HTTP `POST /tools/invoke`, since this can increase RCE blast radius if the gateway is reachable.
 - Security/Plugins/Hooks: harden npm-based installs by restricting specs to registry packages only, passing `--ignore-scripts` to `npm pack`, and cleaning up temp install directories.
+- Security/Sessions: preserve inter-session input provenance for routed prompts so delegated/internal sessions are not treated as direct external user instructions. Thanks @anbecker.
 - Feishu: stop persistent Typing reaction on NO_REPLY/suppressed runs by wiring reply-dispatcher cleanup to remove typing indicators. (#15464) Thanks @arosstale.
 - Agents: strip leading empty lines from `sanitizeUserFacingText` output and normalize whitespace-only outputs to empty text. (#16158) Thanks @mcinteerj.
 - BlueBubbles: gracefully degrade when Private API is disabled by filtering private-only actions, skipping private-only reactions/reply effects, and avoiding private reply markers so non-private flows remain usable. (#16002) Thanks @L-U-C-K-Y.
@ -333,6 +344,7 @@ Docs: https://docs.openclaw.ai
 - Configure/Gateway: reject literal `"undefined"`/`"null"` token input and validate gateway password prompt values to avoid invalid password-mode configs. (#13767) Thanks @omair445.
 - Gateway: handle async `EPIPE` on stdout/stderr during shutdown. (#13414) Thanks @keshav55.
 - Gateway/Control UI: resolve missing dashboard assets when `openclaw` is installed globally via symlink-based Node managers (nvm/fnm/n/Homebrew). (#14919) Thanks @aynorica.
+- Gateway/Control UI: keep partial assistant output visible when runs are aborted, and persist aborted partials to session transcripts for follow-up context.
 - Cron: use requested `agentId` for isolated job auth resolution. (#13983) Thanks @0xRaini.
 - Cron: prevent cron jobs from skipping execution when `nextRunAtMs` advances. (#14068) Thanks @WalterSumbon.
 - Cron: pass `agentId` to `runHeartbeatOnce` for main-session jobs. (#14140) Thanks @ishikawa-pro.
--- a/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift
+++ b/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift
@ -2087,6 +2087,7 @@ public struct CronJob: Codable, Sendable {
    public let name: String
    public let description: String?
    public let enabled: Bool
+    public let notify: Bool?
    public let deleteafterrun: Bool?
    public let createdatms: Int
    public let updatedatms: Int
@ -2103,6 +2104,7 @@ public struct CronJob: Codable, Sendable {
        name: String,
        description: String?,
        enabled: Bool,
+        notify: Bool?,
        deleteafterrun: Bool?,
        createdatms: Int,
        updatedatms: Int,
@ -2118,6 +2120,7 @@ public struct CronJob: Codable, Sendable {
        self.name = name
        self.description = description
        self.enabled = enabled
+        self.notify = notify
        self.deleteafterrun = deleteafterrun
        self.createdatms = createdatms
        self.updatedatms = updatedatms
@ -2134,6 +2137,7 @@ public struct CronJob: Codable, Sendable {
        case name
        case description
        case enabled
+        case notify
        case deleteafterrun = "deleteAfterRun"
        case createdatms = "createdAtMs"
        case updatedatms = "updatedAtMs"
@ -2167,6 +2171,7 @@ public struct CronAddParams: Codable, Sendable {
    public let agentid: AnyCodable?
    public let description: String?
    public let enabled: Bool?
+    public let notify: Bool?
    public let deleteafterrun: Bool?
    public let schedule: AnyCodable
    public let sessiontarget: AnyCodable
@ -2179,6 +2184,7 @@ public struct CronAddParams: Codable, Sendable {
        agentid: AnyCodable?,
        description: String?,
        enabled: Bool?,
+        notify: Bool?,
        deleteafterrun: Bool?,
        schedule: AnyCodable,
        sessiontarget: AnyCodable,
@ -2190,6 +2196,7 @@ public struct CronAddParams: Codable, Sendable {
        self.agentid = agentid
        self.description = description
        self.enabled = enabled
+        self.notify = notify
        self.deleteafterrun = deleteafterrun
        self.schedule = schedule
        self.sessiontarget = sessiontarget
@ -2202,6 +2209,7 @@ public struct CronAddParams: Codable, Sendable {
        case agentid = "agentId"
        case description
        case enabled
+        case notify
        case deleteafterrun = "deleteAfterRun"
        case schedule
        case sessiontarget = "sessionTarget"
--- a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift
+++ b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift
@ -2087,6 +2087,7 @@ public struct CronJob: Codable, Sendable {
    public let name: String
    public let description: String?
    public let enabled: Bool
+    public let notify: Bool?
    public let deleteafterrun: Bool?
    public let createdatms: Int
    public let updatedatms: Int
@ -2103,6 +2104,7 @@ public struct CronJob: Codable, Sendable {
        name: String,
        description: String?,
        enabled: Bool,
+        notify: Bool?,
        deleteafterrun: Bool?,
        createdatms: Int,
        updatedatms: Int,
@ -2118,6 +2120,7 @@ public struct CronJob: Codable, Sendable {
        self.name = name
        self.description = description
        self.enabled = enabled
+        self.notify = notify
        self.deleteafterrun = deleteafterrun
        self.createdatms = createdatms
        self.updatedatms = updatedatms
@ -2134,6 +2137,7 @@ public struct CronJob: Codable, Sendable {
        case name
        case description
        case enabled
+        case notify
        case deleteafterrun = "deleteAfterRun"
        case createdatms = "createdAtMs"
        case updatedatms = "updatedAtMs"
@ -2167,6 +2171,7 @@ public struct CronAddParams: Codable, Sendable {
    public let agentid: AnyCodable?
    public let description: String?
    public let enabled: Bool?
+    public let notify: Bool?
    public let deleteafterrun: Bool?
    public let schedule: AnyCodable
    public let sessiontarget: AnyCodable
@ -2179,6 +2184,7 @@ public struct CronAddParams: Codable, Sendable {
        agentid: AnyCodable?,
        description: String?,
        enabled: Bool?,
+        notify: Bool?,
        deleteafterrun: Bool?,
        schedule: AnyCodable,
        sessiontarget: AnyCodable,
@ -2190,6 +2196,7 @@ public struct CronAddParams: Codable, Sendable {
        self.agentid = agentid
        self.description = description
        self.enabled = enabled
+        self.notify = notify
        self.deleteafterrun = deleteafterrun
        self.schedule = schedule
        self.sessiontarget = sessiontarget
@ -2202,6 +2209,7 @@ public struct CronAddParams: Codable, Sendable {
        case agentid = "agentId"
        case description
        case enabled
+        case notify
        case deleteafterrun = "deleteAfterRun"
        case schedule
        case sessiontarget = "sessionTarget"
--- a/docs/automation/cron-jobs.md
+++ b/docs/automation/cron-jobs.md
@ -27,6 +27,7 @@ Troubleshooting: [/automation/troubleshooting](/automation/troubleshooting)
  - **Main session**: enqueue a system event, then run on the next heartbeat.
  - **Isolated**: run a dedicated agent turn in `cron:<jobId>`, with delivery (announce by default or none).
 - Wakeups are first-class: a job can request “wake now” vs “next heartbeat”.
+- Webhook posting is opt-in per job: set `notify: true` and configure `cron.webhook`.

 ## Quick start (actionable)

@ -288,7 +289,7 @@ Notes:
 - `schedule.at` accepts ISO 8601 (timezone optional; treated as UTC when omitted).
 - `everyMs` is milliseconds.
 - `sessionTarget` must be `"main"` or `"isolated"` and must match `payload.kind`.
- Optional fields: `agentId`, `description`, `enabled`, `deleteAfterRun` (defaults to true for `at`),
+- Optional fields: `agentId`, `description`, `enabled`, `notify`, `deleteAfterRun` (defaults to true for `at`),
  `delivery`.
 - `wakeMode` defaults to `"now"` when omitted.

@ -333,10 +334,19 @@ Notes:
    enabled: true, // default true
    store: "~/.openclaw/cron/jobs.json",
    maxConcurrentRuns: 1, // default 1
+    webhook: "https://example.invalid/cron-finished", // optional finished-run webhook endpoint
+    webhookToken: "replace-with-dedicated-webhook-token", // optional, do not reuse gateway auth token
  },
 }
 ```

+Webhook behavior:
+
+- The Gateway posts finished run events to `cron.webhook` only when the job has `notify: true`.
+- Payload is the cron finished event JSON.
+- If `cron.webhookToken` is set, auth header is `Authorization: Bearer <cron.webhookToken>`.
+- If `cron.webhookToken` is not set, no `Authorization` header is sent.
+
 Disable cron entirely:

 - `cron.enabled: false` (config)
--- a/docs/channels/groups.md
+++ b/docs/channels/groups.md
@ -105,7 +105,7 @@ Want “groups can only see folder X” instead of “no host access”? Keep `w
        docker: {
          binds: [
            // hostPath:containerPath:mode
-            "~/FriendsShared:/data:ro",
+            "/home/user/FriendsShared:/data:ro",
          ],
        },
      },
--- a/docs/experiments/plans/pty-process-supervision.md
+++ b/docs/experiments/plans/pty-process-supervision.md
@ -0,0 +1,192 @@
+---
+summary: "Production plan for reliable interactive process supervision (PTY + non-PTY) with explicit ownership, unified lifecycle, and deterministic cleanup"
+owner: "openclaw"
+status: "in-progress"
+last_updated: "2026-02-15"
+title: "PTY and Process Supervision Plan"
+---
+
+# PTY and Process Supervision Plan
+
+## 1. Problem and goal
+
+We need one reliable lifecycle for long-running command execution across:
+
+- `exec` foreground runs
+- `exec` background runs
+- `process` follow up actions (`poll`, `log`, `send-keys`, `paste`, `submit`, `kill`, `remove`)
+- CLI agent runner subprocesses
+
+The goal is not just to support PTY. The goal is predictable ownership, cancellation, timeout, and cleanup with no unsafe process matching heuristics.
+
+## 2. Scope and boundaries
+
+- Keep implementation internal in `src/process/supervisor`.
+- Do not create a new package for this.
+- Keep current behavior compatibility where practical.
+- Do not broaden scope to terminal replay or tmux style session persistence.
+
+## 3. Implemented in this branch
+
+### Supervisor baseline already present
+
+- Supervisor module is in place under `src/process/supervisor/*`.
+- Exec runtime and CLI runner are already routed through supervisor spawn and wait.
+- Registry finalization is idempotent.
+
+### This pass completed
+
+1. Explicit PTY command contract
+
+- `SpawnInput` is now a discriminated union in `src/process/supervisor/types.ts`.
+- PTY runs require `ptyCommand` instead of reusing generic `argv`.
+- Supervisor no longer rebuilds PTY command strings from argv joins in `src/process/supervisor/supervisor.ts`.
+- Exec runtime now passes `ptyCommand` directly in `src/agents/bash-tools.exec-runtime.ts`.
+
+2. Process layer type decoupling
+
+- Supervisor types no longer import `SessionStdin` from agents.
+- Process local stdin contract lives in `src/process/supervisor/types.ts` (`ManagedRunStdin`).
+- Adapters now depend only on process level types:
+  - `src/process/supervisor/adapters/child.ts`
+  - `src/process/supervisor/adapters/pty.ts`
+
+3. Process tool lifecycle ownership improvement
+
+- `src/agents/bash-tools.process.ts` now requests cancellation through supervisor first.
+- `process kill/remove` now use process-tree fallback termination when supervisor lookup misses.
+- `remove` keeps deterministic remove behavior by dropping running session entries immediately after termination is requested.
+
+4. Single source watchdog defaults
+
+- Added shared defaults in `src/agents/cli-watchdog-defaults.ts`.
+- `src/agents/cli-backends.ts` consumes the shared defaults.
+- `src/agents/cli-runner/reliability.ts` consumes the same shared defaults.
+
+5. Dead helper cleanup
+
+- Removed unused `killSession` helper path from `src/agents/bash-tools.shared.ts`.
+
+6. Direct supervisor path tests added
+
+- Added `src/agents/bash-tools.process.supervisor.test.ts` to cover kill and remove routing through supervisor cancellation.
+
+7. Reliability gap fixes completed
+
+- `src/agents/bash-tools.process.ts` now falls back to real OS-level process termination when supervisor lookup misses.
+- `src/process/supervisor/adapters/child.ts` now uses process-tree termination semantics for default cancel/timeout kill paths.
+- Added shared process-tree utility in `src/process/kill-tree.ts`.
+
+8. PTY contract edge-case coverage added
+
+- Added `src/process/supervisor/supervisor.pty-command.test.ts` for verbatim PTY command forwarding and empty-command rejection.
+- Added `src/process/supervisor/adapters/child.test.ts` for process-tree kill behavior in child adapter cancellation.
+
+## 4. Remaining gaps and decisions
+
+### Reliability status
+
+The two required reliability gaps for this pass are now closed:
+
+- `process kill/remove` now has a real OS termination fallback when supervisor lookup misses.
+- child cancel/timeout now uses process-tree kill semantics for default kill path.
+- Regression tests were added for both behaviors.
+
+### Durability and startup reconciliation
+
+Restart behavior is now explicitly defined as in-memory lifecycle only.
+
+- `reconcileOrphans()` remains a no-op in `src/process/supervisor/supervisor.ts` by design.
+- Active runs are not recovered after process restart.
+- This boundary is intentional for this implementation pass to avoid partial persistence risks.
+
+### Maintainability follow-ups
+
+1. `runExecProcess` in `src/agents/bash-tools.exec-runtime.ts` still handles multiple responsibilities and can be split into focused helpers in a follow-up.
+
+## 5. Implementation plan
+
+The implementation pass for required reliability and contract items is complete.
+
+Completed:
+
+- `process kill/remove` fallback real termination
+- process-tree cancellation for child adapter default kill path
+- regression tests for fallback kill and child adapter kill path
+- PTY command edge-case tests under explicit `ptyCommand`
+- explicit in-memory restart boundary with `reconcileOrphans()` no-op by design
+
+Optional follow-up:
+
+- split `runExecProcess` into focused helpers with no behavior drift
+
+## 6. File map
+
+### Process supervisor
+
+- `src/process/supervisor/types.ts` updated with discriminated spawn input and process local stdin contract.
+- `src/process/supervisor/supervisor.ts` updated to use explicit `ptyCommand`.
+- `src/process/supervisor/adapters/child.ts` and `src/process/supervisor/adapters/pty.ts` decoupled from agent types.
+- `src/process/supervisor/registry.ts` idempotent finalize unchanged and retained.
+
+### Exec and process integration
+
+- `src/agents/bash-tools.exec-runtime.ts` updated to pass PTY command explicitly and keep fallback path.
+- `src/agents/bash-tools.process.ts` updated to cancel via supervisor with real process-tree fallback termination.
+- `src/agents/bash-tools.shared.ts` removed direct kill helper path.
+
+### CLI reliability
+
+- `src/agents/cli-watchdog-defaults.ts` added as shared baseline.
+- `src/agents/cli-backends.ts` and `src/agents/cli-runner/reliability.ts` now consume same defaults.
+
+## 7. Validation run in this pass
+
+Unit tests:
+
+- `pnpm vitest src/process/supervisor/registry.test.ts`
+- `pnpm vitest src/process/supervisor/supervisor.test.ts`
+- `pnpm vitest src/process/supervisor/supervisor.pty-command.test.ts`
+- `pnpm vitest src/process/supervisor/adapters/child.test.ts`
+- `pnpm vitest src/agents/cli-backends.test.ts`
+- `pnpm vitest src/agents/bash-tools.exec.pty-cleanup.test.ts`
+- `pnpm vitest src/agents/bash-tools.process.poll-timeout.test.ts`
+- `pnpm vitest src/agents/bash-tools.process.supervisor.test.ts`
+- `pnpm vitest src/process/exec.test.ts`
+
+E2E targets:
+
+- `pnpm test:e2e src/agents/cli-runner.e2e.test.ts`
+- `pnpm test:e2e src/agents/bash-tools.exec.pty-fallback.e2e.test.ts src/agents/bash-tools.exec.background-abort.e2e.test.ts src/agents/bash-tools.process.send-keys.e2e.test.ts`
+
+Typecheck note:
+
+- `pnpm tsgo` currently fails in this repo due to a pre-existing UI typing dependency issue (`@vitest/browser-playwright` resolution), unrelated to this process supervision work.
+
+## 8. Operational guarantees preserved
+
+- Exec env hardening behavior is unchanged.
+- Approval and allowlist flow is unchanged.
+- Output sanitization and output caps are unchanged.
+- PTY adapter still guarantees wait settlement on forced kill and listener disposal.
+
+## 9. Definition of done
+
+1. Supervisor is lifecycle owner for managed runs.
+2. PTY spawn uses explicit command contract with no argv reconstruction.
+3. Process layer has no type dependency on agent layer for supervisor stdin contracts.
+4. Watchdog defaults are single source.
+5. Targeted unit and e2e tests remain green.
+6. Restart durability boundary is explicitly documented or fully implemented.
+
+## 10. Summary
+
+The branch now has a coherent and safer supervision shape:
+
+- explicit PTY contract
+- cleaner process layering
+- supervisor driven cancellation path for process operations
+- real fallback termination when supervisor lookup misses
+- process-tree cancellation for child-run default kill paths
+- unified watchdog defaults
+- explicit in-memory restart boundary (no orphan reconciliation across restart in this pass)
--- a/docs/gateway/configuration-reference.md
+++ b/docs/gateway/configuration-reference.md
@ -2295,12 +2295,16 @@ Current builds no longer include the TCP bridge. Nodes connect over the Gateway
  cron: {
    enabled: true,
    maxConcurrentRuns: 2,
+    webhook: "https://example.invalid/cron-finished", // optional, must be http:// or https://
+    webhookToken: "replace-with-dedicated-token", // optional bearer token for outbound webhook auth
    sessionRetention: "24h", // duration string or false
  },
 }
 ```

 - `sessionRetention`: how long to keep completed cron sessions before pruning. Default: `24h`.
+- `webhook`: finished-run webhook endpoint, only used when the job has `notify: true`.
+- `webhookToken`: dedicated bearer token for webhook auth, if omitted no auth header is sent.

 See [Cron Jobs](/automation/cron-jobs).

--- a/docs/gateway/sandboxing.md
+++ b/docs/gateway/sandboxing.md
@ -76,7 +76,7 @@ Global and per-agent binds are **merged** (not replaced). Under `scope: "shared"
 - When set (including `[]`), it replaces `agents.defaults.sandbox.docker.binds` for the browser container.
 - When omitted, the browser container falls back to `agents.defaults.sandbox.docker.binds` (backwards compatible).

-Example (read-only source + docker socket):
+Example (read-only source + an extra data directory):

 ```json5
 {
@ -84,7 +84,7 @@ Example (read-only source + docker socket):
    defaults: {
      sandbox: {
        docker: {
-          binds: ["/home/user/source:/source:ro", "/var/run/docker.sock:/var/run/docker.sock"],
+          binds: ["/home/user/source:/source:ro", "/var/data/myapp:/data:ro"],
        },
      },
    },
@ -105,7 +105,8 @@ Example (read-only source + docker socket):
 Security notes:

 - Binds bypass the sandbox filesystem: they expose host paths with whatever mode you set (`:ro` or `:rw`).
- Sensitive mounts (e.g., `docker.sock`, secrets, SSH keys) should be `:ro` unless absolutely required.
+- OpenClaw blocks dangerous bind sources (for example: `docker.sock`, `/etc`, `/proc`, `/sys`, `/dev`, and parent mounts that would expose them).
+- Sensitive mounts (secrets, SSH keys, service credentials) should be `:ro` unless absolutely required.
 - Combine with `workspaceAccess: "ro"` if you only need read access to the workspace; bind modes stay independent.
 - See [Sandbox vs Tool Policy vs Elevated](/gateway/sandbox-vs-tool-policy-vs-elevated) for how binds interact with tool policy and elevated exec.

--- a/docs/tools/web.md
+++ b/docs/tools/web.md
@ -224,6 +224,7 @@ Fetch a URL and extract readable content.
        enabled: true,
        maxChars: 50000,
        maxCharsCap: 50000,
+        maxResponseBytes: 2000000,
        timeoutSeconds: 30,
        cacheTtlMinutes: 15,
        maxRedirects: 3,
@ -256,6 +257,7 @@ Notes:
 - `web_fetch` sends a Chrome-like User-Agent and `Accept-Language` by default; override `userAgent` if needed.
 - `web_fetch` blocks private/internal hostnames and re-checks redirects (limit with `maxRedirects`).
 - `maxChars` is clamped to `tools.web.fetch.maxCharsCap`.
+- `web_fetch` caps the downloaded response body size to `tools.web.fetch.maxResponseBytes` before parsing; oversized responses are truncated and include a warning.
 - `web_fetch` is best-effort extraction; some sites will need the browser tool.
 - See [Firecrawl](/tools/firecrawl) for key setup and service details.
 - Responses are cached (default 15 minutes) to reduce repeated fetches.
--- a/docs/web/control-ui.md
+++ b/docs/web/control-ui.md
@ -83,6 +83,9 @@ Cron jobs panel notes:

 - For isolated jobs, delivery defaults to announce summary. You can switch to none if you want internal-only runs.
 - Channel/target fields appear when announce is selected.
+- New job form includes a **Notify webhook** toggle (`notify` on the job).
+- Gateway webhook posting requires both `notify: true` on the job and `cron.webhook` in config.
+- Set `cron.webhookToken` to send a dedicated bearer token, if omitted the webhook is sent without an auth header.

 ## Chat behavior

@ -93,6 +96,10 @@ Cron jobs panel notes:
  - Click **Stop** (calls `chat.abort`)
  - Type `/stop` (or `stop|esc|abort|wait|exit|interrupt`) to abort out-of-band
  - `chat.abort` supports `{ sessionKey }` (no `runId`) to abort all active runs for that session
+- Abort partial retention:
+  - When a run is aborted, partial assistant text can still be shown in the UI
+  - Gateway persists aborted partial assistant text into transcript history when buffered output exists
+  - Persisted entries include abort metadata so transcript consumers can tell abort partials from normal completion output

 ## Tailnet access (recommended)

--- a/docs/web/webchat.md
+++ b/docs/web/webchat.md
@ -25,6 +25,8 @@ Status: the macOS/iOS SwiftUI chat UI talks directly to the Gateway WebSocket.

 - The UI connects to the Gateway WebSocket and uses `chat.history`, `chat.send`, and `chat.inject`.
 - `chat.inject` appends an assistant note directly to the transcript and broadcasts it to the UI (no agent run).
+- Aborted runs can keep partial assistant output visible in the UI.
+- Gateway persists aborted partial assistant text into transcript history when buffered output exists, and marks those entries with abort metadata.
 - History is always fetched from the gateway (no local file watching).
 - If the gateway is unreachable, WebChat is read-only.

--- a/package.json
+++ b/package.json
@ -201,13 +201,13 @@
    "@types/proper-lockfile": "^4.1.4",
    "@types/qrcode-terminal": "^0.12.2",
    "@types/ws": "^8.18.1",
-    "@typescript/native-preview": "7.0.0-dev.20260214.1",
+    "@typescript/native-preview": "7.0.0-dev.20260215.1",
    "@vitest/coverage-v8": "^4.0.18",
    "lit": "^3.3.2",
    "ollama": "^0.6.3",
    "oxfmt": "0.32.0",
    "oxlint": "^1.47.0",
-    "oxlint-tsgolint": "^0.12.2",
+    "oxlint-tsgolint": "^0.13.0",
    "rolldown": "1.0.0-rc.4",
    "tsdown": "^0.20.3",
    "tsx": "^4.21.0",
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@ -262,8 +262,8 @@ importers:
        specifier: ^8.18.1
        version: 8.18.1
      '@typescript/native-preview':
-        specifier: 7.0.0-dev.20260214.1
-        version: 7.0.0-dev.20260214.1
+        specifier: 7.0.0-dev.20260215.1
+        version: 7.0.0-dev.20260215.1
      '@vitest/coverage-v8':
        specifier: ^4.0.18
        version: 4.0.18(vitest@4.0.18)
@ -278,16 +278,16 @@ importers:
        version: 0.32.0
      oxlint:
        specifier: ^1.47.0
-        version: 1.47.0(oxlint-tsgolint@0.12.2)
+        version: 1.47.0(oxlint-tsgolint@0.13.0)
      oxlint-tsgolint:
-        specifier: ^0.12.2
-        version: 0.12.2
+        specifier: ^0.13.0
+        version: 0.13.0
      rolldown:
        specifier: 1.0.0-rc.4
        version: 1.0.0-rc.4
      tsdown:
        specifier: ^0.20.3
-        version: 0.20.3(@typescript/native-preview@7.0.0-dev.20260214.1)(typescript@5.9.3)
+        version: 0.20.3(@typescript/native-preview@7.0.0-dev.20260215.1)(typescript@5.9.3)
      tsx:
        specifier: ^4.21.0
        version: 4.21.0
--- a/scripts/test-parallel.mjs
+++ b/scripts/test-parallel.mjs
@ -165,7 +165,7 @@ const defaultWorkerBudget =
            unit: Math.max(2, Math.min(8, Math.floor(localWorkers / 2))),
            unitIsolated: 1,
            extensions: Math.max(1, Math.min(4, Math.floor(localWorkers / 4))),
-            gateway: 1,
+            gateway: 2,
          };

 // Keep worker counts predictable for local runs; trim macOS CI workers to avoid worker crashes/OOM.
--- a/src/acp/session-mapper.test.ts
+++ b/src/acp/session-mapper.test.ts
@ -1,6 +1,7 @@
-import { describe, expect, it, vi } from "vitest";
+import { afterEach, describe, expect, it, vi } from "vitest";
 import type { GatewayClient } from "../gateway/client.js";
 import { parseSessionMeta, resolveSessionKey } from "./session-mapper.js";
+import { createInMemorySessionStore } from "./session.js";

 function createGateway(resolveLabelKey = "agent:main:label"): {
  gateway: GatewayClient;
@ -54,3 +55,26 @@ describe("acp session mapper", () => {
    expect(request).not.toHaveBeenCalled();
  });
 });
+
+describe("acp session manager", () => {
+  const store = createInMemorySessionStore();
+
+  afterEach(() => {
+    store.clearAllSessionsForTest();
+  });
+
+  it("tracks active runs and clears on cancel", () => {
+    const session = store.createSession({
+      sessionKey: "acp:test",
+      cwd: "/tmp",
+    });
+    const controller = new AbortController();
+    store.setActiveRun(session.sessionId, "run-1", controller);
+
+    expect(store.getSessionByRunId("run-1")?.sessionId).toBe(session.sessionId);
+
+    const cancelled = store.cancelActiveRun(session.sessionId);
+    expect(cancelled).toBe(true);
+    expect(store.getSessionByRunId("run-1")).toBeUndefined();
+  });
+});
--- a/src/acp/session.test.ts
+++ b/src/acp/session.test.ts
@ -1,25 +0,0 @@
-import { describe, expect, it, afterEach } from "vitest";
-import { createInMemorySessionStore } from "./session.js";
-
-describe("acp session manager", () => {
-  const store = createInMemorySessionStore();
-
-  afterEach(() => {
-    store.clearAllSessionsForTest();
-  });
-
-  it("tracks active runs and clears on cancel", () => {
-    const session = store.createSession({
-      sessionKey: "acp:test",
-      cwd: "/tmp",
-    });
-    const controller = new AbortController();
-    store.setActiveRun(session.sessionId, "run-1", controller);
-
-    expect(store.getSessionByRunId("run-1")?.sessionId).toBe(session.sessionId);
-
-    const cancelled = store.cancelActiveRun(session.sessionId);
-    expect(cancelled).toBe(true);
-    expect(store.getSessionByRunId("run-1")).toBeUndefined();
-  });
-});
--- a/src/agents/agent-paths.e2e.test.ts
+++ b/src/agents/agent-paths.e2e.test.ts
@ -2,12 +2,11 @@ import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 import { afterEach, describe, expect, it } from "vitest";
+import { captureEnv } from "../test-utils/env.js";
 import { resolveOpenClawAgentDir } from "./agent-paths.js";

 describe("resolveOpenClawAgentDir", () => {
-  const previousStateDir = process.env.OPENCLAW_STATE_DIR;
-  const previousAgentDir = process.env.OPENCLAW_AGENT_DIR;
-  const previousPiAgentDir = process.env.PI_CODING_AGENT_DIR;
+  const env = captureEnv(["OPENCLAW_STATE_DIR", "OPENCLAW_AGENT_DIR", "PI_CODING_AGENT_DIR"]);
  let tempStateDir: string | null = null;

  afterEach(async () => {
@ -15,21 +14,7 @@ describe("resolveOpenClawAgentDir", () => {
      await fs.rm(tempStateDir, { recursive: true, force: true });
      tempStateDir = null;
    }
-    if (previousStateDir === undefined) {
-      delete process.env.OPENCLAW_STATE_DIR;
-    } else {
-      process.env.OPENCLAW_STATE_DIR = previousStateDir;
-    }
-    if (previousAgentDir === undefined) {
-      delete process.env.OPENCLAW_AGENT_DIR;
-    } else {
-      process.env.OPENCLAW_AGENT_DIR = previousAgentDir;
-    }
-    if (previousPiAgentDir === undefined) {
-      delete process.env.PI_CODING_AGENT_DIR;
-    } else {
-      process.env.PI_CODING_AGENT_DIR = previousPiAgentDir;
-    }
+    env.restore();
  });

  it("defaults to the multi-agent path when no overrides are set", async () => {
--- a/src/agents/auth-profiles.chutes.e2e.test.ts
+++ b/src/agents/auth-profiles.chutes.e2e.test.ts
@ -2,6 +2,7 @@ import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 import { afterEach, describe, expect, it, vi } from "vitest";
+import { captureEnv } from "../test-utils/env.js";
 import {
  type AuthProfileStore,
  ensureAuthProfileStore,
@ -10,10 +11,7 @@ import {
 import { CHUTES_TOKEN_ENDPOINT, type ChutesStoredOAuth } from "./chutes-oauth.js";

 describe("auth-profiles (chutes)", () => {
-  const previousStateDir = process.env.OPENCLAW_STATE_DIR;
-  const previousAgentDir = process.env.OPENCLAW_AGENT_DIR;
-  const previousPiAgentDir = process.env.PI_CODING_AGENT_DIR;
-  const previousChutesClientId = process.env.CHUTES_CLIENT_ID;
+  let envSnapshot: ReturnType<typeof captureEnv> | undefined;
  let tempDir: string | null = null;

  afterEach(async () => {
@ -22,29 +20,17 @@ describe("auth-profiles (chutes)", () => {
      await fs.rm(tempDir, { recursive: true, force: true });
      tempDir = null;
    }
-    if (previousStateDir === undefined) {
-      delete process.env.OPENCLAW_STATE_DIR;
-    } else {
-      process.env.OPENCLAW_STATE_DIR = previousStateDir;
-    }
-    if (previousAgentDir === undefined) {
-      delete process.env.OPENCLAW_AGENT_DIR;
-    } else {
-      process.env.OPENCLAW_AGENT_DIR = previousAgentDir;
-    }
-    if (previousPiAgentDir === undefined) {
-      delete process.env.PI_CODING_AGENT_DIR;
-    } else {
-      process.env.PI_CODING_AGENT_DIR = previousPiAgentDir;
-    }
-    if (previousChutesClientId === undefined) {
-      delete process.env.CHUTES_CLIENT_ID;
-    } else {
-      process.env.CHUTES_CLIENT_ID = previousChutesClientId;
-    }
+    envSnapshot?.restore();
+    envSnapshot = undefined;
  });

  it("refreshes expired Chutes OAuth credentials", async () => {
+    envSnapshot = captureEnv([
+      "OPENCLAW_STATE_DIR",
+      "OPENCLAW_AGENT_DIR",
+      "PI_CODING_AGENT_DIR",
+      "CHUTES_CLIENT_ID",
+    ]);
    tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-chutes-"));
    process.env.OPENCLAW_STATE_DIR = tempDir;
    process.env.OPENCLAW_AGENT_DIR = path.join(tempDir, "agents", "main", "agent");
--- a/src/agents/auth-profiles.resolve-auth-profile-order.does-not-prioritize-lastgood-round-robin-ordering.e2e.test.ts
+++ b/src/agents/auth-profiles.resolve-auth-profile-order.does-not-prioritize-lastgood-round-robin-ordering.e2e.test.ts
@ -1,30 +1,13 @@
 import { describe, expect, it } from "vitest";
 import { resolveAuthProfileOrder } from "./auth-profiles.js";
+import {
+  ANTHROPIC_CFG,
+  ANTHROPIC_STORE,
+} from "./auth-profiles.resolve-auth-profile-order.fixtures.js";

 describe("resolveAuthProfileOrder", () => {
-  const store: AuthProfileStore = {
-    version: 1,
-    profiles: {
-      "anthropic:default": {
-        type: "api_key",
-        provider: "anthropic",
-        key: "sk-default",
-      },
-      "anthropic:work": {
-        type: "api_key",
-        provider: "anthropic",
-        key: "sk-work",
-      },
-    },
-  };
-  const cfg = {
-    auth: {
-      profiles: {
-        "anthropic:default": { provider: "anthropic", mode: "api_key" },
-        "anthropic:work": { provider: "anthropic", mode: "api_key" },
-      },
-    },
-  };
+  const store = ANTHROPIC_STORE;
+  const cfg = ANTHROPIC_CFG;

  it("does not prioritize lastGood over round-robin ordering", () => {
    const order = resolveAuthProfileOrder({
--- a/src/agents/auth-profiles.resolve-auth-profile-order.fixtures.ts
+++ b/src/agents/auth-profiles.resolve-auth-profile-order.fixtures.ts
@ -0,0 +1,26 @@
+import type { AuthProfileStore } from "./auth-profiles.js";
+
+export const ANTHROPIC_STORE: AuthProfileStore = {
+  version: 1,
+  profiles: {
+    "anthropic:default": {
+      type: "api_key",
+      provider: "anthropic",
+      key: "sk-default",
+    },
+    "anthropic:work": {
+      type: "api_key",
+      provider: "anthropic",
+      key: "sk-work",
+    },
+  },
+};
+
+export const ANTHROPIC_CFG = {
+  auth: {
+    profiles: {
+      "anthropic:default": { provider: "anthropic", mode: "api_key" },
+      "anthropic:work": { provider: "anthropic", mode: "api_key" },
+    },
+  },
+};
--- a/src/agents/auth-profiles.resolve-auth-profile-order.normalizes-z-ai-aliases-auth-order.e2e.test.ts
+++ b/src/agents/auth-profiles.resolve-auth-profile-order.normalizes-z-ai-aliases-auth-order.e2e.test.ts
@ -2,30 +2,6 @@ import { describe, expect, it } from "vitest";
 import { resolveAuthProfileOrder } from "./auth-profiles.js";

 describe("resolveAuthProfileOrder", () => {
-  const _store: AuthProfileStore = {
-    version: 1,
-    profiles: {
-      "anthropic:default": {
-        type: "api_key",
-        provider: "anthropic",
-        key: "sk-default",
-      },
-      "anthropic:work": {
-        type: "api_key",
-        provider: "anthropic",
-        key: "sk-work",
-      },
-    },
-  };
-  const _cfg = {
-    auth: {
-      profiles: {
-        "anthropic:default": { provider: "anthropic", mode: "api_key" },
-        "anthropic:work": { provider: "anthropic", mode: "api_key" },
-      },
-    },
-  };
-
  it("normalizes z.ai aliases in auth.order", () => {
    const order = resolveAuthProfileOrder({
      cfg: {
--- a/src/agents/auth-profiles.resolve-auth-profile-order.orders-by-lastused-no-explicit-order-exists.e2e.test.ts
+++ b/src/agents/auth-profiles.resolve-auth-profile-order.orders-by-lastused-no-explicit-order-exists.e2e.test.ts
@ -2,30 +2,6 @@ import { describe, expect, it } from "vitest";
 import { resolveAuthProfileOrder } from "./auth-profiles.js";

 describe("resolveAuthProfileOrder", () => {
-  const _store: AuthProfileStore = {
-    version: 1,
-    profiles: {
-      "anthropic:default": {
-        type: "api_key",
-        provider: "anthropic",
-        key: "sk-default",
-      },
-      "anthropic:work": {
-        type: "api_key",
-        provider: "anthropic",
-        key: "sk-work",
-      },
-    },
-  };
-  const _cfg = {
-    auth: {
-      profiles: {
-        "anthropic:default": { provider: "anthropic", mode: "api_key" },
-        "anthropic:work": { provider: "anthropic", mode: "api_key" },
-      },
-    },
-  };
-
  it("orders by lastUsed when no explicit order exists", () => {
    const order = resolveAuthProfileOrder({
      store: {
--- a/src/agents/auth-profiles.resolve-auth-profile-order.uses-stored-profiles-no-config-exists.e2e.test.ts
+++ b/src/agents/auth-profiles.resolve-auth-profile-order.uses-stored-profiles-no-config-exists.e2e.test.ts
@ -1,30 +1,13 @@
 import { describe, expect, it } from "vitest";
 import { resolveAuthProfileOrder } from "./auth-profiles.js";
+import {
+  ANTHROPIC_CFG,
+  ANTHROPIC_STORE,
+} from "./auth-profiles.resolve-auth-profile-order.fixtures.js";

 describe("resolveAuthProfileOrder", () => {
-  const store: AuthProfileStore = {
-    version: 1,
-    profiles: {
-      "anthropic:default": {
-        type: "api_key",
-        provider: "anthropic",
-        key: "sk-default",
-      },
-      "anthropic:work": {
-        type: "api_key",
-        provider: "anthropic",
-        key: "sk-work",
-      },
-    },
-  };
-  const cfg = {
-    auth: {
-      profiles: {
-        "anthropic:default": { provider: "anthropic", mode: "api_key" },
-        "anthropic:work": { provider: "anthropic", mode: "api_key" },
-      },
-    },
-  };
+  const store = ANTHROPIC_STORE;
+  const cfg = ANTHROPIC_CFG;

  it("uses stored profiles when no config exists", () => {
    const order = resolveAuthProfileOrder({
--- a/src/agents/auth-profiles/oauth.fallback-to-main-agent.e2e.test.ts
+++ b/src/agents/auth-profiles/oauth.fallback-to-main-agent.e2e.test.ts
@ -3,13 +3,16 @@ import os from "node:os";
 import path from "node:path";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import type { AuthProfileStore } from "./types.js";
+import { captureEnv } from "../../test-utils/env.js";
 import { resolveApiKeyForProfile } from "./oauth.js";
 import { ensureAuthProfileStore } from "./store.js";

 describe("resolveApiKeyForProfile fallback to main agent", () => {
-  const previousStateDir = process.env.OPENCLAW_STATE_DIR;
-  const previousAgentDir = process.env.OPENCLAW_AGENT_DIR;
-  const previousPiAgentDir = process.env.PI_CODING_AGENT_DIR;
+  const envSnapshot = captureEnv([
+    "OPENCLAW_STATE_DIR",
+    "OPENCLAW_AGENT_DIR",
+    "PI_CODING_AGENT_DIR",
+  ]);
  let tmpDir: string;
  let mainAgentDir: string;
  let secondaryAgentDir: string;
@ -30,22 +33,7 @@ describe("resolveApiKeyForProfile fallback to main agent", () => {
  afterEach(async () => {
    vi.unstubAllGlobals();

-    // Restore original environment
-    if (previousStateDir === undefined) {
-      delete process.env.OPENCLAW_STATE_DIR;
-    } else {
-      process.env.OPENCLAW_STATE_DIR = previousStateDir;
-    }
-    if (previousAgentDir === undefined) {
-      delete process.env.OPENCLAW_AGENT_DIR;
-    } else {
-      process.env.OPENCLAW_AGENT_DIR = previousAgentDir;
-    }
-    if (previousPiAgentDir === undefined) {
-      delete process.env.PI_CODING_AGENT_DIR;
-    } else {
-      process.env.PI_CODING_AGENT_DIR = previousPiAgentDir;
-    }
+    envSnapshot.restore();

    await fs.rm(tmpDir, { recursive: true, force: true });
  });
--- a/src/agents/bash-tools.exec-runtime.ts
+++ b/src/agents/bash-tools.exec-runtime.ts
@ -1,17 +1,17 @@
 import type { AgentToolResult } from "@mariozechner/pi-agent-core";
-import type { ChildProcessWithoutNullStreams } from "node:child_process";
 import { Type } from "@sinclair/typebox";
 import path from "node:path";
 import type { ExecAsk, ExecHost, ExecSecurity } from "../infra/exec-approvals.js";
-import type { ProcessSession, SessionStdin } from "./bash-process-registry.js";
+import type { ProcessSession } from "./bash-process-registry.js";
 import type { ExecToolDetails } from "./bash-tools.exec.js";
 import type { BashSandboxConfig } from "./bash-tools.shared.js";
 import { requestHeartbeatNow } from "../infra/heartbeat-wake.js";
 import { mergePathPrepend } from "../infra/path-prepend.js";
 import { enqueueSystemEvent } from "../infra/system-events.js";
 export { applyPathPrepend, normalizePathPrepend } from "../infra/path-prepend.js";
+import type { ManagedRun } from "../process/supervisor/index.js";
 import { logWarn } from "../logger.js";
-import { formatSpawnError, spawnWithFallback } from "../process/spawn-utils.js";
+import { getProcessSupervisor } from "../process/supervisor/index.js";
 import {
  addSession,
  appendOutput,
@ -23,7 +23,6 @@ import {
  buildDockerExecArgs,
  chunkString,
  clampWithDefault,
-  killSession,
  readEnvInt,
 } from "./bash-tools.shared.js";
 import { buildCursorPositionResponse, stripDsrRequests } from "./pty-dsr.js";
@ -147,26 +146,6 @@ export const execSchema = Type.Object({
  ),
 });

-type PtyExitEvent = { exitCode: number; signal?: number };
-type PtyListener<T> = (event: T) => void;
-type PtyHandle = {
-  pid: number;
-  write: (data: string | Buffer) => void;
-  onData: (listener: PtyListener<string>) => void;
-  onExit: (listener: PtyListener<PtyExitEvent>) => void;
-};
-type PtySpawn = (
-  file: string,
-  args: string[] | string,
-  options: {
-    name?: string;
-    cols?: number;
-    rows?: number;
-    cwd?: string;
-    env?: Record<string, string>;
-  },
-) => PtyHandle;
-
 export type ExecProcessOutcome = {
  status: "completed" | "failed";
  exitCode: number | null;
@ -319,138 +298,10 @@ export async function runExecProcess(opts: {
 }): Promise<ExecProcessHandle> {
  const startedAt = Date.now();
  const sessionId = createSessionSlug();
-  let child: ChildProcessWithoutNullStreams | null = null;
-  let pty: PtyHandle | null = null;
-  let stdin: SessionStdin | undefined;
  const execCommand = opts.execCommand ?? opts.command;
+  const supervisor = getProcessSupervisor();

-  const spawnFallbacks = [
-    {
-      label: "no-detach",
-      options: { detached: false },
-    },
-  ];
-
-  const handleSpawnFallback = (err: unknown, fallback: { label: string }) => {
-    const errText = formatSpawnError(err);
-    const warning = `Warning: spawn failed (${errText}); retrying with ${fallback.label}.`;
-    logWarn(`exec: spawn failed (${errText}); retrying with ${fallback.label}.`);
-    opts.warnings.push(warning);
-  };
-
-  const spawnShellChild = async (
-    shell: string,
-    shellArgs: string[],
-  ): Promise<ChildProcessWithoutNullStreams> => {
-    const { child: spawned } = await spawnWithFallback({
-      argv: [shell, ...shellArgs, execCommand],
-      options: {
-        cwd: opts.workdir,
-        env: opts.env,
-        detached: process.platform !== "win32",
-        stdio: ["pipe", "pipe", "pipe"],
-        windowsHide: true,
-      },
-      fallbacks: spawnFallbacks,
-      onFallback: handleSpawnFallback,
-    });
-    return spawned as ChildProcessWithoutNullStreams;
-  };
-
-  // `exec` does not currently accept tool-provided stdin content. For non-PTY runs,
-  // keeping stdin open can cause commands like `wc -l` (or safeBins-hardened segments)
-  // to block forever waiting for input, leading to accidental backgrounding.
-  // For interactive flows, callers should use `pty: true` (stdin kept open).
-  const maybeCloseNonPtyStdin = () => {
-    if (opts.usePty) {
-      return;
-    }
-    try {
-      // Signal EOF immediately so stdin-only commands can terminate.
-      child?.stdin?.end();
-    } catch {
-      // ignore stdin close errors
-    }
-  };
-
-  if (opts.sandbox) {
-    const { child: spawned } = await spawnWithFallback({
-      argv: [
-        "docker",
-        ...buildDockerExecArgs({
-          containerName: opts.sandbox.containerName,
-          command: execCommand,
-          workdir: opts.containerWorkdir ?? opts.sandbox.containerWorkdir,
-          env: opts.env,
-          tty: opts.usePty,
-        }),
-      ],
-      options: {
-        cwd: opts.workdir,
-        env: process.env,
-        detached: process.platform !== "win32",
-        stdio: ["pipe", "pipe", "pipe"],
-        windowsHide: true,
-      },
-      fallbacks: spawnFallbacks,
-      onFallback: handleSpawnFallback,
-    });
-    child = spawned as ChildProcessWithoutNullStreams;
-    stdin = child.stdin;
-    maybeCloseNonPtyStdin();
-  } else if (opts.usePty) {
-    const { shell, args: shellArgs } = getShellConfig();
-    try {
-      const ptyModule = (await import("@lydell/node-pty")) as unknown as {
-        spawn?: PtySpawn;
-        default?: { spawn?: PtySpawn };
-      };
-      const spawnPty = ptyModule.spawn ?? ptyModule.default?.spawn;
-      if (!spawnPty) {
-        throw new Error("PTY support is unavailable (node-pty spawn not found).");
-      }
-      pty = spawnPty(shell, [...shellArgs, execCommand], {
-        cwd: opts.workdir,
-        env: opts.env,
-        name: process.env.TERM ?? "xterm-256color",
-        cols: 120,
-        rows: 30,
-      });
-      stdin = {
-        destroyed: false,
-        write: (data, cb) => {
-          try {
-            pty?.write(data);
-            cb?.(null);
-          } catch (err) {
-            cb?.(err as Error);
-          }
-        },
-        end: () => {
-          try {
-            const eof = process.platform === "win32" ? "\x1a" : "\x04";
-            pty?.write(eof);
-          } catch {
-            // ignore EOF errors
-          }
-        },
-      };
-    } catch (err) {
-      const errText = String(err);
-      const warning = `Warning: PTY spawn failed (${errText}); retrying without PTY for \`${opts.command}\`.`;
-      logWarn(`exec: PTY spawn failed (${errText}); retrying without PTY for "${opts.command}".`);
-      opts.warnings.push(warning);
-      child = await spawnShellChild(shell, shellArgs);
-      stdin = child.stdin;
-    }
-  } else {
-    const { shell, args: shellArgs } = getShellConfig();
-    child = await spawnShellChild(shell, shellArgs);
-    stdin = child.stdin;
-    maybeCloseNonPtyStdin();
-  }
-
-  const session = {
+  const session: ProcessSession = {
    id: sessionId,
    command: opts.command,
    scopeKey: opts.scopeKey,
@ -458,9 +309,9 @@ export async function runExecProcess(opts: {
    notifyOnExit: opts.notifyOnExit,
    notifyOnExitEmptySuccess: opts.notifyOnExitEmptySuccess === true,
    exitNotified: false,
-    child: child ?? undefined,
-    stdin,
-    pid: child?.pid ?? pty?.pid,
+    child: undefined,
+    stdin: undefined,
+    pid: undefined,
    startedAt,
    cwd: opts.workdir,
    maxOutputChars: opts.maxOutput,
@ -477,59 +328,9 @@ export async function runExecProcess(opts: {
    exitSignal: undefined as NodeJS.Signals | number | null | undefined,
    truncated: false,
    backgrounded: false,
-  } satisfies ProcessSession;
+  };
  addSession(session);

-  let settled = false;
-  let timeoutTimer: NodeJS.Timeout | null = null;
-  let timeoutFinalizeTimer: NodeJS.Timeout | null = null;
-  let timedOut = false;
-  const timeoutFinalizeMs = 1000;
-  let resolveFn: ((outcome: ExecProcessOutcome) => void) | null = null;
-
-  const settle = (outcome: ExecProcessOutcome) => {
-    if (settled) {
-      return;
-    }
-    settled = true;
-    resolveFn?.(outcome);
-  };
-
-  const finalizeTimeout = () => {
-    if (session.exited) {
-      return;
-    }
-    markExited(session, null, "SIGKILL", "failed");
-    maybeNotifyOnExit(session, "failed");
-    const aggregated = session.aggregated.trim();
-    const reason = `Command timed out after ${opts.timeoutSec} seconds`;
-    settle({
-      status: "failed",
-      exitCode: null,
-      exitSignal: "SIGKILL",
-      durationMs: Date.now() - startedAt,
-      aggregated,
-      timedOut: true,
-      reason: aggregated ? `${aggregated}\n\n${reason}` : reason,
-    });
-  };
-
-  const onTimeout = () => {
-    timedOut = true;
-    killSession(session);
-    if (!timeoutFinalizeTimer) {
-      timeoutFinalizeTimer = setTimeout(() => {
-        finalizeTimeout();
-      }, timeoutFinalizeMs);
-    }
-  };
-
-  if (opts.timeoutSec > 0) {
-    timeoutTimer = setTimeout(() => {
-      onTimeout();
-    }, opts.timeoutSec * 1000);
-  }
-
  const emitUpdate = () => {
    if (!opts.onUpdate) {
      return;
@ -565,116 +366,208 @@ export async function runExecProcess(opts: {
    }
  };

-  if (pty) {
-    const cursorResponse = buildCursorPositionResponse();
-    pty.onData((data) => {
-      const raw = data.toString();
-      const { cleaned, requests } = stripDsrRequests(raw);
-      if (requests > 0) {
+  const timeoutMs =
+    typeof opts.timeoutSec === "number" && opts.timeoutSec > 0
+      ? Math.floor(opts.timeoutSec * 1000)
+      : undefined;
+
+  const spawnSpec:
+    | {
+        mode: "child";
+        argv: string[];
+        env: NodeJS.ProcessEnv;
+        stdinMode: "pipe-open" | "pipe-closed";
+      }
+    | {
+        mode: "pty";
+        ptyCommand: string;
+        childFallbackArgv: string[];
+        env: NodeJS.ProcessEnv;
+        stdinMode: "pipe-open";
+      } = (() => {
+    if (opts.sandbox) {
+      return {
+        mode: "child" as const,
+        argv: [
+          "docker",
+          ...buildDockerExecArgs({
+            containerName: opts.sandbox.containerName,
+            command: execCommand,
+            workdir: opts.containerWorkdir ?? opts.sandbox.containerWorkdir,
+            env: opts.env,
+            tty: opts.usePty,
+          }),
+        ],
+        env: process.env,
+        stdinMode: opts.usePty ? ("pipe-open" as const) : ("pipe-closed" as const),
+      };
+    }
+    const { shell, args: shellArgs } = getShellConfig();
+    const childArgv = [shell, ...shellArgs, execCommand];
+    if (opts.usePty) {
+      return {
+        mode: "pty" as const,
+        ptyCommand: execCommand,
+        childFallbackArgv: childArgv,
+        env: opts.env,
+        stdinMode: "pipe-open" as const,
+      };
+    }
+    return {
+      mode: "child" as const,
+      argv: childArgv,
+      env: opts.env,
+      stdinMode: "pipe-closed" as const,
+    };
+  })();
+
+  let managedRun: ManagedRun | null = null;
+  let usingPty = spawnSpec.mode === "pty";
+  const cursorResponse = buildCursorPositionResponse();
+
+  const onSupervisorStdout = (chunk: string) => {
+    if (usingPty) {
+      const { cleaned, requests } = stripDsrRequests(chunk);
+      if (requests > 0 && managedRun?.stdin) {
        for (let i = 0; i < requests; i += 1) {
-          pty.write(cursorResponse);
+          managedRun.stdin.write(cursorResponse);
        }
      }
      handleStdout(cleaned);
-    });
-  } else if (child) {
-    child.stdout.on("data", handleStdout);
-    child.stderr.on("data", handleStderr);
-  }
+      return;
+    }
+    handleStdout(chunk);
+  };

-  const promise = new Promise<ExecProcessOutcome>((resolve) => {
-    resolveFn = resolve;
-    const handleExit = (code: number | null, exitSignal: NodeJS.Signals | number | null) => {
-      if (timeoutTimer) {
-        clearTimeout(timeoutTimer);
-      }
-      if (timeoutFinalizeTimer) {
-        clearTimeout(timeoutFinalizeTimer);
+  try {
+    const spawnBase = {
+      runId: sessionId,
+      sessionId: opts.sessionKey?.trim() || sessionId,
+      backendId: opts.sandbox ? "exec-sandbox" : "exec-host",
+      scopeKey: opts.scopeKey,
+      cwd: opts.workdir,
+      env: spawnSpec.env,
+      timeoutMs,
+      captureOutput: false,
+      onStdout: onSupervisorStdout,
+      onStderr: handleStderr,
+    };
+    managedRun =
+      spawnSpec.mode === "pty"
+        ? await supervisor.spawn({
+            ...spawnBase,
+            mode: "pty",
+            ptyCommand: spawnSpec.ptyCommand,
+          })
+        : await supervisor.spawn({
+            ...spawnBase,
+            mode: "child",
+            argv: spawnSpec.argv,
+            stdinMode: spawnSpec.stdinMode,
+          });
+  } catch (err) {
+    if (spawnSpec.mode === "pty") {
+      const warning = `Warning: PTY spawn failed (${String(err)}); retrying without PTY for \`${opts.command}\`.`;
+      logWarn(
+        `exec: PTY spawn failed (${String(err)}); retrying without PTY for "${opts.command}".`,
+      );
+      opts.warnings.push(warning);
+      usingPty = false;
+      try {
+        managedRun = await supervisor.spawn({
+          runId: sessionId,
+          sessionId: opts.sessionKey?.trim() || sessionId,
+          backendId: "exec-host",
+          scopeKey: opts.scopeKey,
+          mode: "child",
+          argv: spawnSpec.childFallbackArgv,
+          cwd: opts.workdir,
+          env: spawnSpec.env,
+          stdinMode: "pipe-open",
+          timeoutMs,
+          captureOutput: false,
+          onStdout: handleStdout,
+          onStderr: handleStderr,
+        });
+      } catch (retryErr) {
+        markExited(session, null, null, "failed");
+        maybeNotifyOnExit(session, "failed");
+        throw retryErr;
      }
+    } else {
+      markExited(session, null, null, "failed");
+      maybeNotifyOnExit(session, "failed");
+      throw err;
+    }
+  }
+  session.stdin = managedRun.stdin;
+  session.pid = managedRun.pid;
+
+  const promise = managedRun
+    .wait()
+    .then((exit): ExecProcessOutcome => {
      const durationMs = Date.now() - startedAt;
-      const wasSignal = exitSignal != null;
-      const isSuccess = code === 0 && !wasSignal && !timedOut;
-      const status: "completed" | "failed" = isSuccess ? "completed" : "failed";
-      markExited(session, code, exitSignal, status);
+      const status: "completed" | "failed" =
+        exit.exitCode === 0 && exit.reason === "exit" ? "completed" : "failed";
+      markExited(session, exit.exitCode, exit.exitSignal, status);
      maybeNotifyOnExit(session, status);
      if (!session.child && session.stdin) {
        session.stdin.destroyed = true;
      }
-
-      if (settled) {
-        return;
-      }
      const aggregated = session.aggregated.trim();
-      if (!isSuccess) {
-        const reason = timedOut
-          ? `Command timed out after ${opts.timeoutSec} seconds`
-          : wasSignal && exitSignal
-            ? `Command aborted by signal ${exitSignal}`
-            : code === null
-              ? "Command aborted before exit code was captured"
-              : `Command exited with code ${code}`;
-        const message = aggregated ? `${aggregated}\n\n${reason}` : reason;
-        settle({
-          status: "failed",
-          exitCode: code ?? null,
-          exitSignal: exitSignal ?? null,
+      if (status === "completed") {
+        return {
+          status: "completed",
+          exitCode: exit.exitCode ?? 0,
+          exitSignal: exit.exitSignal,
          durationMs,
          aggregated,
-          timedOut,
-          reason: message,
-        });
-        return;
+          timedOut: false,
+        };
      }
-      settle({
-        status: "completed",
-        exitCode: code ?? 0,
-        exitSignal: exitSignal ?? null,
+      const reason =
+        exit.reason === "overall-timeout"
+          ? `Command timed out after ${opts.timeoutSec} seconds`
+          : exit.reason === "no-output-timeout"
+            ? "Command timed out waiting for output"
+            : exit.exitSignal != null
+              ? `Command aborted by signal ${exit.exitSignal}`
+              : exit.exitCode == null
+                ? "Command aborted before exit code was captured"
+                : `Command exited with code ${exit.exitCode}`;
+      return {
+        status: "failed",
+        exitCode: exit.exitCode,
+        exitSignal: exit.exitSignal,
        durationMs,
        aggregated,
+        timedOut: exit.timedOut,
+        reason: aggregated ? `${aggregated}\n\n${reason}` : reason,
+      };
+    })
+    .catch((err): ExecProcessOutcome => {
+      markExited(session, null, null, "failed");
+      maybeNotifyOnExit(session, "failed");
+      const aggregated = session.aggregated.trim();
+      const message = aggregated ? `${aggregated}\n\n${String(err)}` : String(err);
+      return {
+        status: "failed",
+        exitCode: null,
+        exitSignal: null,
+        durationMs: Date.now() - startedAt,
+        aggregated,
        timedOut: false,
-      });
-    };
-
-    if (pty) {
-      pty.onExit((event) => {
-        const rawSignal = event.signal ?? null;
-        const normalizedSignal = rawSignal === 0 ? null : rawSignal;
-        handleExit(event.exitCode ?? null, normalizedSignal);
-      });
-    } else if (child) {
-      child.once("close", (code, exitSignal) => {
-        handleExit(code, exitSignal);
-      });
-
-      child.once("error", (err) => {
-        if (timeoutTimer) {
-          clearTimeout(timeoutTimer);
-        }
-        if (timeoutFinalizeTimer) {
-          clearTimeout(timeoutFinalizeTimer);
-        }
-        markExited(session, null, null, "failed");
-        maybeNotifyOnExit(session, "failed");
-        const aggregated = session.aggregated.trim();
-        const message = aggregated ? `${aggregated}\n\n${String(err)}` : String(err);
-        settle({
-          status: "failed",
-          exitCode: null,
-          exitSignal: null,
-          durationMs: Date.now() - startedAt,
-          aggregated,
-          timedOut,
-          reason: message,
-        });
-      });
-    }
-  });
+        reason: message,
+      };
+    });

  return {
    session,
    startedAt,
    pid: session.pid ?? undefined,
    promise,
-    kill: () => killSession(session),
+    kill: () => {
+      managedRun?.cancel("manual-cancel");
+    },
  };
 }
--- a/src/agents/bash-tools.exec.pty-cleanup.test.ts
+++ b/src/agents/bash-tools.exec.pty-cleanup.test.ts
@ -0,0 +1,73 @@
+import { afterEach, expect, test, vi } from "vitest";
+import { resetProcessRegistryForTests } from "./bash-process-registry";
+
+afterEach(() => {
+  resetProcessRegistryForTests();
+  vi.resetModules();
+  vi.clearAllMocks();
+});
+
+test("exec disposes PTY listeners after normal exit", async () => {
+  const disposeData = vi.fn();
+  const disposeExit = vi.fn();
+
+  vi.doMock("@lydell/node-pty", () => ({
+    spawn: () => {
+      return {
+        pid: 0,
+        write: vi.fn(),
+        onData: (listener: (value: string) => void) => {
+          setTimeout(() => listener("ok"), 0);
+          return { dispose: disposeData };
+        },
+        onExit: (listener: (event: { exitCode: number; signal?: number }) => void) => {
+          setTimeout(() => listener({ exitCode: 0 }), 0);
+          return { dispose: disposeExit };
+        },
+        kill: vi.fn(),
+      };
+    },
+  }));
+
+  const { createExecTool } = await import("./bash-tools.exec");
+  const tool = createExecTool({ allowBackground: false });
+  const result = await tool.execute("toolcall", {
+    command: "echo ok",
+    pty: true,
+  });
+
+  expect(result.details.status).toBe("completed");
+  expect(disposeData).toHaveBeenCalledTimes(1);
+  expect(disposeExit).toHaveBeenCalledTimes(1);
+});
+
+test("exec tears down PTY resources on timeout", async () => {
+  const disposeData = vi.fn();
+  const disposeExit = vi.fn();
+  const kill = vi.fn();
+
+  vi.doMock("@lydell/node-pty", () => ({
+    spawn: () => {
+      return {
+        pid: 0,
+        write: vi.fn(),
+        onData: () => ({ dispose: disposeData }),
+        onExit: () => ({ dispose: disposeExit }),
+        kill,
+      };
+    },
+  }));
+
+  const { createExecTool } = await import("./bash-tools.exec");
+  const tool = createExecTool({ allowBackground: false });
+  await expect(
+    tool.execute("toolcall", {
+      command: "sleep 5",
+      pty: true,
+      timeout: 0.01,
+    }),
+  ).rejects.toThrow("Command timed out");
+  expect(kill).toHaveBeenCalledTimes(1);
+  expect(disposeData).toHaveBeenCalledTimes(1);
+  expect(disposeExit).toHaveBeenCalledTimes(1);
+});
--- a/src/agents/bash-tools.exec.pty-fallback-failure.test.ts
+++ b/src/agents/bash-tools.exec.pty-fallback-failure.test.ts
@ -0,0 +1,40 @@
+import { afterEach, expect, test, vi } from "vitest";
+import { listRunningSessions, resetProcessRegistryForTests } from "./bash-process-registry";
+
+const { supervisorSpawnMock } = vi.hoisted(() => ({
+  supervisorSpawnMock: vi.fn(),
+}));
+
+vi.mock("../process/supervisor/index.js", () => ({
+  getProcessSupervisor: () => ({
+    spawn: (...args: unknown[]) => supervisorSpawnMock(...args),
+    cancel: vi.fn(),
+    cancelScope: vi.fn(),
+    reconcileOrphans: vi.fn(),
+    getRecord: vi.fn(),
+  }),
+}));
+
+afterEach(() => {
+  resetProcessRegistryForTests();
+  vi.resetModules();
+  vi.clearAllMocks();
+});
+
+test("exec cleans session state when PTY fallback spawn also fails", async () => {
+  supervisorSpawnMock
+    .mockRejectedValueOnce(new Error("pty spawn failed"))
+    .mockRejectedValueOnce(new Error("child fallback failed"));
+
+  const { createExecTool } = await import("./bash-tools.exec");
+  const tool = createExecTool({ allowBackground: false });
+
+  await expect(
+    tool.execute("toolcall", {
+      command: "echo ok",
+      pty: true,
+    }),
+  ).rejects.toThrow("child fallback failed");
+
+  expect(listRunningSessions()).toHaveLength(0);
+});
--- a/src/agents/bash-tools.process.supervisor.test.ts
+++ b/src/agents/bash-tools.process.supervisor.test.ts
@ -0,0 +1,152 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { ProcessSession } from "./bash-process-registry.js";
+import {
+  addSession,
+  getFinishedSession,
+  getSession,
+  resetProcessRegistryForTests,
+} from "./bash-process-registry.js";
+import { createProcessTool } from "./bash-tools.process.js";
+
+const { supervisorMock } = vi.hoisted(() => ({
+  supervisorMock: {
+    spawn: vi.fn(),
+    cancel: vi.fn(),
+    cancelScope: vi.fn(),
+    reconcileOrphans: vi.fn(),
+    getRecord: vi.fn(),
+  },
+}));
+
+const { killProcessTreeMock } = vi.hoisted(() => ({
+  killProcessTreeMock: vi.fn(),
+}));
+
+vi.mock("../process/supervisor/index.js", () => ({
+  getProcessSupervisor: () => supervisorMock,
+}));
+
+vi.mock("../process/kill-tree.js", () => ({
+  killProcessTree: (...args: unknown[]) => killProcessTreeMock(...args),
+}));
+
+function createBackgroundSession(id: string, pid?: number): ProcessSession {
+  return {
+    id,
+    command: "sleep 999",
+    startedAt: Date.now(),
+    cwd: "/tmp",
+    maxOutputChars: 10_000,
+    pendingMaxOutputChars: 30_000,
+    totalOutputChars: 0,
+    pendingStdout: [],
+    pendingStderr: [],
+    pendingStdoutChars: 0,
+    pendingStderrChars: 0,
+    aggregated: "",
+    tail: "",
+    pid,
+    exited: false,
+    exitCode: undefined,
+    exitSignal: undefined,
+    truncated: false,
+    backgrounded: true,
+  };
+}
+
+describe("process tool supervisor cancellation", () => {
+  beforeEach(() => {
+    supervisorMock.spawn.mockReset();
+    supervisorMock.cancel.mockReset();
+    supervisorMock.cancelScope.mockReset();
+    supervisorMock.reconcileOrphans.mockReset();
+    supervisorMock.getRecord.mockReset();
+    killProcessTreeMock.mockReset();
+  });
+
+  afterEach(() => {
+    resetProcessRegistryForTests();
+  });
+
+  it("routes kill through supervisor when run is managed", async () => {
+    supervisorMock.getRecord.mockReturnValue({
+      runId: "sess",
+      state: "running",
+    });
+    addSession(createBackgroundSession("sess"));
+    const processTool = createProcessTool();
+
+    const result = await processTool.execute("toolcall", {
+      action: "kill",
+      sessionId: "sess",
+    });
+
+    expect(supervisorMock.cancel).toHaveBeenCalledWith("sess", "manual-cancel");
+    expect(getSession("sess")).toBeDefined();
+    expect(getSession("sess")?.exited).toBe(false);
+    expect(result.content[0]).toMatchObject({
+      type: "text",
+      text: "Termination requested for session sess.",
+    });
+  });
+
+  it("remove drops running session immediately when cancellation is requested", async () => {
+    supervisorMock.getRecord.mockReturnValue({
+      runId: "sess",
+      state: "running",
+    });
+    addSession(createBackgroundSession("sess"));
+    const processTool = createProcessTool();
+
+    const result = await processTool.execute("toolcall", {
+      action: "remove",
+      sessionId: "sess",
+    });
+
+    expect(supervisorMock.cancel).toHaveBeenCalledWith("sess", "manual-cancel");
+    expect(getSession("sess")).toBeUndefined();
+    expect(getFinishedSession("sess")).toBeUndefined();
+    expect(result.content[0]).toMatchObject({
+      type: "text",
+      text: "Removed session sess (termination requested).",
+    });
+  });
+
+  it("falls back to process-tree kill when supervisor record is missing", async () => {
+    supervisorMock.getRecord.mockReturnValue(undefined);
+    addSession(createBackgroundSession("sess-fallback", 4242));
+    const processTool = createProcessTool();
+
+    const result = await processTool.execute("toolcall", {
+      action: "kill",
+      sessionId: "sess-fallback",
+    });
+
+    expect(killProcessTreeMock).toHaveBeenCalledWith(4242);
+    expect(getSession("sess-fallback")).toBeUndefined();
+    expect(getFinishedSession("sess-fallback")).toBeDefined();
+    expect(result.content[0]).toMatchObject({
+      type: "text",
+      text: "Killed session sess-fallback.",
+    });
+  });
+
+  it("fails remove when no supervisor record and no pid is available", async () => {
+    supervisorMock.getRecord.mockReturnValue(undefined);
+    addSession(createBackgroundSession("sess-no-pid"));
+    const processTool = createProcessTool();
+
+    const result = await processTool.execute("toolcall", {
+      action: "remove",
+      sessionId: "sess-no-pid",
+    });
+
+    expect(killProcessTreeMock).not.toHaveBeenCalled();
+    expect(getSession("sess-no-pid")).toBeDefined();
+    expect(result.details).toMatchObject({ status: "failed" });
+    expect(result.content[0]).toMatchObject({
+      type: "text",
+      text: "Unable to remove session sess-no-pid: no active supervisor run or process id.",
+    });
+  });
+});
--- a/src/agents/bash-tools.process.ts
+++ b/src/agents/bash-tools.process.ts
@ -1,7 +1,10 @@
 import type { AgentTool, AgentToolResult } from "@mariozechner/pi-agent-core";
 import { Type } from "@sinclair/typebox";
 import { formatDurationCompact } from "../infra/format-time/format-duration.ts";
+import { killProcessTree } from "../process/kill-tree.js";
+import { getProcessSupervisor } from "../process/supervisor/index.js";
 import {
+  type ProcessSession,
  deleteSession,
  drainSession,
  getFinishedSession,
@ -11,13 +14,7 @@ import {
  markExited,
  setJobTtlMs,
 } from "./bash-process-registry.js";
-import {
-  deriveSessionName,
-  killSession,
-  pad,
-  sliceLogLines,
-  truncateMiddle,
-} from "./bash-tools.shared.js";
+import { deriveSessionName, pad, sliceLogLines, truncateMiddle } from "./bash-tools.shared.js";
 import { encodeKeySequence, encodePaste } from "./pty-keys.js";

 export type ProcessToolDefaults = {
@ -65,8 +62,9 @@ const processSchema = Type.Object({
  offset: Type.Optional(Type.Number({ description: "Log offset" })),
  limit: Type.Optional(Type.Number({ description: "Log length" })),
  timeout: Type.Optional(
-    Type.Union([Type.Number(), Type.String()], {
+    Type.Number({
      description: "For poll: wait up to this many milliseconds before returning",
+      minimum: 0,
    }),
  ),
 });
@ -106,9 +104,28 @@ export function createProcessTool(
    setJobTtlMs(defaults.cleanupMs);
  }
  const scopeKey = defaults?.scopeKey;
+  const supervisor = getProcessSupervisor();
  const isInScope = (session?: { scopeKey?: string } | null) =>
    !scopeKey || session?.scopeKey === scopeKey;

+  const cancelManagedSession = (sessionId: string) => {
+    const record = supervisor.getRecord(sessionId);
+    if (!record || record.state === "exited") {
+      return false;
+    }
+    supervisor.cancel(sessionId, "manual-cancel");
+    return true;
+  };
+
+  const terminateSessionFallback = (session: ProcessSession) => {
+    const pid = session.pid ?? session.child?.pid;
+    if (typeof pid !== "number" || !Number.isFinite(pid) || pid <= 0) {
+      return false;
+    }
+    killProcessTree(pid);
+    return true;
+  };
+
  return {
    name: "process",
    label: "process",
@ -138,7 +155,7 @@ export function createProcessTool(
        eof?: boolean;
        offset?: number;
        limit?: number;
-        timeout?: number | string;
+        timeout?: unknown;
      };

      if (params.action === "list") {
@ -522,10 +539,25 @@ export function createProcessTool(
          if (!scopedSession.backgrounded) {
            return failText(`Session ${params.sessionId} is not backgrounded.`);
          }
-          killSession(scopedSession);
-          markExited(scopedSession, null, "SIGKILL", "failed");
+          const canceled = cancelManagedSession(scopedSession.id);
+          if (!canceled) {
+            const terminated = terminateSessionFallback(scopedSession);
+            if (!terminated) {
+              return failText(
+                `Unable to terminate session ${params.sessionId}: no active supervisor run or process id.`,
+              );
+            }
+            markExited(scopedSession, null, "SIGKILL", "failed");
+          }
          return {
-            content: [{ type: "text", text: `Killed session ${params.sessionId}.` }],
+            content: [
+              {
+                type: "text",
+                text: canceled
+                  ? `Termination requested for session ${params.sessionId}.`
+                  : `Killed session ${params.sessionId}.`,
+              },
+            ],
            details: {
              status: "failed",
              name: scopedSession ? deriveSessionName(scopedSession.command) : undefined,
@ -554,10 +586,30 @@ export function createProcessTool(

        case "remove": {
          if (scopedSession) {
-            killSession(scopedSession);
-            markExited(scopedSession, null, "SIGKILL", "failed");
+            const canceled = cancelManagedSession(scopedSession.id);
+            if (canceled) {
+              // Keep remove semantics deterministic: drop from process registry now.
+              scopedSession.backgrounded = false;
+              deleteSession(params.sessionId);
+            } else {
+              const terminated = terminateSessionFallback(scopedSession);
+              if (!terminated) {
+                return failText(
+                  `Unable to remove session ${params.sessionId}: no active supervisor run or process id.`,
+                );
+              }
+              markExited(scopedSession, null, "SIGKILL", "failed");
+              deleteSession(params.sessionId);
+            }
            return {
-              content: [{ type: "text", text: `Removed session ${params.sessionId}.` }],
+              content: [
+                {
+                  type: "text",
+                  text: canceled
+                    ? `Removed session ${params.sessionId} (termination requested).`
+                    : `Removed session ${params.sessionId}.`,
+                },
+              ],
              details: {
                status: "failed",
                name: scopedSession ? deriveSessionName(scopedSession.command) : undefined,
--- a/src/agents/bash-tools.shared.ts
+++ b/src/agents/bash-tools.shared.ts
@ -1,11 +1,9 @@
-import type { ChildProcessWithoutNullStreams } from "node:child_process";
 import { existsSync, statSync } from "node:fs";
 import fs from "node:fs/promises";
 import { homedir } from "node:os";
 import path from "node:path";
 import { sliceUtf16Safe } from "../utils.js";
 import { assertSandboxPath } from "./sandbox-paths.js";
-import { killProcessTree } from "./shell-utils.js";

 const CHUNK_LIMIT = 8 * 1024;

@ -115,13 +113,6 @@ export async function resolveSandboxWorkdir(params: {
  }
 }

-export function killSession(session: { pid?: number; child?: ChildProcessWithoutNullStreams }) {
-  const pid = session.pid ?? session.child?.pid;
-  if (pid) {
-    killProcessTree(pid);
-  }
-}
-
 export function resolveWorkdir(workdir: string, warnings: string[]) {
  const current = safeCwd();
  const fallback = current ?? homedir();
--- a/src/agents/cli-backends.test.ts
+++ b/src/agents/cli-backends.test.ts
@ -0,0 +1,36 @@
+import { describe, expect, it } from "vitest";
+import type { OpenClawConfig } from "../config/config.js";
+import { resolveCliBackendConfig } from "./cli-backends.js";
+
+describe("resolveCliBackendConfig reliability merge", () => {
+  it("deep-merges reliability watchdog overrides for codex", () => {
+    const cfg = {
+      agents: {
+        defaults: {
+          cliBackends: {
+            "codex-cli": {
+              command: "codex",
+              reliability: {
+                watchdog: {
+                  resume: {
+                    noOutputTimeoutMs: 42_000,
+                  },
+                },
+              },
+            },
+          },
+        },
+      },
+    } satisfies OpenClawConfig;
+
+    const resolved = resolveCliBackendConfig("codex-cli", cfg);
+
+    expect(resolved).not.toBeNull();
+    expect(resolved?.config.reliability?.watchdog?.resume?.noOutputTimeoutMs).toBe(42_000);
+    // Ensure defaults are retained when only one field is overridden.
+    expect(resolved?.config.reliability?.watchdog?.resume?.noOutputTimeoutRatio).toBe(0.3);
+    expect(resolved?.config.reliability?.watchdog?.resume?.minMs).toBe(60_000);
+    expect(resolved?.config.reliability?.watchdog?.resume?.maxMs).toBe(180_000);
+    expect(resolved?.config.reliability?.watchdog?.fresh?.noOutputTimeoutRatio).toBe(0.8);
+  });
+});
--- a/src/agents/cli-backends.ts
+++ b/src/agents/cli-backends.ts
@ -1,5 +1,9 @@
 import type { OpenClawConfig } from "../config/config.js";
 import type { CliBackendConfig } from "../config/types.js";
+import {
+  CLI_FRESH_WATCHDOG_DEFAULTS,
+  CLI_RESUME_WATCHDOG_DEFAULTS,
+} from "./cli-watchdog-defaults.js";
 import { normalizeProviderId } from "./model-selection.js";

 export type ResolvedCliBackend = {
@ -49,6 +53,12 @@ const DEFAULT_CLAUDE_BACKEND: CliBackendConfig = {
  systemPromptMode: "append",
  systemPromptWhen: "first",
  clearEnv: ["ANTHROPIC_API_KEY", "ANTHROPIC_API_KEY_OLD"],
+  reliability: {
+    watchdog: {
+      fresh: { ...CLI_FRESH_WATCHDOG_DEFAULTS },
+      resume: { ...CLI_RESUME_WATCHDOG_DEFAULTS },
+    },
+  },
  serialize: true,
 };

@ -73,6 +83,12 @@ const DEFAULT_CODEX_BACKEND: CliBackendConfig = {
  sessionMode: "existing",
  imageArg: "--image",
  imageMode: "repeat",
+  reliability: {
+    watchdog: {
+      fresh: { ...CLI_FRESH_WATCHDOG_DEFAULTS },
+      resume: { ...CLI_RESUME_WATCHDOG_DEFAULTS },
+    },
+  },
  serialize: true,
 };

@ -96,6 +112,10 @@ function mergeBackendConfig(base: CliBackendConfig, override?: CliBackendConfig)
  if (!override) {
    return { ...base };
  }
+  const baseFresh = base.reliability?.watchdog?.fresh ?? {};
+  const baseResume = base.reliability?.watchdog?.resume ?? {};
+  const overrideFresh = override.reliability?.watchdog?.fresh ?? {};
+  const overrideResume = override.reliability?.watchdog?.resume ?? {};
  return {
    ...base,
    ...override,
@ -106,6 +126,22 @@ function mergeBackendConfig(base: CliBackendConfig, override?: CliBackendConfig)
    sessionIdFields: override.sessionIdFields ?? base.sessionIdFields,
    sessionArgs: override.sessionArgs ?? base.sessionArgs,
    resumeArgs: override.resumeArgs ?? base.resumeArgs,
+    reliability: {
+      ...base.reliability,
+      ...override.reliability,
+      watchdog: {
+        ...base.reliability?.watchdog,
+        ...override.reliability?.watchdog,
+        fresh: {
+          ...baseFresh,
+          ...overrideFresh,
+        },
+        resume: {
+          ...baseResume,
+          ...overrideResume,
+        },
+      },
+    },
  };
 }

--- a/src/agents/cli-runner.e2e.test.ts
+++ b/src/agents/cli-runner.e2e.test.ts
@ -3,50 +3,69 @@ import os from "node:os";
 import path from "node:path";
 import { beforeEach, describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../config/config.js";
-import type { CliBackendConfig } from "../config/types.js";
 import { runCliAgent } from "./cli-runner.js";
-import { cleanupResumeProcesses, cleanupSuspendedCliProcesses } from "./cli-runner/helpers.js";
+import { resolveCliNoOutputTimeoutMs } from "./cli-runner/helpers.js";

-const runCommandWithTimeoutMock = vi.fn();
-const runExecMock = vi.fn();
+const supervisorSpawnMock = vi.fn();

-vi.mock("../process/exec.js", () => ({
-  runCommandWithTimeout: (...args: unknown[]) => runCommandWithTimeoutMock(...args),
-  runExec: (...args: unknown[]) => runExecMock(...args),
+vi.mock("../process/supervisor/index.js", () => ({
+  getProcessSupervisor: () => ({
+    spawn: (...args: unknown[]) => supervisorSpawnMock(...args),
+    cancel: vi.fn(),
+    cancelScope: vi.fn(),
+    reconcileOrphans: vi.fn(),
+    getRecord: vi.fn(),
+  }),
 }));

-describe("runCliAgent resume cleanup", () => {
+type MockRunExit = {
+  reason:
+    | "manual-cancel"
+    | "overall-timeout"
+    | "no-output-timeout"
+    | "spawn-error"
+    | "signal"
+    | "exit";
+  exitCode: number | null;
+  exitSignal: NodeJS.Signals | number | null;
+  durationMs: number;
+  stdout: string;
+  stderr: string;
+  timedOut: boolean;
+  noOutputTimedOut: boolean;
+};
+
+function createManagedRun(exit: MockRunExit, pid = 1234) {
+  return {
+    runId: "run-supervisor",
+    pid,
+    startedAtMs: Date.now(),
+    stdin: undefined,
+    wait: vi.fn().mockResolvedValue(exit),
+    cancel: vi.fn(),
+  };
+}
+
+describe("runCliAgent with process supervisor", () => {
  beforeEach(() => {
-    runCommandWithTimeoutMock.mockReset();
-    runExecMock.mockReset();
+    supervisorSpawnMock.mockReset();
  });

-  it("kills stale resume processes for codex sessions", async () => {
-    const selfPid = process.pid;
-
-    runExecMock
-      .mockResolvedValueOnce({
-        stdout: "  1 999 S /bin/launchd\n",
+  it("runs CLI through supervisor and returns payload", async () => {
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "exit",
+        exitCode: 0,
+        exitSignal: null,
+        durationMs: 50,
+        stdout: "ok",
        stderr: "",
-      }) // cleanupSuspendedCliProcesses (ps) — ppid 999 != selfPid, no match
-      .mockResolvedValueOnce({
-        stdout: [
-          `  ${selfPid + 1} ${selfPid} codex exec resume thread-123 --color never --sandbox read-only --skip-git-repo-check`,
-          `  ${selfPid + 2} 999 codex exec resume thread-123 --color never --sandbox read-only --skip-git-repo-check`,
-        ].join("\n"),
-        stderr: "",
-      }) // cleanupResumeProcesses (ps)
-      .mockResolvedValueOnce({ stdout: "", stderr: "" }) // cleanupResumeProcesses (kill -TERM)
-      .mockResolvedValueOnce({ stdout: "", stderr: "" }); // cleanupResumeProcesses (kill -9)
-    runCommandWithTimeoutMock.mockResolvedValueOnce({
-      stdout: "ok",
-      stderr: "",
-      code: 0,
-      signal: null,
-      killed: false,
-    });
+        timedOut: false,
+        noOutputTimedOut: false,
+      }),
+    );

-    await runCliAgent({
+    const result = await runCliAgent({
      sessionId: "s1",
      sessionFile: "/tmp/session.jsonl",
      workspaceDir: "/tmp",
@ -58,28 +77,80 @@ describe("runCliAgent resume cleanup", () => {
      cliSessionId: "thread-123",
    });

-    if (process.platform === "win32") {
-      expect(runExecMock).not.toHaveBeenCalled();
-      return;
-    }
+    expect(result.payloads?.[0]?.text).toBe("ok");
+    expect(supervisorSpawnMock).toHaveBeenCalledTimes(1);
+    const input = supervisorSpawnMock.mock.calls[0]?.[0] as {
+      argv?: string[];
+      mode?: string;
+      timeoutMs?: number;
+      noOutputTimeoutMs?: number;
+      replaceExistingScope?: boolean;
+      scopeKey?: string;
+    };
+    expect(input.mode).toBe("child");
+    expect(input.argv?.[0]).toBe("codex");
+    expect(input.timeoutMs).toBe(1_000);
+    expect(input.noOutputTimeoutMs).toBeGreaterThanOrEqual(1_000);
+    expect(input.replaceExistingScope).toBe(true);
+    expect(input.scopeKey).toContain("thread-123");
+  });

-    expect(runExecMock).toHaveBeenCalledTimes(4);
+  it("fails with timeout when no-output watchdog trips", async () => {
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "no-output-timeout",
+        exitCode: null,
+        exitSignal: "SIGKILL",
+        durationMs: 200,
+        stdout: "",
+        stderr: "",
+        timedOut: true,
+        noOutputTimedOut: true,
+      }),
+    );

-    // Second call: cleanupResumeProcesses ps
-    const psCall = runExecMock.mock.calls[1] ?? [];
-    expect(psCall[0]).toBe("ps");
+    await expect(
+      runCliAgent({
+        sessionId: "s1",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: "/tmp",
+        prompt: "hi",
+        provider: "codex-cli",
+        model: "gpt-5.2-codex",
+        timeoutMs: 1_000,
+        runId: "run-2",
+        cliSessionId: "thread-123",
+      }),
+    ).rejects.toThrow("produced no output");
+  });

-    // Third call: TERM, only the child PID
-    const termCall = runExecMock.mock.calls[2] ?? [];
-    expect(termCall[0]).toBe("kill");
-    const termArgs = termCall[1] as string[];
-    expect(termArgs).toEqual(["-TERM", String(selfPid + 1)]);
+  it("fails with timeout when overall timeout trips", async () => {
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "overall-timeout",
+        exitCode: null,
+        exitSignal: "SIGKILL",
+        durationMs: 200,
+        stdout: "",
+        stderr: "",
+        timedOut: true,
+        noOutputTimedOut: false,
+      }),
+    );

-    // Fourth call: KILL, only the child PID
-    const killCall = runExecMock.mock.calls[3] ?? [];
-    expect(killCall[0]).toBe("kill");
-    const killArgs = killCall[1] as string[];
-    expect(killArgs).toEqual(["-9", String(selfPid + 1)]);
+    await expect(
+      runCliAgent({
+        sessionId: "s1",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: "/tmp",
+        prompt: "hi",
+        provider: "codex-cli",
+        model: "gpt-5.2-codex",
+        timeoutMs: 1_000,
+        runId: "run-3",
+        cliSessionId: "thread-123",
+      }),
+    ).rejects.toThrow("exceeded timeout");
  });

  it("falls back to per-agent workspace when workspaceDir is missing", async () => {
@ -94,14 +165,18 @@ describe("runCliAgent resume cleanup", () => {
      },
    } satisfies OpenClawConfig;

-    runExecMock.mockResolvedValue({ stdout: "", stderr: "" });
-    runCommandWithTimeoutMock.mockResolvedValueOnce({
-      stdout: "ok",
-      stderr: "",
-      code: 0,
-      signal: null,
-      killed: false,
-    });
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "exit",
+        exitCode: 0,
+        exitSignal: null,
+        durationMs: 25,
+        stdout: "ok",
+        stderr: "",
+        timedOut: false,
+        noOutputTimedOut: false,
+      }),
+    );

    try {
      await runCliAgent({
@ -114,264 +189,33 @@ describe("runCliAgent resume cleanup", () => {
        provider: "codex-cli",
        model: "gpt-5.2-codex",
        timeoutMs: 1_000,
-        runId: "run-1",
+        runId: "run-4",
      });
    } finally {
      await fs.rm(tempDir, { recursive: true, force: true });
    }

-    const options = runCommandWithTimeoutMock.mock.calls[0]?.[1] as { cwd?: string };
-    expect(options.cwd).toBe(path.resolve(fallbackWorkspace));
+    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { cwd?: string };
+    expect(input.cwd).toBe(path.resolve(fallbackWorkspace));
  });
+});

-  it("throws when sessionKey is malformed", async () => {
-    const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-cli-runner-"));
-    const mainWorkspace = path.join(tempDir, "workspace-main");
-    const researchWorkspace = path.join(tempDir, "workspace-research");
-    await fs.mkdir(mainWorkspace, { recursive: true });
-    await fs.mkdir(researchWorkspace, { recursive: true });
-    const cfg = {
-      agents: {
-        defaults: {
-          workspace: mainWorkspace,
+describe("resolveCliNoOutputTimeoutMs", () => {
+  it("uses backend-configured resume watchdog override", () => {
+    const timeoutMs = resolveCliNoOutputTimeoutMs({
+      backend: {
+        command: "codex",
+        reliability: {
+          watchdog: {
+            resume: {
+              noOutputTimeoutMs: 42_000,
+            },
+          },
        },
-        list: [{ id: "research", workspace: researchWorkspace }],
      },
-    } satisfies OpenClawConfig;
-
-    try {
-      await expect(
-        runCliAgent({
-          sessionId: "s1",
-          sessionKey: "agent::broken",
-          agentId: "research",
-          sessionFile: "/tmp/session.jsonl",
-          workspaceDir: undefined as unknown as string,
-          config: cfg,
-          prompt: "hi",
-          provider: "codex-cli",
-          model: "gpt-5.2-codex",
-          timeoutMs: 1_000,
-          runId: "run-2",
-        }),
-      ).rejects.toThrow("Malformed agent session key");
-    } finally {
-      await fs.rm(tempDir, { recursive: true, force: true });
-    }
-    expect(runCommandWithTimeoutMock).not.toHaveBeenCalled();
-  });
-});
-
-describe("cleanupSuspendedCliProcesses", () => {
-  beforeEach(() => {
-    runExecMock.mockReset();
-  });
-
-  it("skips when no session tokens are configured", async () => {
-    await cleanupSuspendedCliProcesses(
-      {
-        command: "tool",
-      } as CliBackendConfig,
-      0,
-    );
-
-    if (process.platform === "win32") {
-      expect(runExecMock).not.toHaveBeenCalled();
-      return;
-    }
-
-    expect(runExecMock).not.toHaveBeenCalled();
-  });
-
-  it("matches sessionArg-based commands", async () => {
-    const selfPid = process.pid;
-    runExecMock
-      .mockResolvedValueOnce({
-        stdout: [
-          `  40 ${selfPid} T+ claude --session-id thread-1 -p`,
-          `  41 ${selfPid} S  claude --session-id thread-2 -p`,
-        ].join("\n"),
-        stderr: "",
-      })
-      .mockResolvedValueOnce({ stdout: "", stderr: "" });
-
-    await cleanupSuspendedCliProcesses(
-      {
-        command: "claude",
-        sessionArg: "--session-id",
-      } as CliBackendConfig,
-      0,
-    );
-
-    if (process.platform === "win32") {
-      expect(runExecMock).not.toHaveBeenCalled();
-      return;
-    }
-
-    expect(runExecMock).toHaveBeenCalledTimes(2);
-    const killCall = runExecMock.mock.calls[1] ?? [];
-    expect(killCall[0]).toBe("kill");
-    expect(killCall[1]).toEqual(["-9", "40"]);
-  });
-
-  it("matches resumeArgs with positional session id", async () => {
-    const selfPid = process.pid;
-    runExecMock
-      .mockResolvedValueOnce({
-        stdout: [
-          `  50 ${selfPid} T  codex exec resume thread-99 --color never --sandbox read-only`,
-          `  51 ${selfPid} T  codex exec resume other --color never --sandbox read-only`,
-        ].join("\n"),
-        stderr: "",
-      })
-      .mockResolvedValueOnce({ stdout: "", stderr: "" });
-
-    await cleanupSuspendedCliProcesses(
-      {
-        command: "codex",
-        resumeArgs: ["exec", "resume", "{sessionId}", "--color", "never", "--sandbox", "read-only"],
-      } as CliBackendConfig,
-      1,
-    );
-
-    if (process.platform === "win32") {
-      expect(runExecMock).not.toHaveBeenCalled();
-      return;
-    }
-
-    expect(runExecMock).toHaveBeenCalledTimes(2);
-    const killCall = runExecMock.mock.calls[1] ?? [];
-    expect(killCall[0]).toBe("kill");
-    expect(killCall[1]).toEqual(["-9", "50", "51"]);
-  });
-
-  it("only kills child processes of current process (ppid validation)", async () => {
-    const selfPid = process.pid;
-    const childPid = selfPid + 1;
-    const unrelatedPid = 9999;
-
-    runExecMock
-      .mockResolvedValueOnce({
-        stdout: [
-          `  ${childPid} ${selfPid} T  claude --session-id thread-1 -p`,
-          `  ${unrelatedPid} 100 T  claude --session-id thread-2 -p`,
-        ].join("\n"),
-        stderr: "",
-      })
-      .mockResolvedValueOnce({ stdout: "", stderr: "" });
-
-    await cleanupSuspendedCliProcesses(
-      {
-        command: "claude",
-        sessionArg: "--session-id",
-      } as CliBackendConfig,
-      0,
-    );
-
-    if (process.platform === "win32") {
-      expect(runExecMock).not.toHaveBeenCalled();
-      return;
-    }
-
-    expect(runExecMock).toHaveBeenCalledTimes(2);
-    const killCall = runExecMock.mock.calls[1] ?? [];
-    expect(killCall[0]).toBe("kill");
-    // Only childPid killed; unrelatedPid (ppid=100) excluded
-    expect(killCall[1]).toEqual(["-9", String(childPid)]);
-  });
-
-  it("skips all processes when none are children of current process", async () => {
-    runExecMock.mockResolvedValueOnce({
-      stdout: [
-        "  200 100 T  claude --session-id thread-1 -p",
-        "  201 100 T  claude --session-id thread-2 -p",
-      ].join("\n"),
-      stderr: "",
+      timeoutMs: 120_000,
+      useResume: true,
    });
-
-    await cleanupSuspendedCliProcesses(
-      {
-        command: "claude",
-        sessionArg: "--session-id",
-      } as CliBackendConfig,
-      0,
-    );
-
-    if (process.platform === "win32") {
-      expect(runExecMock).not.toHaveBeenCalled();
-      return;
-    }
-
-    // Only ps called — no kill because no matching ppid
-    expect(runExecMock).toHaveBeenCalledTimes(1);
-  });
-});
-
-describe("cleanupResumeProcesses", () => {
-  beforeEach(() => {
-    runExecMock.mockReset();
-  });
-
-  it("only kills resume processes owned by current process", async () => {
-    const selfPid = process.pid;
-
-    runExecMock
-      .mockResolvedValueOnce({
-        stdout: [
-          `  ${selfPid + 1} ${selfPid} codex exec resume abc-123`,
-          `  ${selfPid + 2} 999 codex exec resume abc-123`,
-        ].join("\n"),
-        stderr: "",
-      })
-      .mockResolvedValueOnce({ stdout: "", stderr: "" })
-      .mockResolvedValueOnce({ stdout: "", stderr: "" });
-
-    await cleanupResumeProcesses(
-      {
-        command: "codex",
-        resumeArgs: ["exec", "resume", "{sessionId}"],
-      } as CliBackendConfig,
-      "abc-123",
-    );
-
-    if (process.platform === "win32") {
-      expect(runExecMock).not.toHaveBeenCalled();
-      return;
-    }
-
-    expect(runExecMock).toHaveBeenCalledTimes(3);
-
-    const termCall = runExecMock.mock.calls[1] ?? [];
-    expect(termCall[0]).toBe("kill");
-    expect(termCall[1]).toEqual(["-TERM", String(selfPid + 1)]);
-
-    const killCall = runExecMock.mock.calls[2] ?? [];
-    expect(killCall[0]).toBe("kill");
-    expect(killCall[1]).toEqual(["-9", String(selfPid + 1)]);
-  });
-
-  it("skips kill when no resume processes match ppid", async () => {
-    runExecMock.mockResolvedValueOnce({
-      stdout: ["  300 100 codex exec resume abc-123", "  301 200 codex exec resume abc-123"].join(
-        "\n",
-      ),
-      stderr: "",
-    });
-
-    await cleanupResumeProcesses(
-      {
-        command: "codex",
-        resumeArgs: ["exec", "resume", "{sessionId}"],
-      } as CliBackendConfig,
-      "abc-123",
-    );
-
-    if (process.platform === "win32") {
-      expect(runExecMock).not.toHaveBeenCalled();
-      return;
-    }
-
-    // Only ps called — no kill because no matching ppid
-    expect(runExecMock).toHaveBeenCalledTimes(1);
+    expect(timeoutMs).toBe(42_000);
  });
 });
--- a/src/agents/cli-runner.ts
+++ b/src/agents/cli-runner.ts
@ -6,20 +6,20 @@ import { resolveHeartbeatPrompt } from "../auto-reply/heartbeat.js";
 import { shouldLogVerbose } from "../globals.js";
 import { isTruthyEnvValue } from "../infra/env.js";
 import { createSubsystemLogger } from "../logging/subsystem.js";
-import { runCommandWithTimeout } from "../process/exec.js";
+import { getProcessSupervisor } from "../process/supervisor/index.js";
 import { resolveSessionAgentIds } from "./agent-scope.js";
 import { makeBootstrapWarn, resolveBootstrapContextForRun } from "./bootstrap-files.js";
 import { resolveCliBackendConfig } from "./cli-backends.js";
 import {
  appendImagePathsToPrompt,
+  buildCliSupervisorScopeKey,
  buildCliArgs,
  buildSystemPrompt,
-  cleanupResumeProcesses,
-  cleanupSuspendedCliProcesses,
  enqueueCliRun,
  normalizeCliModel,
  parseCliJson,
  parseCliJsonl,
+  resolveCliNoOutputTimeoutMs,
  resolvePromptInput,
  resolveSessionIdToSend,
  resolveSystemPromptUsage,
@ -226,19 +226,32 @@ export async function runCliAgent(params: {
        }
        return next;
      })();
-
-      // Cleanup suspended processes that have accumulated (regardless of sessionId)
-      await cleanupSuspendedCliProcesses(backend);
-      if (useResume && cliSessionIdToSend) {
-        await cleanupResumeProcesses(backend, cliSessionIdToSend);
-      }
-
-      const result = await runCommandWithTimeout([backend.command, ...args], {
+      const noOutputTimeoutMs = resolveCliNoOutputTimeoutMs({
+        backend,
        timeoutMs: params.timeoutMs,
+        useResume,
+      });
+      const supervisor = getProcessSupervisor();
+      const scopeKey = buildCliSupervisorScopeKey({
+        backend,
+        backendId: backendResolved.id,
+        cliSessionId: useResume ? cliSessionIdToSend : undefined,
+      });
+
+      const managedRun = await supervisor.spawn({
+        sessionId: params.sessionId,
+        backendId: backendResolved.id,
+        scopeKey,
+        replaceExistingScope: Boolean(useResume && scopeKey),
+        mode: "child",
+        argv: [backend.command, ...args],
+        timeoutMs: params.timeoutMs,
+        noOutputTimeoutMs,
        cwd: workspaceDir,
        env,
        input: stdinPayload,
      });
+      const result = await managedRun.wait();

      const stdout = result.stdout.trim();
      const stderr = result.stderr.trim();
@ -259,7 +272,28 @@ export async function runCliAgent(params: {
        }
      }

-      if (result.code !== 0) {
+      if (result.exitCode !== 0 || result.reason !== "exit") {
+        if (result.reason === "no-output-timeout" || result.noOutputTimedOut) {
+          const timeoutReason = `CLI produced no output for ${Math.round(noOutputTimeoutMs / 1000)}s and was terminated.`;
+          log.warn(
+            `cli watchdog timeout: provider=${params.provider} model=${modelId} session=${cliSessionIdToSend ?? params.sessionId} noOutputTimeoutMs=${noOutputTimeoutMs} pid=${managedRun.pid ?? "unknown"}`,
+          );
+          throw new FailoverError(timeoutReason, {
+            reason: "timeout",
+            provider: params.provider,
+            model: modelId,
+            status: resolveFailoverStatus("timeout"),
+          });
+        }
+        if (result.reason === "overall-timeout") {
+          const timeoutReason = `CLI exceeded timeout (${Math.round(params.timeoutMs / 1000)}s) and was terminated.`;
+          throw new FailoverError(timeoutReason, {
+            reason: "timeout",
+            provider: params.provider,
+            model: modelId,
+            status: resolveFailoverStatus("timeout"),
+          });
+        }
        const err = stderr || stdout || "CLI failed.";
        const reason = classifyFailoverReason(err) ?? "unknown";
        const status = resolveFailoverStatus(reason);
--- a/src/agents/cli-runner/helpers.ts
+++ b/src/agents/cli-runner/helpers.ts
@ -11,230 +11,26 @@ import type { EmbeddedContextFile } from "../pi-embedded-helpers.js";
 import { resolveCliName } from "../../cli/cli-name.js";
 import { runExec } from "../../process/exec.js";
 import { buildTtsSystemPromptHint } from "../../tts/tts.js";
-import { escapeRegExp, isRecord } from "../../utils.js";
+import { isRecord } from "../../utils.js";
 import { buildModelAliasLines } from "../model-alias-lines.js";
 import { resolveDefaultModelForAgent } from "../model-selection.js";
 import { detectRuntimeShell } from "../shell-utils.js";
 import { buildSystemPromptParams } from "../system-prompt-params.js";
 import { buildAgentSystemPrompt } from "../system-prompt.js";
+export { buildCliSupervisorScopeKey, resolveCliNoOutputTimeoutMs } from "./reliability.js";

 const CLI_RUN_QUEUE = new Map<string, Promise<unknown>>();
-
-function buildLooseArgOrderRegex(tokens: string[]): RegExp {
-  // Scan `ps` output lines. Keep matching flexible, but require whitespace arg boundaries
-  // to avoid substring matches like `codexx` or `/path/to/codexx`.
-  const [head, ...rest] = tokens.map((t) => String(t ?? "").trim()).filter(Boolean);
-  if (!head) {
-    return /$^/;
-  }
-
-  const headEscaped = escapeRegExp(head);
-  const headFragment = `(?:^|\\s)(?:${headEscaped}|\\S+\\/${headEscaped})(?=\\s|$)`;
-  const restFragments = rest.map((t) => `(?:^|\\s)${escapeRegExp(t)}(?=\\s|$)`);
-  return new RegExp([headFragment, ...restFragments].join(".*"));
-}
-
-async function psWithFallback(argsA: string[], argsB: string[]): Promise<string> {
-  try {
-    const { stdout } = await runExec("ps", argsA);
-    return stdout;
-  } catch {
-    // fallthrough
-  }
-  const { stdout } = await runExec("ps", argsB);
-  return stdout;
-}
-
-export async function cleanupResumeProcesses(
-  backend: CliBackendConfig,
-  sessionId: string,
-): Promise<void> {
-  if (process.platform === "win32") {
-    return;
-  }
-  const resumeArgs = backend.resumeArgs ?? [];
-  if (resumeArgs.length === 0) {
-    return;
-  }
-  if (!resumeArgs.some((arg) => arg.includes("{sessionId}"))) {
-    return;
-  }
-  const commandToken = path.basename(backend.command ?? "").trim();
-  if (!commandToken) {
-    return;
-  }
-
-  const resumeTokens = resumeArgs.map((arg) => arg.replaceAll("{sessionId}", sessionId));
-  const pattern = [commandToken, ...resumeTokens]
-    .filter(Boolean)
-    .map((token) => escapeRegExp(token))
-    .join(".*");
-  if (!pattern) {
-    return;
-  }
-
-  try {
-    const stdout = await psWithFallback(
-      ["-axww", "-o", "pid=,ppid=,command="],
-      ["-ax", "-o", "pid=,ppid=,command="],
-    );
-    const patternRegex = buildLooseArgOrderRegex([commandToken, ...resumeTokens]);
-    const toKill: number[] = [];
-
-    for (const line of stdout.split("\n")) {
-      const trimmed = line.trim();
-      if (!trimmed) {
-        continue;
-      }
-      const match = /^(\d+)\s+(\d+)\s+(.*)$/.exec(trimmed);
-      if (!match) {
-        continue;
-      }
-      const pid = Number(match[1]);
-      const ppid = Number(match[2]);
-      const cmd = match[3] ?? "";
-      if (!Number.isFinite(pid)) {
-        continue;
-      }
-      if (ppid !== process.pid) {
-        continue;
-      }
-      if (!patternRegex.test(cmd)) {
-        continue;
-      }
-      toKill.push(pid);
-    }
-
-    if (toKill.length > 0) {
-      const pidArgs = toKill.map((pid) => String(pid));
-      try {
-        await runExec("kill", ["-TERM", ...pidArgs]);
-      } catch {
-        // ignore
-      }
-      await new Promise((resolve) => setTimeout(resolve, 250));
-      try {
-        await runExec("kill", ["-9", ...pidArgs]);
-      } catch {
-        // ignore
-      }
-    }
-  } catch {
-    // ignore errors - best effort cleanup
-  }
-}
-
-function buildSessionMatchers(backend: CliBackendConfig): RegExp[] {
-  const commandToken = path.basename(backend.command ?? "").trim();
-  if (!commandToken) {
-    return [];
-  }
-  const matchers: RegExp[] = [];
-  const sessionArg = backend.sessionArg?.trim();
-  const sessionArgs = backend.sessionArgs ?? [];
-  const resumeArgs = backend.resumeArgs ?? [];
-
-  const addMatcher = (args: string[]) => {
-    if (args.length === 0) {
-      return;
-    }
-    const tokens = [commandToken, ...args];
-    const pattern = tokens
-      .map((token, index) => {
-        const tokenPattern = tokenToRegex(token);
-        return index === 0 ? `(?:^|\\s)${tokenPattern}` : `\\s+${tokenPattern}`;
-      })
-      .join("");
-    matchers.push(new RegExp(pattern));
-  };
-
-  if (sessionArgs.some((arg) => arg.includes("{sessionId}"))) {
-    addMatcher(sessionArgs);
-  } else if (sessionArg) {
-    addMatcher([sessionArg, "{sessionId}"]);
-  }
-
-  if (resumeArgs.some((arg) => arg.includes("{sessionId}"))) {
-    addMatcher(resumeArgs);
-  }
-
-  return matchers;
-}
-
-function tokenToRegex(token: string): string {
-  if (!token.includes("{sessionId}")) {
-    return escapeRegExp(token);
-  }
-  const parts = token.split("{sessionId}").map((part) => escapeRegExp(part));
-  return parts.join("\\S+");
-}
-
-/**
- * Cleanup suspended OpenClaw CLI processes that have accumulated.
- * Only cleans up if there are more than the threshold (default: 10).
- */
-export async function cleanupSuspendedCliProcesses(
-  backend: CliBackendConfig,
-  threshold = 10,
-): Promise<void> {
-  if (process.platform === "win32") {
-    return;
-  }
-  const matchers = buildSessionMatchers(backend);
-  if (matchers.length === 0) {
-    return;
-  }
-
-  try {
-    const stdout = await psWithFallback(
-      ["-axww", "-o", "pid=,ppid=,stat=,command="],
-      ["-ax", "-o", "pid=,ppid=,stat=,command="],
-    );
-    const suspended: number[] = [];
-    for (const line of stdout.split("\n")) {
-      const trimmed = line.trim();
-      if (!trimmed) {
-        continue;
-      }
-      const match = /^(\d+)\s+(\d+)\s+(\S+)\s+(.*)$/.exec(trimmed);
-      if (!match) {
-        continue;
-      }
-      const pid = Number(match[1]);
-      const ppid = Number(match[2]);
-      const stat = match[3] ?? "";
-      const command = match[4] ?? "";
-      if (!Number.isFinite(pid)) {
-        continue;
-      }
-      if (ppid !== process.pid) {
-        continue;
-      }
-      if (!stat.includes("T")) {
-        continue;
-      }
-      if (!matchers.some((matcher) => matcher.test(command))) {
-        continue;
-      }
-      suspended.push(pid);
-    }
-
-    if (suspended.length > threshold) {
-      // Verified locally: stopped (T) processes ignore SIGTERM, so use SIGKILL.
-      await runExec("kill", ["-9", ...suspended.map((pid) => String(pid))]);
-    }
-  } catch {
-    // ignore errors - best effort cleanup
-  }
-}
 export function enqueueCliRun<T>(key: string, task: () => Promise<T>): Promise<T> {
  const prior = CLI_RUN_QUEUE.get(key) ?? Promise.resolve();
  const chained = prior.catch(() => undefined).then(task);
-  const tracked = chained.finally(() => {
-    if (CLI_RUN_QUEUE.get(key) === tracked) {
-      CLI_RUN_QUEUE.delete(key);
-    }
-  });
+  // Keep queue continuity even when a run rejects, without emitting unhandled rejections.
+  const tracked = chained
+    .catch(() => undefined)
+    .finally(() => {
+      if (CLI_RUN_QUEUE.get(key) === tracked) {
+        CLI_RUN_QUEUE.delete(key);
+      }
+    });
  CLI_RUN_QUEUE.set(key, tracked);
  return chained;
 }
--- a/src/agents/cli-runner/reliability.ts
+++ b/src/agents/cli-runner/reliability.ts
@ -0,0 +1,88 @@
+import path from "node:path";
+import type { CliBackendConfig } from "../../config/types.js";
+import {
+  CLI_FRESH_WATCHDOG_DEFAULTS,
+  CLI_RESUME_WATCHDOG_DEFAULTS,
+  CLI_WATCHDOG_MIN_TIMEOUT_MS,
+} from "../cli-watchdog-defaults.js";
+
+function pickWatchdogProfile(
+  backend: CliBackendConfig,
+  useResume: boolean,
+): {
+  noOutputTimeoutMs?: number;
+  noOutputTimeoutRatio: number;
+  minMs: number;
+  maxMs: number;
+} {
+  const defaults = useResume ? CLI_RESUME_WATCHDOG_DEFAULTS : CLI_FRESH_WATCHDOG_DEFAULTS;
+  const configured = useResume
+    ? backend.reliability?.watchdog?.resume
+    : backend.reliability?.watchdog?.fresh;
+
+  const ratio = (() => {
+    const value = configured?.noOutputTimeoutRatio;
+    if (typeof value !== "number" || !Number.isFinite(value)) {
+      return defaults.noOutputTimeoutRatio;
+    }
+    return Math.max(0.05, Math.min(0.95, value));
+  })();
+  const minMs = (() => {
+    const value = configured?.minMs;
+    if (typeof value !== "number" || !Number.isFinite(value)) {
+      return defaults.minMs;
+    }
+    return Math.max(CLI_WATCHDOG_MIN_TIMEOUT_MS, Math.floor(value));
+  })();
+  const maxMs = (() => {
+    const value = configured?.maxMs;
+    if (typeof value !== "number" || !Number.isFinite(value)) {
+      return defaults.maxMs;
+    }
+    return Math.max(CLI_WATCHDOG_MIN_TIMEOUT_MS, Math.floor(value));
+  })();
+
+  return {
+    noOutputTimeoutMs:
+      typeof configured?.noOutputTimeoutMs === "number" &&
+      Number.isFinite(configured.noOutputTimeoutMs)
+        ? Math.max(CLI_WATCHDOG_MIN_TIMEOUT_MS, Math.floor(configured.noOutputTimeoutMs))
+        : undefined,
+    noOutputTimeoutRatio: ratio,
+    minMs: Math.min(minMs, maxMs),
+    maxMs: Math.max(minMs, maxMs),
+  };
+}
+
+export function resolveCliNoOutputTimeoutMs(params: {
+  backend: CliBackendConfig;
+  timeoutMs: number;
+  useResume: boolean;
+}): number {
+  const profile = pickWatchdogProfile(params.backend, params.useResume);
+  // Keep watchdog below global timeout in normal cases.
+  const cap = Math.max(CLI_WATCHDOG_MIN_TIMEOUT_MS, params.timeoutMs - 1_000);
+  if (profile.noOutputTimeoutMs !== undefined) {
+    return Math.min(profile.noOutputTimeoutMs, cap);
+  }
+  const computed = Math.floor(params.timeoutMs * profile.noOutputTimeoutRatio);
+  const bounded = Math.min(profile.maxMs, Math.max(profile.minMs, computed));
+  return Math.min(bounded, cap);
+}
+
+export function buildCliSupervisorScopeKey(params: {
+  backend: CliBackendConfig;
+  backendId: string;
+  cliSessionId?: string;
+}): string | undefined {
+  const commandToken = path
+    .basename(params.backend.command ?? "")
+    .trim()
+    .toLowerCase();
+  const backendToken = params.backendId.trim().toLowerCase();
+  const sessionToken = params.cliSessionId?.trim();
+  if (!sessionToken) {
+    return undefined;
+  }
+  return `cli:${backendToken}:${commandToken}:${sessionToken}`;
+}
--- a/src/agents/cli-watchdog-defaults.ts
+++ b/src/agents/cli-watchdog-defaults.ts
@ -0,0 +1,13 @@
+export const CLI_WATCHDOG_MIN_TIMEOUT_MS = 1_000;
+
+export const CLI_FRESH_WATCHDOG_DEFAULTS = {
+  noOutputTimeoutRatio: 0.8,
+  minMs: 180_000,
+  maxMs: 600_000,
+} as const;
+
+export const CLI_RESUME_WATCHDOG_DEFAULTS = {
+  noOutputTimeoutRatio: 0.3,
+  minMs: 60_000,
+  maxMs: 180_000,
+} as const;
--- a/src/agents/context.test.ts
+++ b/src/agents/context.test.ts
@ -0,0 +1,62 @@
+import { describe, expect, it } from "vitest";
+import { applyConfiguredContextWindows } from "./context.js";
+import { createSessionManagerRuntimeRegistry } from "./pi-extensions/session-manager-runtime-registry.js";
+
+describe("applyConfiguredContextWindows", () => {
+  it("overrides discovered cache values with explicit models.providers contextWindow", () => {
+    const cache = new Map<string, number>([["anthropic/claude-opus-4-6", 1_000_000]]);
+    applyConfiguredContextWindows({
+      cache,
+      modelsConfig: {
+        providers: {
+          openrouter: {
+            models: [{ id: "anthropic/claude-opus-4-6", contextWindow: 200_000 }],
+          },
+        },
+      },
+    });
+
+    expect(cache.get("anthropic/claude-opus-4-6")).toBe(200_000);
+  });
+
+  it("adds config-only model context windows and ignores invalid entries", () => {
+    const cache = new Map<string, number>();
+    applyConfiguredContextWindows({
+      cache,
+      modelsConfig: {
+        providers: {
+          openrouter: {
+            models: [
+              { id: "custom/model", contextWindow: 150_000 },
+              { id: "bad/model", contextWindow: 0 },
+              { id: "", contextWindow: 300_000 },
+            ],
+          },
+        },
+      },
+    });
+
+    expect(cache.get("custom/model")).toBe(150_000);
+    expect(cache.has("bad/model")).toBe(false);
+  });
+});
+
+describe("createSessionManagerRuntimeRegistry", () => {
+  it("stores, reads, and clears values by object identity", () => {
+    const registry = createSessionManagerRuntimeRegistry<{ value: number }>();
+    const key = {};
+    expect(registry.get(key)).toBeNull();
+    registry.set(key, { value: 1 });
+    expect(registry.get(key)).toEqual({ value: 1 });
+    registry.set(key, null);
+    expect(registry.get(key)).toBeNull();
+  });
+
+  it("ignores non-object keys", () => {
+    const registry = createSessionManagerRuntimeRegistry<{ value: number }>();
+    registry.set(null, { value: 1 });
+    registry.set(123, { value: 1 });
+    expect(registry.get(null)).toBeNull();
+    expect(registry.get(123)).toBeNull();
+  });
+});
--- a/src/agents/context.ts
+++ b/src/agents/context.ts
@ -6,13 +6,52 @@ import { resolveOpenClawAgentDir } from "./agent-paths.js";
 import { ensureOpenClawModelsJson } from "./models-config.js";

 type ModelEntry = { id: string; contextWindow?: number };
+type ConfigModelEntry = { id?: string; contextWindow?: number };
+type ProviderConfigEntry = { models?: ConfigModelEntry[] };
+type ModelsConfig = { providers?: Record<string, ProviderConfigEntry | undefined> };
+
+export function applyConfiguredContextWindows(params: {
+  cache: Map<string, number>;
+  modelsConfig: ModelsConfig | undefined;
+}) {
+  const providers = params.modelsConfig?.providers;
+  if (!providers || typeof providers !== "object") {
+    return;
+  }
+  for (const provider of Object.values(providers)) {
+    if (!Array.isArray(provider?.models)) {
+      continue;
+    }
+    for (const model of provider.models) {
+      const modelId = typeof model?.id === "string" ? model.id : undefined;
+      const contextWindow =
+        typeof model?.contextWindow === "number" ? model.contextWindow : undefined;
+      if (!modelId || !contextWindow || contextWindow <= 0) {
+        continue;
+      }
+      params.cache.set(modelId, contextWindow);
+    }
+  }
+}

 const MODEL_CACHE = new Map<string, number>();
 const loadPromise = (async () => {
+  let cfg: ReturnType<typeof loadConfig> | undefined;
+  try {
+    cfg = loadConfig();
+  } catch {
+    // If config can't be loaded, leave cache empty.
+    return;
+  }
+
+  try {
+    await ensureOpenClawModelsJson(cfg);
+  } catch {
+    // Continue with best-effort discovery/overrides.
+  }
+
  try {
    const { discoverAuthStorage, discoverModels } = await import("./pi-model-discovery.js");
-    const cfg = loadConfig();
-    await ensureOpenClawModelsJson(cfg);
    const agentDir = resolveOpenClawAgentDir();
    const authStorage = discoverAuthStorage(agentDir);
    const modelRegistry = discoverModels(authStorage, agentDir);
@ -26,9 +65,16 @@ const loadPromise = (async () => {
      }
    }
  } catch {
-    // If pi-ai isn't available, leave cache empty; lookup will fall back.
+    // If model discovery fails, continue with config overrides only.
  }
-})();
+
+  applyConfiguredContextWindows({
+    cache: MODEL_CACHE,
+    modelsConfig: cfg.models as ModelsConfig | undefined,
+  });
+})().catch(() => {
+  // Keep lookup best-effort.
+});

 export function lookupContextTokens(modelId?: string): number | undefined {
  if (!modelId) {
--- a/src/agents/model-auth.e2e.test.ts
+++ b/src/agents/model-auth.e2e.test.ts
@ -3,6 +3,7 @@ import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 import { describe, expect, it } from "vitest";
+import { captureEnv } from "../test-utils/env.js";
 import { ensureAuthProfileStore } from "./auth-profiles.js";
 import { getApiKeyForModel, resolveApiKeyForProvider, resolveEnvApiKey } from "./model-auth.js";

@ -15,9 +16,11 @@ const oauthFixture = {

 describe("getApiKeyForModel", () => {
  it("migrates legacy oauth.json into auth-profiles.json", async () => {
-    const previousStateDir = process.env.OPENCLAW_STATE_DIR;
-    const previousAgentDir = process.env.OPENCLAW_AGENT_DIR;
-    const previousPiAgentDir = process.env.PI_CODING_AGENT_DIR;
+    const envSnapshot = captureEnv([
+      "OPENCLAW_STATE_DIR",
+      "OPENCLAW_AGENT_DIR",
+      "PI_CODING_AGENT_DIR",
+    ]);
    const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-oauth-"));

    try {
@ -73,30 +76,18 @@ describe("getApiKeyForModel", () => {
        },
      });
    } finally {
-      if (previousStateDir === undefined) {
-        delete process.env.OPENCLAW_STATE_DIR;
-      } else {
-        process.env.OPENCLAW_STATE_DIR = previousStateDir;
-      }
-      if (previousAgentDir === undefined) {
-        delete process.env.OPENCLAW_AGENT_DIR;
-      } else {
-        process.env.OPENCLAW_AGENT_DIR = previousAgentDir;
-      }
-      if (previousPiAgentDir === undefined) {
-        delete process.env.PI_CODING_AGENT_DIR;
-      } else {
-        process.env.PI_CODING_AGENT_DIR = previousPiAgentDir;
-      }
+      envSnapshot.restore();
      await fs.rm(tempDir, { recursive: true, force: true });
    }
  });

  it("suggests openai-codex when only Codex OAuth is configured", async () => {
-    const previousStateDir = process.env.OPENCLAW_STATE_DIR;
-    const previousAgentDir = process.env.OPENCLAW_AGENT_DIR;
-    const previousPiAgentDir = process.env.PI_CODING_AGENT_DIR;
-    const previousOpenAiKey = process.env.OPENAI_API_KEY;
+    const envSnapshot = captureEnv([
+      "OPENAI_API_KEY",
+      "OPENCLAW_STATE_DIR",
+      "OPENCLAW_AGENT_DIR",
+      "PI_CODING_AGENT_DIR",
+    ]);
    const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-auth-"));

    try {
@ -137,26 +128,7 @@ describe("getApiKeyForModel", () => {
      }
      expect(String(error)).toContain("openai-codex/gpt-5.3-codex");
    } finally {
-      if (previousOpenAiKey === undefined) {
-        delete process.env.OPENAI_API_KEY;
-      } else {
-        process.env.OPENAI_API_KEY = previousOpenAiKey;
-      }
-      if (previousStateDir === undefined) {
-        delete process.env.OPENCLAW_STATE_DIR;
-      } else {
-        process.env.OPENCLAW_STATE_DIR = previousStateDir;
-      }
-      if (previousAgentDir === undefined) {
-        delete process.env.OPENCLAW_AGENT_DIR;
-      } else {
-        process.env.OPENCLAW_AGENT_DIR = previousAgentDir;
-      }
-      if (previousPiAgentDir === undefined) {
-        delete process.env.PI_CODING_AGENT_DIR;
-      } else {
-        process.env.PI_CODING_AGENT_DIR = previousPiAgentDir;
-      }
+      envSnapshot.restore();
      await fs.rm(tempDir, { recursive: true, force: true });
    }
  });
--- a/src/agents/model-scan.e2e.test.ts
+++ b/src/agents/model-scan.e2e.test.ts
@ -1,4 +1,5 @@
 import { describe, expect, it } from "vitest";
+import { captureEnv } from "../test-utils/env.js";
 import { scanOpenRouterModels } from "./model-scan.js";

 function createFetchFixture(payload: unknown): typeof fetch {
@ -66,7 +67,7 @@ describe("scanOpenRouterModels", () => {

  it("requires an API key when probing", async () => {
    const fetchImpl = createFetchFixture({ data: [] });
-    const previousKey = process.env.OPENROUTER_API_KEY;
+    const envSnapshot = captureEnv(["OPENROUTER_API_KEY"]);
    try {
      delete process.env.OPENROUTER_API_KEY;
      await expect(
@ -77,11 +78,7 @@ describe("scanOpenRouterModels", () => {
        }),
      ).rejects.toThrow(/Missing OpenRouter API key/);
    } finally {
-      if (previousKey === undefined) {
-        delete process.env.OPENROUTER_API_KEY;
-      } else {
-        process.env.OPENROUTER_API_KEY = previousKey;
-      }
+      envSnapshot.restore();
    }
  });
 });
--- a/src/agents/models-config.auto-injects-github-copilot-provider-token-is.e2e.test.ts
+++ b/src/agents/models-config.auto-injects-github-copilot-provider-token-is.e2e.test.ts
@ -1,6 +1,7 @@
 import fs from "node:fs/promises";
 import path from "node:path";
 import { describe, expect, it, vi } from "vitest";
+import { captureEnv } from "../test-utils/env.js";
 import {
  installModelsConfigTestHooks,
  withModelsTempHome as withTempHome,
@ -12,7 +13,7 @@ installModelsConfigTestHooks({ restoreFetch: true });
 describe("models-config", () => {
  it("auto-injects github-copilot provider when token is present", async () => {
    await withTempHome(async (home) => {
-      const previous = process.env.COPILOT_GITHUB_TOKEN;
+      const envSnapshot = captureEnv(["COPILOT_GITHUB_TOKEN"]);
      process.env.COPILOT_GITHUB_TOKEN = "gh-token";
      const fetchMock = vi.fn().mockResolvedValue({
        ok: true,
@ -36,20 +37,14 @@ describe("models-config", () => {
        expect(parsed.providers["github-copilot"]?.baseUrl).toBe("https://api.copilot.example");
        expect(parsed.providers["github-copilot"]?.models?.length ?? 0).toBe(0);
      } finally {
-        if (previous === undefined) {
-          delete process.env.COPILOT_GITHUB_TOKEN;
-        } else {
-          process.env.COPILOT_GITHUB_TOKEN = previous;
-        }
+        envSnapshot.restore();
      }
    });
  });

  it("prefers COPILOT_GITHUB_TOKEN over GH_TOKEN and GITHUB_TOKEN", async () => {
    await withTempHome(async () => {
-      const previous = process.env.COPILOT_GITHUB_TOKEN;
-      const previousGh = process.env.GH_TOKEN;
-      const previousGithub = process.env.GITHUB_TOKEN;
+      const envSnapshot = captureEnv(["COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"]);
      process.env.COPILOT_GITHUB_TOKEN = "copilot-token";
      process.env.GH_TOKEN = "gh-token";
      process.env.GITHUB_TOKEN = "github-token";
@ -70,9 +65,7 @@ describe("models-config", () => {
        const [, opts] = fetchMock.mock.calls[0] as [string, { headers?: Record<string, string> }];
        expect(opts?.headers?.Authorization).toBe("Bearer copilot-token");
      } finally {
-        process.env.COPILOT_GITHUB_TOKEN = previous;
-        process.env.GH_TOKEN = previousGh;
-        process.env.GITHUB_TOKEN = previousGithub;
+        envSnapshot.restore();
      }
    });
  });
--- a/src/agents/models-config.falls-back-default-baseurl-token-exchange-fails.e2e.test.ts
+++ b/src/agents/models-config.falls-back-default-baseurl-token-exchange-fails.e2e.test.ts
@ -2,6 +2,7 @@ import fs from "node:fs/promises";
 import path from "node:path";
 import { describe, expect, it, vi } from "vitest";
 import { DEFAULT_COPILOT_API_BASE_URL } from "../providers/github-copilot-token.js";
+import { captureEnv } from "../test-utils/env.js";
 import {
  installModelsConfigTestHooks,
  withModelsTempHome as withTempHome,
@ -13,7 +14,7 @@ installModelsConfigTestHooks({ restoreFetch: true });
 describe("models-config", () => {
  it("falls back to default baseUrl when token exchange fails", async () => {
    await withTempHome(async () => {
-      const previous = process.env.COPILOT_GITHUB_TOKEN;
+      const envSnapshot = captureEnv(["COPILOT_GITHUB_TOKEN"]);
      process.env.COPILOT_GITHUB_TOKEN = "gh-token";
      const fetchMock = vi.fn().mockResolvedValue({
        ok: false,
@ -33,20 +34,14 @@ describe("models-config", () => {

        expect(parsed.providers["github-copilot"]?.baseUrl).toBe(DEFAULT_COPILOT_API_BASE_URL);
      } finally {
-        if (previous === undefined) {
-          delete process.env.COPILOT_GITHUB_TOKEN;
-        } else {
-          process.env.COPILOT_GITHUB_TOKEN = previous;
-        }
+        envSnapshot.restore();
      }
    });
  });

  it("uses agentDir override auth profiles for copilot injection", async () => {
    await withTempHome(async (home) => {
-      const previous = process.env.COPILOT_GITHUB_TOKEN;
-      const previousGh = process.env.GH_TOKEN;
-      const previousGithub = process.env.GITHUB_TOKEN;
+      const envSnapshot = captureEnv(["COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"]);
      delete process.env.COPILOT_GITHUB_TOKEN;
      delete process.env.GH_TOKEN;
      delete process.env.GITHUB_TOKEN;
@ -91,21 +86,7 @@ describe("models-config", () => {

        expect(parsed.providers["github-copilot"]?.baseUrl).toBe("https://api.copilot.example");
      } finally {
-        if (previous === undefined) {
-          delete process.env.COPILOT_GITHUB_TOKEN;
-        } else {
-          process.env.COPILOT_GITHUB_TOKEN = previous;
-        }
-        if (previousGh === undefined) {
-          delete process.env.GH_TOKEN;
-        } else {
-          process.env.GH_TOKEN = previousGh;
-        }
-        if (previousGithub === undefined) {
-          delete process.env.GITHUB_TOKEN;
-        } else {
-          process.env.GITHUB_TOKEN = previousGithub;
-        }
+        envSnapshot.restore();
      }
    });
  });
--- a/src/agents/models-config.providers.minimax.test.ts
+++ b/src/agents/models-config.providers.minimax.test.ts
@ -2,12 +2,13 @@ import { mkdtempSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { describe, expect, it } from "vitest";
+import { captureEnv } from "../test-utils/env.js";
 import { resolveImplicitProviders } from "./models-config.providers.js";

 describe("MiniMax implicit provider (#15275)", () => {
  it("should use anthropic-messages API for API-key provider", async () => {
    const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
-    const previous = process.env.MINIMAX_API_KEY;
+    const envSnapshot = captureEnv(["MINIMAX_API_KEY"]);
    process.env.MINIMAX_API_KEY = "test-key";

    try {
@ -16,11 +17,7 @@ describe("MiniMax implicit provider (#15275)", () => {
      expect(providers?.minimax?.api).toBe("anthropic-messages");
      expect(providers?.minimax?.baseUrl).toBe("https://api.minimax.io/anthropic");
    } finally {
-      if (previous === undefined) {
-        delete process.env.MINIMAX_API_KEY;
-      } else {
-        process.env.MINIMAX_API_KEY = previous;
-      }
+      envSnapshot.restore();
    }
  });
 });
--- a/src/agents/models-config.providers.nvidia.test.ts
+++ b/src/agents/models-config.providers.nvidia.test.ts
@ -2,13 +2,14 @@ import { mkdtempSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { describe, expect, it } from "vitest";
+import { captureEnv } from "../test-utils/env.js";
 import { resolveApiKeyForProvider } from "./model-auth.js";
 import { buildNvidiaProvider, resolveImplicitProviders } from "./models-config.providers.js";

 describe("NVIDIA provider", () => {
  it("should include nvidia when NVIDIA_API_KEY is configured", async () => {
    const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
-    const previous = process.env.NVIDIA_API_KEY;
+    const envSnapshot = captureEnv(["NVIDIA_API_KEY"]);
    process.env.NVIDIA_API_KEY = "test-key";

    try {
@ -16,17 +17,13 @@ describe("NVIDIA provider", () => {
      expect(providers?.nvidia).toBeDefined();
      expect(providers?.nvidia?.models?.length).toBeGreaterThan(0);
    } finally {
-      if (previous === undefined) {
-        delete process.env.NVIDIA_API_KEY;
-      } else {
-        process.env.NVIDIA_API_KEY = previous;
-      }
+      envSnapshot.restore();
    }
  });

  it("resolves the nvidia api key value from env", async () => {
    const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
-    const previous = process.env.NVIDIA_API_KEY;
+    const envSnapshot = captureEnv(["NVIDIA_API_KEY"]);
    process.env.NVIDIA_API_KEY = "nvidia-test-api-key";

    try {
@ -39,11 +36,7 @@ describe("NVIDIA provider", () => {
      expect(auth.mode).toBe("api-key");
      expect(auth.source).toContain("NVIDIA_API_KEY");
    } finally {
-      if (previous === undefined) {
-        delete process.env.NVIDIA_API_KEY;
-      } else {
-        process.env.NVIDIA_API_KEY = previous;
-      }
+      envSnapshot.restore();
    }
  });

--- a/src/agents/models-config.providers.qianfan.e2e.test.ts
+++ b/src/agents/models-config.providers.qianfan.e2e.test.ts
@ -2,12 +2,13 @@ import { mkdtempSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { describe, expect, it } from "vitest";
+import { captureEnv } from "../test-utils/env.js";
 import { resolveImplicitProviders } from "./models-config.providers.js";

 describe("Qianfan provider", () => {
  it("should include qianfan when QIANFAN_API_KEY is configured", async () => {
    const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
-    const previous = process.env.QIANFAN_API_KEY;
+    const envSnapshot = captureEnv(["QIANFAN_API_KEY"]);
    process.env.QIANFAN_API_KEY = "test-key";

    try {
@ -15,11 +16,7 @@ describe("Qianfan provider", () => {
      expect(providers?.qianfan).toBeDefined();
      expect(providers?.qianfan?.apiKey).toBe("QIANFAN_API_KEY");
    } finally {
-      if (previous === undefined) {
-        delete process.env.QIANFAN_API_KEY;
-      } else {
-        process.env.QIANFAN_API_KEY = previous;
-      }
+      envSnapshot.restore();
    }
  });
 });
--- a/src/agents/models-config.uses-first-github-copilot-profile-env-tokens.e2e.test.ts
+++ b/src/agents/models-config.uses-first-github-copilot-profile-env-tokens.e2e.test.ts
@ -1,6 +1,7 @@
 import fs from "node:fs/promises";
 import path from "node:path";
 import { describe, expect, it, vi } from "vitest";
+import { captureEnv } from "../test-utils/env.js";
 import { resolveOpenClawAgentDir } from "./agent-paths.js";
 import {
  installModelsConfigTestHooks,
@ -13,9 +14,7 @@ installModelsConfigTestHooks({ restoreFetch: true });
 describe("models-config", () => {
  it("uses the first github-copilot profile when env tokens are missing", async () => {
    await withTempHome(async (home) => {
-      const previous = process.env.COPILOT_GITHUB_TOKEN;
-      const previousGh = process.env.GH_TOKEN;
-      const previousGithub = process.env.GITHUB_TOKEN;
+      const envSnapshot = captureEnv(["COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"]);
      delete process.env.COPILOT_GITHUB_TOKEN;
      delete process.env.GH_TOKEN;
      delete process.env.GITHUB_TOKEN;
@ -61,28 +60,14 @@ describe("models-config", () => {
        const [, opts] = fetchMock.mock.calls[0] as [string, { headers?: Record<string, string> }];
        expect(opts?.headers?.Authorization).toBe("Bearer alpha-token");
      } finally {
-        if (previous === undefined) {
-          delete process.env.COPILOT_GITHUB_TOKEN;
-        } else {
-          process.env.COPILOT_GITHUB_TOKEN = previous;
-        }
-        if (previousGh === undefined) {
-          delete process.env.GH_TOKEN;
-        } else {
-          process.env.GH_TOKEN = previousGh;
-        }
-        if (previousGithub === undefined) {
-          delete process.env.GITHUB_TOKEN;
-        } else {
-          process.env.GITHUB_TOKEN = previousGithub;
-        }
+        envSnapshot.restore();
      }
    });
  });

  it("does not override explicit github-copilot provider config", async () => {
    await withTempHome(async () => {
-      const previous = process.env.COPILOT_GITHUB_TOKEN;
+      const envSnapshot = captureEnv(["COPILOT_GITHUB_TOKEN"]);
      process.env.COPILOT_GITHUB_TOKEN = "gh-token";
      const fetchMock = vi.fn().mockResolvedValue({
        ok: true,
@ -115,11 +100,7 @@ describe("models-config", () => {

        expect(parsed.providers["github-copilot"]?.baseUrl).toBe("https://copilot.local");
      } finally {
-        if (previous === undefined) {
-          delete process.env.COPILOT_GITHUB_TOKEN;
-        } else {
-          process.env.COPILOT_GITHUB_TOKEN = previous;
-        }
+        envSnapshot.restore();
      }
    });
  });
--- a/src/agents/openclaw-gateway-tool.e2e.test.ts
+++ b/src/agents/openclaw-gateway-tool.e2e.test.ts
@ -2,6 +2,7 @@ import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 import { describe, expect, it, vi } from "vitest";
+import { captureEnv } from "../test-utils/env.js";
 import "./test-helpers/fast-core-tools.js";
 import { createOpenClawTools } from "./openclaw-tools.js";

@ -18,8 +19,7 @@ describe("gateway tool", () => {
  it("schedules SIGUSR1 restart", async () => {
    vi.useFakeTimers();
    const kill = vi.spyOn(process, "kill").mockImplementation(() => true);
-    const previousStateDir = process.env.OPENCLAW_STATE_DIR;
-    const previousProfile = process.env.OPENCLAW_PROFILE;
+    const envSnapshot = captureEnv(["OPENCLAW_STATE_DIR", "OPENCLAW_PROFILE"]);
    const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
    process.env.OPENCLAW_STATE_DIR = stateDir;
    process.env.OPENCLAW_PROFILE = "isolated";
@ -60,16 +60,8 @@ describe("gateway tool", () => {
    } finally {
      kill.mockRestore();
      vi.useRealTimers();
-      if (previousStateDir === undefined) {
-        delete process.env.OPENCLAW_STATE_DIR;
-      } else {
-        process.env.OPENCLAW_STATE_DIR = previousStateDir;
-      }
-      if (previousProfile === undefined) {
-        delete process.env.OPENCLAW_PROFILE;
-      } else {
-        process.env.OPENCLAW_PROFILE = previousProfile;
-      }
+      envSnapshot.restore();
+      await fs.rm(stateDir, { recursive: true, force: true });
    }
  });

--- a/src/agents/openclaw-tools.sessions.e2e.test.ts
+++ b/src/agents/openclaw-tools.sessions.e2e.test.ts
@ -783,7 +783,7 @@ describe("sessions tools", () => {
        text?: string;
      };
      expect(details.status).toBe("ok");
-      expect(details.text).toContain("tokens 1k (in 12 / out 1k)");
+      expect(details.text).toMatch(/tokens 1(\.0)?k \(in 12 \/ out 1(\.0)?k\)/);
      expect(details.text).toContain("prompt/cache 197k");
      expect(details.text).not.toContain("1.0k io");
    } finally {
--- a/src/agents/openclaw-tools.subagents.sessions-spawn-applies-thinking-default.e2e.test.ts
+++ b/src/agents/openclaw-tools.subagents.sessions-spawn-applies-thinking-default.e2e.test.ts
@ -33,21 +33,37 @@ vi.mock("../gateway/call.js", () => {
  };
 });

+type GatewayCall = { method: string; params?: Record<string, unknown> };
+
+async function getGatewayCalls(): Promise<GatewayCall[]> {
+  const { callGateway } = await import("../gateway/call.js");
+  return (callGateway as unknown as ReturnType<typeof vi.fn>).mock.calls.map(
+    (call) => call[0] as GatewayCall,
+  );
+}
+
+function findLastCall(calls: GatewayCall[], predicate: (call: GatewayCall) => boolean) {
+  for (let i = calls.length - 1; i >= 0; i -= 1) {
+    const call = calls[i];
+    if (call && predicate(call)) {
+      return call;
+    }
+  }
+  return undefined;
+}
+
 describe("sessions_spawn thinking defaults", () => {
  it("applies agents.defaults.subagents.thinking when thinking is omitted", async () => {
    const tool = createSessionsSpawnTool({ agentSessionKey: "agent:test:main" });
    const result = await tool.execute("call-1", { task: "hello" });
    expect(result.details).toMatchObject({ status: "accepted" });

-    const { callGateway } = await import("../gateway/call.js");
-    const calls = (callGateway as unknown as ReturnType<typeof vi.fn>).mock.calls;
-
-    const agentCall = calls
-      .map((call) => call[0] as { method: string; params?: Record<string, unknown> })
-      .findLast((call) => call.method === "agent");
-    const thinkingPatch = calls
-      .map((call) => call[0] as { method: string; params?: Record<string, unknown> })
-      .findLast((call) => call.method === "sessions.patch" && call.params?.thinkingLevel);
+    const calls = await getGatewayCalls();
+    const agentCall = findLastCall(calls, (call) => call.method === "agent");
+    const thinkingPatch = findLastCall(
+      calls,
+      (call) => call.method === "sessions.patch" && call.params?.thinkingLevel !== undefined,
+    );

    expect(agentCall?.params?.thinking).toBe("high");
    expect(thinkingPatch?.params?.thinkingLevel).toBe("high");
@ -58,15 +74,12 @@ describe("sessions_spawn thinking defaults", () => {
    const result = await tool.execute("call-2", { task: "hello", thinking: "low" });
    expect(result.details).toMatchObject({ status: "accepted" });

-    const { callGateway } = await import("../gateway/call.js");
-    const calls = (callGateway as unknown as ReturnType<typeof vi.fn>).mock.calls;
-
-    const agentCall = calls
-      .map((call) => call[0] as { method: string; params?: Record<string, unknown> })
-      .findLast((call) => call.method === "agent");
-    const thinkingPatch = calls
-      .map((call) => call[0] as { method: string; params?: Record<string, unknown> })
-      .findLast((call) => call.method === "sessions.patch" && call.params?.thinkingLevel);
+    const calls = await getGatewayCalls();
+    const agentCall = findLastCall(calls, (call) => call.method === "agent");
+    const thinkingPatch = findLastCall(
+      calls,
+      (call) => call.method === "sessions.patch" && call.params?.thinkingLevel !== undefined,
+    );

    expect(agentCall?.params?.thinking).toBe("low");
    expect(thinkingPatch?.params?.thinkingLevel).toBe("low");
--- a/src/agents/openclaw-tools.subagents.sessions-spawn.allowlist.e2e.test.ts
+++ b/src/agents/openclaw-tools.subagents.sessions-spawn.allowlist.e2e.test.ts
@ -1,5 +1,4 @@
 import { beforeEach, describe, expect, it } from "vitest";
-import { createOpenClawTools } from "./openclaw-tools.js";
 import "./test-helpers/fast-core-tools.js";
 import {
  getCallGatewayMock,
@ -10,6 +9,19 @@ import { resetSubagentRegistryForTests } from "./subagent-registry.js";

 const callGatewayMock = getCallGatewayMock();

+type CreateOpenClawTools = (typeof import("./openclaw-tools.js"))["createOpenClawTools"];
+type CreateOpenClawToolsOpts = Parameters<CreateOpenClawTools>[0];
+
+async function getSessionsSpawnTool(opts: CreateOpenClawToolsOpts) {
+  // Dynamic import: ensure harness mocks are installed before tool modules load.
+  const { createOpenClawTools } = await import("./openclaw-tools.js");
+  const tool = createOpenClawTools(opts).find((candidate) => candidate.name === "sessions_spawn");
+  if (!tool) {
+    throw new Error("missing sessions_spawn tool");
+  }
+  return tool;
+}
+
 describe("openclaw-tools: subagents (sessions_spawn allowlist)", () => {
  beforeEach(() => {
    resetSessionsSpawnConfigOverride();
@ -19,13 +31,10 @@ describe("openclaw-tools: subagents (sessions_spawn allowlist)", () => {
    resetSubagentRegistryForTests();
    callGatewayMock.mockReset();

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "main",
      agentChannel: "whatsapp",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call6", {
      task: "do thing",
@ -57,13 +66,10 @@ describe("openclaw-tools: subagents (sessions_spawn allowlist)", () => {
      },
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "main",
      agentChannel: "whatsapp",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call9", {
      task: "do thing",
@ -78,7 +84,7 @@ describe("openclaw-tools: subagents (sessions_spawn allowlist)", () => {
  it("sessions_spawn allows cross-agent spawning when configured", async () => {
    resetSubagentRegistryForTests();
    callGatewayMock.mockReset();
-    setConfigOverride({
+    setSessionsSpawnConfigOverride({
      session: {
        mainKey: "main",
        scope: "per-sender",
@ -109,13 +115,10 @@ describe("openclaw-tools: subagents (sessions_spawn allowlist)", () => {
      return {};
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "main",
      agentChannel: "whatsapp",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call7", {
      task: "do thing",
@ -132,7 +135,7 @@ describe("openclaw-tools: subagents (sessions_spawn allowlist)", () => {
  it("sessions_spawn allows any agent when allowlist is *", async () => {
    resetSubagentRegistryForTests();
    callGatewayMock.mockReset();
-    setConfigOverride({
+    setSessionsSpawnConfigOverride({
      session: {
        mainKey: "main",
        scope: "per-sender",
@ -163,13 +166,10 @@ describe("openclaw-tools: subagents (sessions_spawn allowlist)", () => {
      return {};
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "main",
      agentChannel: "whatsapp",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call8", {
      task: "do thing",
@ -186,7 +186,7 @@ describe("openclaw-tools: subagents (sessions_spawn allowlist)", () => {
  it("sessions_spawn normalizes allowlisted agent ids", async () => {
    resetSubagentRegistryForTests();
    callGatewayMock.mockReset();
-    setConfigOverride({
+    setSessionsSpawnConfigOverride({
      session: {
        mainKey: "main",
        scope: "per-sender",
@ -217,13 +217,10 @@ describe("openclaw-tools: subagents (sessions_spawn allowlist)", () => {
      return {};
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "main",
      agentChannel: "whatsapp",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call10", {
      task: "do thing",
--- a/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.e2e.test.ts
+++ b/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.e2e.test.ts
@ -1,16 +1,140 @@
-import { beforeEach, describe, expect, it } from "vitest";
+import { beforeEach, describe, expect, it, vi } from "vitest";
 import { emitAgentEvent } from "../infra/agent-events.js";
-import { sleep } from "../utils.js";
-import { createOpenClawTools } from "./openclaw-tools.js";
 import "./test-helpers/fast-core-tools.js";
+import { sleep } from "../utils.js";
 import {
  getCallGatewayMock,
  resetSessionsSpawnConfigOverride,
 } from "./openclaw-tools.subagents.sessions-spawn.test-harness.js";
 import { resetSubagentRegistryForTests } from "./subagent-registry.js";

+vi.mock("./pi-embedded.js", () => ({
+  isEmbeddedPiRunActive: () => false,
+  isEmbeddedPiRunStreaming: () => false,
+  queueEmbeddedPiMessage: () => false,
+  waitForEmbeddedPiRunEnd: async () => true,
+}));
+
 const callGatewayMock = getCallGatewayMock();

+type CreateOpenClawTools = (typeof import("./openclaw-tools.js"))["createOpenClawTools"];
+type CreateOpenClawToolsOpts = Parameters<CreateOpenClawTools>[0];
+
+async function getSessionsSpawnTool(opts: CreateOpenClawToolsOpts) {
+  // Dynamic import: ensure harness mocks are installed before tool modules load.
+  const { createOpenClawTools } = await import("./openclaw-tools.js");
+  const tool = createOpenClawTools(opts).find((candidate) => candidate.name === "sessions_spawn");
+  if (!tool) {
+    throw new Error("missing sessions_spawn tool");
+  }
+  return tool;
+}
+
+type GatewayRequest = { method?: string; params?: unknown };
+type AgentWaitCall = { runId?: string; timeoutMs?: number };
+
+function setupSessionsSpawnGatewayMock(opts: {
+  includeSessionsList?: boolean;
+  includeChatHistory?: boolean;
+  onAgentSubagentSpawn?: (params: unknown) => void;
+  onSessionsPatch?: (params: unknown) => void;
+  onSessionsDelete?: (params: unknown) => void;
+  agentWaitResult?: { status: "ok" | "timeout"; startedAt: number; endedAt: number };
+}): {
+  calls: Array<GatewayRequest>;
+  waitCalls: Array<AgentWaitCall>;
+  getChild: () => { runId?: string; sessionKey?: string };
+} {
+  const calls: Array<GatewayRequest> = [];
+  const waitCalls: Array<AgentWaitCall> = [];
+  let agentCallCount = 0;
+  let childRunId: string | undefined;
+  let childSessionKey: string | undefined;
+
+  callGatewayMock.mockImplementation(async (optsUnknown: unknown) => {
+    const request = optsUnknown as GatewayRequest;
+    calls.push(request);
+
+    if (request.method === "sessions.list" && opts.includeSessionsList) {
+      return {
+        sessions: [
+          {
+            key: "main",
+            lastChannel: "whatsapp",
+            lastTo: "+123",
+          },
+        ],
+      };
+    }
+
+    if (request.method === "agent") {
+      agentCallCount += 1;
+      const runId = `run-${agentCallCount}`;
+      const params = request.params as { lane?: string; sessionKey?: string } | undefined;
+      // Only capture the first agent call (subagent spawn, not main agent trigger)
+      if (params?.lane === "subagent") {
+        childRunId = runId;
+        childSessionKey = params?.sessionKey ?? "";
+        opts.onAgentSubagentSpawn?.(params);
+      }
+      return {
+        runId,
+        status: "accepted",
+        acceptedAt: 1000 + agentCallCount,
+      };
+    }
+
+    if (request.method === "agent.wait") {
+      const params = request.params as AgentWaitCall | undefined;
+      waitCalls.push(params ?? {});
+      const res = opts.agentWaitResult ?? { status: "ok", startedAt: 1000, endedAt: 2000 };
+      return {
+        runId: params?.runId ?? "run-1",
+        ...res,
+      };
+    }
+
+    if (request.method === "sessions.patch") {
+      opts.onSessionsPatch?.(request.params);
+      return { ok: true };
+    }
+
+    if (request.method === "sessions.delete") {
+      opts.onSessionsDelete?.(request.params);
+      return { ok: true };
+    }
+
+    if (request.method === "chat.history" && opts.includeChatHistory) {
+      return {
+        messages: [
+          {
+            role: "assistant",
+            content: [{ type: "text", text: "done" }],
+          },
+        ],
+      };
+    }
+
+    return {};
+  });
+
+  return {
+    calls,
+    waitCalls,
+    getChild: () => ({ runId: childRunId, sessionKey: childSessionKey }),
+  };
+}
+
+const waitFor = async (predicate: () => boolean, timeoutMs = 2000) => {
+  const start = Date.now();
+  while (!predicate()) {
+    if (Date.now() - start > timeoutMs) {
+      throw new Error(`timed out waiting for condition (timeoutMs=${timeoutMs})`);
+    }
+    await sleep(10);
+  }
+};
+
 describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
  beforeEach(() => {
    resetSessionsSpawnConfigOverride();
@ -19,84 +143,21 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
  it("sessions_spawn runs cleanup flow after subagent completion", async () => {
    resetSubagentRegistryForTests();
    callGatewayMock.mockReset();
-    const calls: Array<{ method?: string; params?: unknown }> = [];
-    let agentCallCount = 0;
-    let childRunId: string | undefined;
-    let childSessionKey: string | undefined;
-    const waitCalls: Array<{ runId?: string; timeoutMs?: number }> = [];
-    let patchParams: { key?: string; label?: string } = {};
+    const patchCalls: Array<{ key?: string; label?: string }> = [];

-    callGatewayMock.mockImplementation(async (opts: unknown) => {
-      const request = opts as { method?: string; params?: unknown };
-      calls.push(request);
-      if (request.method === "sessions.list") {
-        return {
-          sessions: [
-            {
-              key: "main",
-              lastChannel: "whatsapp",
-              lastTo: "+123",
-            },
-          ],
-        };
-      }
-      if (request.method === "agent") {
-        agentCallCount += 1;
-        const runId = `run-${agentCallCount}`;
-        const params = request.params as {
-          message?: string;
-          sessionKey?: string;
-          lane?: string;
-        };
-        // Only capture the first agent call (subagent spawn, not main agent trigger)
-        if (params?.lane === "subagent") {
-          childRunId = runId;
-          childSessionKey = params?.sessionKey ?? "";
-        }
-        return {
-          runId,
-          status: "accepted",
-          acceptedAt: 2000 + agentCallCount,
-        };
-      }
-      if (request.method === "agent.wait") {
-        const params = request.params as { runId?: string; timeoutMs?: number } | undefined;
-        waitCalls.push(params ?? {});
-        return {
-          runId: params?.runId ?? "run-1",
-          status: "ok",
-          startedAt: 1000,
-          endedAt: 2000,
-        };
-      }
-      if (request.method === "sessions.patch") {
-        const params = request.params as { key?: string; label?: string } | undefined;
-        patchParams = { key: params?.key, label: params?.label };
-        return { ok: true };
-      }
-      if (request.method === "chat.history") {
-        return {
-          messages: [
-            {
-              role: "assistant",
-              content: [{ type: "text", text: "done" }],
-            },
-          ],
-        };
-      }
-      if (request.method === "sessions.delete") {
-        return { ok: true };
-      }
-      return {};
+    const ctx = setupSessionsSpawnGatewayMock({
+      includeSessionsList: true,
+      includeChatHistory: true,
+      onSessionsPatch: (params) => {
+        const rec = params as { key?: string; label?: string } | undefined;
+        patchCalls.push({ key: rec?.key, label: rec?.label });
+      },
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "main",
      agentChannel: "whatsapp",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call2", {
      task: "do thing",
@ -108,11 +169,12 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
      runId: "run-1",
    });

-    if (!childRunId) {
+    const child = ctx.getChild();
+    if (!child.runId) {
      throw new Error("missing child runId");
    }
    emitAgentEvent({
-      runId: childRunId,
+      runId: child.runId,
      stream: "lifecycle",
      data: {
        phase: "end",
@ -121,18 +183,19 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
      },
    });

-    await sleep(0);
-    await sleep(0);
-    await sleep(0);
+    await waitFor(() => ctx.waitCalls.some((call) => call.runId === child.runId));
+    await waitFor(() => patchCalls.some((call) => call.label === "my-task"));
+    await waitFor(() => ctx.calls.filter((c) => c.method === "agent").length >= 2);

-    const childWait = waitCalls.find((call) => call.runId === childRunId);
+    const childWait = ctx.waitCalls.find((call) => call.runId === child.runId);
    expect(childWait?.timeoutMs).toBe(1000);
    // Cleanup should patch the label
-    expect(patchParams.key).toBe(childSessionKey);
-    expect(patchParams.label).toBe("my-task");
+    const labelPatch = patchCalls.find((call) => call.label === "my-task");
+    expect(labelPatch?.key).toBe(child.sessionKey);
+    expect(labelPatch?.label).toBe("my-task");

    // Two agent calls: subagent spawn + main agent trigger
-    const agentCalls = calls.filter((c) => c.method === "agent");
+    const agentCalls = ctx.calls.filter((c) => c.method === "agent");
    expect(agentCalls).toHaveLength(2);

    // First call: subagent spawn
@ -145,71 +208,31 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
    expect(second?.message).toContain("subagent task");

    // No direct send to external channel (main agent handles delivery)
-    const sendCalls = calls.filter((c) => c.method === "send");
+    const sendCalls = ctx.calls.filter((c) => c.method === "send");
    expect(sendCalls.length).toBe(0);
-    expect(childSessionKey?.startsWith("agent:main:subagent:")).toBe(true);
+    expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);
  });

  it("sessions_spawn runs cleanup via lifecycle events", async () => {
    resetSubagentRegistryForTests();
    callGatewayMock.mockReset();
-    const calls: Array<{ method?: string; params?: unknown }> = [];
-    let agentCallCount = 0;
    let deletedKey: string | undefined;
-    let childRunId: string | undefined;
-    let childSessionKey: string | undefined;
-    const waitCalls: Array<{ runId?: string; timeoutMs?: number }> = [];
-
-    callGatewayMock.mockImplementation(async (opts: unknown) => {
-      const request = opts as { method?: string; params?: unknown };
-      calls.push(request);
-      if (request.method === "agent") {
-        agentCallCount += 1;
-        const runId = `run-${agentCallCount}`;
-        const params = request.params as {
-          message?: string;
-          sessionKey?: string;
-          channel?: string;
-          timeout?: number;
-          lane?: string;
-        };
-        if (params?.lane === "subagent") {
-          childRunId = runId;
-          childSessionKey = params?.sessionKey ?? "";
-          expect(params?.channel).toBe("discord");
-          expect(params?.timeout).toBe(1);
-        }
-        return {
-          runId,
-          status: "accepted",
-          acceptedAt: 1000 + agentCallCount,
-        };
-      }
-      if (request.method === "agent.wait") {
-        const params = request.params as { runId?: string; timeoutMs?: number } | undefined;
-        waitCalls.push(params ?? {});
-        return {
-          runId: params?.runId ?? "run-1",
-          status: "ok",
-          startedAt: 1000,
-          endedAt: 2000,
-        };
-      }
-      if (request.method === "sessions.delete") {
-        const params = request.params as { key?: string } | undefined;
-        deletedKey = params?.key;
-        return { ok: true };
-      }
-      return {};
+    const ctx = setupSessionsSpawnGatewayMock({
+      onAgentSubagentSpawn: (params) => {
+        const rec = params as { channel?: string; timeout?: number } | undefined;
+        expect(rec?.channel).toBe("discord");
+        expect(rec?.timeout).toBe(1);
+      },
+      onSessionsDelete: (params) => {
+        const rec = params as { key?: string } | undefined;
+        deletedKey = rec?.key;
+      },
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "discord:group:req",
      agentChannel: "discord",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call1", {
      task: "do thing",
@ -221,13 +244,14 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
      runId: "run-1",
    });

-    if (!childRunId) {
+    const child = ctx.getChild();
+    if (!child.runId) {
      throw new Error("missing child runId");
    }
    vi.useFakeTimers();
    try {
      emitAgentEvent({
-        runId: childRunId,
+        runId: child.runId,
        stream: "lifecycle",
        data: {
          phase: "end",
@ -241,10 +265,10 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
      vi.useRealTimers();
    }

-    const childWait = waitCalls.find((call) => call.runId === childRunId);
+    const childWait = ctx.waitCalls.find((call) => call.runId === child.runId);
    expect(childWait?.timeoutMs).toBe(1000);

-    const agentCalls = calls.filter((call) => call.method === "agent");
+    const agentCalls = ctx.calls.filter((call) => call.method === "agent");
    expect(agentCalls).toHaveLength(2);

    const first = agentCalls[0]?.params as
@ -259,7 +283,7 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
    expect(first?.deliver).toBe(false);
    expect(first?.channel).toBe("discord");
    expect(first?.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);
-    expect(childSessionKey?.startsWith("agent:main:subagent:")).toBe(true);
+    expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);

    const second = agentCalls[1]?.params as
      | {
@ -272,7 +296,7 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
    expect(second?.deliver).toBe(true);
    expect(second?.message).toContain("subagent task");

-    const sendCalls = calls.filter((c) => c.method === "send");
+    const sendCalls = ctx.calls.filter((c) => c.method === "send");
    expect(sendCalls.length).toBe(0);

    expect(deletedKey?.startsWith("agent:main:subagent:")).toBe(true);
@ -281,74 +305,25 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
  it("sessions_spawn deletes session when cleanup=delete via agent.wait", async () => {
    resetSubagentRegistryForTests();
    callGatewayMock.mockReset();
-    const calls: Array<{ method?: string; params?: unknown }> = [];
-    let agentCallCount = 0;
    let deletedKey: string | undefined;
-    let childRunId: string | undefined;
-    let childSessionKey: string | undefined;
-    const waitCalls: Array<{ runId?: string; timeoutMs?: number }> = [];
-
-    callGatewayMock.mockImplementation(async (opts: unknown) => {
-      const request = opts as { method?: string; params?: unknown };
-      calls.push(request);
-      if (request.method === "agent") {
-        agentCallCount += 1;
-        const runId = `run-${agentCallCount}`;
-        const params = request.params as {
-          message?: string;
-          sessionKey?: string;
-          channel?: string;
-          timeout?: number;
-          lane?: string;
-        };
-        // Only capture the first agent call (subagent spawn, not main agent trigger)
-        if (params?.lane === "subagent") {
-          childRunId = runId;
-          childSessionKey = params?.sessionKey ?? "";
-          expect(params?.channel).toBe("discord");
-          expect(params?.timeout).toBe(1);
-        }
-        return {
-          runId,
-          status: "accepted",
-          acceptedAt: 2000 + agentCallCount,
-        };
-      }
-      if (request.method === "agent.wait") {
-        const params = request.params as { runId?: string; timeoutMs?: number } | undefined;
-        waitCalls.push(params ?? {});
-        return {
-          runId: params?.runId ?? "run-1",
-          status: "ok",
-          startedAt: 3000,
-          endedAt: 4000,
-        };
-      }
-      if (request.method === "chat.history") {
-        return {
-          messages: [
-            {
-              role: "assistant",
-              content: [{ type: "text", text: "done" }],
-            },
-          ],
-        };
-      }
-      if (request.method === "sessions.delete") {
-        const params = request.params as { key?: string } | undefined;
-        deletedKey = params?.key;
-        return { ok: true };
-      }
-      return {};
+    const ctx = setupSessionsSpawnGatewayMock({
+      includeChatHistory: true,
+      onAgentSubagentSpawn: (params) => {
+        const rec = params as { channel?: string; timeout?: number } | undefined;
+        expect(rec?.channel).toBe("discord");
+        expect(rec?.timeout).toBe(1);
+      },
+      onSessionsDelete: (params) => {
+        const rec = params as { key?: string } | undefined;
+        deletedKey = rec?.key;
+      },
+      agentWaitResult: { status: "ok", startedAt: 3000, endedAt: 4000 },
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "discord:group:req",
      agentChannel: "discord",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call1b", {
      task: "do thing",
@ -360,16 +335,20 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
      runId: "run-1",
    });

-    await sleep(0);
-    await sleep(0);
-    await sleep(0);
+    const child = ctx.getChild();
+    if (!child.runId) {
+      throw new Error("missing child runId");
+    }
+    await waitFor(() => ctx.waitCalls.some((call) => call.runId === child.runId));
+    await waitFor(() => ctx.calls.filter((call) => call.method === "agent").length >= 2);
+    await waitFor(() => Boolean(deletedKey));

-    const childWait = waitCalls.find((call) => call.runId === childRunId);
+    const childWait = ctx.waitCalls.find((call) => call.runId === child.runId);
    expect(childWait?.timeoutMs).toBe(1000);
-    expect(childSessionKey?.startsWith("agent:main:subagent:")).toBe(true);
+    expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);

    // Two agent calls: subagent spawn + main agent trigger
-    const agentCalls = calls.filter((call) => call.method === "agent");
+    const agentCalls = ctx.calls.filter((call) => call.method === "agent");
    expect(agentCalls).toHaveLength(2);

    // First call: subagent spawn
@ -382,7 +361,7 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
    expect(second?.deliver).toBe(true);

    // No direct send to external channel (main agent handles delivery)
-    const sendCalls = calls.filter((c) => c.method === "send");
+    const sendCalls = ctx.calls.filter((c) => c.method === "send");
    expect(sendCalls.length).toBe(0);

    // Session should be deleted
@ -428,13 +407,10 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
      return {};
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "discord:group:req",
      agentChannel: "discord",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call-timeout", {
      task: "do thing",
@ -446,9 +422,7 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
      runId: "run-1",
    });

-    await sleep(0);
-    await sleep(0);
-    await sleep(0);
+    await waitFor(() => calls.filter((call) => call.method === "agent").length >= 2);

    const mainAgentCall = calls
      .filter((call) => call.method === "agent")
@ -500,14 +474,11 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
      return {};
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "main",
      agentChannel: "whatsapp",
      agentAccountId: "kev",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call-announce-account", {
      task: "do thing",
--- a/src/agents/openclaw-tools.subagents.sessions-spawn.model.e2e.test.ts
+++ b/src/agents/openclaw-tools.subagents.sessions-spawn.model.e2e.test.ts
@ -1,7 +1,6 @@
 import { beforeEach, describe, expect, it } from "vitest";
 import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js";
 import "./test-helpers/fast-core-tools.js";
-import { createOpenClawTools } from "./openclaw-tools.js";
 import {
  getCallGatewayMock,
  resetSessionsSpawnConfigOverride,
@ -11,6 +10,19 @@ import { resetSubagentRegistryForTests } from "./subagent-registry.js";

 const callGatewayMock = getCallGatewayMock();

+type CreateOpenClawTools = (typeof import("./openclaw-tools.js"))["createOpenClawTools"];
+type CreateOpenClawToolsOpts = Parameters<CreateOpenClawTools>[0];
+
+async function getSessionsSpawnTool(opts: CreateOpenClawToolsOpts) {
+  // Dynamic import: ensure harness mocks are installed before tool modules load.
+  const { createOpenClawTools } = await import("./openclaw-tools.js");
+  const tool = createOpenClawTools(opts).find((candidate) => candidate.name === "sessions_spawn");
+  if (!tool) {
+    throw new Error("missing sessions_spawn tool");
+  }
+  return tool;
+}
+
 describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
  beforeEach(() => {
    resetSessionsSpawnConfigOverride();
@ -46,13 +58,10 @@ describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
      return {};
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "discord:group:req",
      agentChannel: "discord",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call3", {
      task: "do thing",
@ -93,13 +102,10 @@ describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
      return {};
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "discord:group:req",
      agentChannel: "discord",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call-thinking", {
      task: "do thing",
@ -126,13 +132,10 @@ describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
      return {};
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "discord:group:req",
      agentChannel: "discord",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call-thinking-invalid", {
      task: "do thing",
@ -166,13 +169,10 @@ describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
      return {};
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "agent:main:main",
      agentChannel: "discord",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call-default-model", {
      task: "do thing",
@ -207,13 +207,10 @@ describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
      return {};
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "agent:main:main",
      agentChannel: "discord",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call-runtime-default-model", {
      task: "do thing",
@ -255,13 +252,10 @@ describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
      return {};
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "agent:research:main",
      agentChannel: "discord",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call-agent-model", {
      task: "do thing",
@ -271,7 +265,9 @@ describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
      modelApplied: true,
    });

-    const patchCall = calls.find((call) => call.method === "sessions.patch");
+    const patchCall = calls.find(
+      (call) => call.method === "sessions.patch" && (call.params as { model?: string })?.model,
+    );
    expect(patchCall?.params).toMatchObject({
      model: "opencode/claude",
    });
@ -287,7 +283,11 @@ describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
      const request = opts as { method?: string; params?: unknown };
      calls.push(request);
      if (request.method === "sessions.patch") {
-        throw new Error("invalid model: bad-model");
+        const model = (request.params as { model?: unknown } | undefined)?.model;
+        if (model === "bad-model") {
+          throw new Error("invalid model: bad-model");
+        }
+        return { ok: true };
      }
      if (request.method === "agent") {
        agentCallCount += 1;
@ -307,13 +307,10 @@ describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
      return {};
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "main",
      agentChannel: "whatsapp",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call4", {
      task: "do thing",
@ -345,13 +342,10 @@ describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
      return {};
    });

-    const tool = createOpenClawTools({
+    const tool = await getSessionsSpawnTool({
      agentSessionKey: "main",
      agentChannel: "whatsapp",
-    }).find((candidate) => candidate.name === "sessions_spawn");
-    if (!tool) {
-      throw new Error("missing sessions_spawn tool");
-    }
+    });

    const result = await tool.execute("call5", {
      task: "do thing",
--- a/src/agents/pi-embedded-runner/run.overflow-compaction.e2e.test.ts
+++ b/src/agents/pi-embedded-runner/run.overflow-compaction.e2e.test.ts
@ -5,31 +5,6 @@ vi.mock("../../utils.js", () => ({
  resolveUserPath: vi.fn((p: string) => p),
 }));

-vi.mock("../auth-profiles.js", () => ({
-  markAuthProfileFailure: vi.fn(async () => {}),
-  markAuthProfileGood: vi.fn(async () => {}),
-  markAuthProfileUsed: vi.fn(async () => {}),
-}));
-
-vi.mock("../usage.js", () => ({
-  normalizeUsage: vi.fn((usage?: unknown) =>
-    usage && typeof usage === "object" ? usage : undefined,
-  ),
-  derivePromptTokens: vi.fn(
-    (usage?: { input?: number; cacheRead?: number; cacheWrite?: number }) => {
-      if (!usage) {
-        return undefined;
-      }
-      const input = usage.input ?? 0;
-      const cacheRead = usage.cacheRead ?? 0;
-      const cacheWrite = usage.cacheWrite ?? 0;
-      const sum = input + cacheRead + cacheWrite;
-      return sum > 0 ? sum : undefined;
-    },
-  ),
-  hasNonzeroUsage: vi.fn(() => false),
-}));
-
 vi.mock("../pi-embedded-helpers.js", async () => {
  return {
    isCompactionFailureError: (msg?: string) => {
--- a/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts
+++ b/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts
@ -1,5 +1,31 @@
 import { vi } from "vitest";

+vi.mock("../auth-profiles.js", () => ({
+  isProfileInCooldown: vi.fn(() => false),
+  markAuthProfileFailure: vi.fn(async () => {}),
+  markAuthProfileGood: vi.fn(async () => {}),
+  markAuthProfileUsed: vi.fn(async () => {}),
+}));
+
+vi.mock("../usage.js", () => ({
+  normalizeUsage: vi.fn((usage?: unknown) =>
+    usage && typeof usage === "object" ? usage : undefined,
+  ),
+  derivePromptTokens: vi.fn(
+    (usage?: { input?: number; cacheRead?: number; cacheWrite?: number }) => {
+      if (!usage) {
+        return undefined;
+      }
+      const input = usage.input ?? 0;
+      const cacheRead = usage.cacheRead ?? 0;
+      const cacheWrite = usage.cacheWrite ?? 0;
+      const sum = input + cacheRead + cacheWrite;
+      return sum > 0 ? sum : undefined;
+    },
+  ),
+  hasNonzeroUsage: vi.fn(() => false),
+}));
+
 vi.mock("./run/attempt.js", () => ({
  runEmbeddedAttempt: vi.fn(),
 }));
--- a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts
+++ b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts
@ -1,31 +1,6 @@
 import "./run.overflow-compaction.mocks.shared.js";
 import { beforeEach, describe, expect, it, vi } from "vitest";

-vi.mock("../auth-profiles.js", () => ({
-  isProfileInCooldown: vi.fn(() => false),
-  markAuthProfileFailure: vi.fn(async () => {}),
-  markAuthProfileGood: vi.fn(async () => {}),
-  markAuthProfileUsed: vi.fn(async () => {}),
-}));
-
-vi.mock("../usage.js", () => ({
-  normalizeUsage: vi.fn((usage?: unknown) =>
-    usage && typeof usage === "object" ? usage : undefined,
-  ),
-  derivePromptTokens: vi.fn(
-    (usage?: { input?: number; cacheRead?: number; cacheWrite?: number }) => {
-      if (!usage) {
-        return undefined;
-      }
-      const input = usage.input ?? 0;
-      const cacheRead = usage.cacheRead ?? 0;
-      const cacheWrite = usage.cacheWrite ?? 0;
-      const sum = input + cacheRead + cacheWrite;
-      return sum > 0 ? sum : undefined;
-    },
-  ),
-}));
-
 vi.mock("../workspace-run.js", () => ({
  resolveRunWorkspaceDir: vi.fn((params: { workspaceDir: string }) => ({
    workspaceDir: params.workspaceDir,
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@ -968,6 +968,32 @@ export async function runEmbeddedAttempt(
            );
          }

+          if (hookRunner?.hasHooks("llm_input")) {
+            hookRunner
+              .runLlmInput(
+                {
+                  runId: params.runId,
+                  sessionId: params.sessionId,
+                  provider: params.provider,
+                  model: params.modelId,
+                  systemPrompt: systemPromptText,
+                  prompt: effectivePrompt,
+                  historyMessages: activeSession.messages,
+                  imagesCount: imageResult.images.length,
+                },
+                {
+                  agentId: hookAgentId,
+                  sessionKey: params.sessionKey,
+                  sessionId: params.sessionId,
+                  workspaceDir: params.workspaceDir,
+                  messageProvider: params.messageProvider ?? undefined,
+                },
+              )
+              .catch((err) => {
+                log.warn(`llm_input hook failed: ${String(err)}`);
+              });
+          }
+
          // Only pass images option if there are actually images to pass
          // This avoids potential issues with models that don't expect the images parameter
          if (imageResult.images.length > 0) {
@ -1117,6 +1143,31 @@ export async function runEmbeddedAttempt(
        )
        .map((entry) => ({ toolName: entry.toolName, meta: entry.meta }));

+      if (hookRunner?.hasHooks("llm_output")) {
+        hookRunner
+          .runLlmOutput(
+            {
+              runId: params.runId,
+              sessionId: params.sessionId,
+              provider: params.provider,
+              model: params.modelId,
+              assistantTexts,
+              lastAssistant,
+              usage: getUsageTotals(),
+            },
+            {
+              agentId: hookAgentId,
+              sessionKey: params.sessionKey,
+              sessionId: params.sessionId,
+              workspaceDir: params.workspaceDir,
+              messageProvider: params.messageProvider ?? undefined,
+            },
+          )
+          .catch((err) => {
+            log.warn(`llm_output hook failed: ${String(err)}`);
+          });
+      }
+
      return {
        aborted,
        timedOut,
--- a/src/agents/pi-embedded-subscribe.e2e-harness.ts
+++ b/src/agents/pi-embedded-subscribe.e2e-harness.ts
@ -0,0 +1,28 @@
+type SubscribeEmbeddedPiSession =
+  typeof import("./pi-embedded-subscribe.js").subscribeEmbeddedPiSession;
+type PiSession = Parameters<SubscribeEmbeddedPiSession>[0]["session"];
+
+export function createStubSessionHarness(): {
+  session: PiSession;
+  emit: (evt: unknown) => void;
+} {
+  let handler: ((evt: unknown) => void) | undefined;
+  const session = {
+    subscribe: (fn: (evt: unknown) => void) => {
+      handler = fn;
+      return () => {};
+    },
+  } as unknown as PiSession;
+
+  return { session, emit: (evt: unknown) => handler?.(evt) };
+}
+
+export function extractAgentEventPayloads(calls: Array<unknown[]>): Array<Record<string, unknown>> {
+  return calls
+    .map((call) => {
+      const first = call?.[0] as { data?: unknown } | undefined;
+      const data = first?.data;
+      return data && typeof data === "object" ? (data as Record<string, unknown>) : undefined;
+    })
+    .filter((value): value is Record<string, unknown> => Boolean(value));
+}
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.calls-onblockreplyflush-before-tool-execution-start-preserve.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.calls-onblockreplyflush-before-tool-execution-start-preserve.e2e.test.ts
@ -8,13 +8,6 @@ type StubSession = {
 type SessionEventHandler = (evt: unknown) => void;

 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  it("calls onBlockReplyFlush before tool_execution_start to preserve message boundaries", () => {
    let handler: SessionEventHandler | undefined;
    const session: StubSession = {
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.does-not-append-text-end-content-is.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.does-not-append-text-end-content-is.e2e.test.ts
@ -6,13 +6,6 @@ type StubSession = {
 };

 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  function setupTextEndSubscription() {
    let handler: ((evt: unknown) => void) | undefined;
    const session: StubSession = {
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.does-not-call-onblockreplyflush-callback-is-not.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.does-not-call-onblockreplyflush-callback-is-not.e2e.test.ts
@ -8,13 +8,6 @@ type StubSession = {
 type SessionEventHandler = (evt: unknown) => void;

 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  it("does not call onBlockReplyFlush when callback is not provided", () => {
    let handler: SessionEventHandler | undefined;
    const session: StubSession = {
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.does-not-duplicate-text-end-repeats-full.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.does-not-duplicate-text-end-repeats-full.e2e.test.ts
@ -6,13 +6,6 @@ type StubSession = {
 };

 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  it("does not duplicate when text_end repeats full content", () => {
    let handler: ((evt: unknown) => void) | undefined;
    const session: StubSession = {
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.does-not-emit-duplicate-block-replies-text.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.does-not-emit-duplicate-block-replies-text.e2e.test.ts
@ -1,40 +1,22 @@
 import type { AssistantMessage } from "@mariozechner/pi-ai";
 import { describe, expect, it, vi } from "vitest";
+import { createStubSessionHarness } from "./pi-embedded-subscribe.e2e-harness.js";
 import { subscribeEmbeddedPiSession } from "./pi-embedded-subscribe.js";

-type StubSession = {
-  subscribe: (fn: (evt: unknown) => void) => () => void;
-};
-
-type SessionEventHandler = (evt: unknown) => void;
-
 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  it("does not emit duplicate block replies when text_end repeats", () => {
-    let handler: SessionEventHandler | undefined;
-    const session: StubSession = {
-      subscribe: (fn) => {
-        handler = fn;
-        return () => {};
-      },
-    };
+    const { session, emit } = createStubSessionHarness();

    const onBlockReply = vi.fn();

    const subscription = subscribeEmbeddedPiSession({
-      session: session as unknown as Parameters<typeof subscribeEmbeddedPiSession>[0]["session"],
+      session,
      runId: "run",
      onBlockReply,
      blockReplyBreak: "text_end",
    });

-    handler?.({
+    emit({
      type: "message_update",
      message: { role: "assistant" },
      assistantMessageEvent: {
@ -43,7 +25,7 @@ describe("subscribeEmbeddedPiSession", () => {
      },
    });

-    handler?.({
+    emit({
      type: "message_update",
      message: { role: "assistant" },
      assistantMessageEvent: {
@ -51,7 +33,7 @@ describe("subscribeEmbeddedPiSession", () => {
      },
    });

-    handler?.({
+    emit({
      type: "message_update",
      message: { role: "assistant" },
      assistantMessageEvent: {
@ -63,16 +45,10 @@ describe("subscribeEmbeddedPiSession", () => {
    expect(subscription.assistantTexts).toEqual(["Hello block"]);
  });
  it("does not duplicate assistantTexts when message_end repeats", () => {
-    let handler: SessionEventHandler | undefined;
-    const session: StubSession = {
-      subscribe: (fn) => {
-        handler = fn;
-        return () => {};
-      },
-    };
+    const { session, emit } = createStubSessionHarness();

    const subscription = subscribeEmbeddedPiSession({
-      session: session as unknown as Parameters<typeof subscribeEmbeddedPiSession>[0]["session"],
+      session,
      runId: "run",
    });

@ -81,22 +57,16 @@ describe("subscribeEmbeddedPiSession", () => {
      content: [{ type: "text", text: "Hello world" }],
    } as AssistantMessage;

-    handler?.({ type: "message_end", message: assistantMessage });
-    handler?.({ type: "message_end", message: assistantMessage });
+    emit({ type: "message_end", message: assistantMessage });
+    emit({ type: "message_end", message: assistantMessage });

    expect(subscription.assistantTexts).toEqual(["Hello world"]);
  });
  it("does not duplicate assistantTexts when message_end repeats with trailing whitespace changes", () => {
-    let handler: SessionEventHandler | undefined;
-    const session: StubSession = {
-      subscribe: (fn) => {
-        handler = fn;
-        return () => {};
-      },
-    };
+    const { session, emit } = createStubSessionHarness();

    const subscription = subscribeEmbeddedPiSession({
-      session: session as unknown as Parameters<typeof subscribeEmbeddedPiSession>[0]["session"],
+      session,
      runId: "run",
    });

@ -110,22 +80,16 @@ describe("subscribeEmbeddedPiSession", () => {
      content: [{ type: "text", text: "Hello world" }],
    } as AssistantMessage;

-    handler?.({ type: "message_end", message: assistantMessageWithNewline });
-    handler?.({ type: "message_end", message: assistantMessageTrimmed });
+    emit({ type: "message_end", message: assistantMessageWithNewline });
+    emit({ type: "message_end", message: assistantMessageTrimmed });

    expect(subscription.assistantTexts).toEqual(["Hello world"]);
  });
  it("does not duplicate assistantTexts when message_end repeats with reasoning blocks", () => {
-    let handler: SessionEventHandler | undefined;
-    const session: StubSession = {
-      subscribe: (fn) => {
-        handler = fn;
-        return () => {};
-      },
-    };
+    const { session, emit } = createStubSessionHarness();

    const subscription = subscribeEmbeddedPiSession({
-      session: session as unknown as Parameters<typeof subscribeEmbeddedPiSession>[0]["session"],
+      session,
      runId: "run",
      reasoningMode: "on",
    });
@ -138,37 +102,31 @@ describe("subscribeEmbeddedPiSession", () => {
      ],
    } as AssistantMessage;

-    handler?.({ type: "message_end", message: assistantMessage });
-    handler?.({ type: "message_end", message: assistantMessage });
+    emit({ type: "message_end", message: assistantMessage });
+    emit({ type: "message_end", message: assistantMessage });

    expect(subscription.assistantTexts).toEqual(["Hello world"]);
  });
  it("populates assistantTexts for non-streaming models with chunking enabled", () => {
    // Non-streaming models (e.g. zai/glm-4.7): no text_delta events; message_end
    // must still populate assistantTexts so providers can deliver a final reply.
-    let handler: SessionEventHandler | undefined;
-    const session: StubSession = {
-      subscribe: (fn) => {
-        handler = fn;
-        return () => {};
-      },
-    };
+    const { session, emit } = createStubSessionHarness();

    const subscription = subscribeEmbeddedPiSession({
-      session: session as unknown as Parameters<typeof subscribeEmbeddedPiSession>[0]["session"],
+      session,
      runId: "run",
      blockReplyChunking: { minChars: 50, maxChars: 200 }, // Chunking enabled
    });

    // Simulate non-streaming model: only message_start and message_end, no text_delta
-    handler?.({ type: "message_start", message: { role: "assistant" } });
+    emit({ type: "message_start", message: { role: "assistant" } });

    const assistantMessage = {
      role: "assistant",
      content: [{ type: "text", text: "Response from non-streaming model" }],
    } as AssistantMessage;

-    handler?.({ type: "message_end", message: assistantMessage });
+    emit({ type: "message_end", message: assistantMessage });

    expect(subscription.assistantTexts).toEqual(["Response from non-streaming model"]);
  });
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.emits-block-replies-text-end-does-not.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.emits-block-replies-text-end-does-not.e2e.test.ts
@ -7,13 +7,6 @@ type StubSession = {
 };

 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  it("emits block replies on text_end and does not duplicate on message_end", () => {
    let handler: ((evt: unknown) => void) | undefined;
    const session: StubSession = {
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.filters-final-suppresses-output-without-start-tag.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.filters-final-suppresses-output-without-start-tag.e2e.test.ts
@ -1,41 +1,28 @@
 import type { AssistantMessage } from "@mariozechner/pi-ai";
 import { describe, expect, it, vi } from "vitest";
+import {
+  createStubSessionHarness,
+  extractAgentEventPayloads,
+} from "./pi-embedded-subscribe.e2e-harness.js";
 import { subscribeEmbeddedPiSession } from "./pi-embedded-subscribe.js";

-type StubSession = {
-  subscribe: (fn: (evt: unknown) => void) => () => void;
-};
-
 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  it("filters to <final> and suppresses output without a start tag", () => {
-    let handler: ((evt: unknown) => void) | undefined;
-    const session: StubSession = {
-      subscribe: (fn) => {
-        handler = fn;
-        return () => {};
-      },
-    };
+    const { session, emit } = createStubSessionHarness();

    const onPartialReply = vi.fn();
    const onAgentEvent = vi.fn();

    subscribeEmbeddedPiSession({
-      session: session as unknown as Parameters<typeof subscribeEmbeddedPiSession>[0]["session"],
+      session,
      runId: "run",
      enforceFinalTag: true,
      onPartialReply,
      onAgentEvent,
    });

-    handler?.({ type: "message_start", message: { role: "assistant" } });
-    handler?.({
+    emit({ type: "message_start", message: { role: "assistant" } });
+    emit({
      type: "message_update",
      message: { role: "assistant" },
      assistantMessageEvent: {
@ -50,8 +37,8 @@ describe("subscribeEmbeddedPiSession", () => {

    onPartialReply.mockReset();

-    handler?.({ type: "message_start", message: { role: "assistant" } });
-    handler?.({
+    emit({ type: "message_start", message: { role: "assistant" } });
+    emit({
      type: "message_update",
      message: { role: "assistant" },
      assistantMessageEvent: {
@ -63,18 +50,12 @@ describe("subscribeEmbeddedPiSession", () => {
    expect(onPartialReply).not.toHaveBeenCalled();
  });
  it("emits agent events on message_end even without <final> tags", () => {
-    let handler: ((evt: unknown) => void) | undefined;
-    const session: StubSession = {
-      subscribe: (fn) => {
-        handler = fn;
-        return () => {};
-      },
-    };
+    const { session, emit } = createStubSessionHarness();

    const onAgentEvent = vi.fn();

    subscribeEmbeddedPiSession({
-      session: session as unknown as Parameters<typeof subscribeEmbeddedPiSession>[0]["session"],
+      session,
      runId: "run",
      enforceFinalTag: true,
      onAgentEvent,
@ -85,12 +66,10 @@ describe("subscribeEmbeddedPiSession", () => {
      content: [{ type: "text", text: "Hello world" }],
    } as AssistantMessage;

-    handler?.({ type: "message_start", message: assistantMessage });
-    handler?.({ type: "message_end", message: assistantMessage });
+    emit({ type: "message_start", message: assistantMessage });
+    emit({ type: "message_end", message: assistantMessage });

-    const payloads = onAgentEvent.mock.calls
-      .map((call) => call[0]?.data as Record<string, unknown> | undefined)
-      .filter((value): value is Record<string, unknown> => Boolean(value));
+    const payloads = extractAgentEventPayloads(onAgentEvent.mock.calls);
    expect(payloads).toHaveLength(1);
    expect(payloads[0]?.text).toBe("Hello world");
    expect(payloads[0]?.delta).toBe("Hello world");
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.includes-canvas-action-metadata-tool-summaries.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.includes-canvas-action-metadata-tool-summaries.e2e.test.ts
@ -6,13 +6,6 @@ type StubSession = {
 };

 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  it("includes canvas action metadata in tool summaries", async () => {
    let handler: ((evt: unknown) => void) | undefined;
    const session: StubSession = {
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.keeps-assistanttexts-final-answer-block-replies-are.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.keeps-assistanttexts-final-answer-block-replies-are.e2e.test.ts
@ -7,13 +7,6 @@ type StubSession = {
 };

 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  it("keeps assistantTexts to the final answer when block replies are disabled", () => {
    let handler: ((evt: unknown) => void) | undefined;
    const session: StubSession = {
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.keeps-indented-fenced-blocks-intact.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.keeps-indented-fenced-blocks-intact.e2e.test.ts
@ -7,13 +7,6 @@ type StubSession = {
 };

 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  it("keeps indented fenced blocks intact", () => {
    let handler: ((evt: unknown) => void) | undefined;
    const session: StubSession = {
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.reopens-fenced-blocks-splitting-inside-them.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.reopens-fenced-blocks-splitting-inside-them.e2e.test.ts
@ -7,13 +7,6 @@ type StubSession = {
 };

 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  it("reopens fenced blocks when splitting inside them", () => {
    let handler: ((evt: unknown) => void) | undefined;
    const session: StubSession = {
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.splits-long-single-line-fenced-blocks-reopen.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.splits-long-single-line-fenced-blocks-reopen.e2e.test.ts
@ -7,13 +7,6 @@ type StubSession = {
 };

 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  it("splits long single-line fenced blocks with reopen/close", () => {
    let handler: ((evt: unknown) => void) | undefined;
    const session: StubSession = {
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.streams-soft-chunks-paragraph-preference.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.streams-soft-chunks-paragraph-preference.e2e.test.ts
@ -1,32 +1,16 @@
 import type { AssistantMessage } from "@mariozechner/pi-ai";
 import { describe, expect, it, vi } from "vitest";
+import { createStubSessionHarness } from "./pi-embedded-subscribe.e2e-harness.js";
 import { subscribeEmbeddedPiSession } from "./pi-embedded-subscribe.js";

-type StubSession = {
-  subscribe: (fn: (evt: unknown) => void) => () => void;
-};
-
 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  it("streams soft chunks with paragraph preference", () => {
-    let handler: ((evt: unknown) => void) | undefined;
-    const session: StubSession = {
-      subscribe: (fn) => {
-        handler = fn;
-        return () => {};
-      },
-    };
+    const { session, emit } = createStubSessionHarness();

    const onBlockReply = vi.fn();

    const subscription = subscribeEmbeddedPiSession({
-      session: session as unknown as Parameters<typeof subscribeEmbeddedPiSession>[0]["session"],
+      session,
      runId: "run",
      onBlockReply,
      blockReplyBreak: "message_end",
@ -39,7 +23,7 @@ describe("subscribeEmbeddedPiSession", () => {

    const text = "First block line\n\nSecond block line";

-    handler?.({
+    emit({
      type: "message_update",
      message: { role: "assistant" },
      assistantMessageEvent: {
@ -53,7 +37,7 @@ describe("subscribeEmbeddedPiSession", () => {
      content: [{ type: "text", text }],
    } as AssistantMessage;

-    handler?.({ type: "message_end", message: assistantMessage });
+    emit({ type: "message_end", message: assistantMessage });

    expect(onBlockReply).toHaveBeenCalledTimes(2);
    expect(onBlockReply.mock.calls[0][0].text).toBe("First block line");
@ -61,18 +45,12 @@ describe("subscribeEmbeddedPiSession", () => {
    expect(subscription.assistantTexts).toEqual(["First block line", "Second block line"]);
  });
  it("avoids splitting inside fenced code blocks", () => {
-    let handler: ((evt: unknown) => void) | undefined;
-    const session: StubSession = {
-      subscribe: (fn) => {
-        handler = fn;
-        return () => {};
-      },
-    };
+    const { session, emit } = createStubSessionHarness();

    const onBlockReply = vi.fn();

    subscribeEmbeddedPiSession({
-      session: session as unknown as Parameters<typeof subscribeEmbeddedPiSession>[0]["session"],
+      session,
      runId: "run",
      onBlockReply,
      blockReplyBreak: "message_end",
@ -85,7 +63,7 @@ describe("subscribeEmbeddedPiSession", () => {

    const text = "Intro\n\n```bash\nline1\nline2\n```\n\nOutro";

-    handler?.({
+    emit({
      type: "message_update",
      message: { role: "assistant" },
      assistantMessageEvent: {
@ -99,7 +77,7 @@ describe("subscribeEmbeddedPiSession", () => {
      content: [{ type: "text", text }],
    } as AssistantMessage;

-    handler?.({ type: "message_end", message: assistantMessage });
+    emit({ type: "message_end", message: assistantMessage });

    expect(onBlockReply).toHaveBeenCalledTimes(3);
    expect(onBlockReply.mock.calls[0][0].text).toBe("Intro");
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.subscribeembeddedpisession.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.subscribeembeddedpisession.e2e.test.ts
@ -1,5 +1,9 @@
 import type { AssistantMessage } from "@mariozechner/pi-ai";
 import { describe, expect, it, vi } from "vitest";
+import {
+  createStubSessionHarness,
+  extractAgentEventPayloads,
+} from "./pi-embedded-subscribe.e2e-harness.js";
 import { subscribeEmbeddedPiSession } from "./pi-embedded-subscribe.js";

 type StubSession = {
@ -186,18 +190,12 @@ describe("subscribeEmbeddedPiSession", () => {
  });

  it("emits agent events on message_end for non-streaming assistant text", () => {
-    let handler: ((evt: unknown) => void) | undefined;
-    const session: StubSession = {
-      subscribe: (fn) => {
-        handler = fn;
-        return () => {};
-      },
-    };
+    const { session, emit } = createStubSessionHarness();

    const onAgentEvent = vi.fn();

    subscribeEmbeddedPiSession({
-      session: session as unknown as Parameters<typeof subscribeEmbeddedPiSession>[0]["session"],
+      session,
      runId: "run",
      onAgentEvent,
    });
@ -207,12 +205,10 @@ describe("subscribeEmbeddedPiSession", () => {
      content: [{ type: "text", text: "Hello world" }],
    } as AssistantMessage;

-    handler?.({ type: "message_start", message: assistantMessage });
-    handler?.({ type: "message_end", message: assistantMessage });
+    emit({ type: "message_start", message: assistantMessage });
+    emit({ type: "message_end", message: assistantMessage });

-    const payloads = onAgentEvent.mock.calls
-      .map((call) => call[0]?.data as Record<string, unknown> | undefined)
-      .filter((value): value is Record<string, unknown> => Boolean(value));
+    const payloads = extractAgentEventPayloads(onAgentEvent.mock.calls);
    expect(payloads).toHaveLength(1);
    expect(payloads[0]?.text).toBe("Hello world");
    expect(payloads[0]?.delta).toBe("Hello world");
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.suppresses-message-end-block-replies-message-tool.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.suppresses-message-end-block-replies-message-tool.e2e.test.ts
@ -7,13 +7,6 @@ type StubSession = {
 };

 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  it("suppresses message_end block replies when the message tool already sent", async () => {
    let handler: ((evt: unknown) => void) | undefined;
    const session: StubSession = {
--- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.waits-multiple-compaction-retries-before-resolving.e2e.test.ts
+++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.waits-multiple-compaction-retries-before-resolving.e2e.test.ts
@ -7,13 +7,6 @@ type StubSession = {
 };

 describe("subscribeEmbeddedPiSession", () => {
-  const _THINKING_TAG_CASES = [
-    { tag: "think", open: "<think>", close: "</think>" },
-    { tag: "thinking", open: "<thinking>", close: "</thinking>" },
-    { tag: "thought", open: "<thought>", close: "</thought>" },
-    { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
-  ] as const;
-
  it("waits for multiple compaction retries before resolving", async () => {
    const listeners: SessionEventHandler[] = [];
    const session = {
--- a/src/agents/pi-extensions/session-manager-runtime-registry.test.ts
+++ b/src/agents/pi-extensions/session-manager-runtime-registry.test.ts
@ -1,22 +0,0 @@
-import { describe, expect, it } from "vitest";
-import { createSessionManagerRuntimeRegistry } from "./session-manager-runtime-registry.js";
-
-describe("createSessionManagerRuntimeRegistry", () => {
-  it("stores, reads, and clears values by object identity", () => {
-    const registry = createSessionManagerRuntimeRegistry<{ value: number }>();
-    const key = {};
-    expect(registry.get(key)).toBeNull();
-    registry.set(key, { value: 1 });
-    expect(registry.get(key)).toEqual({ value: 1 });
-    registry.set(key, null);
-    expect(registry.get(key)).toBeNull();
-  });
-
-  it("ignores non-object keys", () => {
-    const registry = createSessionManagerRuntimeRegistry<{ value: number }>();
-    registry.set(null, { value: 1 });
-    registry.set(123, { value: 1 });
-    expect(registry.get(null)).toBeNull();
-    expect(registry.get(123)).toBeNull();
-  });
-});
--- a/src/agents/pi-tools.create-openclaw-coding-tools.adds-claude-style-aliases-schemas-without-dropping.e2e.test.ts
+++ b/src/agents/pi-tools.create-openclaw-coding-tools.adds-claude-style-aliases-schemas-without-dropping.e2e.test.ts
@ -102,7 +102,10 @@ describe("createOpenClawCodingTools", () => {
        execute,
      };

-      const wrapped = __testing.wrapToolParamNormalization(tool, [{ keys: ["path", "file_path"] }]);
+      const wrapped = __testing.wrapToolParamNormalization(tool, [
+        { keys: ["path", "file_path"], label: "path (path or file_path)" },
+        { keys: ["content"], label: "content" },
+      ]);

      await wrapped.execute("tool-1", { file_path: "foo.txt", content: "x" });
      expect(execute).toHaveBeenCalledWith(
@ -115,9 +118,21 @@ describe("createOpenClawCodingTools", () => {
      await expect(wrapped.execute("tool-2", { content: "x" })).rejects.toThrow(
        /Missing required parameter/,
      );
+      await expect(wrapped.execute("tool-2", { content: "x" })).rejects.toThrow(
+        /Supply correct parameters before retrying\./,
+      );
      await expect(wrapped.execute("tool-3", { file_path: "   ", content: "x" })).rejects.toThrow(
        /Missing required parameter/,
      );
+      await expect(wrapped.execute("tool-3", { file_path: "   ", content: "x" })).rejects.toThrow(
+        /Supply correct parameters before retrying\./,
+      );
+      await expect(wrapped.execute("tool-4", {})).rejects.toThrow(
+        /Missing required parameters: path \(path or file_path\), content/,
+      );
+      await expect(wrapped.execute("tool-4", {})).rejects.toThrow(
+        /Supply correct parameters before retrying\./,
+      );
    });
  });

--- a/src/agents/pi-tools.read.ts
+++ b/src/agents/pi-tools.read.ts
@ -87,6 +87,12 @@ type RequiredParamGroup = {
  label?: string;
 };

+const RETRY_GUIDANCE_SUFFIX = " Supply correct parameters before retrying.";
+
+function parameterValidationError(message: string): Error {
+  return new Error(`${message}.${RETRY_GUIDANCE_SUFFIX}`);
+}
+
 export const CLAUDE_PARAM_GROUPS = {
  read: [{ keys: ["path", "file_path"], label: "path (path or file_path)" }],
  write: [
@ -245,9 +251,10 @@ export function assertRequiredParams(
  toolName: string,
 ): void {
  if (!record || typeof record !== "object") {
-    throw new Error(`Missing parameters for ${toolName}`);
+    throw parameterValidationError(`Missing parameters for ${toolName}`);
  }

+  const missingLabels: string[] = [];
  for (const group of groups) {
    const satisfied = group.keys.some((key) => {
      if (!(key in record)) {
@ -265,9 +272,15 @@ export function assertRequiredParams(

    if (!satisfied) {
      const label = group.label ?? group.keys.join(" or ");
-      throw new Error(`Missing required parameter: ${label}`);
+      missingLabels.push(label);
    }
  }
+
+  if (missingLabels.length > 0) {
+    const joined = missingLabels.join(", ");
+    const noun = missingLabels.length === 1 ? "parameter" : "parameters";
+    throw parameterValidationError(`Missing required ${noun}: ${joined}`);
+  }
 }

 // Generic wrapper to normalize parameters for any tool
--- a/src/agents/pi-tools.safe-bins.e2e.test.ts
+++ b/src/agents/pi-tools.safe-bins.e2e.test.ts
@ -4,8 +4,9 @@ import path from "node:path";
 import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../config/config.js";
 import type { ExecApprovalsResolved } from "../infra/exec-approvals.js";
+import { captureEnv } from "../test-utils/env.js";

-const previousBundledPluginsDir = process.env.OPENCLAW_BUNDLED_PLUGINS_DIR;
+const bundledPluginsDirSnapshot = captureEnv(["OPENCLAW_BUNDLED_PLUGINS_DIR"]);

 beforeAll(() => {
  process.env.OPENCLAW_BUNDLED_PLUGINS_DIR = path.join(
@ -15,32 +16,18 @@ beforeAll(() => {
 });

 afterAll(() => {
-  if (previousBundledPluginsDir === undefined) {
-    delete process.env.OPENCLAW_BUNDLED_PLUGINS_DIR;
-  } else {
-    process.env.OPENCLAW_BUNDLED_PLUGINS_DIR = previousBundledPluginsDir;
-  }
+  bundledPluginsDirSnapshot.restore();
 });

 vi.mock("../infra/shell-env.js", async (importOriginal) => {
  const mod = await importOriginal<typeof import("../infra/shell-env.js")>();
  return {
    ...mod,
-    getShellPathFromLoginShell: vi.fn(() => "/usr/bin:/bin"),
+    getShellPathFromLoginShell: vi.fn(() => null),
    resolveShellEnvFallbackTimeoutMs: vi.fn(() => 500),
  };
 });

-vi.mock("../plugins/tools.js", () => ({
-  getPluginToolMeta: () => undefined,
-  resolvePluginTools: () => [],
-}));
-
-vi.mock("../infra/shell-env.js", async (importOriginal) => {
-  const mod = await importOriginal<typeof import("../infra/shell-env.js")>();
-  return { ...mod, getShellPathFromLoginShell: () => null };
-});
-
 vi.mock("../plugins/tools.js", () => ({
  resolvePluginTools: () => [],
  getPluginToolMeta: () => undefined,
@ -109,20 +96,16 @@ describe("createOpenClawCodingTools safeBins", () => {
    expect(execTool).toBeDefined();

    const marker = `safe-bins-${Date.now()}`;
-    const prevShellEnvTimeoutMs = process.env.OPENCLAW_SHELL_ENV_TIMEOUT_MS;
-    process.env.OPENCLAW_SHELL_ENV_TIMEOUT_MS = "1000";
+    const envSnapshot = captureEnv(["OPENCLAW_SHELL_ENV_TIMEOUT_MS"]);
    const result = await (async () => {
      try {
+        process.env.OPENCLAW_SHELL_ENV_TIMEOUT_MS = "1000";
        return await execTool!.execute("call1", {
          command: `echo ${marker}`,
          workdir: tmpDir,
        });
      } finally {
-        if (prevShellEnvTimeoutMs === undefined) {
-          delete process.env.OPENCLAW_SHELL_ENV_TIMEOUT_MS;
-        } else {
-          process.env.OPENCLAW_SHELL_ENV_TIMEOUT_MS = prevShellEnvTimeoutMs;
-        }
+        envSnapshot.restore();
      }
    })();
    const text = result.content.find((content) => content.type === "text")?.text ?? "";
--- a/src/agents/sandbox-create-args.e2e.test.ts
+++ b/src/agents/sandbox-create-args.e2e.test.ts
@ -94,7 +94,7 @@ describe("buildSandboxCreateArgs", () => {
    );
  });

-  it("emits -v flags for custom binds", () => {
+  it("emits -v flags for safe custom binds", () => {
    const cfg: SandboxDockerConfig = {
      image: "openclaw-sandbox:bookworm-slim",
      containerPrefix: "openclaw-sbx-",
@ -103,7 +103,7 @@ describe("buildSandboxCreateArgs", () => {
      tmpfs: [],
      network: "none",
      capDrop: [],
-      binds: ["/home/user/source:/source:rw", "/var/run/docker.sock:/var/run/docker.sock"],
+      binds: ["/home/user/source:/source:rw", "/var/data/myapp:/data:ro"],
    };

    const args = buildSandboxCreateArgs({
@ -124,7 +124,116 @@ describe("buildSandboxCreateArgs", () => {
      }
    }
    expect(vFlags).toContain("/home/user/source:/source:rw");
-    expect(vFlags).toContain("/var/run/docker.sock:/var/run/docker.sock");
+    expect(vFlags).toContain("/var/data/myapp:/data:ro");
+  });
+
+  it("throws on dangerous bind mounts (Docker socket)", () => {
+    const cfg: SandboxDockerConfig = {
+      image: "openclaw-sandbox:bookworm-slim",
+      containerPrefix: "openclaw-sbx-",
+      workdir: "/workspace",
+      readOnlyRoot: false,
+      tmpfs: [],
+      network: "none",
+      capDrop: [],
+      binds: ["/var/run/docker.sock:/var/run/docker.sock"],
+    };
+
+    expect(() =>
+      buildSandboxCreateArgs({
+        name: "openclaw-sbx-dangerous",
+        cfg,
+        scopeKey: "main",
+        createdAtMs: 1700000000000,
+      }),
+    ).toThrow(/blocked path/);
+  });
+
+  it("throws on dangerous bind mounts (parent path)", () => {
+    const cfg: SandboxDockerConfig = {
+      image: "openclaw-sandbox:bookworm-slim",
+      containerPrefix: "openclaw-sbx-",
+      workdir: "/workspace",
+      readOnlyRoot: false,
+      tmpfs: [],
+      network: "none",
+      capDrop: [],
+      binds: ["/run:/run"],
+    };
+
+    expect(() =>
+      buildSandboxCreateArgs({
+        name: "openclaw-sbx-dangerous-parent",
+        cfg,
+        scopeKey: "main",
+        createdAtMs: 1700000000000,
+      }),
+    ).toThrow(/blocked path/);
+  });
+
+  it("throws on network host mode", () => {
+    const cfg: SandboxDockerConfig = {
+      image: "openclaw-sandbox:bookworm-slim",
+      containerPrefix: "openclaw-sbx-",
+      workdir: "/workspace",
+      readOnlyRoot: false,
+      tmpfs: [],
+      network: "host",
+      capDrop: [],
+    };
+
+    expect(() =>
+      buildSandboxCreateArgs({
+        name: "openclaw-sbx-host",
+        cfg,
+        scopeKey: "main",
+        createdAtMs: 1700000000000,
+      }),
+    ).toThrow(/network mode "host" is blocked/);
+  });
+
+  it("throws on seccomp unconfined", () => {
+    const cfg: SandboxDockerConfig = {
+      image: "openclaw-sandbox:bookworm-slim",
+      containerPrefix: "openclaw-sbx-",
+      workdir: "/workspace",
+      readOnlyRoot: false,
+      tmpfs: [],
+      network: "none",
+      capDrop: [],
+      seccompProfile: "unconfined",
+    };
+
+    expect(() =>
+      buildSandboxCreateArgs({
+        name: "openclaw-sbx-seccomp",
+        cfg,
+        scopeKey: "main",
+        createdAtMs: 1700000000000,
+      }),
+    ).toThrow(/seccomp profile "unconfined" is blocked/);
+  });
+
+  it("throws on apparmor unconfined", () => {
+    const cfg: SandboxDockerConfig = {
+      image: "openclaw-sandbox:bookworm-slim",
+      containerPrefix: "openclaw-sbx-",
+      workdir: "/workspace",
+      readOnlyRoot: false,
+      tmpfs: [],
+      network: "none",
+      capDrop: [],
+      apparmorProfile: "unconfined",
+    };
+
+    expect(() =>
+      buildSandboxCreateArgs({
+        name: "openclaw-sbx-apparmor",
+        cfg,
+        scopeKey: "main",
+        createdAtMs: 1700000000000,
+      }),
+    ).toThrow(/apparmor profile "unconfined" is blocked/);
  });

  it("omits -v flags when binds is empty or undefined", () => {
--- a/src/agents/sandbox-skills.e2e.test.ts
+++ b/src/agents/sandbox-skills.e2e.test.ts
@ -3,6 +3,7 @@ import os from "node:os";
 import path from "node:path";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../config/config.js";
+import { captureFullEnv } from "../test-utils/env.js";
 import { resolveSandboxContext } from "./sandbox.js";

 vi.mock("./sandbox/docker.js", () => ({
@ -27,30 +28,15 @@ async function writeSkill(params: { dir: string; name: string; description: stri
  );
 }

-function restoreEnv(snapshot: Record<string, string | undefined>) {
-  for (const key of Object.keys(process.env)) {
-    if (!(key in snapshot)) {
-      delete process.env[key];
-    }
-  }
-  for (const [key, value] of Object.entries(snapshot)) {
-    if (value === undefined) {
-      delete process.env[key];
-    } else {
-      process.env[key] = value;
-    }
-  }
-}
-
 describe("sandbox skill mirroring", () => {
-  let envSnapshot: Record<string, string | undefined>;
+  let envSnapshot: ReturnType<typeof captureFullEnv>;

  beforeEach(() => {
-    envSnapshot = { ...process.env };
+    envSnapshot = captureFullEnv();
  });

  afterEach(() => {
-    restoreEnv(envSnapshot);
+    envSnapshot.restore();
  });

  const runContext = async (workspaceAccess: "none" | "ro") => {
--- a/src/agents/sandbox/docker.ts
+++ b/src/agents/sandbox/docker.ts
@ -111,6 +111,7 @@ import { computeSandboxConfigHash } from "./config-hash.js";
 import { DEFAULT_SANDBOX_IMAGE, SANDBOX_AGENT_WORKSPACE_MOUNT } from "./constants.js";
 import { readRegistry, updateRegistry } from "./registry.js";
 import { resolveSandboxAgentId, resolveSandboxScopeKey, slugifySessionKey } from "./shared.js";
+import { validateSandboxSecurity } from "./validate-sandbox-security.js";

 const HOT_CONTAINER_WINDOW_MS = 5 * 60 * 1000;

@ -240,6 +241,9 @@ export function buildSandboxCreateArgs(params: {
  labels?: Record<string, string>;
  configHash?: string;
 }) {
+  // Runtime security validation: blocks dangerous bind mounts, network modes, and profiles.
+  validateSandboxSecurity(params.cfg);
+
  const createdAtMs = params.createdAtMs ?? Date.now();
  const args = ["create", "--name", params.name];
  args.push("--label", "openclaw.sandbox=1");
--- a/src/agents/sandbox/validate-sandbox-security.test.ts
+++ b/src/agents/sandbox/validate-sandbox-security.test.ts
@ -0,0 +1,146 @@
+import { mkdtempSync, symlinkSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { describe, expect, it } from "vitest";
+import {
+  getBlockedBindReasonStringOnly,
+  validateBindMounts,
+  validateNetworkMode,
+  validateSeccompProfile,
+  validateApparmorProfile,
+  validateSandboxSecurity,
+} from "./validate-sandbox-security.js";
+
+describe("getBlockedBindReasonStringOnly", () => {
+  it("blocks ancestor mounts that would expose the Docker socket", () => {
+    expect(getBlockedBindReasonStringOnly("/run:/run")).toEqual(
+      expect.objectContaining({ kind: "covers" }),
+    );
+    expect(getBlockedBindReasonStringOnly("/var/run:/var/run:ro")).toEqual(
+      expect.objectContaining({ kind: "covers" }),
+    );
+    expect(getBlockedBindReasonStringOnly("/var:/var")).toEqual(
+      expect.objectContaining({ kind: "covers" }),
+    );
+  });
+});
+
+describe("validateBindMounts", () => {
+  it("allows legitimate project directory mounts", () => {
+    expect(() =>
+      validateBindMounts([
+        "/home/user/source:/source:rw",
+        "/home/user/projects:/projects:ro",
+        "/var/data/myapp:/data",
+        "/opt/myapp/config:/config:ro",
+      ]),
+    ).not.toThrow();
+  });
+
+  it("allows undefined or empty binds", () => {
+    expect(() => validateBindMounts(undefined)).not.toThrow();
+    expect(() => validateBindMounts([])).not.toThrow();
+  });
+
+  it("blocks /etc mount", () => {
+    expect(() => validateBindMounts(["/etc/passwd:/mnt/passwd:ro"])).toThrow(
+      /blocked path "\/etc"/,
+    );
+  });
+
+  it("blocks /proc mount", () => {
+    expect(() => validateBindMounts(["/proc:/proc:ro"])).toThrow(/blocked path "\/proc"/);
+  });
+
+  it("blocks Docker socket mounts (/var/run + /run)", () => {
+    expect(() => validateBindMounts(["/var/run/docker.sock:/var/run/docker.sock"])).toThrow(
+      /docker\.sock/,
+    );
+    expect(() => validateBindMounts(["/run/docker.sock:/run/docker.sock"])).toThrow(/docker\.sock/);
+  });
+
+  it("blocks parent mounts that would expose the Docker socket", () => {
+    expect(() => validateBindMounts(["/run:/run"])).toThrow(/blocked path/);
+    expect(() => validateBindMounts(["/var/run:/var/run"])).toThrow(/blocked path/);
+    expect(() => validateBindMounts(["/var:/var"])).toThrow(/blocked path/);
+  });
+
+  it("blocks paths with .. traversal to dangerous directories", () => {
+    expect(() => validateBindMounts(["/home/user/../../etc/shadow:/mnt/shadow"])).toThrow(
+      /blocked path "\/etc"/,
+    );
+  });
+
+  it("blocks paths with double slashes normalizing to dangerous dirs", () => {
+    expect(() => validateBindMounts(["//etc//passwd:/mnt/passwd"])).toThrow(/blocked path "\/etc"/);
+  });
+
+  it("blocks symlink escapes into blocked directories", () => {
+    const dir = mkdtempSync(join(tmpdir(), "openclaw-sbx-"));
+    const link = join(dir, "etc-link");
+    symlinkSync("/etc", link);
+    expect(() => validateBindMounts([`${link}/passwd:/mnt/passwd:ro`])).toThrow(/blocked path/);
+  });
+
+  it("rejects non-absolute source paths (relative or named volumes)", () => {
+    expect(() => validateBindMounts(["../etc/passwd:/mnt/passwd"])).toThrow(/non-absolute/);
+    expect(() => validateBindMounts(["etc/passwd:/mnt/passwd"])).toThrow(/non-absolute/);
+    expect(() => validateBindMounts(["myvol:/mnt"])).toThrow(/non-absolute/);
+  });
+});
+
+describe("validateNetworkMode", () => {
+  it("allows bridge/none/custom/undefined", () => {
+    expect(() => validateNetworkMode("bridge")).not.toThrow();
+    expect(() => validateNetworkMode("none")).not.toThrow();
+    expect(() => validateNetworkMode("my-custom-network")).not.toThrow();
+    expect(() => validateNetworkMode(undefined)).not.toThrow();
+  });
+
+  it("blocks host mode (case-insensitive)", () => {
+    expect(() => validateNetworkMode("host")).toThrow(/network mode "host" is blocked/);
+    expect(() => validateNetworkMode("HOST")).toThrow(/network mode "HOST" is blocked/);
+  });
+});
+
+describe("validateSeccompProfile", () => {
+  it("allows custom profile paths/undefined", () => {
+    expect(() => validateSeccompProfile("/tmp/seccomp.json")).not.toThrow();
+    expect(() => validateSeccompProfile(undefined)).not.toThrow();
+  });
+
+  it("blocks unconfined (case-insensitive)", () => {
+    expect(() => validateSeccompProfile("unconfined")).toThrow(
+      /seccomp profile "unconfined" is blocked/,
+    );
+    expect(() => validateSeccompProfile("Unconfined")).toThrow(
+      /seccomp profile "Unconfined" is blocked/,
+    );
+  });
+});
+
+describe("validateApparmorProfile", () => {
+  it("allows named profile/undefined", () => {
+    expect(() => validateApparmorProfile("openclaw-sandbox")).not.toThrow();
+    expect(() => validateApparmorProfile(undefined)).not.toThrow();
+  });
+
+  it("blocks unconfined (case-insensitive)", () => {
+    expect(() => validateApparmorProfile("unconfined")).toThrow(
+      /apparmor profile "unconfined" is blocked/,
+    );
+  });
+});
+
+describe("validateSandboxSecurity", () => {
+  it("passes with safe config", () => {
+    expect(() =>
+      validateSandboxSecurity({
+        binds: ["/home/user/src:/src:rw"],
+        network: "none",
+        seccompProfile: "/tmp/seccomp.json",
+        apparmorProfile: "openclaw-sandbox",
+      }),
+    ).not.toThrow();
+  });
+});
--- a/src/agents/sandbox/validate-sandbox-security.ts
+++ b/src/agents/sandbox/validate-sandbox-security.ts
@ -0,0 +1,208 @@
+/**
+ * Sandbox security validation — blocks dangerous Docker configurations.
+ *
+ * Threat model: local-trusted config, but protect against foot-guns and config injection.
+ * Enforced at runtime when creating sandbox containers.
+ */
+
+import { existsSync, realpathSync } from "node:fs";
+import { posix } from "node:path";
+
+// Targeted denylist: host paths that should never be exposed inside sandbox containers.
+// Exported for reuse in security audit collectors.
+export const BLOCKED_HOST_PATHS = [
+  "/etc",
+  "/private/etc",
+  "/proc",
+  "/sys",
+  "/dev",
+  "/root",
+  "/boot",
+  "/var/run/docker.sock",
+  "/private/var/run/docker.sock",
+  "/run/docker.sock",
+];
+
+const BLOCKED_NETWORK_MODES = new Set(["host"]);
+const BLOCKED_SECCOMP_PROFILES = new Set(["unconfined"]);
+const BLOCKED_APPARMOR_PROFILES = new Set(["unconfined"]);
+
+export type BlockedBindReason =
+  | { kind: "targets"; blockedPath: string }
+  | { kind: "covers"; blockedPath: string }
+  | { kind: "non_absolute"; sourcePath: string };
+
+/**
+ * Parse the host/source path from a Docker bind mount string.
+ * Format: `source:target[:mode]`
+ */
+export function parseBindSourcePath(bind: string): string {
+  const trimmed = bind.trim();
+  const firstColon = trimmed.indexOf(":");
+  if (firstColon <= 0) {
+    // No colon or starts with colon — treat as source.
+    return trimmed;
+  }
+  return trimmed.slice(0, firstColon);
+}
+
+/**
+ * Normalize a POSIX path: resolve `.`, `..`, collapse `//`, strip trailing `/`.
+ */
+export function normalizeHostPath(raw: string): string {
+  const trimmed = raw.trim();
+  return posix.normalize(trimmed).replace(/\/+$/, "") || "/";
+}
+
+/**
+ * String-only blocked-path check (no filesystem I/O).
+ * Blocks:
+ * - binds that target blocked paths (equal or under)
+ * - binds that cover blocked paths (ancestor mounts like /run or /var)
+ * - non-absolute source paths (relative / volume names) because they are hard to validate safely
+ */
+export function getBlockedBindReasonStringOnly(bind: string): BlockedBindReason | null {
+  const sourceRaw = parseBindSourcePath(bind);
+  if (!sourceRaw.startsWith("/")) {
+    return { kind: "non_absolute", sourcePath: sourceRaw };
+  }
+
+  const normalized = normalizeHostPath(sourceRaw);
+
+  for (const blocked of BLOCKED_HOST_PATHS) {
+    if (normalized === blocked || normalized.startsWith(blocked + "/")) {
+      return { kind: "targets", blockedPath: blocked };
+    }
+    // Ancestor mounts: mounting /run exposes /run/docker.sock.
+    if (normalized === "/") {
+      return { kind: "covers", blockedPath: blocked };
+    }
+    if (blocked.startsWith(normalized + "/")) {
+      return { kind: "covers", blockedPath: blocked };
+    }
+  }
+
+  return null;
+}
+
+function tryRealpathAbsolute(path: string): string {
+  if (!path.startsWith("/")) {
+    return path;
+  }
+  if (!existsSync(path)) {
+    return path;
+  }
+  try {
+    // Use native when available (keeps platform semantics); normalize for prefix checks.
+    return normalizeHostPath(realpathSync.native(path));
+  } catch {
+    return path;
+  }
+}
+
+function formatBindBlockedError(params: { bind: string; reason: BlockedBindReason }): Error {
+  if (params.reason.kind === "non_absolute") {
+    return new Error(
+      `Sandbox security: bind mount "${params.bind}" uses a non-absolute source path ` +
+        `"${params.reason.sourcePath}". Only absolute POSIX paths are supported for sandbox binds.`,
+    );
+  }
+  const verb = params.reason.kind === "covers" ? "covers" : "targets";
+  return new Error(
+    `Sandbox security: bind mount "${params.bind}" ${verb} blocked path "${params.reason.blockedPath}". ` +
+      "Mounting system directories (or Docker socket paths) into sandbox containers is not allowed. " +
+      "Use project-specific paths instead (e.g. /home/user/myproject).",
+  );
+}
+
+/**
+ * Validate bind mounts — throws if any source path is dangerous.
+ * Includes a symlink/realpath pass when the source path exists.
+ */
+export function validateBindMounts(binds: string[] | undefined): void {
+  if (!binds?.length) {
+    return;
+  }
+
+  for (const rawBind of binds) {
+    const bind = rawBind.trim();
+    if (!bind) {
+      continue;
+    }
+
+    // Fast string-only check (covers .., //, ancestor/descendant logic).
+    const blocked = getBlockedBindReasonStringOnly(bind);
+    if (blocked) {
+      throw formatBindBlockedError({ bind, reason: blocked });
+    }
+
+    // Symlink escape hardening: resolve existing absolute paths and re-check.
+    const sourceRaw = parseBindSourcePath(bind);
+    const sourceNormalized = normalizeHostPath(sourceRaw);
+    const sourceReal = tryRealpathAbsolute(sourceNormalized);
+    if (sourceReal !== sourceNormalized) {
+      for (const blockedPath of BLOCKED_HOST_PATHS) {
+        if (sourceReal === blockedPath || sourceReal.startsWith(blockedPath + "/")) {
+          throw formatBindBlockedError({
+            bind,
+            reason: { kind: "targets", blockedPath },
+          });
+        }
+        if (sourceReal === "/") {
+          throw formatBindBlockedError({
+            bind,
+            reason: { kind: "covers", blockedPath },
+          });
+        }
+        if (blockedPath.startsWith(sourceReal + "/")) {
+          throw formatBindBlockedError({
+            bind,
+            reason: { kind: "covers", blockedPath },
+          });
+        }
+      }
+    }
+  }
+}
+
+export function validateNetworkMode(network: string | undefined): void {
+  if (network && BLOCKED_NETWORK_MODES.has(network.trim().toLowerCase())) {
+    throw new Error(
+      `Sandbox security: network mode "${network}" is blocked. ` +
+        'Network "host" mode bypasses container network isolation. ' +
+        'Use "bridge" or "none" instead.',
+    );
+  }
+}
+
+export function validateSeccompProfile(profile: string | undefined): void {
+  if (profile && BLOCKED_SECCOMP_PROFILES.has(profile.trim().toLowerCase())) {
+    throw new Error(
+      `Sandbox security: seccomp profile "${profile}" is blocked. ` +
+        "Disabling seccomp removes syscall filtering and weakens sandbox isolation. " +
+        "Use a custom seccomp profile file or omit this setting.",
+    );
+  }
+}
+
+export function validateApparmorProfile(profile: string | undefined): void {
+  if (profile && BLOCKED_APPARMOR_PROFILES.has(profile.trim().toLowerCase())) {
+    throw new Error(
+      `Sandbox security: apparmor profile "${profile}" is blocked. ` +
+        "Disabling AppArmor removes mandatory access controls and weakens sandbox isolation. " +
+        "Use a named AppArmor profile or omit this setting.",
+    );
+  }
+}
+
+export function validateSandboxSecurity(cfg: {
+  binds?: string[];
+  network?: string;
+  seccompProfile?: string;
+  apparmorProfile?: string;
+}): void {
+  validateBindMounts(cfg.binds);
+  validateNetworkMode(cfg.network);
+  validateSeccompProfile(cfg.seccompProfile);
+  validateApparmorProfile(cfg.apparmorProfile);
+}
--- a/src/agents/sanitize-for-prompt.test.ts
+++ b/src/agents/sanitize-for-prompt.test.ts
@ -0,0 +1,53 @@
+import { describe, expect, it } from "vitest";
+import { sanitizeForPromptLiteral } from "./sanitize-for-prompt.js";
+import { buildAgentSystemPrompt } from "./system-prompt.js";
+
+describe("sanitizeForPromptLiteral (OC-19 hardening)", () => {
+  it("strips ASCII control chars (CR/LF/NUL/tab)", () => {
+    expect(sanitizeForPromptLiteral("/tmp/a\nb\rc\x00d\te")).toBe("/tmp/abcde");
+  });
+
+  it("strips Unicode line/paragraph separators", () => {
+    expect(sanitizeForPromptLiteral(`/tmp/a\u2028b\u2029c`)).toBe("/tmp/abc");
+  });
+
+  it("strips Unicode format chars (bidi override)", () => {
+    // U+202E RIGHT-TO-LEFT OVERRIDE (Cf) can spoof rendered text.
+    expect(sanitizeForPromptLiteral(`/tmp/a\u202Eb`)).toBe("/tmp/ab");
+  });
+
+  it("preserves ordinary Unicode + spaces", () => {
+    const value = "/tmp/my project/日本語-folder.v2";
+    expect(sanitizeForPromptLiteral(value)).toBe(value);
+  });
+});
+
+describe("buildAgentSystemPrompt uses sanitized workspace/sandbox strings", () => {
+  it("sanitizes workspaceDir (no newlines / separators)", () => {
+    const prompt = buildAgentSystemPrompt({
+      workspaceDir: "/tmp/project\nINJECT\u2028MORE",
+    });
+    expect(prompt).toContain("Your working directory is: /tmp/projectINJECTMORE");
+    expect(prompt).not.toContain("Your working directory is: /tmp/project\n");
+    expect(prompt).not.toContain("\u2028");
+  });
+
+  it("sanitizes sandbox workspace/mount/url strings", () => {
+    const prompt = buildAgentSystemPrompt({
+      workspaceDir: "/tmp/test",
+      sandboxInfo: {
+        enabled: true,
+        containerWorkspaceDir: "/work\u2029space",
+        workspaceDir: "/host\nspace",
+        workspaceAccess: "read-write",
+        agentWorkspaceMount: "/mnt\u2028mount",
+        browserNoVncUrl: "http://example.test/\nui",
+      },
+    });
+    expect(prompt).toContain("Sandbox container workdir: /workspace");
+    expect(prompt).toContain("Sandbox host workspace: /hostspace");
+    expect(prompt).toContain("(mounted at /mntmount)");
+    expect(prompt).toContain("Sandbox browser observer (noVNC): http://example.test/ui");
+    expect(prompt).not.toContain("\nui");
+  });
+});
--- a/src/agents/sanitize-for-prompt.ts
+++ b/src/agents/sanitize-for-prompt.ts
@ -0,0 +1,18 @@
+/**
+ * Sanitize untrusted strings before embedding them into an LLM prompt.
+ *
+ * Threat model (OC-19): attacker-controlled directory names (or other runtime strings)
+ * that contain newline/control characters can break prompt structure and inject
+ * arbitrary instructions.
+ *
+ * Strategy (Option 3 hardening):
+ * - Strip Unicode "control" (Cc) + "format" (Cf) characters (includes CR/LF/NUL, bidi marks, zero-width chars).
+ * - Strip explicit line/paragraph separators (Zl/Zp): U+2028/U+2029.
+ *
+ * Notes:
+ * - This is intentionally lossy; it trades edge-case path fidelity for prompt integrity.
+ * - If you need lossless representation, escape instead of stripping.
+ */
+export function sanitizeForPromptLiteral(value: string): string {
+  return value.replace(/[\p{Cc}\p{Cf}\u2028\u2029]/gu, "");
+}
--- a/src/agents/skills.build-workspace-skills-prompt.applies-bundled-allowlist-without-affecting-workspace-skills.e2e.test.ts
+++ b/src/agents/skills.build-workspace-skills-prompt.applies-bundled-allowlist-without-affecting-workspace-skills.e2e.test.ts
@ -2,30 +2,9 @@ import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 import { describe, expect, it } from "vitest";
+import { writeSkill } from "./skills.e2e-test-helpers.js";
 import { buildWorkspaceSkillsPrompt } from "./skills.js";

-async function writeSkill(params: {
-  dir: string;
-  name: string;
-  description: string;
-  metadata?: string;
-  body?: string;
-}) {
-  const { dir, name, description, metadata, body } = params;
-  await fs.mkdir(dir, { recursive: true });
-  await fs.writeFile(
-    path.join(dir, "SKILL.md"),
-    `---
-name: ${name}
-description: ${description}${metadata ? `\nmetadata: ${metadata}` : ""}
---
-
-${body ?? `# ${name}\n`}
-`,
-    "utf-8",
-  );
-}
-
 describe("buildWorkspaceSkillsPrompt", () => {
  it("applies bundled allowlist without affecting workspace skills", async () => {
    const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-"));
--- a/src/agents/skills.build-workspace-skills-prompt.prefers-workspace-skills-managed-skills.e2e.test.ts
+++ b/src/agents/skills.build-workspace-skills-prompt.prefers-workspace-skills-managed-skills.e2e.test.ts
@ -2,30 +2,9 @@ import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 import { describe, expect, it } from "vitest";
+import { writeSkill } from "./skills.e2e-test-helpers.js";
 import { buildWorkspaceSkillsPrompt } from "./skills.js";

-async function writeSkill(params: {
-  dir: string;
-  name: string;
-  description: string;
-  metadata?: string;
-  body?: string;
-}) {
-  const { dir, name, description, metadata, body } = params;
-  await fs.mkdir(dir, { recursive: true });
-  await fs.writeFile(
-    path.join(dir, "SKILL.md"),
-    `---
-name: ${name}
-description: ${description}${metadata ? `\nmetadata: ${metadata}` : ""}
---
-
-${body ?? `# ${name}\n`}
-`,
-    "utf-8",
-  );
-}
-
 describe("buildWorkspaceSkillsPrompt", () => {
  it("prefers workspace skills over managed skills", async () => {
    const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-"));
--- a/src/agents/skills.e2e-test-helpers.ts
+++ b/src/agents/skills.e2e-test-helpers.ts
@ -0,0 +1,24 @@
+import fs from "node:fs/promises";
+import path from "node:path";
+
+export async function writeSkill(params: {
+  dir: string;
+  name: string;
+  description: string;
+  metadata?: string;
+  body?: string;
+}) {
+  const { dir, name, description, metadata, body } = params;
+  await fs.mkdir(dir, { recursive: true });
+  await fs.writeFile(
+    path.join(dir, "SKILL.md"),
+    `---
+name: ${name}
+description: ${description}${metadata ? `\nmetadata: ${metadata}` : ""}
+---
+
+${body ?? `# ${name}\n`}
+`,
+    "utf-8",
+  );
+}
--- a/src/agents/skills/frontmatter.ts
+++ b/src/agents/skills/frontmatter.ts
@ -12,6 +12,9 @@ import {
  normalizeStringList,
  parseFrontmatterBool,
  resolveOpenClawManifestBlock,
+  resolveOpenClawManifestInstall,
+  resolveOpenClawManifestOs,
+  resolveOpenClawManifestRequires,
 } from "../../shared/frontmatter.js";

 export function parseFrontmatter(content: string): ParsedSkillFrontmatter {
@ -83,15 +86,9 @@ export function resolveOpenClawMetadata(
  if (!metadataObj) {
    return undefined;
  }
-  const requiresRaw =
-    typeof metadataObj.requires === "object" && metadataObj.requires !== null
-      ? (metadataObj.requires as Record<string, unknown>)
-      : undefined;
-  const installRaw = Array.isArray(metadataObj.install) ? (metadataObj.install as unknown[]) : [];
-  const install = installRaw
-    .map((entry) => parseInstallSpec(entry))
-    .filter((entry): entry is SkillInstallSpec => Boolean(entry));
-  const osRaw = normalizeStringList(metadataObj.os);
+  const requires = resolveOpenClawManifestRequires(metadataObj);
+  const install = resolveOpenClawManifestInstall(metadataObj, parseInstallSpec);
+  const osRaw = resolveOpenClawManifestOs(metadataObj);
  return {
    always: typeof metadataObj.always === "boolean" ? metadataObj.always : undefined,
    emoji: typeof metadataObj.emoji === "string" ? metadataObj.emoji : undefined,
@ -99,14 +96,7 @@ export function resolveOpenClawMetadata(
    skillKey: typeof metadataObj.skillKey === "string" ? metadataObj.skillKey : undefined,
    primaryEnv: typeof metadataObj.primaryEnv === "string" ? metadataObj.primaryEnv : undefined,
    os: osRaw.length > 0 ? osRaw : undefined,
-    requires: requiresRaw
-      ? {
-          bins: normalizeStringList(requiresRaw.bins),
-          anyBins: normalizeStringList(requiresRaw.anyBins),
-          env: normalizeStringList(requiresRaw.env),
-          config: normalizeStringList(requiresRaw.config),
-        }
-      : undefined,
+    requires: requires,
    install: install.length > 0 ? install : undefined,
  };
 }
--- a/src/agents/subagent-registry.persistence.e2e.test.ts
+++ b/src/agents/subagent-registry.persistence.e2e.test.ts
@ -2,6 +2,7 @@ import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 import { afterEach, describe, expect, it, vi } from "vitest";
+import { captureEnv } from "../test-utils/env.js";
 import {
  initSubagentRegistry,
  registerSubagentRun,
@ -29,7 +30,7 @@ vi.mock("./subagent-announce.js", () => ({
 }));

 describe("subagent registry persistence", () => {
-  const previousStateDir = process.env.OPENCLAW_STATE_DIR;
+  const envSnapshot = captureEnv(["OPENCLAW_STATE_DIR"]);
  let tempStateDir: string | null = null;

  afterEach(async () => {
@ -39,11 +40,7 @@ describe("subagent registry persistence", () => {
      await fs.rm(tempStateDir, { recursive: true, force: true });
      tempStateDir = null;
    }
-    if (previousStateDir === undefined) {
-      delete process.env.OPENCLAW_STATE_DIR;
-    } else {
-      process.env.OPENCLAW_STATE_DIR = previousStateDir;
-    }
+    envSnapshot.restore();
  });

  it("persists runs to disk and resumes after restart", async () => {
--- a/src/agents/system-prompt.ts
+++ b/src/agents/system-prompt.ts
@ -5,6 +5,7 @@ import type { EmbeddedContextFile } from "./pi-embedded-helpers.js";
 import { SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js";
 import { DEFAULT_CLI_NAME } from "../cli/cli-name.js";
 import { listDeliverableMessageChannels } from "../utils/message-channel.js";
+import { sanitizeForPromptLiteral } from "./sanitize-for-prompt.js";

 /**
 * Controls which hardcoded sections are included in the system prompt.
@ -364,13 +365,17 @@ export function buildAgentSystemPrompt(params: {
  const promptMode = params.promptMode ?? "full";
  const isMinimal = promptMode === "minimal" || promptMode === "none";
  const sandboxContainerWorkspace = params.sandboxInfo?.containerWorkspaceDir?.trim();
+  const sanitizedWorkspaceDir = sanitizeForPromptLiteral(params.workspaceDir);
+  const sanitizedSandboxContainerWorkspace = sandboxContainerWorkspace
+    ? sanitizeForPromptLiteral(sandboxContainerWorkspace)
+    : "";
  const displayWorkspaceDir =
-    params.sandboxInfo?.enabled && sandboxContainerWorkspace
-      ? sandboxContainerWorkspace
-      : params.workspaceDir;
+    params.sandboxInfo?.enabled && sanitizedSandboxContainerWorkspace
+      ? sanitizedSandboxContainerWorkspace
+      : sanitizedWorkspaceDir;
  const workspaceGuidance =
-    params.sandboxInfo?.enabled && sandboxContainerWorkspace
-      ? `For read/write/edit/apply_patch, file paths resolve against host workspace: ${params.workspaceDir}. Prefer relative paths so both sandboxed exec and file tools work consistently.`
+    params.sandboxInfo?.enabled && sanitizedSandboxContainerWorkspace
+      ? `For read/write/edit/apply_patch, file paths resolve against host workspace: ${sanitizedWorkspaceDir}. Prefer relative paths so both sandboxed exec and file tools work consistently.`
      : "Treat this directory as the single global workspace for file operations unless explicitly instructed otherwise.";
  const safetySection = [
    "## Safety",
@ -490,21 +495,21 @@ export function buildAgentSystemPrompt(params: {
          "Some tools may be unavailable due to sandbox policy.",
          "Sub-agents stay sandboxed (no elevated/host access). Need outside-sandbox read/write? Don't spawn; ask first.",
          params.sandboxInfo.containerWorkspaceDir
-            ? `Sandbox container workdir: ${params.sandboxInfo.containerWorkspaceDir}`
+            ? `Sandbox container workdir: ${sanitizeForPromptLiteral(params.sandboxInfo.containerWorkspaceDir)}`
            : "",
          params.sandboxInfo.workspaceDir
-            ? `Sandbox host workspace: ${params.sandboxInfo.workspaceDir}`
+            ? `Sandbox host workspace: ${sanitizeForPromptLiteral(params.sandboxInfo.workspaceDir)}`
            : "",
          params.sandboxInfo.workspaceAccess
            ? `Agent workspace access: ${params.sandboxInfo.workspaceAccess}${
                params.sandboxInfo.agentWorkspaceMount
-                  ? ` (mounted at ${params.sandboxInfo.agentWorkspaceMount})`
+                  ? ` (mounted at ${sanitizeForPromptLiteral(params.sandboxInfo.agentWorkspaceMount)})`
                  : ""
              }`
            : "",
          params.sandboxInfo.browserBridgeUrl ? "Sandbox browser: enabled." : "",
          params.sandboxInfo.browserNoVncUrl
-            ? `Sandbox browser observer (noVNC): ${params.sandboxInfo.browserNoVncUrl}`
+            ? `Sandbox browser observer (noVNC): ${sanitizeForPromptLiteral(params.sandboxInfo.browserNoVncUrl)}`
            : "",
          params.sandboxInfo.hostBrowserAllowed === true
            ? "Host browser control: allowed."
--- a/src/agents/tools/cron-tool.ts
+++ b/src/agents/tools/cron-tool.ts
@ -219,7 +219,8 @@ JOB SCHEMA (for add action):
  "payload": { ... },       // Required: what to execute
  "delivery": { ... },      // Optional: announce summary (isolated only)
  "sessionTarget": "main" | "isolated",  // Required
-  "enabled": true | false   // Optional, default true
+  "enabled": true | false,  // Optional, default true
+  "notify": true | false    // Optional webhook opt-in; set true for user-facing reminders
 }

 SCHEDULE TYPES (schedule.kind):
@ -246,6 +247,7 @@ DELIVERY (isolated-only, top-level):
 CRITICAL CONSTRAINTS:
 - sessionTarget="main" REQUIRES payload.kind="systemEvent"
 - sessionTarget="isolated" REQUIRES payload.kind="agentTurn"
+- For reminders users should be notified about, set notify=true.
 Default: prefer isolated agentTurn jobs unless the user explicitly wants a main-session system event.

 WAKE MODES (for wake action):
@ -292,6 +294,7 @@ Use jobId as the canonical identifier; id is accepted for compatibility. Use con
              "payload",
              "delivery",
              "enabled",
+              "notify",
              "description",
              "deleteAfterRun",
              "agentId",
--- a/src/agents/tools/image-tool.e2e.test.ts
+++ b/src/agents/tools/image-tool.e2e.test.ts
@ -192,9 +192,7 @@ describe("image tool implicit imageModel config", () => {
    });
    const tool = createImageTool({ config: cfg, agentDir, modelHasVision: true });
    expect(tool).not.toBeNull();
-    expect(tool?.description).toContain(
-      "Only use this tool when the image was NOT already provided",
-    );
+    expect(tool?.description).toContain("Only use this tool when images were NOT already provided");
  });

  it("allows workspace images outside default local media roots", async () => {
--- a/src/agents/tools/image-tool.ts
+++ b/src/agents/tools/image-tool.ts
@ -26,6 +26,7 @@ import {
 const DEFAULT_PROMPT = "Describe the image.";
 const ANTHROPIC_IMAGE_PRIMARY = "anthropic/claude-opus-4-6";
 const ANTHROPIC_IMAGE_FALLBACK = "anthropic/claude-opus-4-5";
+const DEFAULT_MAX_IMAGES = 20;

 export const __testing = {
  decodeDataUrl,
@ -182,15 +183,21 @@ function pickMaxBytes(cfg?: OpenClawConfig, maxBytesMb?: number): number | undef
  return undefined;
 }

-function buildImageContext(prompt: string, base64: string, mimeType: string): Context {
+function buildImageContext(
+  prompt: string,
+  images: Array<{ base64: string; mimeType: string }>,
+): Context {
+  const content: Array<
+    { type: "text"; text: string } | { type: "image"; data: string; mimeType: string }
+  > = [{ type: "text", text: prompt }];
+  for (const img of images) {
+    content.push({ type: "image", data: img.base64, mimeType: img.mimeType });
+  }
  return {
    messages: [
      {
        role: "user",
-        content: [
-          { type: "text", text: prompt },
-          { type: "image", data: base64, mimeType },
-        ],
+        content,
        timestamp: Date.now(),
      },
    ],
@ -242,8 +249,7 @@ async function runImagePrompt(params: {
  imageModelConfig: ImageModelConfig;
  modelOverride?: string;
  prompt: string;
-  base64: string;
-  mimeType: string;
+  images: Array<{ base64: string; mimeType: string }>;
 }): Promise<{
  text: string;
  provider: string;
@ -285,9 +291,11 @@ async function runImagePrompt(params: {
      });
      const apiKey = requireApiKey(apiKeyInfo, model.provider);
      authStorage.setRuntimeApiKey(model.provider, apiKey);
-      const imageDataUrl = `data:${params.mimeType};base64,${params.base64}`;

+      // MiniMax VLM only supports a single image; use the first one.
      if (model.provider === "minimax") {
+        const first = params.images[0];
+        const imageDataUrl = `data:${first.mimeType};base64,${first.base64}`;
        const text = await minimaxUnderstandImage({
          apiKey,
          prompt: params.prompt,
@ -297,7 +305,7 @@ async function runImagePrompt(params: {
        return { text, provider: model.provider, model: model.id };
      }

-      const context = buildImageContext(params.prompt, params.base64, params.mimeType);
+      const context = buildImageContext(params.prompt, params.images);
      const message = await complete(model, context, {
        apiKey,
        maxTokens: resolveImageToolMaxTokens(model.maxTokens),
@ -350,8 +358,8 @@ export function createImageTool(options?: {
  // If model has native vision, images in the prompt are auto-injected
  // so this tool is only needed when image wasn't provided in the prompt
  const description = options?.modelHasVision
-    ? "Analyze an image with a vision model. Only use this tool when the image was NOT already provided in the user's message. Images mentioned in the prompt are automatically visible to you."
-    : "Analyze an image with the configured image model (agents.defaults.imageModel). Provide a prompt and image path or URL.";
+    ? "Analyze one or more images with a vision model. Pass a single image path/URL or an array of up to 20. Only use this tool when images were NOT already provided in the user's message. Images mentioned in the prompt are automatically visible to you."
+    : "Analyze one or more images with the configured image model (agents.defaults.imageModel). Pass a single image path/URL or an array of up to 20. Provide a prompt describing what to analyze.";

  const localRoots = (() => {
    const roots = getDefaultLocalRoots();
@ -368,44 +376,47 @@ export function createImageTool(options?: {
    description,
    parameters: Type.Object({
      prompt: Type.Optional(Type.String()),
-      image: Type.String(),
+      image: Type.Union([Type.String(), Type.Array(Type.String())]),
      model: Type.Optional(Type.String()),
      maxBytesMb: Type.Optional(Type.Number()),
+      maxImages: Type.Optional(Type.Number()),
    }),
    execute: async (_toolCallId, args) => {
      const record = args && typeof args === "object" ? (args as Record<string, unknown>) : {};
-      const imageRawInput = typeof record.image === "string" ? record.image.trim() : "";
-      const imageRaw = imageRawInput.startsWith("@")
-        ? imageRawInput.slice(1).trim()
-        : imageRawInput;
-      if (!imageRaw) {
+
+      // MARK: - Normalize image input (string | string[])
+      const rawImageInput = record.image;
+      const imageInputs: string[] = (() => {
+        if (typeof rawImageInput === "string") {
+          return [rawImageInput];
+        }
+        if (Array.isArray(rawImageInput)) {
+          return rawImageInput.filter((v): v is string => typeof v === "string");
+        }
+        return [];
+      })();
+      if (imageInputs.length === 0) {
        throw new Error("image required");
      }

-      // The tool accepts file paths, file/data URLs, or http(s) URLs. In some
-      // agent/model contexts, images can be referenced as pseudo-URIs like
-      // `image:0` (e.g. "first image in the prompt"). We don't have access to a
-      // shared image registry here, so fail gracefully instead of attempting to
-      // `fs.readFile("image:0")` and producing a noisy ENOENT.
-      const looksLikeWindowsDrivePath = /^[a-zA-Z]:[\\/]/.test(imageRaw);
-      const hasScheme = /^[a-z][a-z0-9+.-]*:/i.test(imageRaw);
-      const isFileUrl = /^file:/i.test(imageRaw);
-      const isHttpUrl = /^https?:\/\//i.test(imageRaw);
-      const isDataUrl = /^data:/i.test(imageRaw);
-      if (hasScheme && !looksLikeWindowsDrivePath && !isFileUrl && !isHttpUrl && !isDataUrl) {
+      // MARK: - Enforce max images cap
+      const maxImagesRaw = typeof record.maxImages === "number" ? record.maxImages : undefined;
+      const maxImages =
+        typeof maxImagesRaw === "number" && Number.isFinite(maxImagesRaw) && maxImagesRaw > 0
+          ? Math.floor(maxImagesRaw)
+          : DEFAULT_MAX_IMAGES;
+      if (imageInputs.length > maxImages) {
        return {
          content: [
            {
              type: "text",
-              text: `Unsupported image reference: ${imageRawInput}. Use a file path, a file:// URL, a data: URL, or an http(s) URL.`,
+              text: `Too many images: ${imageInputs.length} provided, maximum is ${maxImages}. Please reduce the number of images.`,
            },
          ],
-          details: {
-            error: "unsupported_image_reference",
-            image: imageRawInput,
-          },
+          details: { error: "too_many_images", count: imageInputs.length, max: maxImages },
        };
      }
+
      const promptRaw =
        typeof record.prompt === "string" && record.prompt.trim()
          ? record.prompt.trim()
@ -419,73 +430,136 @@ export function createImageTool(options?: {
        options?.sandbox && options?.sandbox.root.trim()
          ? { root: options.sandbox.root.trim(), bridge: options.sandbox.bridge }
          : null;
-      const isUrl = isHttpUrl;
-      if (sandboxConfig && isUrl) {
-        throw new Error("Sandboxed image tool does not allow remote URLs.");
-      }

-      const resolvedImage = (() => {
-        if (sandboxConfig) {
+      // MARK: - Load and resolve each image
+      const loadedImages: Array<{
+        base64: string;
+        mimeType: string;
+        resolvedImage: string;
+        rewrittenFrom?: string;
+      }> = [];
+
+      for (const imageRawInput of imageInputs) {
+        const trimmed = imageRawInput.trim();
+        const imageRaw = trimmed.startsWith("@") ? trimmed.slice(1).trim() : trimmed;
+        if (!imageRaw) {
+          throw new Error("image required (empty string in array)");
+        }
+
+        // The tool accepts file paths, file/data URLs, or http(s) URLs. In some
+        // agent/model contexts, images can be referenced as pseudo-URIs like
+        // `image:0` (e.g. "first image in the prompt"). We don't have access to a
+        // shared image registry here, so fail gracefully instead of attempting to
+        // `fs.readFile("image:0")` and producing a noisy ENOENT.
+        const looksLikeWindowsDrivePath = /^[a-zA-Z]:[\\/]/.test(imageRaw);
+        const hasScheme = /^[a-z][a-z0-9+.-]*:/i.test(imageRaw);
+        const isFileUrl = /^file:/i.test(imageRaw);
+        const isHttpUrl = /^https?:\/\//i.test(imageRaw);
+        const isDataUrl = /^data:/i.test(imageRaw);
+        if (hasScheme && !looksLikeWindowsDrivePath && !isFileUrl && !isHttpUrl && !isDataUrl) {
+          return {
+            content: [
+              {
+                type: "text",
+                text: `Unsupported image reference: ${imageRawInput}. Use a file path, a file:// URL, a data: URL, or an http(s) URL.`,
+              },
+            ],
+            details: {
+              error: "unsupported_image_reference",
+              image: imageRawInput,
+            },
+          };
+        }
+
+        if (sandboxConfig && isHttpUrl) {
+          throw new Error("Sandboxed image tool does not allow remote URLs.");
+        }
+
+        const resolvedImage = (() => {
+          if (sandboxConfig) {
+            return imageRaw;
+          }
+          if (imageRaw.startsWith("~")) {
+            return resolveUserPath(imageRaw);
+          }
          return imageRaw;
-        }
-        if (imageRaw.startsWith("~")) {
-          return resolveUserPath(imageRaw);
-        }
-        return imageRaw;
-      })();
-      const resolvedPathInfo: { resolved: string; rewrittenFrom?: string } = isDataUrl
-        ? { resolved: "" }
-        : sandboxConfig
-          ? await resolveSandboxedImagePath({
-              sandbox: sandboxConfig,
-              imagePath: resolvedImage,
-            })
-          : {
-              resolved: resolvedImage.startsWith("file://")
-                ? resolvedImage.slice("file://".length)
-                : resolvedImage,
-            };
-      const resolvedPath = isDataUrl ? null : resolvedPathInfo.resolved;
+        })();
+        const resolvedPathInfo: { resolved: string; rewrittenFrom?: string } = isDataUrl
+          ? { resolved: "" }
+          : sandboxConfig
+            ? await resolveSandboxedImagePath({
+                sandbox: sandboxConfig,
+                imagePath: resolvedImage,
+              })
+            : {
+                resolved: resolvedImage.startsWith("file://")
+                  ? resolvedImage.slice("file://".length)
+                  : resolvedImage,
+              };
+        const resolvedPath = isDataUrl ? null : resolvedPathInfo.resolved;

-      const media = isDataUrl
-        ? decodeDataUrl(resolvedImage)
-        : sandboxConfig
-          ? await loadWebMedia(resolvedPath ?? resolvedImage, {
-              maxBytes,
-              sandboxValidated: true,
-              readFile: (filePath) =>
-                sandboxConfig.bridge.readFile({ filePath, cwd: sandboxConfig.root }),
-            })
-          : await loadWebMedia(resolvedPath ?? resolvedImage, {
-              maxBytes,
-              localRoots,
-            });
-      if (media.kind !== "image") {
-        throw new Error(`Unsupported media type: ${media.kind}`);
+        const media = isDataUrl
+          ? decodeDataUrl(resolvedImage)
+          : sandboxConfig
+            ? await loadWebMedia(resolvedPath ?? resolvedImage, {
+                maxBytes,
+                sandboxValidated: true,
+                readFile: (filePath) =>
+                  sandboxConfig.bridge.readFile({ filePath, cwd: sandboxConfig.root }),
+              })
+            : await loadWebMedia(resolvedPath ?? resolvedImage, {
+                maxBytes,
+                localRoots,
+              });
+        if (media.kind !== "image") {
+          throw new Error(`Unsupported media type: ${media.kind}`);
+        }
+
+        const mimeType =
+          ("contentType" in media && media.contentType) ||
+          ("mimeType" in media && media.mimeType) ||
+          "image/png";
+        const base64 = media.buffer.toString("base64");
+        loadedImages.push({
+          base64,
+          mimeType,
+          resolvedImage,
+          ...(resolvedPathInfo.rewrittenFrom
+            ? { rewrittenFrom: resolvedPathInfo.rewrittenFrom }
+            : {}),
+        });
      }

-      const mimeType =
-        ("contentType" in media && media.contentType) ||
-        ("mimeType" in media && media.mimeType) ||
-        "image/png";
-      const base64 = media.buffer.toString("base64");
+      // MARK: - Run image prompt with all loaded images
      const result = await runImagePrompt({
        cfg: options?.config,
        agentDir,
        imageModelConfig,
        modelOverride,
        prompt: promptRaw,
-        base64,
-        mimeType,
+        images: loadedImages.map((img) => ({ base64: img.base64, mimeType: img.mimeType })),
      });
+
+      const imageDetails =
+        loadedImages.length === 1
+          ? {
+              image: loadedImages[0].resolvedImage,
+              ...(loadedImages[0].rewrittenFrom
+                ? { rewrittenFrom: loadedImages[0].rewrittenFrom }
+                : {}),
+            }
+          : {
+              images: loadedImages.map((img) => ({
+                image: img.resolvedImage,
+                ...(img.rewrittenFrom ? { rewrittenFrom: img.rewrittenFrom } : {}),
+              })),
+            };
+
      return {
        content: [{ type: "text", text: result.text }],
        details: {
          model: `${result.provider}/${result.model}`,
-          image: resolvedImage,
-          ...(resolvedPathInfo.rewrittenFrom
-            ? { rewrittenFrom: resolvedPathInfo.rewrittenFrom }
-            : {}),
+          ...imageDetails,
          attempts: result.attempts,
        },
      };
--- a/src/agents/tools/memory-tool.does-not-crash-on-errors.e2e.test.ts
+++ b/src/agents/tools/memory-tool.does-not-crash-on-errors.e2e.test.ts
@ -1,65 +0,0 @@
-import { describe, expect, it, vi } from "vitest";
-
-vi.mock("../../memory/index.js", () => {
-  return {
-    getMemorySearchManager: async () => {
-      return {
-        manager: {
-          search: async () => {
-            throw new Error("openai embeddings failed: 429 insufficient_quota");
-          },
-          readFile: async () => {
-            throw new Error("path required");
-          },
-          status: () => ({
-            files: 0,
-            chunks: 0,
-            dirty: true,
-            workspaceDir: "/tmp",
-            dbPath: "/tmp/index.sqlite",
-            provider: "openai",
-            model: "text-embedding-3-small",
-            requestedProvider: "openai",
-          }),
-        },
-      };
-    },
-  };
-});
-
-import { createMemoryGetTool, createMemorySearchTool } from "./memory-tool.js";
-
-describe("memory tools", () => {
-  it("does not throw when memory_search fails (e.g. embeddings 429)", async () => {
-    const cfg = { agents: { list: [{ id: "main", default: true }] } };
-    const tool = createMemorySearchTool({ config: cfg });
-    expect(tool).not.toBeNull();
-    if (!tool) {
-      throw new Error("tool missing");
-    }
-
-    const result = await tool.execute("call_1", { query: "hello" });
-    expect(result.details).toEqual({
-      results: [],
-      disabled: true,
-      error: "openai embeddings failed: 429 insufficient_quota",
-    });
-  });
-
-  it("does not throw when memory_get fails", async () => {
-    const cfg = { agents: { list: [{ id: "main", default: true }] } };
-    const tool = createMemoryGetTool({ config: cfg });
-    expect(tool).not.toBeNull();
-    if (!tool) {
-      throw new Error("tool missing");
-    }
-
-    const result = await tool.execute("call_2", { path: "memory/NOPE.md" });
-    expect(result.details).toEqual({
-      path: "memory/NOPE.md",
-      text: "",
-      disabled: true,
-      error: "path required",
-    });
-  });
-});
--- a/src/agents/tools/memory-tool.citations.e2e.test.ts
+++ b/src/agents/tools/memory-tool.citations.e2e.test.ts
@ -1,18 +1,21 @@
 import { beforeEach, describe, expect, it, vi } from "vitest";

 let backend: "builtin" | "qmd" = "builtin";
+let searchImpl: () => Promise<unknown[]> = async () => [
+  {
+    path: "MEMORY.md",
+    startLine: 5,
+    endLine: 7,
+    score: 0.9,
+    snippet: "@@ -5,3 @@\nAssistant: noted",
+    source: "memory" as const,
+  },
+];
+let readFileImpl: () => Promise<string> = async () => "";
+
 const stubManager = {
-  search: vi.fn(async () => [
-    {
-      path: "MEMORY.md",
-      startLine: 5,
-      endLine: 7,
-      score: 0.9,
-      snippet: "@@ -5,3 @@\nAssistant: noted",
-      source: "memory" as const,
-    },
-  ]),
-  readFile: vi.fn(),
+  search: vi.fn(async () => await searchImpl()),
+  readFile: vi.fn(async () => await readFileImpl()),
  status: () => ({
    backend,
    files: 1,
@ -37,9 +40,21 @@ vi.mock("../../memory/index.js", () => {
  };
 });

-import { createMemorySearchTool } from "./memory-tool.js";
+import { createMemoryGetTool, createMemorySearchTool } from "./memory-tool.js";

 beforeEach(() => {
+  backend = "builtin";
+  searchImpl = async () => [
+    {
+      path: "MEMORY.md",
+      startLine: 5,
+      endLine: 7,
+      score: 0.9,
+      snippet: "@@ -5,3 @@\nAssistant: noted",
+      source: "memory" as const,
+    },
+  ];
+  readFileImpl = async () => "";
  vi.clearAllMocks();
 });

@ -121,3 +136,46 @@ describe("memory search citations", () => {
    expect(details.results[0]?.snippet).not.toMatch(/Source:/);
  });
 });
+
+describe("memory tools", () => {
+  it("does not throw when memory_search fails (e.g. embeddings 429)", async () => {
+    searchImpl = async () => {
+      throw new Error("openai embeddings failed: 429 insufficient_quota");
+    };
+
+    const cfg = { agents: { list: [{ id: "main", default: true }] } };
+    const tool = createMemorySearchTool({ config: cfg });
+    expect(tool).not.toBeNull();
+    if (!tool) {
+      throw new Error("tool missing");
+    }
+
+    const result = await tool.execute("call_1", { query: "hello" });
+    expect(result.details).toEqual({
+      results: [],
+      disabled: true,
+      error: "openai embeddings failed: 429 insufficient_quota",
+    });
+  });
+
+  it("does not throw when memory_get fails", async () => {
+    readFileImpl = async () => {
+      throw new Error("path required");
+    };
+
+    const cfg = { agents: { list: [{ id: "main", default: true }] } };
+    const tool = createMemoryGetTool({ config: cfg });
+    expect(tool).not.toBeNull();
+    if (!tool) {
+      throw new Error("tool missing");
+    }
+
+    const result = await tool.execute("call_2", { path: "memory/NOPE.md" });
+    expect(result.details).toEqual({
+      path: "memory/NOPE.md",
+      text: "",
+      disabled: true,
+      error: "path required",
+    });
+  });
+});
--- a/src/agents/tools/sessions-announce-target.e2e.test.ts
+++ b/src/agents/tools/sessions-announce-target.e2e.test.ts
@ -1,103 +0,0 @@
-import { beforeEach, describe, expect, it, vi } from "vitest";
-import { createTestRegistry } from "../../test-utils/channel-plugins.js";
-
-const callGatewayMock = vi.fn();
-vi.mock("../../gateway/call.js", () => ({
-  callGateway: (opts: unknown) => callGatewayMock(opts),
-}));
-
-const loadResolveAnnounceTarget = async () => await import("./sessions-announce-target.js");
-
-const installRegistry = async () => {
-  const { setActivePluginRegistry } = await import("../../plugins/runtime.js");
-  setActivePluginRegistry(
-    createTestRegistry([
-      {
-        pluginId: "discord",
-        source: "test",
-        plugin: {
-          id: "discord",
-          meta: {
-            id: "discord",
-            label: "Discord",
-            selectionLabel: "Discord",
-            docsPath: "/channels/discord",
-            blurb: "Discord test stub.",
-          },
-          capabilities: { chatTypes: ["direct", "channel", "thread"] },
-          config: {
-            listAccountIds: () => ["default"],
-            resolveAccount: () => ({}),
-          },
-        },
-      },
-      {
-        pluginId: "whatsapp",
-        source: "test",
-        plugin: {
-          id: "whatsapp",
-          meta: {
-            id: "whatsapp",
-            label: "WhatsApp",
-            selectionLabel: "WhatsApp",
-            docsPath: "/channels/whatsapp",
-            blurb: "WhatsApp test stub.",
-            preferSessionLookupForAnnounceTarget: true,
-          },
-          capabilities: { chatTypes: ["direct", "group"] },
-          config: {
-            listAccountIds: () => ["default"],
-            resolveAccount: () => ({}),
-          },
-        },
-      },
-    ]),
-  );
-};
-
-describe("resolveAnnounceTarget", () => {
-  beforeEach(async () => {
-    callGatewayMock.mockReset();
-    await installRegistry();
-  });
-
-  it("derives non-WhatsApp announce targets from the session key", async () => {
-    const { resolveAnnounceTarget } = await loadResolveAnnounceTarget();
-    const target = await resolveAnnounceTarget({
-      sessionKey: "agent:main:discord:group:dev",
-      displayKey: "agent:main:discord:group:dev",
-    });
-    expect(target).toEqual({ channel: "discord", to: "channel:dev" });
-    expect(callGatewayMock).not.toHaveBeenCalled();
-  });
-
-  it("hydrates WhatsApp accountId from sessions.list when available", async () => {
-    const { resolveAnnounceTarget } = await loadResolveAnnounceTarget();
-    callGatewayMock.mockResolvedValueOnce({
-      sessions: [
-        {
-          key: "agent:main:whatsapp:group:123@g.us",
-          deliveryContext: {
-            channel: "whatsapp",
-            to: "123@g.us",
-            accountId: "work",
-          },
-        },
-      ],
-    });
-
-    const target = await resolveAnnounceTarget({
-      sessionKey: "agent:main:whatsapp:group:123@g.us",
-      displayKey: "agent:main:whatsapp:group:123@g.us",
-    });
-    expect(target).toEqual({
-      channel: "whatsapp",
-      to: "123@g.us",
-      accountId: "work",
-    });
-    expect(callGatewayMock).toHaveBeenCalledTimes(1);
-    const first = callGatewayMock.mock.calls[0]?.[0] as { method?: string } | undefined;
-    expect(first).toBeDefined();
-    expect(first?.method).toBe("sessions.list");
-  });
-});
--- a/Show More
+++ b/Show More