From 76f834a4cfa2259130813d57de2794addb97f63f Mon Sep 17 00:00:00 2001 From: yapie0 Date: Thu, 19 Mar 2026 19:12:11 +0800 Subject: [PATCH] fix(security-shield): narrow param scanning, add transcript redaction, truncate errors - Add tool_result_persist hook to redact secrets before session transcript storage, preventing secrets from being persisted to disk - Add extractCommandParams() to scan only command-relevant param fields (command, input, code, path, etc.) instead of all params as a blob, reducing false positives from text/description fields - Truncate error messages in audit log to 500 chars, matching params limit - Add tests for extractCommandParams and error truncation (59 tests total) Co-Authored-By: Claude Opus 4.6 --- extensions/security-shield/index.ts | 49 ++++++++++++---- .../security-shield/src/audit-log.test.ts | 17 ++++++ extensions/security-shield/src/audit-log.ts | 5 ++ .../src/dangerous-commands.test.ts | 40 ++++++++++++- .../security-shield/src/dangerous-commands.ts | 57 +++++++++++++++++++ 5 files changed, 156 insertions(+), 12 deletions(-) diff --git a/extensions/security-shield/index.ts b/extensions/security-shield/index.ts index c48b364cb98..d9952a66856 100644 --- a/extensions/security-shield/index.ts +++ b/extensions/security-shield/index.ts @@ -1,16 +1,18 @@ /** * Security Shield plugin for OpenClaw. * - * Registers before_tool_call and after_tool_call hooks to: + * Registers hooks to: * 1. Block dangerous commands (rm -rf, curl|bash, reverse shells, etc.) * 2. Detect and redact secret leaks in tool output (API keys, tokens, etc.) - * 3. Log all tool activity to an audit trail + * 3. Redact secrets from session transcripts before persistence + * 4. Log all tool activity to an audit trail * * Works with all existing tools and extensions — no code changes required. */ import type { OpenClawPluginApi } from "openclaw/plugin-sdk"; import { writeAuditEntry, type AuditEntry } from "./src/audit-log.js"; import { scanForDangerousCommands } from "./src/dangerous-commands.js"; +import { extractCommandParams } from "./src/dangerous-commands.js"; import { scanForLeaks, redactLeaks } from "./src/leak-detector.js"; type ShieldConfig = { @@ -51,13 +53,15 @@ const plugin = { ); // ── before_tool_call: block dangerous commands ────────────── - // Note: scans stringified params broadly. Only "critical" severity - // rules trigger blocking to reduce false positives from text fields. + // Scans only command-relevant param fields (command, input, code, etc.) + // to avoid false positives from text/description fields. api.on("before_tool_call", (event) => { if (config.enforcement === "off") return; - const paramsStr = JSON.stringify(event.params ?? {}); - const matches = scanForDangerousCommands(paramsStr); + const commandText = extractCommandParams(event.params ?? {}); + if (commandText.length === 0) return; + + const matches = scanForDangerousCommands(commandText); if (matches.length === 0) return; @@ -78,7 +82,7 @@ const plugin = { writeAuditEntry({ timestamp: new Date().toISOString(), toolName: event.toolName, - params: redactLeaks(paramsStr), + params: redactLeaks(JSON.stringify(event.params ?? {})), blocked: config.enforcement === "block" && criticals.length > 0, blockReason: criticals.length > 0 ? criticals.map((m) => m.message).join("; ") : undefined, @@ -102,9 +106,8 @@ const plugin = { // ── after_tool_call: log leaks + audit trail (observational) ─ // Note: after_tool_call is fire-and-forget (void hook), so we cannot - // modify event.result here. Actual redaction happens in message_sending. - // A future tool_result_persist hook would allow redaction before - // transcript storage — see openclaw hook docs for updates. + // modify event.result here. Redaction happens in tool_result_persist + // (for transcript) and message_sending (for outbound messages). api.on("after_tool_call", (event) => { const resultStr = event.result != null ? JSON.stringify(event.result) : ""; const findings: AuditEntry["findings"] = []; @@ -124,7 +127,7 @@ const plugin = { } } - // Audit log (redact params to avoid writing secrets to disk) + // Audit log (redact both params and error to avoid writing secrets) if (config.auditLog) { writeAuditEntry({ timestamp: new Date().toISOString(), @@ -138,6 +141,30 @@ const plugin = { } }); + // ── tool_result_persist: redact leaks before transcript storage ── + // Synchronous hook that runs before tool results are written to the + // session JSONL. This prevents secrets from being persisted to disk. + api.on("tool_result_persist", (event) => { + if (!config.leakDetection) return; + + const message = event.message; + if (!message) return; + + const messageStr = JSON.stringify(message); + const leaks = scanForLeaks(messageStr); + if (leaks.length === 0) return; + + for (const leak of leaks) { + logger.warn( + `[Security Shield] Redacting ${leak.message} (${leak.ruleId}) from transcript persistence`, + ); + } + + // Deep-redact the message content before it hits disk + const redacted = JSON.parse(redactLeaks(messageStr)); + return { message: redacted }; + }); + // ── message_sending: redact leaks in outbound messages ────── api.on("message_sending", (event) => { if (!config.leakDetection) return; diff --git a/extensions/security-shield/src/audit-log.test.ts b/extensions/security-shield/src/audit-log.test.ts index 37c94d2a7e7..33a64be58c8 100644 --- a/extensions/security-shield/src/audit-log.test.ts +++ b/extensions/security-shield/src/audit-log.test.ts @@ -69,4 +69,21 @@ describe("writeAuditEntry", () => { expect(lines.length).toBe(2); expect(JSON.parse(lines[1]).blocked).toBe(true); }); + + it("truncates long error messages", () => { + const longError = "Error: " + "x".repeat(1000); + writeAuditEntry({ + timestamp: "2026-01-01T00:00:00Z", + toolName: "shell", + params: "{}", + blocked: false, + findings: [], + error: longError, + }); + + const content = readFileSync(testPath, "utf-8"); + const entry = JSON.parse(content.trim()); + expect(entry.error.length).toBeLessThan(600); + expect(entry.error).toContain("...(truncated)"); + }); }); diff --git a/extensions/security-shield/src/audit-log.ts b/extensions/security-shield/src/audit-log.ts index ac4f3795602..04008a83810 100644 --- a/extensions/security-shield/src/audit-log.ts +++ b/extensions/security-shield/src/audit-log.ts @@ -22,6 +22,7 @@ export type AuditEntry = { }; const MAX_PARAMS_LENGTH = 500; +const MAX_ERROR_LENGTH = 500; let logPath: string | null = null; @@ -48,6 +49,10 @@ export function writeAuditEntry(entry: AuditEntry): void { entry.params.length > MAX_PARAMS_LENGTH ? entry.params.slice(0, MAX_PARAMS_LENGTH) + "...(truncated)" : entry.params, + error: + entry.error && entry.error.length > MAX_ERROR_LENGTH + ? entry.error.slice(0, MAX_ERROR_LENGTH) + "...(truncated)" + : entry.error, }); const path = getLogPath(); const isNew = !existsSync(path); diff --git a/extensions/security-shield/src/dangerous-commands.test.ts b/extensions/security-shield/src/dangerous-commands.test.ts index ec9acb9d36a..24cf0c0041e 100644 --- a/extensions/security-shield/src/dangerous-commands.test.ts +++ b/extensions/security-shield/src/dangerous-commands.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from "vitest"; -import { scanForDangerousCommands } from "./dangerous-commands.js"; +import { scanForDangerousCommands, extractCommandParams } from "./dangerous-commands.js"; describe("scanForDangerousCommands", () => { // ── Should detect ────────────────────────────────────────────── @@ -109,3 +109,41 @@ describe("scanForDangerousCommands", () => { expect(m[0].severity).toBe("critical"); }); }); + +describe("extractCommandParams", () => { + it("extracts known command param keys", () => { + const result = extractCommandParams({ command: "rm -rf /", description: "delete files" }); + expect(result).toBe("rm -rf /"); + expect(result).not.toContain("delete files"); + }); + + it("extracts multiple command-relevant keys", () => { + const result = extractCommandParams({ command: "echo hello", path: "/etc/passwd" }); + expect(result).toContain("echo hello"); + expect(result).toContain("/etc/passwd"); + }); + + it("ignores non-string values", () => { + const result = extractCommandParams({ command: "ls", count: 42, verbose: true }); + expect(result).toBe("ls"); + }); + + it("falls back to all string values when no known keys match", () => { + const result = extractCommandParams({ custom_field: "rm -rf /" }); + expect(result).toContain("rm -rf /"); + }); + + it("does not scan description/message fields when command keys present", () => { + const result = extractCommandParams({ + command: "ls -la", + description: "rm -rf / is dangerous", + message: "please run rm -rf / to clean up", + }); + expect(result).toBe("ls -la"); + }); + + it("returns empty string for empty params", () => { + const result = extractCommandParams({}); + expect(result).toBe(""); + }); +}); diff --git a/extensions/security-shield/src/dangerous-commands.ts b/extensions/security-shield/src/dangerous-commands.ts index 15413ef05b5..e3e109d4efd 100644 --- a/extensions/security-shield/src/dangerous-commands.ts +++ b/extensions/security-shield/src/dangerous-commands.ts @@ -160,6 +160,63 @@ const RULES: Rule[] = [ }, ]; +/** + * Parameter keys that typically contain executable commands or file paths. + * Only these fields are scanned for dangerous patterns, reducing false + * positives from benign text fields like descriptions or messages. + */ +const COMMAND_PARAM_KEYS = new Set([ + "command", + "input", + "code", + "script", + "shell", + "bash", + "cmd", + "exec", + "run", + "args", + "arguments", + "path", + "file_path", + "filepath", + "filename", + "file", + "source", + "destination", + "target", + "url", + "content", +]); + +/** + * Extract command-relevant values from tool params. + * Only scans keys that are likely to contain executable commands or paths, + * avoiding false positives from text/description/message fields. + */ +export function extractCommandParams(params: Record): string { + const parts: string[] = []; + + for (const [key, value] of Object.entries(params)) { + const lowerKey = key.toLowerCase(); + if (COMMAND_PARAM_KEYS.has(lowerKey) && typeof value === "string") { + parts.push(value); + } + } + + // If no known command keys matched, fall back to full scan for tools + // that use non-standard param names — but only for string values + if (parts.length === 0) { + for (const value of Object.values(params)) { + if (typeof value === "string") { + parts.push(value); + } + } + } + + return parts.join("\n"); +} + /** * Scan a stringified tool call for dangerous patterns. * Returns all matching rules sorted by severity (critical first).