fix(security-shield): narrow param scanning, add transcript redaction, truncate errors

- Add tool_result_persist hook to redact secrets before session transcript storage, preventing secrets from being persisted to disk - Add extractCommandParams() to scan only command-relevant param fields (command, input, code, path, etc.) instead of all params as a blob, reducing false positives from text/description fields - Truncate error messages in audit log to 500 chars, matching params limit - Add tests for extractCommandParams and error truncation (59 tests total) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 19:12:11 +08:00 · 2026-03-19 19:12:11 +08:00 · 76f834a4cf
commit 76f834a4cf
parent 5012795ba3
5 changed files with 156 additions and 12 deletions
--- a/extensions/security-shield/index.ts
+++ b/extensions/security-shield/index.ts
@ -1,16 +1,18 @@
 /**
 * Security Shield plugin for OpenClaw.
 *
- * Registers before_tool_call and after_tool_call hooks to:
+ * Registers hooks to:
 * 1. Block dangerous commands (rm -rf, curl|bash, reverse shells, etc.)
 * 2. Detect and redact secret leaks in tool output (API keys, tokens, etc.)
- * 3. Log all tool activity to an audit trail
+ * 3. Redact secrets from session transcripts before persistence
+ * 4. Log all tool activity to an audit trail
 *
 * Works with all existing tools and extensions — no code changes required.
 */
 import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
 import { writeAuditEntry, type AuditEntry } from "./src/audit-log.js";
 import { scanForDangerousCommands } from "./src/dangerous-commands.js";
+import { extractCommandParams } from "./src/dangerous-commands.js";
 import { scanForLeaks, redactLeaks } from "./src/leak-detector.js";

 type ShieldConfig = {
@ -51,13 +53,15 @@ const plugin = {
    );

    // ── before_tool_call: block dangerous commands ──────────────
-    // Note: scans stringified params broadly. Only "critical" severity
-    // rules trigger blocking to reduce false positives from text fields.
+    // Scans only command-relevant param fields (command, input, code, etc.)
+    // to avoid false positives from text/description fields.
    api.on("before_tool_call", (event) => {
      if (config.enforcement === "off") return;

-      const paramsStr = JSON.stringify(event.params ?? {});
-      const matches = scanForDangerousCommands(paramsStr);
+      const commandText = extractCommandParams(event.params ?? {});
+      if (commandText.length === 0) return;
+
+      const matches = scanForDangerousCommands(commandText);

      if (matches.length === 0) return;

@ -78,7 +82,7 @@ const plugin = {
        writeAuditEntry({
          timestamp: new Date().toISOString(),
          toolName: event.toolName,
-          params: redactLeaks(paramsStr),
+          params: redactLeaks(JSON.stringify(event.params ?? {})),
          blocked: config.enforcement === "block" && criticals.length > 0,
          blockReason:
            criticals.length > 0 ? criticals.map((m) => m.message).join("; ") : undefined,
@ -102,9 +106,8 @@ const plugin = {

    // ── after_tool_call: log leaks + audit trail (observational) ─
    // Note: after_tool_call is fire-and-forget (void hook), so we cannot
-    // modify event.result here. Actual redaction happens in message_sending.
-    // A future tool_result_persist hook would allow redaction before
-    // transcript storage — see openclaw hook docs for updates.
+    // modify event.result here. Redaction happens in tool_result_persist
+    // (for transcript) and message_sending (for outbound messages).
    api.on("after_tool_call", (event) => {
      const resultStr = event.result != null ? JSON.stringify(event.result) : "";
      const findings: AuditEntry["findings"] = [];
@ -124,7 +127,7 @@ const plugin = {
        }
      }

-      // Audit log (redact params to avoid writing secrets to disk)
+      // Audit log (redact both params and error to avoid writing secrets)
      if (config.auditLog) {
        writeAuditEntry({
          timestamp: new Date().toISOString(),
@ -138,6 +141,30 @@ const plugin = {
      }
    });

+    // ── tool_result_persist: redact leaks before transcript storage ──
+    // Synchronous hook that runs before tool results are written to the
+    // session JSONL. This prevents secrets from being persisted to disk.
+    api.on("tool_result_persist", (event) => {
+      if (!config.leakDetection) return;
+
+      const message = event.message;
+      if (!message) return;
+
+      const messageStr = JSON.stringify(message);
+      const leaks = scanForLeaks(messageStr);
+      if (leaks.length === 0) return;
+
+      for (const leak of leaks) {
+        logger.warn(
+          `[Security Shield] Redacting ${leak.message} (${leak.ruleId}) from transcript persistence`,
+        );
+      }
+
+      // Deep-redact the message content before it hits disk
+      const redacted = JSON.parse(redactLeaks(messageStr));
+      return { message: redacted };
+    });
+
    // ── message_sending: redact leaks in outbound messages ──────
    api.on("message_sending", (event) => {
      if (!config.leakDetection) return;
--- a/extensions/security-shield/src/audit-log.test.ts
+++ b/extensions/security-shield/src/audit-log.test.ts
@ -69,4 +69,21 @@ describe("writeAuditEntry", () => {
    expect(lines.length).toBe(2);
    expect(JSON.parse(lines[1]).blocked).toBe(true);
  });
+
+  it("truncates long error messages", () => {
+    const longError = "Error: " + "x".repeat(1000);
+    writeAuditEntry({
+      timestamp: "2026-01-01T00:00:00Z",
+      toolName: "shell",
+      params: "{}",
+      blocked: false,
+      findings: [],
+      error: longError,
+    });
+
+    const content = readFileSync(testPath, "utf-8");
+    const entry = JSON.parse(content.trim());
+    expect(entry.error.length).toBeLessThan(600);
+    expect(entry.error).toContain("...(truncated)");
+  });
 });
--- a/extensions/security-shield/src/audit-log.ts
+++ b/extensions/security-shield/src/audit-log.ts
@ -22,6 +22,7 @@ export type AuditEntry = {
 };

 const MAX_PARAMS_LENGTH = 500;
+const MAX_ERROR_LENGTH = 500;

 let logPath: string | null = null;

@ -48,6 +49,10 @@ export function writeAuditEntry(entry: AuditEntry): void {
        entry.params.length > MAX_PARAMS_LENGTH
          ? entry.params.slice(0, MAX_PARAMS_LENGTH) + "...(truncated)"
          : entry.params,
+      error:
+        entry.error && entry.error.length > MAX_ERROR_LENGTH
+          ? entry.error.slice(0, MAX_ERROR_LENGTH) + "...(truncated)"
+          : entry.error,
    });
    const path = getLogPath();
    const isNew = !existsSync(path);
--- a/extensions/security-shield/src/dangerous-commands.test.ts
+++ b/extensions/security-shield/src/dangerous-commands.test.ts
@ -1,5 +1,5 @@
 import { describe, it, expect } from "vitest";
-import { scanForDangerousCommands } from "./dangerous-commands.js";
+import { scanForDangerousCommands, extractCommandParams } from "./dangerous-commands.js";

 describe("scanForDangerousCommands", () => {
  // ── Should detect ──────────────────────────────────────────────
@ -109,3 +109,41 @@ describe("scanForDangerousCommands", () => {
    expect(m[0].severity).toBe("critical");
  });
 });
+
+describe("extractCommandParams", () => {
+  it("extracts known command param keys", () => {
+    const result = extractCommandParams({ command: "rm -rf /", description: "delete files" });
+    expect(result).toBe("rm -rf /");
+    expect(result).not.toContain("delete files");
+  });
+
+  it("extracts multiple command-relevant keys", () => {
+    const result = extractCommandParams({ command: "echo hello", path: "/etc/passwd" });
+    expect(result).toContain("echo hello");
+    expect(result).toContain("/etc/passwd");
+  });
+
+  it("ignores non-string values", () => {
+    const result = extractCommandParams({ command: "ls", count: 42, verbose: true });
+    expect(result).toBe("ls");
+  });
+
+  it("falls back to all string values when no known keys match", () => {
+    const result = extractCommandParams({ custom_field: "rm -rf /" });
+    expect(result).toContain("rm -rf /");
+  });
+
+  it("does not scan description/message fields when command keys present", () => {
+    const result = extractCommandParams({
+      command: "ls -la",
+      description: "rm -rf / is dangerous",
+      message: "please run rm -rf / to clean up",
+    });
+    expect(result).toBe("ls -la");
+  });
+
+  it("returns empty string for empty params", () => {
+    const result = extractCommandParams({});
+    expect(result).toBe("");
+  });
+});
--- a/extensions/security-shield/src/dangerous-commands.ts
+++ b/extensions/security-shield/src/dangerous-commands.ts
@ -160,6 +160,63 @@ const RULES: Rule[] = [
  },
 ];

+/**
+ * Parameter keys that typically contain executable commands or file paths.
+ * Only these fields are scanned for dangerous patterns, reducing false
+ * positives from benign text fields like descriptions or messages.
+ */
+const COMMAND_PARAM_KEYS = new Set([
+  "command",
+  "input",
+  "code",
+  "script",
+  "shell",
+  "bash",
+  "cmd",
+  "exec",
+  "run",
+  "args",
+  "arguments",
+  "path",
+  "file_path",
+  "filepath",
+  "filename",
+  "file",
+  "source",
+  "destination",
+  "target",
+  "url",
+  "content",
+]);
+
+/**
+ * Extract command-relevant values from tool params.
+ * Only scans keys that are likely to contain executable commands or paths,
+ * avoiding false positives from text/description/message fields.
+ */
+export function extractCommandParams(params: Record<string, unknown>): string {
+  const parts: string[] = [];
+
+  for (const [key, value] of Object.entries(params)) {
+    const lowerKey = key.toLowerCase();
+    if (COMMAND_PARAM_KEYS.has(lowerKey) && typeof value === "string") {
+      parts.push(value);
+    }
+  }
+
+  // If no known command keys matched, fall back to full scan for tools
+  // that use non-standard param names — but only for string values
+  if (parts.length === 0) {
+    for (const value of Object.values(params)) {
+      if (typeof value === "string") {
+        parts.push(value);
+      }
+    }
+  }
+
+  return parts.join("\n");
+}
+
 /**
 * Scan a stringified tool call for dangerous patterns.
 * Returns all matching rules sorted by severity (critical first).