fix(security-shield): narrow param scanning, add transcript redaction, truncate errors

- Add tool_result_persist hook to redact secrets before session transcript
  storage, preventing secrets from being persisted to disk
- Add extractCommandParams() to scan only command-relevant param fields
  (command, input, code, path, etc.) instead of all params as a blob,
  reducing false positives from text/description fields
- Truncate error messages in audit log to 500 chars, matching params limit
- Add tests for extractCommandParams and error truncation (59 tests total)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
yapie0 2026-03-19 19:12:11 +08:00
parent 5012795ba3
commit 76f834a4cf
5 changed files with 156 additions and 12 deletions

View File

@ -1,16 +1,18 @@
/**
* Security Shield plugin for OpenClaw.
*
* Registers before_tool_call and after_tool_call hooks to:
* Registers hooks to:
* 1. Block dangerous commands (rm -rf, curl|bash, reverse shells, etc.)
* 2. Detect and redact secret leaks in tool output (API keys, tokens, etc.)
* 3. Log all tool activity to an audit trail
* 3. Redact secrets from session transcripts before persistence
* 4. Log all tool activity to an audit trail
*
* Works with all existing tools and extensions no code changes required.
*/
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
import { writeAuditEntry, type AuditEntry } from "./src/audit-log.js";
import { scanForDangerousCommands } from "./src/dangerous-commands.js";
import { extractCommandParams } from "./src/dangerous-commands.js";
import { scanForLeaks, redactLeaks } from "./src/leak-detector.js";
type ShieldConfig = {
@ -51,13 +53,15 @@ const plugin = {
);
// ── before_tool_call: block dangerous commands ──────────────
// Note: scans stringified params broadly. Only "critical" severity
// rules trigger blocking to reduce false positives from text fields.
// Scans only command-relevant param fields (command, input, code, etc.)
// to avoid false positives from text/description fields.
api.on("before_tool_call", (event) => {
if (config.enforcement === "off") return;
const paramsStr = JSON.stringify(event.params ?? {});
const matches = scanForDangerousCommands(paramsStr);
const commandText = extractCommandParams(event.params ?? {});
if (commandText.length === 0) return;
const matches = scanForDangerousCommands(commandText);
if (matches.length === 0) return;
@ -78,7 +82,7 @@ const plugin = {
writeAuditEntry({
timestamp: new Date().toISOString(),
toolName: event.toolName,
params: redactLeaks(paramsStr),
params: redactLeaks(JSON.stringify(event.params ?? {})),
blocked: config.enforcement === "block" && criticals.length > 0,
blockReason:
criticals.length > 0 ? criticals.map((m) => m.message).join("; ") : undefined,
@ -102,9 +106,8 @@ const plugin = {
// ── after_tool_call: log leaks + audit trail (observational) ─
// Note: after_tool_call is fire-and-forget (void hook), so we cannot
// modify event.result here. Actual redaction happens in message_sending.
// A future tool_result_persist hook would allow redaction before
// transcript storage — see openclaw hook docs for updates.
// modify event.result here. Redaction happens in tool_result_persist
// (for transcript) and message_sending (for outbound messages).
api.on("after_tool_call", (event) => {
const resultStr = event.result != null ? JSON.stringify(event.result) : "";
const findings: AuditEntry["findings"] = [];
@ -124,7 +127,7 @@ const plugin = {
}
}
// Audit log (redact params to avoid writing secrets to disk)
// Audit log (redact both params and error to avoid writing secrets)
if (config.auditLog) {
writeAuditEntry({
timestamp: new Date().toISOString(),
@ -138,6 +141,30 @@ const plugin = {
}
});
// ── tool_result_persist: redact leaks before transcript storage ──
// Synchronous hook that runs before tool results are written to the
// session JSONL. This prevents secrets from being persisted to disk.
api.on("tool_result_persist", (event) => {
if (!config.leakDetection) return;
const message = event.message;
if (!message) return;
const messageStr = JSON.stringify(message);
const leaks = scanForLeaks(messageStr);
if (leaks.length === 0) return;
for (const leak of leaks) {
logger.warn(
`[Security Shield] Redacting ${leak.message} (${leak.ruleId}) from transcript persistence`,
);
}
// Deep-redact the message content before it hits disk
const redacted = JSON.parse(redactLeaks(messageStr));
return { message: redacted };
});
// ── message_sending: redact leaks in outbound messages ──────
api.on("message_sending", (event) => {
if (!config.leakDetection) return;

View File

@ -69,4 +69,21 @@ describe("writeAuditEntry", () => {
expect(lines.length).toBe(2);
expect(JSON.parse(lines[1]).blocked).toBe(true);
});
it("truncates long error messages", () => {
const longError = "Error: " + "x".repeat(1000);
writeAuditEntry({
timestamp: "2026-01-01T00:00:00Z",
toolName: "shell",
params: "{}",
blocked: false,
findings: [],
error: longError,
});
const content = readFileSync(testPath, "utf-8");
const entry = JSON.parse(content.trim());
expect(entry.error.length).toBeLessThan(600);
expect(entry.error).toContain("...(truncated)");
});
});

View File

@ -22,6 +22,7 @@ export type AuditEntry = {
};
const MAX_PARAMS_LENGTH = 500;
const MAX_ERROR_LENGTH = 500;
let logPath: string | null = null;
@ -48,6 +49,10 @@ export function writeAuditEntry(entry: AuditEntry): void {
entry.params.length > MAX_PARAMS_LENGTH
? entry.params.slice(0, MAX_PARAMS_LENGTH) + "...(truncated)"
: entry.params,
error:
entry.error && entry.error.length > MAX_ERROR_LENGTH
? entry.error.slice(0, MAX_ERROR_LENGTH) + "...(truncated)"
: entry.error,
});
const path = getLogPath();
const isNew = !existsSync(path);

View File

@ -1,5 +1,5 @@
import { describe, it, expect } from "vitest";
import { scanForDangerousCommands } from "./dangerous-commands.js";
import { scanForDangerousCommands, extractCommandParams } from "./dangerous-commands.js";
describe("scanForDangerousCommands", () => {
// ── Should detect ──────────────────────────────────────────────
@ -109,3 +109,41 @@ describe("scanForDangerousCommands", () => {
expect(m[0].severity).toBe("critical");
});
});
describe("extractCommandParams", () => {
it("extracts known command param keys", () => {
const result = extractCommandParams({ command: "rm -rf /", description: "delete files" });
expect(result).toBe("rm -rf /");
expect(result).not.toContain("delete files");
});
it("extracts multiple command-relevant keys", () => {
const result = extractCommandParams({ command: "echo hello", path: "/etc/passwd" });
expect(result).toContain("echo hello");
expect(result).toContain("/etc/passwd");
});
it("ignores non-string values", () => {
const result = extractCommandParams({ command: "ls", count: 42, verbose: true });
expect(result).toBe("ls");
});
it("falls back to all string values when no known keys match", () => {
const result = extractCommandParams({ custom_field: "rm -rf /" });
expect(result).toContain("rm -rf /");
});
it("does not scan description/message fields when command keys present", () => {
const result = extractCommandParams({
command: "ls -la",
description: "rm -rf / is dangerous",
message: "please run rm -rf / to clean up",
});
expect(result).toBe("ls -la");
});
it("returns empty string for empty params", () => {
const result = extractCommandParams({});
expect(result).toBe("");
});
});

View File

@ -160,6 +160,63 @@ const RULES: Rule[] = [
},
];
/**
* Parameter keys that typically contain executable commands or file paths.
* Only these fields are scanned for dangerous patterns, reducing false
* positives from benign text fields like descriptions or messages.
*/
const COMMAND_PARAM_KEYS = new Set([
"command",
"input",
"code",
"script",
"shell",
"bash",
"cmd",
"exec",
"run",
"args",
"arguments",
"path",
"file_path",
"filepath",
"filename",
"file",
"source",
"destination",
"target",
"url",
"content",
]);
/**
* Extract command-relevant values from tool params.
* Only scans keys that are likely to contain executable commands or paths,
* avoiding false positives from text/description/message fields.
*/
export function extractCommandParams(params: Record<string, unknown>): string {
const parts: string[] = [];
for (const [key, value] of Object.entries(params)) {
const lowerKey = key.toLowerCase();
if (COMMAND_PARAM_KEYS.has(lowerKey) && typeof value === "string") {
parts.push(value);
}
}
// If no known command keys matched, fall back to full scan for tools
// that use non-standard param names — but only for string values
if (parts.length === 0) {
for (const value of Object.values(params)) {
if (typeof value === "string") {
parts.push(value);
}
}
}
return parts.join("\n");
}
/**
* Scan a stringified tool call for dangerous patterns.
* Returns all matching rules sorted by severity (critical first).