fix: exec tool loop detection bypassed by volatile details fields

hashToolOutcome() hashed the entire `details` object for exec tool
results, which includes volatile fields (durationMs, pid, startedAt,
sessionId) that change on every invocation. This made every exec call
produce a unique hash, so loop detection never triggered — even when
the same command was called 121+ times with identical output.

Add exec-specific handling that only hashes stable fields (status,
exitCode, text), matching the existing pattern for process tool's
poll/log actions.

Fixes #34574

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Keren 2026-03-09 23:55:19 +01:00
parent 391f9430ca
commit 17a0bd633c
2 changed files with 138 additions and 0 deletions

View File

@ -543,6 +543,132 @@ describe("tool-loop-detection", () => {
const result = detectToolCallLoop(state, "tool", { arg: 1 }, enabledLoopDetectionConfig);
expect(result.stuck).toBe(false);
});
it("detects repeated exec calls with volatile details fields (#34574)", () => {
const state = createState();
const execParams = { command: "echo hello", cwd: "/workspace" };
for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
const toolCallId = `exec-${i}`;
recordToolCall(state, "exec", execParams, toolCallId);
recordToolCallOutcome(state, {
toolName: "exec",
toolParams: execParams,
toolCallId,
result: {
content: [{ type: "text", text: "hello" }],
details: {
status: "completed",
exitCode: 0,
durationMs: 100 + i * 7,
aggregated: "hello",
cwd: "/workspace",
},
},
});
}
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
}
});
it("does not flag exec calls with different commands", () => {
const state = createState();
for (let i = 0; i < WARNING_THRESHOLD + 5; i += 1) {
const execParams = { command: `cmd-${i}`, cwd: "/workspace" };
const toolCallId = `exec-${i}`;
recordToolCall(state, "exec", execParams, toolCallId);
recordToolCallOutcome(state, {
toolName: "exec",
toolParams: execParams,
toolCallId,
result: {
content: [{ type: "text", text: `output ${i}` }],
details: {
status: "completed",
exitCode: 0,
durationMs: 50 + i,
aggregated: `output ${i}`,
cwd: "/workspace",
},
},
});
}
const loopResult = detectToolCallLoop(
state,
"exec",
{ command: "cmd-new", cwd: "/workspace" },
enabledLoopDetectionConfig,
);
expect(loopResult.stuck).toBe(false);
});
it("blocks repeated exec calls at critical threshold (#34574)", () => {
const state = createState();
const execParams = { command: "cat /tmp/status", cwd: "/workspace" };
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
const toolCallId = `exec-crit-${i}`;
recordToolCall(state, "exec", execParams, toolCallId);
recordToolCallOutcome(state, {
toolName: "exec",
toolParams: execParams,
toolCallId,
result: {
content: [{ type: "text", text: "same output" }],
details: {
status: "completed",
exitCode: 0,
durationMs: 200 + i * 3,
aggregated: "same output",
cwd: "/workspace",
},
},
});
}
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
}
});
it("detects exec loop even with varying pid and startedAt (#34574)", () => {
const state = createState();
const execParams = { command: "sleep 1 &", cwd: "/workspace" };
for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
const toolCallId = `exec-bg-${i}`;
recordToolCall(state, "exec", execParams, toolCallId);
recordToolCallOutcome(state, {
toolName: "exec",
toolParams: execParams,
toolCallId,
result: {
content: [{ type: "text", text: "Command still running" }],
details: {
status: "running",
sessionId: `sess-${1000 + i}`,
pid: 40000 + i,
startedAt: Date.now() + i * 1000,
cwd: "/workspace",
},
},
});
}
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
}
});
});
describe("getToolCallStats", () => {

View File

@ -223,6 +223,18 @@ function hashToolOutcome(
}
}
// Exec tool results contain volatile fields (durationMs, pid, startedAt,
// sessionId) that change on every invocation. Hash only the stable
// fields so that repeated identical commands are correctly detected as
// a loop. See https://github.com/nicepkg/openclaw/issues/34574.
if (toolName === "exec") {
return digestStable({
status: details.status,
exitCode: details.exitCode ?? null,
text,
});
}
return digestStable({
details,
text,