From 17a0bd633cf14c706f7f2c21e4cc7a6331f38f8e Mon Sep 17 00:00:00 2001 From: Keren Date: Mon, 9 Mar 2026 23:55:19 +0100 Subject: [PATCH] fix: exec tool loop detection bypassed by volatile details fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hashToolOutcome() hashed the entire `details` object for exec tool results, which includes volatile fields (durationMs, pid, startedAt, sessionId) that change on every invocation. This made every exec call produce a unique hash, so loop detection never triggered — even when the same command was called 121+ times with identical output. Add exec-specific handling that only hashes stable fields (status, exitCode, text), matching the existing pattern for process tool's poll/log actions. Fixes #34574 Co-Authored-By: Claude Opus 4.6 --- src/agents/tool-loop-detection.test.ts | 126 +++++++++++++++++++++++++ src/agents/tool-loop-detection.ts | 12 +++ 2 files changed, 138 insertions(+) diff --git a/src/agents/tool-loop-detection.test.ts b/src/agents/tool-loop-detection.test.ts index 056c5286cbb..0d6f0199509 100644 --- a/src/agents/tool-loop-detection.test.ts +++ b/src/agents/tool-loop-detection.test.ts @@ -543,6 +543,132 @@ describe("tool-loop-detection", () => { const result = detectToolCallLoop(state, "tool", { arg: 1 }, enabledLoopDetectionConfig); expect(result.stuck).toBe(false); }); + + it("detects repeated exec calls with volatile details fields (#34574)", () => { + const state = createState(); + const execParams = { command: "echo hello", cwd: "/workspace" }; + + for (let i = 0; i < WARNING_THRESHOLD; i += 1) { + const toolCallId = `exec-${i}`; + recordToolCall(state, "exec", execParams, toolCallId); + recordToolCallOutcome(state, { + toolName: "exec", + toolParams: execParams, + toolCallId, + result: { + content: [{ type: "text", text: "hello" }], + details: { + status: "completed", + exitCode: 0, + durationMs: 100 + i * 7, + aggregated: "hello", + cwd: "/workspace", + }, + }, + }); + } + + const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig); + expect(loopResult.stuck).toBe(true); + if (loopResult.stuck) { + expect(loopResult.level).toBe("warning"); + } + }); + + it("does not flag exec calls with different commands", () => { + const state = createState(); + + for (let i = 0; i < WARNING_THRESHOLD + 5; i += 1) { + const execParams = { command: `cmd-${i}`, cwd: "/workspace" }; + const toolCallId = `exec-${i}`; + recordToolCall(state, "exec", execParams, toolCallId); + recordToolCallOutcome(state, { + toolName: "exec", + toolParams: execParams, + toolCallId, + result: { + content: [{ type: "text", text: `output ${i}` }], + details: { + status: "completed", + exitCode: 0, + durationMs: 50 + i, + aggregated: `output ${i}`, + cwd: "/workspace", + }, + }, + }); + } + + const loopResult = detectToolCallLoop( + state, + "exec", + { command: "cmd-new", cwd: "/workspace" }, + enabledLoopDetectionConfig, + ); + expect(loopResult.stuck).toBe(false); + }); + + it("blocks repeated exec calls at critical threshold (#34574)", () => { + const state = createState(); + const execParams = { command: "cat /tmp/status", cwd: "/workspace" }; + + for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) { + const toolCallId = `exec-crit-${i}`; + recordToolCall(state, "exec", execParams, toolCallId); + recordToolCallOutcome(state, { + toolName: "exec", + toolParams: execParams, + toolCallId, + result: { + content: [{ type: "text", text: "same output" }], + details: { + status: "completed", + exitCode: 0, + durationMs: 200 + i * 3, + aggregated: "same output", + cwd: "/workspace", + }, + }, + }); + } + + const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig); + expect(loopResult.stuck).toBe(true); + if (loopResult.stuck) { + expect(loopResult.level).toBe("warning"); + } + }); + + it("detects exec loop even with varying pid and startedAt (#34574)", () => { + const state = createState(); + const execParams = { command: "sleep 1 &", cwd: "/workspace" }; + + for (let i = 0; i < WARNING_THRESHOLD; i += 1) { + const toolCallId = `exec-bg-${i}`; + recordToolCall(state, "exec", execParams, toolCallId); + recordToolCallOutcome(state, { + toolName: "exec", + toolParams: execParams, + toolCallId, + result: { + content: [{ type: "text", text: "Command still running" }], + details: { + status: "running", + sessionId: `sess-${1000 + i}`, + pid: 40000 + i, + startedAt: Date.now() + i * 1000, + cwd: "/workspace", + }, + }, + }); + } + + const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig); + expect(loopResult.stuck).toBe(true); + if (loopResult.stuck) { + expect(loopResult.level).toBe("warning"); + } + }); }); describe("getToolCallStats", () => { diff --git a/src/agents/tool-loop-detection.ts b/src/agents/tool-loop-detection.ts index 1576e7ace9b..7eb0c9cb3de 100644 --- a/src/agents/tool-loop-detection.ts +++ b/src/agents/tool-loop-detection.ts @@ -223,6 +223,18 @@ function hashToolOutcome( } } + // Exec tool results contain volatile fields (durationMs, pid, startedAt, + // sessionId) that change on every invocation. Hash only the stable + // fields so that repeated identical commands are correctly detected as + // a loop. See https://github.com/nicepkg/openclaw/issues/34574. + if (toolName === "exec") { + return digestStable({ + status: details.status, + exitCode: details.exitCode ?? null, + text, + }); + } + return digestStable({ details, text,