Merge e067ee167dd7984265bc9358d68c05744980adbf into 6b4c24c2e55b5b4013277bd799525086f6a0c40f

2026-03-21 05:40:42 +01:00 · 2026-03-21 05:40:42 +01:00 · 5497925912
commit 5497925912
parent 6b4c24c2e5 e067ee167d
2 changed files with 229 additions and 0 deletions
--- a/src/agents/tool-loop-detection.test.ts
+++ b/src/agents/tool-loop-detection.test.ts
@ -543,6 +543,210 @@ describe("tool-loop-detection", () => {
      const result = detectToolCallLoop(state, "tool", { arg: 1 }, enabledLoopDetectionConfig);
      expect(result.stuck).toBe(false);
    });
+
+    it("detects repeated exec calls with volatile details fields (#34574)", () => {
+      const state = createState();
+      const execParams = { command: "echo hello", cwd: "/workspace" };
+
+      for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
+        const toolCallId = `exec-${i}`;
+        recordToolCall(state, "exec", execParams, toolCallId);
+        recordToolCallOutcome(state, {
+          toolName: "exec",
+          toolParams: execParams,
+          toolCallId,
+          result: {
+            content: [{ type: "text", text: "hello" }],
+            details: {
+              status: "completed",
+              exitCode: 0,
+              durationMs: 100 + i * 7,
+              aggregated: "hello",
+              cwd: "/workspace",
+            },
+          },
+        });
+      }
+
+      const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("warning");
+      }
+    });
+
+    it("does not flag exec calls with different commands", () => {
+      const state = createState();
+
+      for (let i = 0; i < WARNING_THRESHOLD + 5; i += 1) {
+        const execParams = { command: `cmd-${i}`, cwd: "/workspace" };
+        const toolCallId = `exec-${i}`;
+        recordToolCall(state, "exec", execParams, toolCallId);
+        recordToolCallOutcome(state, {
+          toolName: "exec",
+          toolParams: execParams,
+          toolCallId,
+          result: {
+            content: [{ type: "text", text: `output ${i}` }],
+            details: {
+              status: "completed",
+              exitCode: 0,
+              durationMs: 50 + i,
+              aggregated: `output ${i}`,
+              cwd: "/workspace",
+            },
+          },
+        });
+      }
+
+      const loopResult = detectToolCallLoop(
+        state,
+        "exec",
+        { command: "cmd-new", cwd: "/workspace" },
+        enabledLoopDetectionConfig,
+      );
+      expect(loopResult.stuck).toBe(false);
+    });
+
+    it("warns for exec calls repeated past warning threshold (#34574)", () => {
+      const state = createState();
+      const execParams = { command: "cat /tmp/status", cwd: "/workspace" };
+
+      for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
+        const toolCallId = `exec-crit-${i}`;
+        recordToolCall(state, "exec", execParams, toolCallId);
+        recordToolCallOutcome(state, {
+          toolName: "exec",
+          toolParams: execParams,
+          toolCallId,
+          result: {
+            content: [{ type: "text", text: "same output" }],
+            details: {
+              status: "completed",
+              exitCode: 0,
+              durationMs: 200 + i * 3,
+              aggregated: "same output",
+              cwd: "/workspace",
+            },
+          },
+        });
+      }
+
+      const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("warning");
+      }
+    });
+
+    it("triggers global circuit breaker for exec at 30 repetitions (#34574)", () => {
+      const state = createState();
+      const execParams = { command: "cat /tmp/status", cwd: "/workspace" };
+
+      for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
+        const toolCallId = `exec-gcb-${i}`;
+        recordToolCall(state, "exec", execParams, toolCallId);
+        recordToolCallOutcome(state, {
+          toolName: "exec",
+          toolParams: execParams,
+          toolCallId,
+          result: {
+            content: [{ type: "text", text: "same output" }],
+            details: {
+              status: "completed",
+              exitCode: 0,
+              durationMs: 300 + i * 5,
+              aggregated: "same output",
+              cwd: "/workspace",
+            },
+          },
+        });
+      }
+
+      const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("critical");
+        expect(loopResult.detector).toBe("global_circuit_breaker");
+      }
+    });
+
+    it("detects exec loop even with varying pid and startedAt (#34574)", () => {
+      const state = createState();
+      const execParams = { command: "sleep 1 &", cwd: "/workspace" };
+
+      for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
+        const toolCallId = `exec-bg-${i}`;
+        recordToolCall(state, "exec", execParams, toolCallId);
+        recordToolCallOutcome(state, {
+          toolName: "exec",
+          toolParams: execParams,
+          toolCallId,
+          result: {
+            // Real exec embeds volatile session/pid in content text
+            content: [
+              {
+                type: "text",
+                text: `Command still running (session sess-${1000 + i}, pid ${40000 + i}). Use process for follow-up.`,
+              },
+            ],
+            details: {
+              status: "running",
+              sessionId: `sess-${1000 + i}`,
+              pid: 40000 + i,
+              startedAt: Date.now() + i * 1000,
+              cwd: "/workspace",
+            },
+          },
+        });
+      }
+
+      const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("warning");
+      }
+    });
+
+    it("does not flag running exec loop when tail output progresses (#34574)", () => {
+      const state = createState();
+      const execParams = { command: "make build", cwd: "/workspace" };
+
+      for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
+        const toolCallId = `exec-tail-${i}`;
+        recordToolCall(state, "exec", execParams, toolCallId);
+        recordToolCallOutcome(state, {
+          toolName: "exec",
+          toolParams: execParams,
+          toolCallId,
+          result: {
+            content: [
+              {
+                type: "text",
+                text: `Command still running (session sess-${i}, pid ${50000 + i}).`,
+              },
+            ],
+            details: {
+              status: "running",
+              sessionId: `sess-${i}`,
+              pid: 50000 + i,
+              startedAt: Date.now() + i * 1000,
+              cwd: "/workspace",
+              tail: `Compiling module ${i}...`,
+            },
+          },
+        });
+      }
+
+      const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
+      // generic_repeat fires on args, but no-progress streak should not
+      // escalate to critical because tail output is progressing
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("warning");
+        expect(loopResult.detector).not.toBe("global_circuit_breaker");
+      }
+    });
  });

  describe("getToolCallStats", () => {
--- a/src/agents/tool-loop-detection.ts
+++ b/src/agents/tool-loop-detection.ts
@ -223,6 +223,31 @@ function hashToolOutcome(
    }
  }

+  // Exec tool results contain volatile fields (durationMs, pid, startedAt,
+  // sessionId) that change on every invocation.  Hash only the stable
+  // fields so that repeated identical commands are correctly detected as
+  // a loop.  See https://github.com/nicepkg/openclaw/issues/34574.
+  //
+  // For "running" results the content text itself embeds volatile metadata
+  // (session id, pid) so we omit it and hash only the status + tail output.
+  // For "completed" results we use details.aggregated rather than content text
+  // because content.text may drop stderr/error (node-host uses short-circuit
+  // OR) or prepend warnings (gateway path), while aggregated always contains
+  // the full combined stdout+stderr+error output.
+  if (toolName === "exec") {
+    if (details.status === "running") {
+      return digestStable({
+        status: "running",
+        tail: details.tail ?? null,
+      });
+    }
+    return digestStable({
+      status: details.status,
+      exitCode: details.exitCode ?? null,
+      aggregated: details.aggregated ?? text,
+    });
+  }
+
  return digestStable({
    details,
    text,