fix: handle volatile text in running exec results

Address Codex review feedback: running exec results embed volatile session id and pid in content.text ("Command still running (session sess-123, pid 45678)"), so hashing text still produces unique hashes. For running exec, hash only status + tail output (which reflects actual command progress). For completed exec, content.text already mirrors the stable aggregated output, so keep hashing it. Add test for running exec with volatile text + varying pid/sessionId, and a negative test verifying that changing tail output prevents escalation to global_circuit_breaker. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 00:21:25 +01:00 · 2026-03-10 00:21:25 +01:00 · 48d177d102
commit 48d177d102
parent 3f69db6eff
2 changed files with 58 additions and 1 deletions
--- a/src/agents/tool-loop-detection.test.ts
+++ b/src/agents/tool-loop-detection.test.ts
@ -683,7 +683,13 @@ describe("tool-loop-detection", () => {
          toolParams: execParams,
          toolCallId,
          result: {
-            content: [{ type: "text", text: "Command still running" }],
+            // Real exec embeds volatile session/pid in content text
+            content: [
+              {
+                type: "text",
+                text: `Command still running (session sess-${1000 + i}, pid ${40000 + i}). Use process for follow-up.`,
+              },
+            ],
            details: {
              status: "running",
              sessionId: `sess-${1000 + i}`,
@ -701,6 +707,46 @@ describe("tool-loop-detection", () => {
        expect(loopResult.level).toBe("warning");
      }
    });
+
+    it("does not flag running exec loop when tail output progresses (#34574)", () => {
+      const state = createState();
+      const execParams = { command: "make build", cwd: "/workspace" };
+
+      for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
+        const toolCallId = `exec-tail-${i}`;
+        recordToolCall(state, "exec", execParams, toolCallId);
+        recordToolCallOutcome(state, {
+          toolName: "exec",
+          toolParams: execParams,
+          toolCallId,
+          result: {
+            content: [
+              {
+                type: "text",
+                text: `Command still running (session sess-${i}, pid ${50000 + i}).`,
+              },
+            ],
+            details: {
+              status: "running",
+              sessionId: `sess-${i}`,
+              pid: 50000 + i,
+              startedAt: Date.now() + i * 1000,
+              cwd: "/workspace",
+              tail: `Compiling module ${i}...`,
+            },
+          },
+        });
+      }
+
+      const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
+      // generic_repeat fires on args, but no-progress streak should not
+      // escalate to critical because tail output is progressing
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("warning");
+        expect(loopResult.detector).not.toBe("global_circuit_breaker");
+      }
+    });
  });

  describe("getToolCallStats", () => {
--- a/src/agents/tool-loop-detection.ts
+++ b/src/agents/tool-loop-detection.ts
@ -227,7 +227,18 @@ function hashToolOutcome(
  // sessionId) that change on every invocation.  Hash only the stable
  // fields so that repeated identical commands are correctly detected as
  // a loop.  See https://github.com/nicepkg/openclaw/issues/34574.
+  //
+  // For "running" results the content text itself embeds volatile metadata
+  // (session id, pid) so we omit it and hash only the status + tail output.
+  // For "completed" results the content text mirrors `aggregated` which is
+  // stable, so we include it.
  if (toolName === "exec") {
+    if (details.status === "running") {
+      return digestStable({
+        status: "running",
+        tail: details.tail ?? null,
+      });
+    }
    return digestStable({
      status: details.status,
      exitCode: details.exitCode ?? null,