fix: handle volatile text in running exec results

Address Codex review feedback: running exec results embed volatile
session id and pid in content.text ("Command still running (session
sess-123, pid 45678)"), so hashing text still produces unique hashes.

For running exec, hash only status + tail output (which reflects
actual command progress). For completed exec, content.text already
mirrors the stable aggregated output, so keep hashing it.

Add test for running exec with volatile text + varying pid/sessionId,
and a negative test verifying that changing tail output prevents
escalation to global_circuit_breaker.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Keren 2026-03-10 00:21:25 +01:00
parent 3f69db6eff
commit 48d177d102
2 changed files with 58 additions and 1 deletions

View File

@ -683,7 +683,13 @@ describe("tool-loop-detection", () => {
toolParams: execParams,
toolCallId,
result: {
content: [{ type: "text", text: "Command still running" }],
// Real exec embeds volatile session/pid in content text
content: [
{
type: "text",
text: `Command still running (session sess-${1000 + i}, pid ${40000 + i}). Use process for follow-up.`,
},
],
details: {
status: "running",
sessionId: `sess-${1000 + i}`,
@ -701,6 +707,46 @@ describe("tool-loop-detection", () => {
expect(loopResult.level).toBe("warning");
}
});
it("does not flag running exec loop when tail output progresses (#34574)", () => {
const state = createState();
const execParams = { command: "make build", cwd: "/workspace" };
for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
const toolCallId = `exec-tail-${i}`;
recordToolCall(state, "exec", execParams, toolCallId);
recordToolCallOutcome(state, {
toolName: "exec",
toolParams: execParams,
toolCallId,
result: {
content: [
{
type: "text",
text: `Command still running (session sess-${i}, pid ${50000 + i}).`,
},
],
details: {
status: "running",
sessionId: `sess-${i}`,
pid: 50000 + i,
startedAt: Date.now() + i * 1000,
cwd: "/workspace",
tail: `Compiling module ${i}...`,
},
},
});
}
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
// generic_repeat fires on args, but no-progress streak should not
// escalate to critical because tail output is progressing
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
expect(loopResult.detector).not.toBe("global_circuit_breaker");
}
});
});
describe("getToolCallStats", () => {

View File

@ -227,7 +227,18 @@ function hashToolOutcome(
// sessionId) that change on every invocation. Hash only the stable
// fields so that repeated identical commands are correctly detected as
// a loop. See https://github.com/nicepkg/openclaw/issues/34574.
//
// For "running" results the content text itself embeds volatile metadata
// (session id, pid) so we omit it and hash only the status + tail output.
// For "completed" results the content text mirrors `aggregated` which is
// stable, so we include it.
if (toolName === "exec") {
if (details.status === "running") {
return digestStable({
status: "running",
tail: details.tail ?? null,
});
}
return digestStable({
status: details.status,
exitCode: details.exitCode ?? null,