fix: handle volatile text in running exec results
Address Codex review feedback: running exec results embed volatile
session id and pid in content.text ("Command still running (session
sess-123, pid 45678)"), so hashing text still produces unique hashes.
For running exec, hash only status + tail output (which reflects
actual command progress). For completed exec, content.text already
mirrors the stable aggregated output, so keep hashing it.
Add test for running exec with volatile text + varying pid/sessionId,
and a negative test verifying that changing tail output prevents
escalation to global_circuit_breaker.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
3f69db6eff
commit
48d177d102
@ -683,7 +683,13 @@ describe("tool-loop-detection", () => {
|
||||
toolParams: execParams,
|
||||
toolCallId,
|
||||
result: {
|
||||
content: [{ type: "text", text: "Command still running" }],
|
||||
// Real exec embeds volatile session/pid in content text
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Command still running (session sess-${1000 + i}, pid ${40000 + i}). Use process for follow-up.`,
|
||||
},
|
||||
],
|
||||
details: {
|
||||
status: "running",
|
||||
sessionId: `sess-${1000 + i}`,
|
||||
@ -701,6 +707,46 @@ describe("tool-loop-detection", () => {
|
||||
expect(loopResult.level).toBe("warning");
|
||||
}
|
||||
});
|
||||
|
||||
it("does not flag running exec loop when tail output progresses (#34574)", () => {
|
||||
const state = createState();
|
||||
const execParams = { command: "make build", cwd: "/workspace" };
|
||||
|
||||
for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
|
||||
const toolCallId = `exec-tail-${i}`;
|
||||
recordToolCall(state, "exec", execParams, toolCallId);
|
||||
recordToolCallOutcome(state, {
|
||||
toolName: "exec",
|
||||
toolParams: execParams,
|
||||
toolCallId,
|
||||
result: {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Command still running (session sess-${i}, pid ${50000 + i}).`,
|
||||
},
|
||||
],
|
||||
details: {
|
||||
status: "running",
|
||||
sessionId: `sess-${i}`,
|
||||
pid: 50000 + i,
|
||||
startedAt: Date.now() + i * 1000,
|
||||
cwd: "/workspace",
|
||||
tail: `Compiling module ${i}...`,
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
|
||||
// generic_repeat fires on args, but no-progress streak should not
|
||||
// escalate to critical because tail output is progressing
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("warning");
|
||||
expect(loopResult.detector).not.toBe("global_circuit_breaker");
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("getToolCallStats", () => {
|
||||
|
||||
@ -227,7 +227,18 @@ function hashToolOutcome(
|
||||
// sessionId) that change on every invocation. Hash only the stable
|
||||
// fields so that repeated identical commands are correctly detected as
|
||||
// a loop. See https://github.com/nicepkg/openclaw/issues/34574.
|
||||
//
|
||||
// For "running" results the content text itself embeds volatile metadata
|
||||
// (session id, pid) so we omit it and hash only the status + tail output.
|
||||
// For "completed" results the content text mirrors `aggregated` which is
|
||||
// stable, so we include it.
|
||||
if (toolName === "exec") {
|
||||
if (details.status === "running") {
|
||||
return digestStable({
|
||||
status: "running",
|
||||
tail: details.tail ?? null,
|
||||
});
|
||||
}
|
||||
return digestStable({
|
||||
status: details.status,
|
||||
exitCode: details.exitCode ?? null,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user