Merge e067ee167dd7984265bc9358d68c05744980adbf into 6b4c24c2e55b5b4013277bd799525086f6a0c40f

This commit is contained in:
Zcg2021 2026-03-21 05:40:42 +01:00 committed by GitHub
commit 5497925912
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 229 additions and 0 deletions

View File

@ -543,6 +543,210 @@ describe("tool-loop-detection", () => {
const result = detectToolCallLoop(state, "tool", { arg: 1 }, enabledLoopDetectionConfig);
expect(result.stuck).toBe(false);
});
it("detects repeated exec calls with volatile details fields (#34574)", () => {
const state = createState();
const execParams = { command: "echo hello", cwd: "/workspace" };
for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
const toolCallId = `exec-${i}`;
recordToolCall(state, "exec", execParams, toolCallId);
recordToolCallOutcome(state, {
toolName: "exec",
toolParams: execParams,
toolCallId,
result: {
content: [{ type: "text", text: "hello" }],
details: {
status: "completed",
exitCode: 0,
durationMs: 100 + i * 7,
aggregated: "hello",
cwd: "/workspace",
},
},
});
}
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
}
});
it("does not flag exec calls with different commands", () => {
const state = createState();
for (let i = 0; i < WARNING_THRESHOLD + 5; i += 1) {
const execParams = { command: `cmd-${i}`, cwd: "/workspace" };
const toolCallId = `exec-${i}`;
recordToolCall(state, "exec", execParams, toolCallId);
recordToolCallOutcome(state, {
toolName: "exec",
toolParams: execParams,
toolCallId,
result: {
content: [{ type: "text", text: `output ${i}` }],
details: {
status: "completed",
exitCode: 0,
durationMs: 50 + i,
aggregated: `output ${i}`,
cwd: "/workspace",
},
},
});
}
const loopResult = detectToolCallLoop(
state,
"exec",
{ command: "cmd-new", cwd: "/workspace" },
enabledLoopDetectionConfig,
);
expect(loopResult.stuck).toBe(false);
});
it("warns for exec calls repeated past warning threshold (#34574)", () => {
const state = createState();
const execParams = { command: "cat /tmp/status", cwd: "/workspace" };
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
const toolCallId = `exec-crit-${i}`;
recordToolCall(state, "exec", execParams, toolCallId);
recordToolCallOutcome(state, {
toolName: "exec",
toolParams: execParams,
toolCallId,
result: {
content: [{ type: "text", text: "same output" }],
details: {
status: "completed",
exitCode: 0,
durationMs: 200 + i * 3,
aggregated: "same output",
cwd: "/workspace",
},
},
});
}
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
}
});
it("triggers global circuit breaker for exec at 30 repetitions (#34574)", () => {
const state = createState();
const execParams = { command: "cat /tmp/status", cwd: "/workspace" };
for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
const toolCallId = `exec-gcb-${i}`;
recordToolCall(state, "exec", execParams, toolCallId);
recordToolCallOutcome(state, {
toolName: "exec",
toolParams: execParams,
toolCallId,
result: {
content: [{ type: "text", text: "same output" }],
details: {
status: "completed",
exitCode: 0,
durationMs: 300 + i * 5,
aggregated: "same output",
cwd: "/workspace",
},
},
});
}
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("critical");
expect(loopResult.detector).toBe("global_circuit_breaker");
}
});
it("detects exec loop even with varying pid and startedAt (#34574)", () => {
const state = createState();
const execParams = { command: "sleep 1 &", cwd: "/workspace" };
for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
const toolCallId = `exec-bg-${i}`;
recordToolCall(state, "exec", execParams, toolCallId);
recordToolCallOutcome(state, {
toolName: "exec",
toolParams: execParams,
toolCallId,
result: {
// Real exec embeds volatile session/pid in content text
content: [
{
type: "text",
text: `Command still running (session sess-${1000 + i}, pid ${40000 + i}). Use process for follow-up.`,
},
],
details: {
status: "running",
sessionId: `sess-${1000 + i}`,
pid: 40000 + i,
startedAt: Date.now() + i * 1000,
cwd: "/workspace",
},
},
});
}
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
}
});
it("does not flag running exec loop when tail output progresses (#34574)", () => {
const state = createState();
const execParams = { command: "make build", cwd: "/workspace" };
for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
const toolCallId = `exec-tail-${i}`;
recordToolCall(state, "exec", execParams, toolCallId);
recordToolCallOutcome(state, {
toolName: "exec",
toolParams: execParams,
toolCallId,
result: {
content: [
{
type: "text",
text: `Command still running (session sess-${i}, pid ${50000 + i}).`,
},
],
details: {
status: "running",
sessionId: `sess-${i}`,
pid: 50000 + i,
startedAt: Date.now() + i * 1000,
cwd: "/workspace",
tail: `Compiling module ${i}...`,
},
},
});
}
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
// generic_repeat fires on args, but no-progress streak should not
// escalate to critical because tail output is progressing
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
expect(loopResult.detector).not.toBe("global_circuit_breaker");
}
});
});
describe("getToolCallStats", () => {

View File

@ -223,6 +223,31 @@ function hashToolOutcome(
}
}
// Exec tool results contain volatile fields (durationMs, pid, startedAt,
// sessionId) that change on every invocation. Hash only the stable
// fields so that repeated identical commands are correctly detected as
// a loop. See https://github.com/nicepkg/openclaw/issues/34574.
//
// For "running" results the content text itself embeds volatile metadata
// (session id, pid) so we omit it and hash only the status + tail output.
// For "completed" results we use details.aggregated rather than content text
// because content.text may drop stderr/error (node-host uses short-circuit
// OR) or prepend warnings (gateway path), while aggregated always contains
// the full combined stdout+stderr+error output.
if (toolName === "exec") {
if (details.status === "running") {
return digestStable({
status: "running",
tail: details.tail ?? null,
});
}
return digestStable({
status: details.status,
exitCode: details.exitCode ?? null,
aggregated: details.aggregated ?? text,
});
}
return digestStable({
details,
text,