Merge e067ee167dd7984265bc9358d68c05744980adbf into 6b4c24c2e55b5b4013277bd799525086f6a0c40f
This commit is contained in:
commit
5497925912
@ -543,6 +543,210 @@ describe("tool-loop-detection", () => {
|
||||
const result = detectToolCallLoop(state, "tool", { arg: 1 }, enabledLoopDetectionConfig);
|
||||
expect(result.stuck).toBe(false);
|
||||
});
|
||||
|
||||
it("detects repeated exec calls with volatile details fields (#34574)", () => {
|
||||
const state = createState();
|
||||
const execParams = { command: "echo hello", cwd: "/workspace" };
|
||||
|
||||
for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
|
||||
const toolCallId = `exec-${i}`;
|
||||
recordToolCall(state, "exec", execParams, toolCallId);
|
||||
recordToolCallOutcome(state, {
|
||||
toolName: "exec",
|
||||
toolParams: execParams,
|
||||
toolCallId,
|
||||
result: {
|
||||
content: [{ type: "text", text: "hello" }],
|
||||
details: {
|
||||
status: "completed",
|
||||
exitCode: 0,
|
||||
durationMs: 100 + i * 7,
|
||||
aggregated: "hello",
|
||||
cwd: "/workspace",
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("warning");
|
||||
}
|
||||
});
|
||||
|
||||
it("does not flag exec calls with different commands", () => {
|
||||
const state = createState();
|
||||
|
||||
for (let i = 0; i < WARNING_THRESHOLD + 5; i += 1) {
|
||||
const execParams = { command: `cmd-${i}`, cwd: "/workspace" };
|
||||
const toolCallId = `exec-${i}`;
|
||||
recordToolCall(state, "exec", execParams, toolCallId);
|
||||
recordToolCallOutcome(state, {
|
||||
toolName: "exec",
|
||||
toolParams: execParams,
|
||||
toolCallId,
|
||||
result: {
|
||||
content: [{ type: "text", text: `output ${i}` }],
|
||||
details: {
|
||||
status: "completed",
|
||||
exitCode: 0,
|
||||
durationMs: 50 + i,
|
||||
aggregated: `output ${i}`,
|
||||
cwd: "/workspace",
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(
|
||||
state,
|
||||
"exec",
|
||||
{ command: "cmd-new", cwd: "/workspace" },
|
||||
enabledLoopDetectionConfig,
|
||||
);
|
||||
expect(loopResult.stuck).toBe(false);
|
||||
});
|
||||
|
||||
it("warns for exec calls repeated past warning threshold (#34574)", () => {
|
||||
const state = createState();
|
||||
const execParams = { command: "cat /tmp/status", cwd: "/workspace" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
||||
const toolCallId = `exec-crit-${i}`;
|
||||
recordToolCall(state, "exec", execParams, toolCallId);
|
||||
recordToolCallOutcome(state, {
|
||||
toolName: "exec",
|
||||
toolParams: execParams,
|
||||
toolCallId,
|
||||
result: {
|
||||
content: [{ type: "text", text: "same output" }],
|
||||
details: {
|
||||
status: "completed",
|
||||
exitCode: 0,
|
||||
durationMs: 200 + i * 3,
|
||||
aggregated: "same output",
|
||||
cwd: "/workspace",
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("warning");
|
||||
}
|
||||
});
|
||||
|
||||
it("triggers global circuit breaker for exec at 30 repetitions (#34574)", () => {
|
||||
const state = createState();
|
||||
const execParams = { command: "cat /tmp/status", cwd: "/workspace" };
|
||||
|
||||
for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
|
||||
const toolCallId = `exec-gcb-${i}`;
|
||||
recordToolCall(state, "exec", execParams, toolCallId);
|
||||
recordToolCallOutcome(state, {
|
||||
toolName: "exec",
|
||||
toolParams: execParams,
|
||||
toolCallId,
|
||||
result: {
|
||||
content: [{ type: "text", text: "same output" }],
|
||||
details: {
|
||||
status: "completed",
|
||||
exitCode: 0,
|
||||
durationMs: 300 + i * 5,
|
||||
aggregated: "same output",
|
||||
cwd: "/workspace",
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("critical");
|
||||
expect(loopResult.detector).toBe("global_circuit_breaker");
|
||||
}
|
||||
});
|
||||
|
||||
it("detects exec loop even with varying pid and startedAt (#34574)", () => {
|
||||
const state = createState();
|
||||
const execParams = { command: "sleep 1 &", cwd: "/workspace" };
|
||||
|
||||
for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
|
||||
const toolCallId = `exec-bg-${i}`;
|
||||
recordToolCall(state, "exec", execParams, toolCallId);
|
||||
recordToolCallOutcome(state, {
|
||||
toolName: "exec",
|
||||
toolParams: execParams,
|
||||
toolCallId,
|
||||
result: {
|
||||
// Real exec embeds volatile session/pid in content text
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Command still running (session sess-${1000 + i}, pid ${40000 + i}). Use process for follow-up.`,
|
||||
},
|
||||
],
|
||||
details: {
|
||||
status: "running",
|
||||
sessionId: `sess-${1000 + i}`,
|
||||
pid: 40000 + i,
|
||||
startedAt: Date.now() + i * 1000,
|
||||
cwd: "/workspace",
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("warning");
|
||||
}
|
||||
});
|
||||
|
||||
it("does not flag running exec loop when tail output progresses (#34574)", () => {
|
||||
const state = createState();
|
||||
const execParams = { command: "make build", cwd: "/workspace" };
|
||||
|
||||
for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
|
||||
const toolCallId = `exec-tail-${i}`;
|
||||
recordToolCall(state, "exec", execParams, toolCallId);
|
||||
recordToolCallOutcome(state, {
|
||||
toolName: "exec",
|
||||
toolParams: execParams,
|
||||
toolCallId,
|
||||
result: {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Command still running (session sess-${i}, pid ${50000 + i}).`,
|
||||
},
|
||||
],
|
||||
details: {
|
||||
status: "running",
|
||||
sessionId: `sess-${i}`,
|
||||
pid: 50000 + i,
|
||||
startedAt: Date.now() + i * 1000,
|
||||
cwd: "/workspace",
|
||||
tail: `Compiling module ${i}...`,
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "exec", execParams, enabledLoopDetectionConfig);
|
||||
// generic_repeat fires on args, but no-progress streak should not
|
||||
// escalate to critical because tail output is progressing
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("warning");
|
||||
expect(loopResult.detector).not.toBe("global_circuit_breaker");
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("getToolCallStats", () => {
|
||||
|
||||
@ -223,6 +223,31 @@ function hashToolOutcome(
|
||||
}
|
||||
}
|
||||
|
||||
// Exec tool results contain volatile fields (durationMs, pid, startedAt,
|
||||
// sessionId) that change on every invocation. Hash only the stable
|
||||
// fields so that repeated identical commands are correctly detected as
|
||||
// a loop. See https://github.com/nicepkg/openclaw/issues/34574.
|
||||
//
|
||||
// For "running" results the content text itself embeds volatile metadata
|
||||
// (session id, pid) so we omit it and hash only the status + tail output.
|
||||
// For "completed" results we use details.aggregated rather than content text
|
||||
// because content.text may drop stderr/error (node-host uses short-circuit
|
||||
// OR) or prepend warnings (gateway path), while aggregated always contains
|
||||
// the full combined stdout+stderr+error output.
|
||||
if (toolName === "exec") {
|
||||
if (details.status === "running") {
|
||||
return digestStable({
|
||||
status: "running",
|
||||
tail: details.tail ?? null,
|
||||
});
|
||||
}
|
||||
return digestStable({
|
||||
status: details.status,
|
||||
exitCode: details.exitCode ?? null,
|
||||
aggregated: details.aggregated ?? text,
|
||||
});
|
||||
}
|
||||
|
||||
return digestStable({
|
||||
details,
|
||||
text,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user