811 lines
24 KiB
TypeScript
Raw Normal View History

2026-01-14 01:08:15 +00:00
import type { AssistantMessage } from "@mariozechner/pi-ai";
2026-01-30 03:15:10 +01:00
import type { OpenClawConfig } from "../../config/config.js";
2026-01-14 01:08:15 +00:00
import type { FailoverReason } from "./types.js";
import { formatSandboxToolPolicyBlockedMessage } from "../sandbox.js";
2026-01-14 01:08:15 +00:00
export function formatBillingErrorMessage(provider?: string): string {
const providerName = provider?.trim();
if (providerName) {
return `⚠️ ${providerName} returned a billing error — your API key has run out of credits or has an insufficient balance. Check your ${providerName} billing dashboard and top up or switch to a different API key.`;
}
return "⚠️ API provider returned a billing error — your API key has run out of credits or has an insufficient balance. Check your provider's billing dashboard and top up or switch to a different API key.";
}
export const BILLING_ERROR_USER_MESSAGE = formatBillingErrorMessage();
const RATE_LIMIT_ERROR_USER_MESSAGE = "⚠️ API rate limit reached. Please try again later.";
const OVERLOADED_ERROR_USER_MESSAGE =
"The AI service is temporarily overloaded. Please try again in a moment.";
function formatRateLimitOrOverloadedErrorCopy(raw: string): string | undefined {
if (isRateLimitErrorMessage(raw)) {
return RATE_LIMIT_ERROR_USER_MESSAGE;
}
if (isOverloadedErrorMessage(raw)) {
return OVERLOADED_ERROR_USER_MESSAGE;
}
return undefined;
}
2026-01-14 01:08:15 +00:00
export function isContextOverflowError(errorMessage?: string): boolean {
if (!errorMessage) {
return false;
}
2026-01-14 01:08:15 +00:00
const lower = errorMessage.toLowerCase();
fix: auto-compact on context overflow promptError before returning error (#1627) * fix: detect Anthropic 'Request size exceeds model context window' as context overflow Anthropic now returns 'Request size exceeds model context window' instead of the previously detected 'prompt is too long' format. This new error message was not recognized by isContextOverflowError(), causing auto-compaction to NOT trigger. Users would see the raw error twice without any recovery attempt. Changes: - Add 'exceeds model context window' and 'request size exceeds' to isContextOverflowError() detection patterns - Add tests that fail without the fix, verifying both the raw error string and the JSON-wrapped format from Anthropic's API - Add test for formatAssistantErrorText to ensure the friendly 'Context overflow' message is shown instead of the raw error Note: The upstream pi-ai package (@mariozechner/pi-ai) also needs a fix in its OVERFLOW_PATTERNS regex: /exceeds the context window/i should be changed to /exceeds.*context window/i to match both 'the' and 'model' variants for triggering auto-compaction retry. * fix(tests): remove unused imports and helper from test files Remove WorkspaceBootstrapFile references and _makeFile helper that were incorrectly copied from another test file. These caused type errors and were unrelated to the context overflow detection tests. * fix: trigger auto-compaction on context overflow promptError When the LLM rejects a request with a context overflow error that surfaces as a promptError (thrown exception rather than streamed error), the existing auto-compaction in pi-coding-agent never triggers. This happens because the error bypasses the agent's message_end → agent_end → _checkCompaction path. This fix adds a fallback compaction attempt directly in the run loop: - Detects context overflow in promptError (excluding compaction_failure) - Calls compactEmbeddedPiSessionDirect (bypassing lane queues since already in-lane) - Retries the prompt after successful compaction - Limits to one compaction attempt per run to prevent infinite loops Fixes: context overflow errors shown to user without auto-compaction attempt * style: format compact.ts and run.ts with oxfmt * fix: tighten context overflow match (#1627) (thanks @rodrigouroz) --------- Co-authored-by: Claude <claude@anthropic.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-01-24 19:09:24 -03:00
const hasRequestSizeExceeds = lower.includes("request size exceeds");
const hasContextWindow =
lower.includes("context window") ||
lower.includes("context length") ||
lower.includes("maximum context length");
2026-01-14 01:08:15 +00:00
return (
lower.includes("request_too_large") ||
lower.includes("request exceeds the maximum size") ||
lower.includes("context length exceeded") ||
lower.includes("maximum context length") ||
lower.includes("prompt is too long") ||
fix: auto-compact on context overflow promptError before returning error (#1627) * fix: detect Anthropic 'Request size exceeds model context window' as context overflow Anthropic now returns 'Request size exceeds model context window' instead of the previously detected 'prompt is too long' format. This new error message was not recognized by isContextOverflowError(), causing auto-compaction to NOT trigger. Users would see the raw error twice without any recovery attempt. Changes: - Add 'exceeds model context window' and 'request size exceeds' to isContextOverflowError() detection patterns - Add tests that fail without the fix, verifying both the raw error string and the JSON-wrapped format from Anthropic's API - Add test for formatAssistantErrorText to ensure the friendly 'Context overflow' message is shown instead of the raw error Note: The upstream pi-ai package (@mariozechner/pi-ai) also needs a fix in its OVERFLOW_PATTERNS regex: /exceeds the context window/i should be changed to /exceeds.*context window/i to match both 'the' and 'model' variants for triggering auto-compaction retry. * fix(tests): remove unused imports and helper from test files Remove WorkspaceBootstrapFile references and _makeFile helper that were incorrectly copied from another test file. These caused type errors and were unrelated to the context overflow detection tests. * fix: trigger auto-compaction on context overflow promptError When the LLM rejects a request with a context overflow error that surfaces as a promptError (thrown exception rather than streamed error), the existing auto-compaction in pi-coding-agent never triggers. This happens because the error bypasses the agent's message_end → agent_end → _checkCompaction path. This fix adds a fallback compaction attempt directly in the run loop: - Detects context overflow in promptError (excluding compaction_failure) - Calls compactEmbeddedPiSessionDirect (bypassing lane queues since already in-lane) - Retries the prompt after successful compaction - Limits to one compaction attempt per run to prevent infinite loops Fixes: context overflow errors shown to user without auto-compaction attempt * style: format compact.ts and run.ts with oxfmt * fix: tighten context overflow match (#1627) (thanks @rodrigouroz) --------- Co-authored-by: Claude <claude@anthropic.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-01-24 19:09:24 -03:00
lower.includes("exceeds model context window") ||
(hasRequestSizeExceeds && hasContextWindow) ||
lower.includes("context overflow:") ||
2026-01-14 01:08:15 +00:00
(lower.includes("413") && lower.includes("too large"))
);
}
const CONTEXT_WINDOW_TOO_SMALL_RE = /context window.*(too small|minimum is)/i;
const CONTEXT_OVERFLOW_HINT_RE =
/context.*overflow|context window.*(too (?:large|long)|exceed|over|limit|max(?:imum)?|requested|sent|tokens)|prompt.*(too (?:large|long)|exceed|over|limit|max(?:imum)?)|(?:request|input).*(?:context|window|length|token).*(too (?:large|long)|exceed|over|limit|max(?:imum)?)/i;
const RATE_LIMIT_HINT_RE =
/rate limit|too many requests|requests per (?:minute|hour|day)|quota|throttl|429\b/i;
export function isLikelyContextOverflowError(errorMessage?: string): boolean {
if (!errorMessage) {
return false;
}
if (CONTEXT_WINDOW_TOO_SMALL_RE.test(errorMessage)) {
return false;
}
// Rate limit errors can match the broad CONTEXT_OVERFLOW_HINT_RE pattern
// (e.g., "request reached organization TPD rate limit" matches request.*limit).
// Exclude them before checking context overflow heuristics.
if (isRateLimitErrorMessage(errorMessage)) {
return false;
}
if (isContextOverflowError(errorMessage)) {
return true;
}
if (RATE_LIMIT_HINT_RE.test(errorMessage)) {
return false;
}
return CONTEXT_OVERFLOW_HINT_RE.test(errorMessage);
}
2026-01-14 01:08:15 +00:00
export function isCompactionFailureError(errorMessage?: string): boolean {
if (!errorMessage) {
return false;
}
2026-01-14 01:08:15 +00:00
const lower = errorMessage.toLowerCase();
const hasCompactionTerm =
2026-01-14 01:08:15 +00:00
lower.includes("summarization failed") ||
lower.includes("auto-compaction") ||
lower.includes("compaction failed") ||
lower.includes("compaction");
if (!hasCompactionTerm) {
return false;
}
// Treat any likely overflow shape as a compaction failure when compaction terms are present.
// Providers often vary wording (e.g. "context window exceeded") across APIs.
if (isLikelyContextOverflowError(errorMessage)) {
return true;
}
// Keep explicit fallback for bare "context overflow" strings.
return lower.includes("context overflow");
2026-01-14 01:08:15 +00:00
}
const ERROR_PAYLOAD_PREFIX_RE =
/^(?:error|api\s*error|apierror|openai\s*error|anthropic\s*error|gateway\s*error)[:\s-]+/i;
const FINAL_TAG_RE = /<\s*\/?\s*final\s*>/gi;
const ERROR_PREFIX_RE =
/^(?:error|api\s*error|openai\s*error|anthropic\s*error|gateway\s*error|request failed|failed|exception)[:\s-]+/i;
const CONTEXT_OVERFLOW_ERROR_HEAD_RE =
/^(?:context overflow:|request_too_large\b|request size exceeds\b|request exceeds the maximum size\b|context length exceeded\b|maximum context length\b|prompt is too long\b|exceeds model context window\b)/i;
Agents: add nested subagent orchestration controls and reduce subagent token waste (#14447) * Agents: add subagent orchestration controls * Agents: add subagent orchestration controls (WIP uncommitted changes) * feat(subagents): add depth-based spawn gating for sub-sub-agents * feat(subagents): tool policy, registry, and announce chain for nested agents * feat(subagents): system prompt, docs, changelog for nested sub-agents * fix(subagents): prevent model fallback override, show model during active runs, and block context overflow fallback Bug 1: When a session has an explicit model override (e.g., gpt/openai-codex), the fallback candidate logic in resolveFallbackCandidates silently appended the global primary model (opus) as a backstop. On reinjection/steer with a transient error, the session could fall back to opus which has a smaller context window and crash. Fix: when storedModelOverride is set, pass fallbacksOverride ?? [] instead of undefined, preventing the implicit primary backstop. Bug 2: Active subagents showed 'model n/a' in /subagents list because resolveModelDisplay only read entry.model/modelProvider (populated after run completes). Fix: fall back to modelOverride/providerOverride fields which are populated at spawn time via sessions.patch. Bug 3: Context overflow errors (prompt too long, context_length_exceeded) could theoretically escape runEmbeddedPiAgent and be treated as failover candidates in runWithModelFallback, causing a switch to a model with a smaller context window. Fix: in runWithModelFallback, detect context overflow errors via isLikelyContextOverflowError and rethrow them immediately instead of trying the next model candidate. * fix(subagents): track spawn depth in session store and fix announce routing for nested agents * Fix compaction status tracking and dedupe overflow compaction triggers * fix(subagents): enforce depth block via session store and implement cascade kill * fix: inject group chat context into system prompt * fix(subagents): always write model to session store at spawn time * Preserve spawnDepth when agent handler rewrites session entry * fix(subagents): suppress announce on steer-restart * fix(subagents): fallback spawned session model to runtime default * fix(subagents): enforce spawn depth when caller key resolves by sessionId * feat(subagents): implement active-first ordering for numeric targets and enhance task display - Added a test to verify that subagents with numeric targets follow an active-first list ordering. - Updated `resolveSubagentTarget` to sort subagent runs based on active status and recent activity. - Enhanced task display in command responses to prevent truncation of long task descriptions. - Introduced new utility functions for compacting task text and managing subagent run states. * fix(subagents): show model for active runs via run record fallback When the spawned model matches the agent's default model, the session store's override fields are intentionally cleared (isDefault: true). The model/modelProvider fields are only populated after the run completes. This left active subagents showing 'model n/a'. Fix: store the resolved model on SubagentRunRecord at registration time, and use it as a fallback in both display paths (subagents tool and /subagents command) when the session store entry has no model info. Changes: - SubagentRunRecord: add optional model field - registerSubagentRun: accept and persist model param - sessions-spawn-tool: pass resolvedModel to registerSubagentRun - subagents-tool: pass run record model as fallback to resolveModelDisplay - commands-subagents: pass run record model as fallback to resolveModelDisplay * feat(chat): implement session key resolution and reset on sidebar navigation - Added functions to resolve the main session key and reset chat state when switching sessions from the sidebar. - Updated the `renderTab` function to handle session key changes when navigating to the chat tab. - Introduced a test to verify that the session resets to "main" when opening chat from the sidebar navigation. * fix: subagent timeout=0 passthrough and fallback prompt duplication Bug 1: runTimeoutSeconds=0 now means 'no timeout' instead of applying 600s default - sessions-spawn-tool: default to undefined (not 0) when neither timeout param is provided; use != null check so explicit 0 passes through to gateway - agent.ts: accept 0 as valid timeout (resolveAgentTimeoutMs already handles 0 → MAX_SAFE_TIMEOUT_MS) Bug 2: model fallback no longer re-injects the original prompt as a duplicate - agent.ts: track fallback attempt index; on retries use a short continuation message instead of the full original prompt since the session file already contains it from the first attempt - Also skip re-sending images on fallback retries (already in session) * feat(subagents): truncate long task descriptions in subagents command output - Introduced a new utility function to format task previews, limiting their length to improve readability. - Updated the command handler to use the new formatting function, ensuring task descriptions are truncated appropriately. - Adjusted related tests to verify that long task descriptions are now truncated in the output. * refactor(subagents): update subagent registry path resolution and improve command output formatting - Replaced direct import of STATE_DIR with a utility function to resolve the state directory dynamically. - Enhanced the formatting of command output for active and recent subagents, adding separators for better readability. - Updated related tests to reflect changes in command output structure. * fix(subagent): default sessions_spawn to no timeout when runTimeoutSeconds omitted The previous fix (75a791106) correctly handled the case where runTimeoutSeconds was explicitly set to 0 ("no timeout"). However, when models omit the parameter entirely (which is common since the schema marks it as optional), runTimeoutSeconds resolved to undefined. undefined flowed through the chain as: sessions_spawn → timeout: undefined (since undefined != null is false) → gateway agent handler → agentCommand opts.timeout: undefined → resolveAgentTimeoutMs({ overrideSeconds: undefined }) → DEFAULT_AGENT_TIMEOUT_SECONDS (600s = 10 minutes) This caused subagents to be killed at exactly 10 minutes even though the user's intent (via TOOLS.md) was for subagents to run without a timeout. Fix: default runTimeoutSeconds to 0 (no timeout) when neither runTimeoutSeconds nor timeoutSeconds is provided by the caller. Subagent spawns are long-running by design and should not inherit the 600s agent-command default timeout. * fix(subagent): accept timeout=0 in agent-via-gateway path (second 600s default) * fix: thread timeout override through getReplyFromConfig dispatch path getReplyFromConfig called resolveAgentTimeoutMs({ cfg }) with no override, always falling back to the config default (600s). Add timeoutOverrideSeconds to GetReplyOptions and pass it through as overrideSeconds so callers of the dispatch chain can specify a custom timeout (0 = no timeout). This complements the existing timeout threading in agentCommand and the cron isolated-agent runner, which already pass overrideSeconds correctly. * feat(model-fallback): normalize OpenAI Codex model references and enhance fallback handling - Added normalization for OpenAI Codex model references, specifically converting "gpt-5.3-codex" to "openai-codex" before execution. - Updated the `resolveFallbackCandidates` function to utilize the new normalization logic. - Enhanced tests to verify the correct behavior of model normalization and fallback mechanisms. - Introduced a new test case to ensure that the normalization process works as expected for various input formats. * feat(tests): add unit tests for steer failure behavior in openclaw-tools - Introduced a new test file to validate the behavior of subagents when steer replacement dispatch fails. - Implemented tests to ensure that the announce behavior is restored correctly and that the suppression reason is cleared as expected. - Enhanced the subagent registry with a new function to clear steer restart suppression. - Updated related components to support the new test scenarios. * fix(subagents): replace stop command with kill in slash commands and documentation - Updated the `/subagents` command to replace `stop` with `kill` for consistency in controlling sub-agent runs. - Modified related documentation to reflect the change in command usage. - Removed legacy timeoutSeconds references from the sessions-spawn-tool schema and tests to streamline timeout handling. - Enhanced tests to ensure correct behavior of the updated commands and their interactions. * feat(tests): add unit tests for readLatestAssistantReply function - Introduced a new test file for the `readLatestAssistantReply` function to validate its behavior with various message scenarios. - Implemented tests to ensure the function correctly retrieves the latest assistant message and handles cases where the latest message has no text. - Mocked the gateway call to simulate different message histories for comprehensive testing. * feat(tests): enhance subagent kill-all cascade tests and announce formatting - Added a new test to verify that the `kill-all` command cascades through ended parents to active descendants in subagents. - Updated the subagent announce formatting tests to reflect changes in message structure, including the replacement of "Findings:" with "Result:" and the addition of new expectations for message content. - Improved the handling of long findings and stats in the announce formatting logic to ensure concise output. - Refactored related functions to enhance clarity and maintainability in the subagent registry and tools. * refactor(subagent): update announce formatting and remove unused constants - Modified the subagent announce formatting to replace "Findings:" with "Result:" and adjusted related expectations in tests. - Removed constants for maximum announce findings characters and summary words, simplifying the announcement logic. - Updated the handling of findings to retain full content instead of truncating, ensuring more informative outputs. - Cleaned up unused imports in the commands-subagents file to enhance code clarity. * feat(tests): enhance billing error handling in user-facing text - Added tests to ensure that normal text mentioning billing plans is not rewritten, preserving user context. - Updated the `isBillingErrorMessage` and `sanitizeUserFacingText` functions to improve handling of billing-related messages. - Introduced new test cases for various scenarios involving billing messages to ensure accurate processing and output. - Enhanced the subagent announce flow to correctly manage active descendant runs, preventing premature announcements. * feat(subagent): enhance workflow guidance and auto-announcement clarity - Added a new guideline in the subagent system prompt to emphasize trust in push-based completion, discouraging busy polling for status updates. - Updated documentation to clarify that sub-agents will automatically announce their results, improving user understanding of the workflow. - Enhanced tests to verify the new guidance on avoiding polling loops and to ensure the accuracy of the updated prompts. * fix(cron): avoid announcing interim subagent spawn acks * chore: clean post-rebase imports * fix(cron): fall back to child replies when parent stays interim * fix(subagents): make active-run guidance advisory * fix(subagents): update announce flow to handle active descendants and enhance test coverage - Modified the announce flow to defer announcements when active descendant runs are present, ensuring accurate status reporting. - Updated tests to verify the new behavior, including scenarios where no fallback requester is available and ensuring proper handling of finished subagents. - Enhanced the announce formatting to include an `expectFinal` flag for better clarity in the announcement process. * fix(subagents): enhance announce flow and formatting for user updates - Updated the announce flow to provide clearer instructions for user updates based on active subagent runs and requester context. - Refactored the announcement logic to improve clarity and ensure internal context remains private. - Enhanced tests to verify the new message expectations and formatting, including updated prompts for user-facing updates. - Introduced a new function to build reply instructions based on session context, improving the overall announcement process. * fix: resolve prep blockers and changelog placement (#14447) (thanks @tyler6204) * fix: restore cron delivery-plan import after rebase (#14447) (thanks @tyler6204) * fix: resolve test failures from rebase conflicts (#14447) (thanks @tyler6204) * fix: apply formatting after rebase (#14447) (thanks @tyler6204)
2026-02-14 22:03:45 -08:00
const BILLING_ERROR_HEAD_RE =
/^(?:error[:\s-]+)?billing(?:\s+error)?(?:[:\s-]+|$)|^(?:error[:\s-]+)?(?:credit balance|insufficient credits?|payment required|http\s*402\b)/i;
const HTTP_STATUS_PREFIX_RE = /^(?:http\s*)?(\d{3})\s+(.+)$/i;
const HTTP_STATUS_CODE_PREFIX_RE = /^(?:http\s*)?(\d{3})(?:\s+([\s\S]+))?$/i;
const HTML_ERROR_PREFIX_RE = /^\s*(?:<!doctype\s+html\b|<html\b)/i;
const CLOUDFLARE_HTML_ERROR_CODES = new Set([521, 522, 523, 524, 525, 526, 530]);
const TRANSIENT_HTTP_ERROR_CODES = new Set([500, 502, 503, 521, 522, 523, 524, 529]);
const HTTP_ERROR_HINTS = [
"error",
"bad request",
"not found",
"unauthorized",
"forbidden",
"internal server",
"service unavailable",
"gateway",
"rate limit",
"overloaded",
"timeout",
"timed out",
"invalid",
"too many requests",
"permission",
];
function extractLeadingHttpStatus(raw: string): { code: number; rest: string } | null {
const match = raw.match(HTTP_STATUS_CODE_PREFIX_RE);
if (!match) {
return null;
}
const code = Number(match[1]);
if (!Number.isFinite(code)) {
return null;
}
return { code, rest: (match[2] ?? "").trim() };
}
export function isCloudflareOrHtmlErrorPage(raw: string): boolean {
const trimmed = raw.trim();
if (!trimmed) {
return false;
}
const status = extractLeadingHttpStatus(trimmed);
if (!status || status.code < 500) {
return false;
}
if (CLOUDFLARE_HTML_ERROR_CODES.has(status.code)) {
return true;
}
return (
status.code < 600 && HTML_ERROR_PREFIX_RE.test(status.rest) && /<\/html>/i.test(status.rest)
);
}
export function isTransientHttpError(raw: string): boolean {
const trimmed = raw.trim();
if (!trimmed) {
return false;
}
const status = extractLeadingHttpStatus(trimmed);
if (!status) {
return false;
}
return TRANSIENT_HTTP_ERROR_CODES.has(status.code);
}
function stripFinalTagsFromText(text: string): string {
if (!text) {
return text;
}
return text.replace(FINAL_TAG_RE, "");
}
function collapseConsecutiveDuplicateBlocks(text: string): string {
const trimmed = text.trim();
if (!trimmed) {
return text;
}
const blocks = trimmed.split(/\n{2,}/);
if (blocks.length < 2) {
return text;
}
const normalizeBlock = (value: string) => value.trim().replace(/\s+/g, " ");
const result: string[] = [];
let lastNormalized: string | null = null;
for (const block of blocks) {
const normalized = normalizeBlock(block);
if (lastNormalized && normalized === lastNormalized) {
continue;
}
result.push(block.trim());
lastNormalized = normalized;
}
if (result.length === blocks.length) {
return text;
}
return result.join("\n\n");
}
function isLikelyHttpErrorText(raw: string): boolean {
if (isCloudflareOrHtmlErrorPage(raw)) {
return true;
}
const match = raw.match(HTTP_STATUS_PREFIX_RE);
if (!match) {
return false;
}
const code = Number(match[1]);
if (!Number.isFinite(code) || code < 400) {
return false;
}
const message = match[2].toLowerCase();
return HTTP_ERROR_HINTS.some((hint) => message.includes(hint));
}
function shouldRewriteContextOverflowText(raw: string): boolean {
if (!isContextOverflowError(raw)) {
return false;
}
return (
isRawApiErrorPayload(raw) ||
isLikelyHttpErrorText(raw) ||
ERROR_PREFIX_RE.test(raw) ||
CONTEXT_OVERFLOW_ERROR_HEAD_RE.test(raw)
);
}
Agents: add nested subagent orchestration controls and reduce subagent token waste (#14447) * Agents: add subagent orchestration controls * Agents: add subagent orchestration controls (WIP uncommitted changes) * feat(subagents): add depth-based spawn gating for sub-sub-agents * feat(subagents): tool policy, registry, and announce chain for nested agents * feat(subagents): system prompt, docs, changelog for nested sub-agents * fix(subagents): prevent model fallback override, show model during active runs, and block context overflow fallback Bug 1: When a session has an explicit model override (e.g., gpt/openai-codex), the fallback candidate logic in resolveFallbackCandidates silently appended the global primary model (opus) as a backstop. On reinjection/steer with a transient error, the session could fall back to opus which has a smaller context window and crash. Fix: when storedModelOverride is set, pass fallbacksOverride ?? [] instead of undefined, preventing the implicit primary backstop. Bug 2: Active subagents showed 'model n/a' in /subagents list because resolveModelDisplay only read entry.model/modelProvider (populated after run completes). Fix: fall back to modelOverride/providerOverride fields which are populated at spawn time via sessions.patch. Bug 3: Context overflow errors (prompt too long, context_length_exceeded) could theoretically escape runEmbeddedPiAgent and be treated as failover candidates in runWithModelFallback, causing a switch to a model with a smaller context window. Fix: in runWithModelFallback, detect context overflow errors via isLikelyContextOverflowError and rethrow them immediately instead of trying the next model candidate. * fix(subagents): track spawn depth in session store and fix announce routing for nested agents * Fix compaction status tracking and dedupe overflow compaction triggers * fix(subagents): enforce depth block via session store and implement cascade kill * fix: inject group chat context into system prompt * fix(subagents): always write model to session store at spawn time * Preserve spawnDepth when agent handler rewrites session entry * fix(subagents): suppress announce on steer-restart * fix(subagents): fallback spawned session model to runtime default * fix(subagents): enforce spawn depth when caller key resolves by sessionId * feat(subagents): implement active-first ordering for numeric targets and enhance task display - Added a test to verify that subagents with numeric targets follow an active-first list ordering. - Updated `resolveSubagentTarget` to sort subagent runs based on active status and recent activity. - Enhanced task display in command responses to prevent truncation of long task descriptions. - Introduced new utility functions for compacting task text and managing subagent run states. * fix(subagents): show model for active runs via run record fallback When the spawned model matches the agent's default model, the session store's override fields are intentionally cleared (isDefault: true). The model/modelProvider fields are only populated after the run completes. This left active subagents showing 'model n/a'. Fix: store the resolved model on SubagentRunRecord at registration time, and use it as a fallback in both display paths (subagents tool and /subagents command) when the session store entry has no model info. Changes: - SubagentRunRecord: add optional model field - registerSubagentRun: accept and persist model param - sessions-spawn-tool: pass resolvedModel to registerSubagentRun - subagents-tool: pass run record model as fallback to resolveModelDisplay - commands-subagents: pass run record model as fallback to resolveModelDisplay * feat(chat): implement session key resolution and reset on sidebar navigation - Added functions to resolve the main session key and reset chat state when switching sessions from the sidebar. - Updated the `renderTab` function to handle session key changes when navigating to the chat tab. - Introduced a test to verify that the session resets to "main" when opening chat from the sidebar navigation. * fix: subagent timeout=0 passthrough and fallback prompt duplication Bug 1: runTimeoutSeconds=0 now means 'no timeout' instead of applying 600s default - sessions-spawn-tool: default to undefined (not 0) when neither timeout param is provided; use != null check so explicit 0 passes through to gateway - agent.ts: accept 0 as valid timeout (resolveAgentTimeoutMs already handles 0 → MAX_SAFE_TIMEOUT_MS) Bug 2: model fallback no longer re-injects the original prompt as a duplicate - agent.ts: track fallback attempt index; on retries use a short continuation message instead of the full original prompt since the session file already contains it from the first attempt - Also skip re-sending images on fallback retries (already in session) * feat(subagents): truncate long task descriptions in subagents command output - Introduced a new utility function to format task previews, limiting their length to improve readability. - Updated the command handler to use the new formatting function, ensuring task descriptions are truncated appropriately. - Adjusted related tests to verify that long task descriptions are now truncated in the output. * refactor(subagents): update subagent registry path resolution and improve command output formatting - Replaced direct import of STATE_DIR with a utility function to resolve the state directory dynamically. - Enhanced the formatting of command output for active and recent subagents, adding separators for better readability. - Updated related tests to reflect changes in command output structure. * fix(subagent): default sessions_spawn to no timeout when runTimeoutSeconds omitted The previous fix (75a791106) correctly handled the case where runTimeoutSeconds was explicitly set to 0 ("no timeout"). However, when models omit the parameter entirely (which is common since the schema marks it as optional), runTimeoutSeconds resolved to undefined. undefined flowed through the chain as: sessions_spawn → timeout: undefined (since undefined != null is false) → gateway agent handler → agentCommand opts.timeout: undefined → resolveAgentTimeoutMs({ overrideSeconds: undefined }) → DEFAULT_AGENT_TIMEOUT_SECONDS (600s = 10 minutes) This caused subagents to be killed at exactly 10 minutes even though the user's intent (via TOOLS.md) was for subagents to run without a timeout. Fix: default runTimeoutSeconds to 0 (no timeout) when neither runTimeoutSeconds nor timeoutSeconds is provided by the caller. Subagent spawns are long-running by design and should not inherit the 600s agent-command default timeout. * fix(subagent): accept timeout=0 in agent-via-gateway path (second 600s default) * fix: thread timeout override through getReplyFromConfig dispatch path getReplyFromConfig called resolveAgentTimeoutMs({ cfg }) with no override, always falling back to the config default (600s). Add timeoutOverrideSeconds to GetReplyOptions and pass it through as overrideSeconds so callers of the dispatch chain can specify a custom timeout (0 = no timeout). This complements the existing timeout threading in agentCommand and the cron isolated-agent runner, which already pass overrideSeconds correctly. * feat(model-fallback): normalize OpenAI Codex model references and enhance fallback handling - Added normalization for OpenAI Codex model references, specifically converting "gpt-5.3-codex" to "openai-codex" before execution. - Updated the `resolveFallbackCandidates` function to utilize the new normalization logic. - Enhanced tests to verify the correct behavior of model normalization and fallback mechanisms. - Introduced a new test case to ensure that the normalization process works as expected for various input formats. * feat(tests): add unit tests for steer failure behavior in openclaw-tools - Introduced a new test file to validate the behavior of subagents when steer replacement dispatch fails. - Implemented tests to ensure that the announce behavior is restored correctly and that the suppression reason is cleared as expected. - Enhanced the subagent registry with a new function to clear steer restart suppression. - Updated related components to support the new test scenarios. * fix(subagents): replace stop command with kill in slash commands and documentation - Updated the `/subagents` command to replace `stop` with `kill` for consistency in controlling sub-agent runs. - Modified related documentation to reflect the change in command usage. - Removed legacy timeoutSeconds references from the sessions-spawn-tool schema and tests to streamline timeout handling. - Enhanced tests to ensure correct behavior of the updated commands and their interactions. * feat(tests): add unit tests for readLatestAssistantReply function - Introduced a new test file for the `readLatestAssistantReply` function to validate its behavior with various message scenarios. - Implemented tests to ensure the function correctly retrieves the latest assistant message and handles cases where the latest message has no text. - Mocked the gateway call to simulate different message histories for comprehensive testing. * feat(tests): enhance subagent kill-all cascade tests and announce formatting - Added a new test to verify that the `kill-all` command cascades through ended parents to active descendants in subagents. - Updated the subagent announce formatting tests to reflect changes in message structure, including the replacement of "Findings:" with "Result:" and the addition of new expectations for message content. - Improved the handling of long findings and stats in the announce formatting logic to ensure concise output. - Refactored related functions to enhance clarity and maintainability in the subagent registry and tools. * refactor(subagent): update announce formatting and remove unused constants - Modified the subagent announce formatting to replace "Findings:" with "Result:" and adjusted related expectations in tests. - Removed constants for maximum announce findings characters and summary words, simplifying the announcement logic. - Updated the handling of findings to retain full content instead of truncating, ensuring more informative outputs. - Cleaned up unused imports in the commands-subagents file to enhance code clarity. * feat(tests): enhance billing error handling in user-facing text - Added tests to ensure that normal text mentioning billing plans is not rewritten, preserving user context. - Updated the `isBillingErrorMessage` and `sanitizeUserFacingText` functions to improve handling of billing-related messages. - Introduced new test cases for various scenarios involving billing messages to ensure accurate processing and output. - Enhanced the subagent announce flow to correctly manage active descendant runs, preventing premature announcements. * feat(subagent): enhance workflow guidance and auto-announcement clarity - Added a new guideline in the subagent system prompt to emphasize trust in push-based completion, discouraging busy polling for status updates. - Updated documentation to clarify that sub-agents will automatically announce their results, improving user understanding of the workflow. - Enhanced tests to verify the new guidance on avoiding polling loops and to ensure the accuracy of the updated prompts. * fix(cron): avoid announcing interim subagent spawn acks * chore: clean post-rebase imports * fix(cron): fall back to child replies when parent stays interim * fix(subagents): make active-run guidance advisory * fix(subagents): update announce flow to handle active descendants and enhance test coverage - Modified the announce flow to defer announcements when active descendant runs are present, ensuring accurate status reporting. - Updated tests to verify the new behavior, including scenarios where no fallback requester is available and ensuring proper handling of finished subagents. - Enhanced the announce formatting to include an `expectFinal` flag for better clarity in the announcement process. * fix(subagents): enhance announce flow and formatting for user updates - Updated the announce flow to provide clearer instructions for user updates based on active subagent runs and requester context. - Refactored the announcement logic to improve clarity and ensure internal context remains private. - Enhanced tests to verify the new message expectations and formatting, including updated prompts for user-facing updates. - Introduced a new function to build reply instructions based on session context, improving the overall announcement process. * fix: resolve prep blockers and changelog placement (#14447) (thanks @tyler6204) * fix: restore cron delivery-plan import after rebase (#14447) (thanks @tyler6204) * fix: resolve test failures from rebase conflicts (#14447) (thanks @tyler6204) * fix: apply formatting after rebase (#14447) (thanks @tyler6204)
2026-02-14 22:03:45 -08:00
function shouldRewriteBillingText(raw: string): boolean {
if (!isBillingErrorMessage(raw)) {
return false;
}
return (
isRawApiErrorPayload(raw) ||
isLikelyHttpErrorText(raw) ||
ERROR_PREFIX_RE.test(raw) ||
BILLING_ERROR_HEAD_RE.test(raw)
);
}
type ErrorPayload = Record<string, unknown>;
function isErrorPayloadObject(payload: unknown): payload is ErrorPayload {
if (!payload || typeof payload !== "object" || Array.isArray(payload)) {
return false;
}
const record = payload as ErrorPayload;
if (record.type === "error") {
return true;
}
if (typeof record.request_id === "string" || typeof record.requestId === "string") {
return true;
}
if ("error" in record) {
const err = record.error;
if (err && typeof err === "object" && !Array.isArray(err)) {
const errRecord = err as ErrorPayload;
if (
typeof errRecord.message === "string" ||
typeof errRecord.type === "string" ||
typeof errRecord.code === "string"
) {
return true;
}
}
}
return false;
}
function parseApiErrorPayload(raw: string): ErrorPayload | null {
if (!raw) {
return null;
}
const trimmed = raw.trim();
if (!trimmed) {
return null;
}
const candidates = [trimmed];
if (ERROR_PAYLOAD_PREFIX_RE.test(trimmed)) {
candidates.push(trimmed.replace(ERROR_PAYLOAD_PREFIX_RE, "").trim());
}
for (const candidate of candidates) {
if (!candidate.startsWith("{") || !candidate.endsWith("}")) {
continue;
}
try {
const parsed = JSON.parse(candidate) as unknown;
if (isErrorPayloadObject(parsed)) {
return parsed;
}
} catch {
// ignore parse errors
}
}
return null;
}
function stableStringify(value: unknown): string {
if (!value || typeof value !== "object") {
return JSON.stringify(value) ?? "null";
}
if (Array.isArray(value)) {
return `[${value.map((entry) => stableStringify(entry)).join(",")}]`;
}
const record = value as Record<string, unknown>;
const keys = Object.keys(record).toSorted();
const entries = keys.map((key) => `${JSON.stringify(key)}:${stableStringify(record[key])}`);
return `{${entries.join(",")}}`;
}
export function getApiErrorPayloadFingerprint(raw?: string): string | null {
if (!raw) {
return null;
}
const payload = parseApiErrorPayload(raw);
if (!payload) {
return null;
}
return stableStringify(payload);
}
export function isRawApiErrorPayload(raw?: string): boolean {
return getApiErrorPayloadFingerprint(raw) !== null;
}
2026-01-15 08:16:44 +00:00
export type ApiErrorInfo = {
httpCode?: string;
type?: string;
message?: string;
requestId?: string;
};
export function parseApiErrorInfo(raw?: string): ApiErrorInfo | null {
if (!raw) {
return null;
}
2026-01-15 08:16:44 +00:00
const trimmed = raw.trim();
if (!trimmed) {
return null;
}
2026-01-15 08:16:44 +00:00
let httpCode: string | undefined;
let candidate = trimmed;
const httpPrefixMatch = candidate.match(/^(\d{3})\s+(.+)$/s);
if (httpPrefixMatch) {
httpCode = httpPrefixMatch[1];
candidate = httpPrefixMatch[2].trim();
}
const payload = parseApiErrorPayload(candidate);
if (!payload) {
return null;
}
2026-01-15 08:16:44 +00:00
const requestId =
typeof payload.request_id === "string"
? payload.request_id
: typeof payload.requestId === "string"
? payload.requestId
: undefined;
const topType = typeof payload.type === "string" ? payload.type : undefined;
const topMessage = typeof payload.message === "string" ? payload.message : undefined;
let errType: string | undefined;
let errMessage: string | undefined;
if (payload.error && typeof payload.error === "object" && !Array.isArray(payload.error)) {
const err = payload.error as Record<string, unknown>;
if (typeof err.type === "string") {
errType = err.type;
}
if (typeof err.code === "string" && !errType) {
errType = err.code;
}
if (typeof err.message === "string") {
errMessage = err.message;
}
2026-01-15 08:16:44 +00:00
}
return {
httpCode,
type: errType ?? topType,
message: errMessage ?? topMessage,
requestId,
};
}
export function formatRawAssistantErrorForUi(raw?: string): string {
const trimmed = (raw ?? "").trim();
if (!trimmed) {
return "LLM request failed with an unknown error.";
}
2026-01-15 08:16:44 +00:00
const leadingStatus = extractLeadingHttpStatus(trimmed);
if (leadingStatus && isCloudflareOrHtmlErrorPage(trimmed)) {
return `The AI service is temporarily unavailable (HTTP ${leadingStatus.code}). Please try again in a moment.`;
}
2026-01-22 22:24:25 +00:00
const httpMatch = trimmed.match(HTTP_STATUS_PREFIX_RE);
if (httpMatch) {
const rest = httpMatch[2].trim();
if (!rest.startsWith("{")) {
return `HTTP ${httpMatch[1]}: ${rest}`;
}
}
2026-01-15 08:16:44 +00:00
const info = parseApiErrorInfo(trimmed);
if (info?.message) {
const prefix = info.httpCode ? `HTTP ${info.httpCode}` : "LLM error";
const type = info.type ? ` ${info.type}` : "";
const requestId = info.requestId ? ` (request_id: ${info.requestId})` : "";
return `${prefix}${type}: ${info.message}${requestId}`;
}
return trimmed.length > 600 ? `${trimmed.slice(0, 600)}` : trimmed;
}
2026-01-14 01:08:15 +00:00
export function formatAssistantErrorText(
msg: AssistantMessage,
opts?: { cfg?: OpenClawConfig; sessionKey?: string; provider?: string },
2026-01-14 01:08:15 +00:00
): string | undefined {
// Also format errors if errorMessage is present, even if stopReason isn't "error"
2026-01-14 01:08:15 +00:00
const raw = (msg.errorMessage ?? "").trim();
if (msg.stopReason !== "error" && !raw) {
return undefined;
}
if (!raw) {
return "LLM request failed with an unknown error.";
}
2026-01-14 01:08:15 +00:00
const unknownTool =
raw.match(/unknown tool[:\s]+["']?([a-z0-9_-]+)["']?/i) ??
raw.match(/tool\s+["']?([a-z0-9_-]+)["']?\s+(?:not found|is not available)/i);
2026-01-14 01:08:15 +00:00
if (unknownTool?.[1]) {
const rewritten = formatSandboxToolPolicyBlockedMessage({
cfg: opts?.cfg,
sessionKey: opts?.sessionKey,
toolName: unknownTool[1],
});
if (rewritten) {
return rewritten;
}
2026-01-14 01:08:15 +00:00
}
if (isContextOverflowError(raw)) {
return (
"Context overflow: prompt too large for the model. " +
"Try /reset (or /new) to start a fresh session, or use a larger-context model."
2026-01-14 01:08:15 +00:00
);
}
// Catch role ordering errors - including JSON-wrapped and "400" prefix variants
if (
/incorrect role information|roles must alternate|400.*role|"message".*role.*information/i.test(
raw,
)
) {
2026-01-14 01:08:15 +00:00
return (
"Message ordering conflict - please try again. " +
"If this persists, use /new to start a fresh session."
);
}
if (isMissingToolCallInputError(raw)) {
return (
"Session history looks corrupted (tool call input missing). " +
"Use /new to start a fresh session. " +
"If this keeps happening, reset the session or delete the corrupted session transcript."
);
}
const invalidRequest = raw.match(/"type":"invalid_request_error".*?"message":"([^"]+)"/);
2026-01-14 01:08:15 +00:00
if (invalidRequest?.[1]) {
return `LLM request rejected: ${invalidRequest[1]}`;
}
const transientCopy = formatRateLimitOrOverloadedErrorCopy(raw);
if (transientCopy) {
return transientCopy;
2026-01-14 01:08:15 +00:00
}
if (isTimeoutErrorMessage(raw)) {
return "LLM request timed out.";
}
if (isBillingErrorMessage(raw)) {
return formatBillingErrorMessage(opts?.provider);
}
2026-01-22 22:24:25 +00:00
if (isLikelyHttpErrorText(raw) || isRawApiErrorPayload(raw)) {
return formatRawAssistantErrorForUi(raw);
}
// Never return raw unhandled errors - log for debugging but return safe message
if (raw.length > 600) {
console.warn("[formatAssistantErrorText] Long error truncated:", raw.slice(0, 200));
}
2026-01-14 01:08:15 +00:00
return raw.length > 600 ? `${raw.slice(0, 600)}` : raw;
}
export function sanitizeUserFacingText(text: string, opts?: { errorContext?: boolean }): string {
if (!text) {
return text;
}
const errorContext = opts?.errorContext ?? false;
const stripped = stripFinalTagsFromText(text);
const trimmed = stripped.trim();
if (!trimmed) {
return "";
}
// Only apply error-pattern rewrites when the caller knows this text is an error payload.
// Otherwise we risk swallowing legitimate assistant text that merely *mentions* these errors.
if (errorContext) {
if (/incorrect role information|roles must alternate/i.test(trimmed)) {
return (
"Message ordering conflict - please try again. " +
"If this persists, use /new to start a fresh session."
);
}
if (shouldRewriteContextOverflowText(trimmed)) {
return (
"Context overflow: prompt too large for the model. " +
"Try /reset (or /new) to start a fresh session, or use a larger-context model."
);
}
if (isBillingErrorMessage(trimmed)) {
return BILLING_ERROR_USER_MESSAGE;
}
if (isRawApiErrorPayload(trimmed) || isLikelyHttpErrorText(trimmed)) {
return formatRawAssistantErrorForUi(trimmed);
}
if (ERROR_PREFIX_RE.test(trimmed)) {
const prefixedCopy = formatRateLimitOrOverloadedErrorCopy(trimmed);
if (prefixedCopy) {
return prefixedCopy;
}
if (isTimeoutErrorMessage(trimmed)) {
return "LLM request timed out.";
}
return formatRawAssistantErrorForUi(trimmed);
}
}
Agents: add nested subagent orchestration controls and reduce subagent token waste (#14447) * Agents: add subagent orchestration controls * Agents: add subagent orchestration controls (WIP uncommitted changes) * feat(subagents): add depth-based spawn gating for sub-sub-agents * feat(subagents): tool policy, registry, and announce chain for nested agents * feat(subagents): system prompt, docs, changelog for nested sub-agents * fix(subagents): prevent model fallback override, show model during active runs, and block context overflow fallback Bug 1: When a session has an explicit model override (e.g., gpt/openai-codex), the fallback candidate logic in resolveFallbackCandidates silently appended the global primary model (opus) as a backstop. On reinjection/steer with a transient error, the session could fall back to opus which has a smaller context window and crash. Fix: when storedModelOverride is set, pass fallbacksOverride ?? [] instead of undefined, preventing the implicit primary backstop. Bug 2: Active subagents showed 'model n/a' in /subagents list because resolveModelDisplay only read entry.model/modelProvider (populated after run completes). Fix: fall back to modelOverride/providerOverride fields which are populated at spawn time via sessions.patch. Bug 3: Context overflow errors (prompt too long, context_length_exceeded) could theoretically escape runEmbeddedPiAgent and be treated as failover candidates in runWithModelFallback, causing a switch to a model with a smaller context window. Fix: in runWithModelFallback, detect context overflow errors via isLikelyContextOverflowError and rethrow them immediately instead of trying the next model candidate. * fix(subagents): track spawn depth in session store and fix announce routing for nested agents * Fix compaction status tracking and dedupe overflow compaction triggers * fix(subagents): enforce depth block via session store and implement cascade kill * fix: inject group chat context into system prompt * fix(subagents): always write model to session store at spawn time * Preserve spawnDepth when agent handler rewrites session entry * fix(subagents): suppress announce on steer-restart * fix(subagents): fallback spawned session model to runtime default * fix(subagents): enforce spawn depth when caller key resolves by sessionId * feat(subagents): implement active-first ordering for numeric targets and enhance task display - Added a test to verify that subagents with numeric targets follow an active-first list ordering. - Updated `resolveSubagentTarget` to sort subagent runs based on active status and recent activity. - Enhanced task display in command responses to prevent truncation of long task descriptions. - Introduced new utility functions for compacting task text and managing subagent run states. * fix(subagents): show model for active runs via run record fallback When the spawned model matches the agent's default model, the session store's override fields are intentionally cleared (isDefault: true). The model/modelProvider fields are only populated after the run completes. This left active subagents showing 'model n/a'. Fix: store the resolved model on SubagentRunRecord at registration time, and use it as a fallback in both display paths (subagents tool and /subagents command) when the session store entry has no model info. Changes: - SubagentRunRecord: add optional model field - registerSubagentRun: accept and persist model param - sessions-spawn-tool: pass resolvedModel to registerSubagentRun - subagents-tool: pass run record model as fallback to resolveModelDisplay - commands-subagents: pass run record model as fallback to resolveModelDisplay * feat(chat): implement session key resolution and reset on sidebar navigation - Added functions to resolve the main session key and reset chat state when switching sessions from the sidebar. - Updated the `renderTab` function to handle session key changes when navigating to the chat tab. - Introduced a test to verify that the session resets to "main" when opening chat from the sidebar navigation. * fix: subagent timeout=0 passthrough and fallback prompt duplication Bug 1: runTimeoutSeconds=0 now means 'no timeout' instead of applying 600s default - sessions-spawn-tool: default to undefined (not 0) when neither timeout param is provided; use != null check so explicit 0 passes through to gateway - agent.ts: accept 0 as valid timeout (resolveAgentTimeoutMs already handles 0 → MAX_SAFE_TIMEOUT_MS) Bug 2: model fallback no longer re-injects the original prompt as a duplicate - agent.ts: track fallback attempt index; on retries use a short continuation message instead of the full original prompt since the session file already contains it from the first attempt - Also skip re-sending images on fallback retries (already in session) * feat(subagents): truncate long task descriptions in subagents command output - Introduced a new utility function to format task previews, limiting their length to improve readability. - Updated the command handler to use the new formatting function, ensuring task descriptions are truncated appropriately. - Adjusted related tests to verify that long task descriptions are now truncated in the output. * refactor(subagents): update subagent registry path resolution and improve command output formatting - Replaced direct import of STATE_DIR with a utility function to resolve the state directory dynamically. - Enhanced the formatting of command output for active and recent subagents, adding separators for better readability. - Updated related tests to reflect changes in command output structure. * fix(subagent): default sessions_spawn to no timeout when runTimeoutSeconds omitted The previous fix (75a791106) correctly handled the case where runTimeoutSeconds was explicitly set to 0 ("no timeout"). However, when models omit the parameter entirely (which is common since the schema marks it as optional), runTimeoutSeconds resolved to undefined. undefined flowed through the chain as: sessions_spawn → timeout: undefined (since undefined != null is false) → gateway agent handler → agentCommand opts.timeout: undefined → resolveAgentTimeoutMs({ overrideSeconds: undefined }) → DEFAULT_AGENT_TIMEOUT_SECONDS (600s = 10 minutes) This caused subagents to be killed at exactly 10 minutes even though the user's intent (via TOOLS.md) was for subagents to run without a timeout. Fix: default runTimeoutSeconds to 0 (no timeout) when neither runTimeoutSeconds nor timeoutSeconds is provided by the caller. Subagent spawns are long-running by design and should not inherit the 600s agent-command default timeout. * fix(subagent): accept timeout=0 in agent-via-gateway path (second 600s default) * fix: thread timeout override through getReplyFromConfig dispatch path getReplyFromConfig called resolveAgentTimeoutMs({ cfg }) with no override, always falling back to the config default (600s). Add timeoutOverrideSeconds to GetReplyOptions and pass it through as overrideSeconds so callers of the dispatch chain can specify a custom timeout (0 = no timeout). This complements the existing timeout threading in agentCommand and the cron isolated-agent runner, which already pass overrideSeconds correctly. * feat(model-fallback): normalize OpenAI Codex model references and enhance fallback handling - Added normalization for OpenAI Codex model references, specifically converting "gpt-5.3-codex" to "openai-codex" before execution. - Updated the `resolveFallbackCandidates` function to utilize the new normalization logic. - Enhanced tests to verify the correct behavior of model normalization and fallback mechanisms. - Introduced a new test case to ensure that the normalization process works as expected for various input formats. * feat(tests): add unit tests for steer failure behavior in openclaw-tools - Introduced a new test file to validate the behavior of subagents when steer replacement dispatch fails. - Implemented tests to ensure that the announce behavior is restored correctly and that the suppression reason is cleared as expected. - Enhanced the subagent registry with a new function to clear steer restart suppression. - Updated related components to support the new test scenarios. * fix(subagents): replace stop command with kill in slash commands and documentation - Updated the `/subagents` command to replace `stop` with `kill` for consistency in controlling sub-agent runs. - Modified related documentation to reflect the change in command usage. - Removed legacy timeoutSeconds references from the sessions-spawn-tool schema and tests to streamline timeout handling. - Enhanced tests to ensure correct behavior of the updated commands and their interactions. * feat(tests): add unit tests for readLatestAssistantReply function - Introduced a new test file for the `readLatestAssistantReply` function to validate its behavior with various message scenarios. - Implemented tests to ensure the function correctly retrieves the latest assistant message and handles cases where the latest message has no text. - Mocked the gateway call to simulate different message histories for comprehensive testing. * feat(tests): enhance subagent kill-all cascade tests and announce formatting - Added a new test to verify that the `kill-all` command cascades through ended parents to active descendants in subagents. - Updated the subagent announce formatting tests to reflect changes in message structure, including the replacement of "Findings:" with "Result:" and the addition of new expectations for message content. - Improved the handling of long findings and stats in the announce formatting logic to ensure concise output. - Refactored related functions to enhance clarity and maintainability in the subagent registry and tools. * refactor(subagent): update announce formatting and remove unused constants - Modified the subagent announce formatting to replace "Findings:" with "Result:" and adjusted related expectations in tests. - Removed constants for maximum announce findings characters and summary words, simplifying the announcement logic. - Updated the handling of findings to retain full content instead of truncating, ensuring more informative outputs. - Cleaned up unused imports in the commands-subagents file to enhance code clarity. * feat(tests): enhance billing error handling in user-facing text - Added tests to ensure that normal text mentioning billing plans is not rewritten, preserving user context. - Updated the `isBillingErrorMessage` and `sanitizeUserFacingText` functions to improve handling of billing-related messages. - Introduced new test cases for various scenarios involving billing messages to ensure accurate processing and output. - Enhanced the subagent announce flow to correctly manage active descendant runs, preventing premature announcements. * feat(subagent): enhance workflow guidance and auto-announcement clarity - Added a new guideline in the subagent system prompt to emphasize trust in push-based completion, discouraging busy polling for status updates. - Updated documentation to clarify that sub-agents will automatically announce their results, improving user understanding of the workflow. - Enhanced tests to verify the new guidance on avoiding polling loops and to ensure the accuracy of the updated prompts. * fix(cron): avoid announcing interim subagent spawn acks * chore: clean post-rebase imports * fix(cron): fall back to child replies when parent stays interim * fix(subagents): make active-run guidance advisory * fix(subagents): update announce flow to handle active descendants and enhance test coverage - Modified the announce flow to defer announcements when active descendant runs are present, ensuring accurate status reporting. - Updated tests to verify the new behavior, including scenarios where no fallback requester is available and ensuring proper handling of finished subagents. - Enhanced the announce formatting to include an `expectFinal` flag for better clarity in the announcement process. * fix(subagents): enhance announce flow and formatting for user updates - Updated the announce flow to provide clearer instructions for user updates based on active subagent runs and requester context. - Refactored the announcement logic to improve clarity and ensure internal context remains private. - Enhanced tests to verify the new message expectations and formatting, including updated prompts for user-facing updates. - Introduced a new function to build reply instructions based on session context, improving the overall announcement process. * fix: resolve prep blockers and changelog placement (#14447) (thanks @tyler6204) * fix: restore cron delivery-plan import after rebase (#14447) (thanks @tyler6204) * fix: resolve test failures from rebase conflicts (#14447) (thanks @tyler6204) * fix: apply formatting after rebase (#14447) (thanks @tyler6204)
2026-02-14 22:03:45 -08:00
// Preserve legacy behavior for explicit billing-head text outside known
// error contexts (e.g., "billing: please upgrade your plan"), while
// keeping conversational billing mentions untouched.
if (shouldRewriteBillingText(trimmed)) {
return BILLING_ERROR_USER_MESSAGE;
}
// Strip leading blank lines (including whitespace-only lines) without clobbering indentation on
// the first content line (e.g. markdown/code blocks).
const withoutLeadingEmptyLines = stripped.replace(/^(?:[ \t]*\r?\n)+/, "");
return collapseConsecutiveDuplicateBlocks(withoutLeadingEmptyLines);
}
export function isRateLimitAssistantError(msg: AssistantMessage | undefined): boolean {
if (!msg || msg.stopReason !== "error") {
return false;
}
2026-01-14 01:08:15 +00:00
return isRateLimitErrorMessage(msg.errorMessage ?? "");
}
type ErrorPattern = RegExp | string;
const ERROR_PATTERNS = {
rateLimit: [
/rate[_ ]limit|too many requests|429/,
"exceeded your current quota",
"resource has been exhausted",
"quota exceeded",
"resource_exhausted",
"usage limit",
],
overloaded: [/overloaded_error|"type"\s*:\s*"overloaded_error"/i, "overloaded"],
timeout: [
"timeout",
"timed out",
"deadline exceeded",
"context deadline exceeded",
/without sending (?:any )?chunks?/i,
],
2026-01-14 01:08:15 +00:00
billing: [
/["']?(?:status|code)["']?\s*[:=]\s*402\b|\bhttp\s*402\b|\berror(?:\s+code)?\s*[:=]?\s*402\b|\b(?:got|returned|received)\s+(?:a\s+)?402\b|^\s*402\s+payment/i,
2026-01-14 01:08:15 +00:00
"payment required",
"insufficient credits",
"credit balance",
"plans & billing",
"insufficient balance",
2026-01-14 01:08:15 +00:00
],
auth: [
/invalid[_ ]?api[_ ]?key/,
"incorrect api key",
"invalid token",
"authentication",
"re-authenticate",
"oauth token refresh failed",
2026-01-14 01:08:15 +00:00
"unauthorized",
"forbidden",
"access denied",
"expired",
"token has expired",
/\b401\b/,
/\b403\b/,
"no credentials found",
"no api key found",
],
format: [
"string should match pattern",
"tool_use.id",
"tool_use_id",
"messages.1.content.1.tool_use.id",
"invalid request format",
],
} as const;
const TOOL_CALL_INPUT_MISSING_RE =
/tool_(?:use|call)\.(?:input|arguments).*?(?:field required|required)/i;
const TOOL_CALL_INPUT_PATH_RE =
/messages\.\d+\.content\.\d+\.tool_(?:use|call)\.(?:input|arguments)/i;
2026-01-18 15:19:25 +00:00
const IMAGE_DIMENSION_ERROR_RE =
/image dimensions exceed max allowed size for many-image requests:\s*(\d+)\s*pixels/i;
const IMAGE_DIMENSION_PATH_RE = /messages\.(\d+)\.content\.(\d+)\.image/i;
const IMAGE_SIZE_ERROR_RE = /image exceeds\s*(\d+(?:\.\d+)?)\s*mb/i;
2026-01-18 15:19:25 +00:00
function matchesErrorPatterns(raw: string, patterns: readonly ErrorPattern[]): boolean {
if (!raw) {
return false;
}
2026-01-14 01:08:15 +00:00
const value = raw.toLowerCase();
return patterns.some((pattern) =>
pattern instanceof RegExp ? pattern.test(value) : value.includes(pattern),
);
}
export function isRateLimitErrorMessage(raw: string): boolean {
return matchesErrorPatterns(raw, ERROR_PATTERNS.rateLimit);
}
export function isTimeoutErrorMessage(raw: string): boolean {
return matchesErrorPatterns(raw, ERROR_PATTERNS.timeout);
}
export function isBillingErrorMessage(raw: string): boolean {
const value = raw.toLowerCase();
if (!value) {
return false;
}
Agents: add nested subagent orchestration controls and reduce subagent token waste (#14447) * Agents: add subagent orchestration controls * Agents: add subagent orchestration controls (WIP uncommitted changes) * feat(subagents): add depth-based spawn gating for sub-sub-agents * feat(subagents): tool policy, registry, and announce chain for nested agents * feat(subagents): system prompt, docs, changelog for nested sub-agents * fix(subagents): prevent model fallback override, show model during active runs, and block context overflow fallback Bug 1: When a session has an explicit model override (e.g., gpt/openai-codex), the fallback candidate logic in resolveFallbackCandidates silently appended the global primary model (opus) as a backstop. On reinjection/steer with a transient error, the session could fall back to opus which has a smaller context window and crash. Fix: when storedModelOverride is set, pass fallbacksOverride ?? [] instead of undefined, preventing the implicit primary backstop. Bug 2: Active subagents showed 'model n/a' in /subagents list because resolveModelDisplay only read entry.model/modelProvider (populated after run completes). Fix: fall back to modelOverride/providerOverride fields which are populated at spawn time via sessions.patch. Bug 3: Context overflow errors (prompt too long, context_length_exceeded) could theoretically escape runEmbeddedPiAgent and be treated as failover candidates in runWithModelFallback, causing a switch to a model with a smaller context window. Fix: in runWithModelFallback, detect context overflow errors via isLikelyContextOverflowError and rethrow them immediately instead of trying the next model candidate. * fix(subagents): track spawn depth in session store and fix announce routing for nested agents * Fix compaction status tracking and dedupe overflow compaction triggers * fix(subagents): enforce depth block via session store and implement cascade kill * fix: inject group chat context into system prompt * fix(subagents): always write model to session store at spawn time * Preserve spawnDepth when agent handler rewrites session entry * fix(subagents): suppress announce on steer-restart * fix(subagents): fallback spawned session model to runtime default * fix(subagents): enforce spawn depth when caller key resolves by sessionId * feat(subagents): implement active-first ordering for numeric targets and enhance task display - Added a test to verify that subagents with numeric targets follow an active-first list ordering. - Updated `resolveSubagentTarget` to sort subagent runs based on active status and recent activity. - Enhanced task display in command responses to prevent truncation of long task descriptions. - Introduced new utility functions for compacting task text and managing subagent run states. * fix(subagents): show model for active runs via run record fallback When the spawned model matches the agent's default model, the session store's override fields are intentionally cleared (isDefault: true). The model/modelProvider fields are only populated after the run completes. This left active subagents showing 'model n/a'. Fix: store the resolved model on SubagentRunRecord at registration time, and use it as a fallback in both display paths (subagents tool and /subagents command) when the session store entry has no model info. Changes: - SubagentRunRecord: add optional model field - registerSubagentRun: accept and persist model param - sessions-spawn-tool: pass resolvedModel to registerSubagentRun - subagents-tool: pass run record model as fallback to resolveModelDisplay - commands-subagents: pass run record model as fallback to resolveModelDisplay * feat(chat): implement session key resolution and reset on sidebar navigation - Added functions to resolve the main session key and reset chat state when switching sessions from the sidebar. - Updated the `renderTab` function to handle session key changes when navigating to the chat tab. - Introduced a test to verify that the session resets to "main" when opening chat from the sidebar navigation. * fix: subagent timeout=0 passthrough and fallback prompt duplication Bug 1: runTimeoutSeconds=0 now means 'no timeout' instead of applying 600s default - sessions-spawn-tool: default to undefined (not 0) when neither timeout param is provided; use != null check so explicit 0 passes through to gateway - agent.ts: accept 0 as valid timeout (resolveAgentTimeoutMs already handles 0 → MAX_SAFE_TIMEOUT_MS) Bug 2: model fallback no longer re-injects the original prompt as a duplicate - agent.ts: track fallback attempt index; on retries use a short continuation message instead of the full original prompt since the session file already contains it from the first attempt - Also skip re-sending images on fallback retries (already in session) * feat(subagents): truncate long task descriptions in subagents command output - Introduced a new utility function to format task previews, limiting their length to improve readability. - Updated the command handler to use the new formatting function, ensuring task descriptions are truncated appropriately. - Adjusted related tests to verify that long task descriptions are now truncated in the output. * refactor(subagents): update subagent registry path resolution and improve command output formatting - Replaced direct import of STATE_DIR with a utility function to resolve the state directory dynamically. - Enhanced the formatting of command output for active and recent subagents, adding separators for better readability. - Updated related tests to reflect changes in command output structure. * fix(subagent): default sessions_spawn to no timeout when runTimeoutSeconds omitted The previous fix (75a791106) correctly handled the case where runTimeoutSeconds was explicitly set to 0 ("no timeout"). However, when models omit the parameter entirely (which is common since the schema marks it as optional), runTimeoutSeconds resolved to undefined. undefined flowed through the chain as: sessions_spawn → timeout: undefined (since undefined != null is false) → gateway agent handler → agentCommand opts.timeout: undefined → resolveAgentTimeoutMs({ overrideSeconds: undefined }) → DEFAULT_AGENT_TIMEOUT_SECONDS (600s = 10 minutes) This caused subagents to be killed at exactly 10 minutes even though the user's intent (via TOOLS.md) was for subagents to run without a timeout. Fix: default runTimeoutSeconds to 0 (no timeout) when neither runTimeoutSeconds nor timeoutSeconds is provided by the caller. Subagent spawns are long-running by design and should not inherit the 600s agent-command default timeout. * fix(subagent): accept timeout=0 in agent-via-gateway path (second 600s default) * fix: thread timeout override through getReplyFromConfig dispatch path getReplyFromConfig called resolveAgentTimeoutMs({ cfg }) with no override, always falling back to the config default (600s). Add timeoutOverrideSeconds to GetReplyOptions and pass it through as overrideSeconds so callers of the dispatch chain can specify a custom timeout (0 = no timeout). This complements the existing timeout threading in agentCommand and the cron isolated-agent runner, which already pass overrideSeconds correctly. * feat(model-fallback): normalize OpenAI Codex model references and enhance fallback handling - Added normalization for OpenAI Codex model references, specifically converting "gpt-5.3-codex" to "openai-codex" before execution. - Updated the `resolveFallbackCandidates` function to utilize the new normalization logic. - Enhanced tests to verify the correct behavior of model normalization and fallback mechanisms. - Introduced a new test case to ensure that the normalization process works as expected for various input formats. * feat(tests): add unit tests for steer failure behavior in openclaw-tools - Introduced a new test file to validate the behavior of subagents when steer replacement dispatch fails. - Implemented tests to ensure that the announce behavior is restored correctly and that the suppression reason is cleared as expected. - Enhanced the subagent registry with a new function to clear steer restart suppression. - Updated related components to support the new test scenarios. * fix(subagents): replace stop command with kill in slash commands and documentation - Updated the `/subagents` command to replace `stop` with `kill` for consistency in controlling sub-agent runs. - Modified related documentation to reflect the change in command usage. - Removed legacy timeoutSeconds references from the sessions-spawn-tool schema and tests to streamline timeout handling. - Enhanced tests to ensure correct behavior of the updated commands and their interactions. * feat(tests): add unit tests for readLatestAssistantReply function - Introduced a new test file for the `readLatestAssistantReply` function to validate its behavior with various message scenarios. - Implemented tests to ensure the function correctly retrieves the latest assistant message and handles cases where the latest message has no text. - Mocked the gateway call to simulate different message histories for comprehensive testing. * feat(tests): enhance subagent kill-all cascade tests and announce formatting - Added a new test to verify that the `kill-all` command cascades through ended parents to active descendants in subagents. - Updated the subagent announce formatting tests to reflect changes in message structure, including the replacement of "Findings:" with "Result:" and the addition of new expectations for message content. - Improved the handling of long findings and stats in the announce formatting logic to ensure concise output. - Refactored related functions to enhance clarity and maintainability in the subagent registry and tools. * refactor(subagent): update announce formatting and remove unused constants - Modified the subagent announce formatting to replace "Findings:" with "Result:" and adjusted related expectations in tests. - Removed constants for maximum announce findings characters and summary words, simplifying the announcement logic. - Updated the handling of findings to retain full content instead of truncating, ensuring more informative outputs. - Cleaned up unused imports in the commands-subagents file to enhance code clarity. * feat(tests): enhance billing error handling in user-facing text - Added tests to ensure that normal text mentioning billing plans is not rewritten, preserving user context. - Updated the `isBillingErrorMessage` and `sanitizeUserFacingText` functions to improve handling of billing-related messages. - Introduced new test cases for various scenarios involving billing messages to ensure accurate processing and output. - Enhanced the subagent announce flow to correctly manage active descendant runs, preventing premature announcements. * feat(subagent): enhance workflow guidance and auto-announcement clarity - Added a new guideline in the subagent system prompt to emphasize trust in push-based completion, discouraging busy polling for status updates. - Updated documentation to clarify that sub-agents will automatically announce their results, improving user understanding of the workflow. - Enhanced tests to verify the new guidance on avoiding polling loops and to ensure the accuracy of the updated prompts. * fix(cron): avoid announcing interim subagent spawn acks * chore: clean post-rebase imports * fix(cron): fall back to child replies when parent stays interim * fix(subagents): make active-run guidance advisory * fix(subagents): update announce flow to handle active descendants and enhance test coverage - Modified the announce flow to defer announcements when active descendant runs are present, ensuring accurate status reporting. - Updated tests to verify the new behavior, including scenarios where no fallback requester is available and ensuring proper handling of finished subagents. - Enhanced the announce formatting to include an `expectFinal` flag for better clarity in the announcement process. * fix(subagents): enhance announce flow and formatting for user updates - Updated the announce flow to provide clearer instructions for user updates based on active subagent runs and requester context. - Refactored the announcement logic to improve clarity and ensure internal context remains private. - Enhanced tests to verify the new message expectations and formatting, including updated prompts for user-facing updates. - Introduced a new function to build reply instructions based on session context, improving the overall announcement process. * fix: resolve prep blockers and changelog placement (#14447) (thanks @tyler6204) * fix: restore cron delivery-plan import after rebase (#14447) (thanks @tyler6204) * fix: resolve test failures from rebase conflicts (#14447) (thanks @tyler6204) * fix: apply formatting after rebase (#14447) (thanks @tyler6204)
2026-02-14 22:03:45 -08:00
if (matchesErrorPatterns(value, ERROR_PATTERNS.billing)) {
return true;
}
if (!BILLING_ERROR_HEAD_RE.test(raw)) {
return false;
}
return (
value.includes("upgrade") ||
value.includes("credits") ||
value.includes("payment") ||
value.includes("plan")
);
2026-01-14 01:08:15 +00:00
}
export function isMissingToolCallInputError(raw: string): boolean {
if (!raw) {
return false;
}
return TOOL_CALL_INPUT_MISSING_RE.test(raw) || TOOL_CALL_INPUT_PATH_RE.test(raw);
}
export function isBillingAssistantError(msg: AssistantMessage | undefined): boolean {
if (!msg || msg.stopReason !== "error") {
return false;
}
2026-01-14 01:08:15 +00:00
return isBillingErrorMessage(msg.errorMessage ?? "");
}
export function isAuthErrorMessage(raw: string): boolean {
return matchesErrorPatterns(raw, ERROR_PATTERNS.auth);
}
export function isOverloadedErrorMessage(raw: string): boolean {
return matchesErrorPatterns(raw, ERROR_PATTERNS.overloaded);
}
2026-01-18 15:19:25 +00:00
export function parseImageDimensionError(raw: string): {
maxDimensionPx?: number;
messageIndex?: number;
contentIndex?: number;
raw: string;
} | null {
if (!raw) {
return null;
}
2026-01-18 15:19:25 +00:00
const lower = raw.toLowerCase();
if (!lower.includes("image dimensions exceed max allowed size")) {
return null;
}
2026-01-18 15:19:25 +00:00
const limitMatch = raw.match(IMAGE_DIMENSION_ERROR_RE);
const pathMatch = raw.match(IMAGE_DIMENSION_PATH_RE);
return {
maxDimensionPx: limitMatch?.[1] ? Number.parseInt(limitMatch[1], 10) : undefined,
messageIndex: pathMatch?.[1] ? Number.parseInt(pathMatch[1], 10) : undefined,
contentIndex: pathMatch?.[2] ? Number.parseInt(pathMatch[2], 10) : undefined,
raw,
};
}
export function isImageDimensionErrorMessage(raw: string): boolean {
return Boolean(parseImageDimensionError(raw));
}
export function parseImageSizeError(raw: string): {
maxMb?: number;
raw: string;
} | null {
if (!raw) {
return null;
}
const lower = raw.toLowerCase();
if (!lower.includes("image exceeds") || !lower.includes("mb")) {
return null;
}
const match = raw.match(IMAGE_SIZE_ERROR_RE);
return {
maxMb: match?.[1] ? Number.parseFloat(match[1]) : undefined,
raw,
};
}
export function isImageSizeError(errorMessage?: string): boolean {
if (!errorMessage) {
return false;
}
return Boolean(parseImageSizeError(errorMessage));
}
2026-01-14 01:08:15 +00:00
export function isCloudCodeAssistFormatError(raw: string): boolean {
2026-01-18 15:19:25 +00:00
return !isImageDimensionErrorMessage(raw) && matchesErrorPatterns(raw, ERROR_PATTERNS.format);
2026-01-14 01:08:15 +00:00
}
export function isAuthAssistantError(msg: AssistantMessage | undefined): boolean {
if (!msg || msg.stopReason !== "error") {
return false;
}
2026-01-14 01:08:15 +00:00
return isAuthErrorMessage(msg.errorMessage ?? "");
}
export function classifyFailoverReason(raw: string): FailoverReason | null {
if (isImageDimensionErrorMessage(raw)) {
return null;
}
if (isImageSizeError(raw)) {
return null;
}
if (isTransientHttpError(raw)) {
// Treat transient 5xx provider failures as retryable transport issues.
return "timeout";
}
if (isRateLimitErrorMessage(raw)) {
return "rate_limit";
}
if (isOverloadedErrorMessage(raw)) {
return "rate_limit";
}
if (isCloudCodeAssistFormatError(raw)) {
return "format";
}
if (isBillingErrorMessage(raw)) {
return "billing";
}
if (isTimeoutErrorMessage(raw)) {
return "timeout";
}
if (isAuthErrorMessage(raw)) {
return "auth";
}
2026-01-14 01:08:15 +00:00
return null;
}
export function isFailoverErrorMessage(raw: string): boolean {
return classifyFailoverReason(raw) !== null;
}
export function isFailoverAssistantError(msg: AssistantMessage | undefined): boolean {
if (!msg || msg.stopReason !== "error") {
return false;
}
2026-01-14 01:08:15 +00:00
return isFailoverErrorMessage(msg.errorMessage ?? "");
}