diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index cfd4f82fad1..6815a92e919 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -948,6 +948,7 @@ export async function runEmbeddedPiAgent( didSendViaMessagingTool: attempt.didSendViaMessagingTool, messagingToolSentTexts: attempt.messagingToolSentTexts, messagingToolSentTargets: attempt.messagingToolSentTargets, + successfulCronAdds: attempt.successfulCronAdds, }; } @@ -989,6 +990,7 @@ export async function runEmbeddedPiAgent( didSendViaMessagingTool: attempt.didSendViaMessagingTool, messagingToolSentTexts: attempt.messagingToolSentTexts, messagingToolSentTargets: attempt.messagingToolSentTargets, + successfulCronAdds: attempt.successfulCronAdds, }; } } finally { diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 75c2c7673ab..084b84a39a5 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -759,6 +759,7 @@ export async function runEmbeddedAttempt( waitForCompactionRetry, getMessagingToolSentTexts, getMessagingToolSentTargets, + getSuccessfulCronAdds, didSendViaMessagingTool, getLastToolError, getUsageTotals, @@ -1172,6 +1173,7 @@ export async function runEmbeddedAttempt( didSendViaMessagingTool: didSendViaMessagingTool(), messagingToolSentTexts: getMessagingToolSentTexts(), messagingToolSentTargets: getMessagingToolSentTargets(), + successfulCronAdds: getSuccessfulCronAdds(), cloudCodeAssistFormatError: Boolean( lastAssistant?.errorMessage && isCloudCodeAssistFormatError(lastAssistant.errorMessage), ), diff --git a/src/agents/pi-embedded-runner/run/types.ts b/src/agents/pi-embedded-runner/run/types.ts index 2d22e0a953f..8436f0e2e57 100644 --- a/src/agents/pi-embedded-runner/run/types.ts +++ b/src/agents/pi-embedded-runner/run/types.ts @@ -43,6 +43,7 @@ export type EmbeddedRunAttemptResult = { didSendViaMessagingTool: boolean; messagingToolSentTexts: string[]; messagingToolSentTargets: MessagingToolSend[]; + successfulCronAdds?: number; cloudCodeAssistFormatError: boolean; attemptUsage?: NormalizedUsage; compactionCount?: number; diff --git a/src/agents/pi-embedded-runner/types.ts b/src/agents/pi-embedded-runner/types.ts index 5f0d0b9897e..30a0a5b56e8 100644 --- a/src/agents/pi-embedded-runner/types.ts +++ b/src/agents/pi-embedded-runner/types.ts @@ -65,6 +65,8 @@ export type EmbeddedPiRunResult = { messagingToolSentTexts?: string[]; // Messaging tool targets that successfully sent a message during the run. messagingToolSentTargets?: MessagingToolSend[]; + // Count of successful cron.add tool calls in this run. + successfulCronAdds?: number; }; export type EmbeddedPiCompactResult = { diff --git a/src/agents/pi-embedded-subscribe.handlers.tools.test.ts b/src/agents/pi-embedded-subscribe.handlers.tools.test.ts index f4a8061c888..30e5b7881ad 100644 --- a/src/agents/pi-embedded-subscribe.handlers.tools.test.ts +++ b/src/agents/pi-embedded-subscribe.handlers.tools.test.ts @@ -1,5 +1,8 @@ import { describe, expect, it, vi } from "vitest"; -import { handleToolExecutionStart } from "./pi-embedded-subscribe.handlers.tools.js"; +import { + handleToolExecutionEnd, + handleToolExecutionStart, +} from "./pi-embedded-subscribe.handlers.tools.js"; function createTestContext() { const onBlockReplyFlush = vi.fn(); @@ -25,6 +28,8 @@ function createTestContext() { messagingToolSentTexts: [], messagingToolSentTextsNormalized: [], messagingToolSentTargets: [], + successfulCronAdds: 0, + toolMetas: [], }, shouldEmitToolResult: () => false, emitToolSummary: vi.fn(), @@ -69,3 +74,57 @@ describe("handleToolExecutionStart read path checks", () => { expect(String(warn.mock.calls[0]?.[0] ?? "")).toContain("read tool called without path"); }); }); + +describe("handleToolExecutionEnd cron.add commitment tracking", () => { + it("increments successfulCronAdds when cron add succeeds", async () => { + const { ctx } = createTestContext(); + await handleToolExecutionStart( + ctx as never, + { + type: "tool_execution_start", + toolName: "cron", + toolCallId: "tool-cron-1", + args: { action: "add", job: { name: "reminder" } }, + } as never, + ); + + await handleToolExecutionEnd( + ctx as never, + { + type: "tool_execution_end", + toolName: "cron", + toolCallId: "tool-cron-1", + isError: false, + result: { details: { status: "ok" } }, + } as never, + ); + + expect(ctx.state.successfulCronAdds).toBe(1); + }); + + it("does not increment successfulCronAdds when cron add fails", async () => { + const { ctx } = createTestContext(); + await handleToolExecutionStart( + ctx as never, + { + type: "tool_execution_start", + toolName: "cron", + toolCallId: "tool-cron-2", + args: { action: "add", job: { name: "reminder" } }, + } as never, + ); + + await handleToolExecutionEnd( + ctx as never, + { + type: "tool_execution_end", + toolName: "cron", + toolCallId: "tool-cron-2", + isError: true, + result: { details: { status: "error" } }, + } as never, + ); + + expect(ctx.state.successfulCronAdds).toBe(0); + }); +}); diff --git a/src/agents/pi-embedded-subscribe.handlers.tools.ts b/src/agents/pi-embedded-subscribe.handlers.tools.ts index 1ae6c1609f8..86b40459946 100644 --- a/src/agents/pi-embedded-subscribe.handlers.tools.ts +++ b/src/agents/pi-embedded-subscribe.handlers.tools.ts @@ -23,6 +23,14 @@ import { normalizeToolName } from "./tool-policy.js"; /** Track tool execution start times and args for after_tool_call hook */ const toolStartData = new Map(); +function isCronAddAction(args: unknown): boolean { + if (!args || typeof args !== "object") { + return false; + } + const action = (args as Record).action; + return typeof action === "string" && action.trim().toLowerCase() === "add"; +} + function buildToolCallSummary(toolName: string, args: unknown, meta?: string): ToolCallSummary { const mutation = buildToolMutationState(toolName, args, meta); return { @@ -188,6 +196,8 @@ export async function handleToolExecutionEnd( const result = evt.result; const isToolError = isError || isToolResultError(result); const sanitizedResult = sanitizeToolResult(result); + const startData = toolStartData.get(toolCallId); + toolStartData.delete(toolCallId); const callSummary = ctx.state.toolMetaById.get(toolCallId); const meta = callSummary?.meta; ctx.state.toolMetas.push({ toolName, meta }); @@ -239,6 +249,11 @@ export async function handleToolExecutionEnd( } } + // Track committed reminders only when cron.add completed successfully. + if (!isToolError && toolName === "cron" && isCronAddAction(startData?.args)) { + ctx.state.successfulCronAdds += 1; + } + emitAgentEvent({ runId: ctx.params.runId, stream: "tool", @@ -290,8 +305,6 @@ export async function handleToolExecutionEnd( // Run after_tool_call plugin hook (fire-and-forget) const hookRunnerAfter = ctx.hookRunner ?? getGlobalHookRunner(); if (hookRunnerAfter?.hasHooks("after_tool_call")) { - const startData = toolStartData.get(toolCallId); - toolStartData.delete(toolCallId); const durationMs = startData?.startTime != null ? Date.now() - startData.startTime : undefined; const toolArgs = startData?.args; const hookEvent: PluginHookAfterToolCallEvent = { @@ -310,7 +323,5 @@ export async function handleToolExecutionEnd( .catch((err) => { ctx.log.warn(`after_tool_call hook failed: tool=${toolName} error=${String(err)}`); }); - } else { - toolStartData.delete(toolCallId); } } diff --git a/src/agents/pi-embedded-subscribe.handlers.types.ts b/src/agents/pi-embedded-subscribe.handlers.types.ts index aa70dc4e912..09ab1328f67 100644 --- a/src/agents/pi-embedded-subscribe.handlers.types.ts +++ b/src/agents/pi-embedded-subscribe.handlers.types.ts @@ -72,6 +72,7 @@ export type EmbeddedPiSubscribeState = { messagingToolSentTargets: MessagingToolSend[]; pendingMessagingTexts: Map; pendingMessagingTargets: Map; + successfulCronAdds: number; lastAssistant?: AgentMessage; }; diff --git a/src/agents/pi-embedded-subscribe.ts b/src/agents/pi-embedded-subscribe.ts index 8f8b1cadd61..d782ed247a1 100644 --- a/src/agents/pi-embedded-subscribe.ts +++ b/src/agents/pi-embedded-subscribe.ts @@ -73,6 +73,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar messagingToolSentTargets: [], pendingMessagingTexts: new Map(), pendingMessagingTargets: new Map(), + successfulCronAdds: 0, }; const usageTotals = { input: 0, @@ -578,6 +579,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar messagingToolSentTargets.length = 0; pendingMessagingTexts.clear(); pendingMessagingTargets.clear(); + state.successfulCronAdds = 0; resetAssistantMessageState(0); }; @@ -662,6 +664,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar isCompactionInFlight: () => state.compactionInFlight, getMessagingToolSentTexts: () => messagingToolSentTexts.slice(), getMessagingToolSentTargets: () => messagingToolSentTargets.slice(), + getSuccessfulCronAdds: () => state.successfulCronAdds, // Returns true if any messaging tool successfully sent a message. // Used to suppress agent's confirmation text (e.g., "Respondi no Telegram!") // which is generated AFTER the tool sends the actual answer. diff --git a/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts b/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts index 99ec07a6245..8ce11f49d83 100644 --- a/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts +++ b/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts @@ -853,6 +853,93 @@ describe("runReplyAgent messaging tool suppression", () => { }); }); +describe("runReplyAgent reminder commitment guard", () => { + function createRun() { + const typing = createMockTypingController(); + const sessionCtx = { + Provider: "telegram", + OriginatingTo: "chat", + AccountId: "primary", + MessageSid: "msg", + Surface: "telegram", + } as unknown as TemplateContext; + const resolvedQueue = { mode: "interrupt" } as unknown as QueueSettings; + const followupRun = { + prompt: "hello", + summaryLine: "hello", + enqueuedAt: Date.now(), + run: { + sessionId: "session", + sessionKey: "main", + messageProvider: "telegram", + sessionFile: "/tmp/session.jsonl", + workspaceDir: "/tmp", + config: {}, + skillsSnapshot: {}, + provider: "anthropic", + model: "claude", + thinkLevel: "low", + verboseLevel: "off", + elevatedLevel: "off", + bashElevated: { + enabled: false, + allowed: false, + defaultLevel: "off", + }, + timeoutMs: 1_000, + blockReplyBreak: "message_end", + }, + } as unknown as FollowupRun; + + return runReplyAgent({ + commandBody: "hello", + followupRun, + queueKey: "main", + resolvedQueue, + shouldSteer: false, + shouldFollowup: false, + isActive: false, + isStreaming: false, + typing, + sessionCtx, + sessionKey: "main", + defaultModel: "anthropic/claude-opus-4-5", + resolvedVerboseLevel: "off", + isNewSession: false, + blockStreamingEnabled: false, + resolvedBlockStreamingBreak: "message_end", + shouldInjectGroupIntro: false, + typingMode: "instant", + }); + } + + it("appends guard note when reminder commitment is not backed by cron.add", async () => { + runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "I'll remind you tomorrow morning." }], + meta: {}, + successfulCronAdds: 0, + }); + + const result = await createRun(); + expect(result).toMatchObject({ + text: "I'll remind you tomorrow morning.\n\nNote: I did not schedule a reminder in this turn, so this will not trigger automatically.", + }); + }); + + it("keeps reminder commitment unchanged when cron.add succeeded", async () => { + runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "I'll remind you tomorrow morning." }], + meta: {}, + successfulCronAdds: 1, + }); + + const result = await createRun(); + expect(result).toMatchObject({ + text: "I'll remind you tomorrow morning.", + }); + }); +}); + describe("runReplyAgent fallback reasoning tags", () => { type EmbeddedPiAgentParams = { enforceFinalTag?: boolean; diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index c8f8eba129a..ea6d07951be 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -42,6 +42,41 @@ import { incrementRunCompactionCount, persistRunSessionUsage } from "./session-r import { createTypingSignaler } from "./typing-mode.js"; const BLOCK_REPLY_SEND_TIMEOUT_MS = 15_000; +const UNSCHEDULED_REMINDER_NOTE = + "Note: I did not schedule a reminder in this turn, so this will not trigger automatically."; +const REMINDER_COMMITMENT_PATTERNS: RegExp[] = [ + /\b(?:i\s*['’]?ll|i will)\s+(?:make sure to\s+)?(?:remember|remind|ping|follow up|follow-up|check back|circle back)\b/i, + /\b(?:i\s*['’]?ll|i will)\s+(?:set|create|schedule)\s+(?:a\s+)?reminder\b/i, +]; + +function hasUnbackedReminderCommitment(text: string): boolean { + const normalized = text.toLowerCase(); + if (!normalized.trim()) { + return false; + } + if (normalized.includes(UNSCHEDULED_REMINDER_NOTE.toLowerCase())) { + return false; + } + return REMINDER_COMMITMENT_PATTERNS.some((pattern) => pattern.test(text)); +} + +function appendUnscheduledReminderNote(payloads: ReplyPayload[]): ReplyPayload[] { + let appended = false; + return payloads.map((payload) => { + if (appended || payload.isError || typeof payload.text !== "string") { + return payload; + } + if (!hasUnbackedReminderCommitment(payload.text)) { + return payload; + } + appended = true; + const trimmed = payload.text.trimEnd(); + return { + ...payload, + text: `${trimmed}\n\n${UNSCHEDULED_REMINDER_NOTE}`, + }; + }); +} export async function runReplyAgent(params: { commandBody: string; @@ -420,7 +455,19 @@ export async function runReplyAgent(params: { return finalizeWithFollowup(undefined, queueKey, runFollowupTurn); } - await signalTypingIfNeeded(replyPayloads, typingSignals); + const successfulCronAdds = runResult.successfulCronAdds ?? 0; + const hasReminderCommitment = replyPayloads.some( + (payload) => + !payload.isError && + typeof payload.text === "string" && + hasUnbackedReminderCommitment(payload.text), + ); + const guardedReplyPayloads = + hasReminderCommitment && successfulCronAdds === 0 + ? appendUnscheduledReminderNote(replyPayloads) + : replyPayloads; + + await signalTypingIfNeeded(guardedReplyPayloads, typingSignals); if (isDiagnosticsEnabled(cfg) && hasNonzeroUsage(usage)) { const input = usage.input ?? 0; @@ -488,7 +535,7 @@ export async function runReplyAgent(params: { } // If verbose is enabled and this is a new session, prepend a session hint. - let finalPayloads = replyPayloads; + let finalPayloads = guardedReplyPayloads; const verboseEnabled = resolvedVerboseLevel !== "off"; if (autoCompactionCompleted) { const count = await incrementRunCompactionCount({