From 460bf6bf6e254e0092fb3df792b4171a45428dc4 Mon Sep 17 00:00:00 2001 From: kumarabhirup Date: Thu, 5 Mar 2026 12:58:24 -0800 Subject: [PATCH] fix: resolve PostHog trace linkage, cost display, and tool capture Switch TraceContextManager from runId to sessionId-based indexing so tool spans share the same trace_id as their parent generation. Extract tool names from agent_end messages for the PostHog Tools tab. Guard against sending $0 cost and null token counts. --- extensions/posthog-analytics/index.ts | 81 ++++-- .../posthog-analytics/lib/event-mappers.ts | 84 ++++-- .../posthog-analytics/lib/trace-context.ts | 61 +++-- src/telemetry/event-mappers.test.ts | 250 ++++++++++-------- src/telemetry/trace-context.test.ts | 196 +++++++------- 5 files changed, 398 insertions(+), 274 deletions(-) diff --git a/extensions/posthog-analytics/index.ts b/extensions/posthog-analytics/index.ts index eb685e5f761..f5ba3e9389b 100644 --- a/extensions/posthog-analytics/index.ts +++ b/extensions/posthog-analytics/index.ts @@ -1,11 +1,20 @@ import { createPostHogClient, shutdownPostHogClient } from "./lib/posthog-client.js"; -import { TraceContextManager } from "./lib/trace-context.js"; +import { TraceContextManager, resolveSessionKey } from "./lib/trace-context.js"; import { emitGeneration, emitToolSpan, emitTrace, emitCustomEvent } from "./lib/event-mappers.js"; import { readPrivacyMode } from "./lib/privacy.js"; import type { PluginConfig } from "./lib/types.js"; export const id = "posthog-analytics"; +const DEBUG = process.env.DENCHCLAW_POSTHOG_DEBUG === "1"; + +function debugLog(label: string, data: unknown): void { + if (!DEBUG) return; + try { + process.stderr.write(`[posthog-analytics] ${label}: ${JSON.stringify(data, null, 2)}\n`); + } catch { /* ignore serialization errors */ } +} + export default function register(api: any) { const config: PluginConfig | undefined = api.config?.plugins?.entries?.["posthog-analytics"]?.config; @@ -18,12 +27,28 @@ export default function register(api: any) { const getPrivacyMode = () => readPrivacyMode(api.config); + const getConfigModel = (): string | undefined => + api.config?.agents?.defaults?.model?.primary; + + const ensureTrace = (ctx: any): void => { + const sk = resolveSessionKey(ctx); + if (traceCtx.getTrace(sk)) return; + traceCtx.startTrace(sk, ctx.runId ?? sk); + const model = getConfigModel(); + if (model) traceCtx.setModel(sk, model); + }; + api.on( "before_model_resolve", (event: any, ctx: any) => { - traceCtx.startTrace(ctx.sessionId ?? ctx.runId, ctx.runId); - if (event.modelOverride) { - traceCtx.setModel(ctx.runId, event.modelOverride); + debugLog("before_model_resolve event", event); + debugLog("before_model_resolve ctx", { runId: ctx.runId, sessionId: ctx.sessionId, sessionKey: ctx.sessionKey }); + + const sk = resolveSessionKey(ctx); + traceCtx.startTrace(sk, ctx.runId ?? sk); + const model = event.modelOverride || getConfigModel(); + if (model) { + traceCtx.setModel(sk, model); } }, { priority: -10 }, @@ -32,8 +57,12 @@ export default function register(api: any) { api.on( "before_prompt_build", (_event: any, ctx: any) => { + debugLog("before_prompt_build ctx", { runId: ctx.runId, sessionId: ctx.sessionId, hasMessages: Boolean(ctx.messages) }); + + const sk = resolveSessionKey(ctx); + ensureTrace(ctx); if (ctx.messages) { - traceCtx.setInput(ctx.runId, ctx.messages, getPrivacyMode()); + traceCtx.setInput(sk, ctx.messages, getPrivacyMode()); } }, { priority: -10 }, @@ -42,7 +71,11 @@ export default function register(api: any) { api.on( "before_tool_call", (event: any, ctx: any) => { - traceCtx.startToolSpan(ctx.runId, event.toolName, event.params); + debugLog("before_tool_call", { toolName: event.toolName, runId: ctx.runId, sessionId: ctx.sessionId }); + + const sk = resolveSessionKey(ctx); + ensureTrace(ctx); + traceCtx.startToolSpan(sk, event.toolName, event.params); }, { priority: -10 }, ); @@ -50,8 +83,12 @@ export default function register(api: any) { api.on( "after_tool_call", (event: any, ctx: any) => { - traceCtx.endToolSpan(ctx.runId, event.toolName, event.result); - emitToolSpan(ph, traceCtx, ctx.runId, event, getPrivacyMode()); + debugLog("after_tool_call", { toolName: event.toolName, runId: ctx.runId, sessionId: ctx.sessionId, hasError: Boolean(event.error), durationMs: event.durationMs }); + + const sk = resolveSessionKey(ctx); + ensureTrace(ctx); + traceCtx.endToolSpan(sk, event.toolName, event.result); + emitToolSpan(ph, traceCtx, sk, event, getPrivacyMode()); }, { priority: -10 }, ); @@ -59,14 +96,26 @@ export default function register(api: any) { api.on( "agent_end", (event: any, ctx: any) => { - emitGeneration(ph, traceCtx, ctx, event, getPrivacyMode()); - emitTrace(ph, traceCtx, ctx); + debugLog("agent_end event", { success: event.success, error: event.error, durationMs: event.durationMs, messageCount: event.messages?.length }); + debugLog("agent_end ctx", { runId: ctx.runId, sessionId: ctx.sessionId }); + + const sk = resolveSessionKey(ctx); + ensureTrace(ctx); + + const trace = traceCtx.getTrace(sk); + if (trace && !trace.model) { + const model = getConfigModel(); + if (model) traceCtx.setModel(sk, model); + } + + emitGeneration(ph, traceCtx, sk, event, getPrivacyMode()); + emitTrace(ph, traceCtx, sk); emitCustomEvent(ph, "dench_turn_completed", { - session_id: ctx.sessionId, + session_id: sk, run_id: ctx.runId, - model: traceCtx.getModel(ctx.runId), + model: traceCtx.getModel(sk), }); - traceCtx.endTrace(ctx.runId); + traceCtx.endTrace(sk); }, { priority: -10 }, ); @@ -75,7 +124,7 @@ export default function register(api: any) { "message_received", (event: any, ctx: any) => { emitCustomEvent(ph, "dench_message_received", { - channel: ctx.channel, + channel: ctx.channel ?? ctx.channelId, session_id: ctx.sessionId, has_attachments: Boolean(event.attachments?.length), }); @@ -88,7 +137,7 @@ export default function register(api: any) { (_event: any, ctx: any) => { emitCustomEvent(ph, "dench_session_start", { session_id: ctx.sessionId, - channel: ctx.channel, + channel: ctx.channel ?? ctx.channelId, }); }, { priority: -10 }, @@ -99,7 +148,7 @@ export default function register(api: any) { (_event: any, ctx: any) => { emitCustomEvent(ph, "dench_session_end", { session_id: ctx.sessionId, - channel: ctx.channel, + channel: ctx.channel ?? ctx.channelId, }); }, { priority: -10 }, diff --git a/extensions/posthog-analytics/lib/event-mappers.ts b/extensions/posthog-analytics/lib/event-mappers.ts index 39aa2e81bc7..74baed58a1b 100644 --- a/extensions/posthog-analytics/lib/event-mappers.ts +++ b/extensions/posthog-analytics/lib/event-mappers.ts @@ -13,25 +13,66 @@ function getAnonymousId(): string { } } +/** + * Extract tool call names from the messages array provided by agent_end. + * Works regardless of privacy mode since tool names are metadata, not content. + */ +export function extractToolNamesFromMessages(messages: unknown): string[] { + if (!Array.isArray(messages)) return []; + const names: string[] = []; + for (const msg of messages) { + if (!msg || typeof msg !== "object") continue; + const m = msg as Record; + + // OpenAI format: assistant message with tool_calls array + if (Array.isArray(m.tool_calls)) { + for (const tc of m.tool_calls) { + const name = (tc as any)?.function?.name ?? (tc as any)?.name; + if (typeof name === "string" && name) names.push(name); + } + } + // Anthropic format: content blocks with type "tool_use" + if (Array.isArray(m.content)) { + for (const block of m.content) { + if ((block as any)?.type === "tool_use" && typeof (block as any)?.name === "string") { + names.push((block as any).name); + } + if ((block as any)?.type === "tool-call" && typeof (block as any)?.toolName === "string") { + names.push((block as any).toolName); + } + } + } + // Tool result messages have a "name" field with the tool name + if (m.role === "tool" && typeof m.name === "string") { + names.push(m.name); + } + } + return [...new Set(names)]; +} + /** * Emit a `$ai_generation` event from the agent_end hook data. */ export function emitGeneration( ph: PostHogClient, traceCtx: TraceContextManager, - ctx: any, + sessionKey: string, event: any, privacyMode: boolean, ): void { try { - const trace = traceCtx.getTrace(ctx.runId); + const trace = traceCtx.getTrace(sessionKey); if (!trace) return; - const latency = trace.startedAt - ? (Date.now() - trace.startedAt) / 1_000 - : undefined; + const latency = event.durationMs != null + ? event.durationMs / 1_000 + : trace.startedAt + ? (Date.now() - trace.startedAt) / 1_000 + : undefined; - const toolNames = trace.toolSpans.map((s) => s.toolName); + const spanToolNames = trace.toolSpans.map((s) => s.toolName); + const messageToolNames = extractToolNamesFromMessages(event.messages); + const allToolNames = [...new Set([...spanToolNames, ...messageToolNames])]; const properties: Record = { $ai_trace_id: trace.traceId, @@ -39,19 +80,26 @@ export function emitGeneration( $ai_model: trace.model ?? event.model ?? "unknown", $ai_provider: trace.provider ?? event.provider, $ai_latency: latency, - $ai_tools: toolNames.length > 0 ? toolNames : undefined, + $ai_tools: allToolNames.length > 0 + ? allToolNames.map((name) => ({ type: "function", function: { name } })) + : undefined, $ai_stream: event.stream, $ai_temperature: event.temperature, - $ai_is_error: Boolean(event.error), + $ai_is_error: event.success === false || Boolean(event.error), }; if (event.usage) { - properties.$ai_input_tokens = event.usage.inputTokens ?? event.usage.input_tokens; - properties.$ai_output_tokens = event.usage.outputTokens ?? event.usage.output_tokens; + const inputTokens = event.usage.inputTokens ?? event.usage.input_tokens; + const outputTokens = event.usage.outputTokens ?? event.usage.output_tokens; + if (inputTokens != null && inputTokens > 0) properties.$ai_input_tokens = inputTokens; + if (outputTokens != null && outputTokens > 0) properties.$ai_output_tokens = outputTokens; } if (event.cost) { - properties.$ai_total_cost_usd = event.cost.totalUsd ?? event.cost.total_usd; + const cost = event.cost.totalUsd ?? event.cost.total_usd; + if (cost != null && cost > 0) { + properties.$ai_total_cost_usd = cost; + } } properties.$ai_input = sanitizeForCapture(trace.input, privacyMode); @@ -82,18 +130,20 @@ export function emitGeneration( export function emitToolSpan( ph: PostHogClient, traceCtx: TraceContextManager, - runId: string, + sessionKey: string, event: any, privacyMode: boolean, ): void { try { - const trace = traceCtx.getTrace(runId); - const span = traceCtx.getLastToolSpan(runId); + const trace = traceCtx.getTrace(sessionKey); + const span = traceCtx.getLastToolSpan(sessionKey); if (!trace || !span) return; const latency = span.startedAt && span.endedAt ? (span.endedAt - span.startedAt) / 1_000 - : undefined; + : event.durationMs != null + ? event.durationMs / 1_000 + : undefined; const properties: Record = { $ai_trace_id: trace.traceId, @@ -126,10 +176,10 @@ export function emitToolSpan( export function emitTrace( ph: PostHogClient, traceCtx: TraceContextManager, - ctx: any, + sessionKey: string, ): void { try { - const trace = traceCtx.getTrace(ctx.runId); + const trace = traceCtx.getTrace(sessionKey); if (!trace) return; const latency = trace.startedAt diff --git a/extensions/posthog-analytics/lib/trace-context.ts b/extensions/posthog-analytics/lib/trace-context.ts index 4011a52f5f0..f8a3b7e168c 100644 --- a/extensions/posthog-analytics/lib/trace-context.ts +++ b/extensions/posthog-analytics/lib/trace-context.ts @@ -3,24 +3,40 @@ import { redactMessages } from "./privacy.js"; import type { TraceEntry, ToolSpanEntry } from "./types.js"; /** - * Tracks in-flight trace and span state per agent run. - * Each `runId` maps to one trace containing zero or more tool spans. + * Resolve a stable session key from the hook context. + * OpenClaw assigns different `runId` values to different hook phases + * in the same agent loop, so we use sessionId/sessionKey as the + * stable key for trace indexing. + */ +export function resolveSessionKey(ctx: { + sessionId?: string; + sessionKey?: string; + runId?: string; +}): string { + return ctx.sessionId ?? ctx.sessionKey ?? ctx.runId ?? "unknown"; +} + +/** + * Tracks in-flight trace and span state per session. + * Each session has at most one active agent run at a time + * (OpenClaw serializes runs per session), so sessionId is + * a stable key across all hooks in the same agent loop. */ export class TraceContextManager { private traces = new Map(); - startTrace(sessionId: string, runId: string): void { - this.traces.set(runId, { + startTrace(sessionKey: string, runId: string): void { + this.traces.set(sessionKey, { traceId: randomUUID(), - sessionId, + sessionId: sessionKey, runId, startedAt: Date.now(), toolSpans: [], }); } - setModel(runId: string, model: string): void { - const t = this.traces.get(runId); + setModel(sessionKey: string, model: string): void { + const t = this.traces.get(sessionKey); if (!t) return; t.model = model; const slashIdx = model.indexOf("/"); @@ -29,14 +45,14 @@ export class TraceContextManager { } } - setInput(runId: string, messages: unknown, privacyMode: boolean): void { - const t = this.traces.get(runId); + setInput(sessionKey: string, messages: unknown, privacyMode: boolean): void { + const t = this.traces.get(sessionKey); if (!t) return; t.input = privacyMode ? redactMessages(messages) : messages; } - startToolSpan(runId: string, toolName: string, params?: unknown): void { - const t = this.traces.get(runId); + startToolSpan(sessionKey: string, toolName: string, params?: unknown): void { + const t = this.traces.get(sessionKey); if (!t) return; t.toolSpans.push({ toolName, @@ -46,8 +62,8 @@ export class TraceContextManager { }); } - endToolSpan(runId: string, toolName: string, result?: unknown): void { - const t = this.traces.get(runId); + endToolSpan(sessionKey: string, toolName: string, result?: unknown): void { + const t = this.traces.get(sessionKey); if (!t) return; for (let i = t.toolSpans.length - 1; i >= 0; i--) { const span = t.toolSpans[i]; @@ -63,26 +79,25 @@ export class TraceContextManager { } } - getTrace(runId: string): TraceEntry | undefined { - return this.traces.get(runId); + getTrace(sessionKey: string): TraceEntry | undefined { + return this.traces.get(sessionKey); } - getModel(runId: string): string | undefined { - return this.traces.get(runId)?.model; + getModel(sessionKey: string): string | undefined { + return this.traces.get(sessionKey)?.model; } - getLastToolSpan(runId: string): ToolSpanEntry | undefined { - const t = this.traces.get(runId); + getLastToolSpan(sessionKey: string): ToolSpanEntry | undefined { + const t = this.traces.get(sessionKey); if (!t || t.toolSpans.length === 0) return undefined; return t.toolSpans[t.toolSpans.length - 1]; } - endTrace(runId: string): void { - const t = this.traces.get(runId); + endTrace(sessionKey: string): void { + const t = this.traces.get(sessionKey); if (t) { t.endedAt = Date.now(); } - // Clean up after a short delay to allow final event emission. - setTimeout(() => this.traces.delete(runId), 5_000); + setTimeout(() => this.traces.delete(sessionKey), 5_000); } } diff --git a/src/telemetry/event-mappers.test.ts b/src/telemetry/event-mappers.test.ts index ef3e496ac83..e71bcf72c32 100644 --- a/src/telemetry/event-mappers.test.ts +++ b/src/telemetry/event-mappers.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect, beforeEach, vi } from "vitest"; import { TraceContextManager } from "../../extensions/posthog-analytics/lib/trace-context.js"; -import { emitGeneration, emitToolSpan, emitTrace, emitCustomEvent } from "../../extensions/posthog-analytics/lib/event-mappers.js"; +import { emitGeneration, emitToolSpan, emitTrace, emitCustomEvent, extractToolNamesFromMessages } from "../../extensions/posthog-analytics/lib/event-mappers.js"; function createMockPostHog() { return { @@ -9,6 +9,44 @@ function createMockPostHog() { } as any; } +describe("extractToolNamesFromMessages", () => { + it("extracts tool names from OpenAI-format tool_calls in assistant messages", () => { + const messages = [ + { role: "assistant", tool_calls: [{ function: { name: "web_search" } }, { function: { name: "exec" } }] }, + ]; + expect(extractToolNamesFromMessages(messages)).toEqual(["web_search", "exec"]); + }); + + it("extracts tool names from Anthropic-format content blocks with type tool_use", () => { + const messages = [ + { role: "assistant", content: [{ type: "tool_use", name: "read_file" }, { type: "text", text: "hello" }] }, + ]; + expect(extractToolNamesFromMessages(messages)).toEqual(["read_file"]); + }); + + it("extracts tool names from tool result messages with role=tool", () => { + const messages = [ + { role: "tool", name: "exec", content: "output" }, + ]; + expect(extractToolNamesFromMessages(messages)).toEqual(["exec"]); + }); + + it("deduplicates tool names across messages", () => { + const messages = [ + { role: "assistant", tool_calls: [{ function: { name: "exec" } }] }, + { role: "tool", name: "exec", content: "result" }, + { role: "assistant", tool_calls: [{ function: { name: "exec" } }] }, + ]; + expect(extractToolNamesFromMessages(messages)).toEqual(["exec"]); + }); + + it("returns empty array for messages without tool calls", () => { + expect(extractToolNamesFromMessages([{ role: "user", content: "hello" }])).toEqual([]); + expect(extractToolNamesFromMessages(null)).toEqual([]); + expect(extractToolNamesFromMessages([])).toEqual([]); + }); +}); + describe("emitGeneration", () => { let ph: ReturnType; let traceCtx: TraceContextManager; @@ -20,10 +58,10 @@ describe("emitGeneration", () => { it("emits $ai_generation with correct model, provider, and trace linkage", () => { traceCtx.startTrace("sess-1", "run-1"); - traceCtx.setModel("run-1", "anthropic/claude-4-sonnet"); - traceCtx.setInput("run-1", [{ role: "user", content: "hello" }], false); + traceCtx.setModel("sess-1", "anthropic/claude-4-sonnet"); + traceCtx.setInput("sess-1", [{ role: "user", content: "hello" }], false); - emitGeneration(ph, traceCtx, { runId: "run-1" }, { + emitGeneration(ph, traceCtx, "sess-1", { usage: { inputTokens: 10, outputTokens: 20 }, cost: { totalUsd: 0.001 }, output: [{ role: "assistant", content: "hi" }], @@ -34,7 +72,7 @@ describe("emitGeneration", () => { expect(call.event).toBe("$ai_generation"); expect(call.properties.$ai_model).toBe("anthropic/claude-4-sonnet"); expect(call.properties.$ai_provider).toBe("anthropic"); - expect(call.properties.$ai_trace_id).toBe(traceCtx.getTrace("run-1")!.traceId); + expect(call.properties.$ai_trace_id).toBe(traceCtx.getTrace("sess-1")!.traceId); expect(call.properties.$ai_session_id).toBe("sess-1"); expect(call.properties.$ai_input_tokens).toBe(10); expect(call.properties.$ai_output_tokens).toBe(20); @@ -44,9 +82,9 @@ describe("emitGeneration", () => { it("redacts input/output when privacy mode is on (enforces privacy boundary)", () => { traceCtx.startTrace("s", "r"); - traceCtx.setInput("r", [{ role: "user", content: "sensitive" }], true); + traceCtx.setInput("s", [{ role: "user", content: "sensitive" }], true); - emitGeneration(ph, traceCtx, { runId: "r" }, { + emitGeneration(ph, traceCtx, "s", { output: [{ role: "assistant", content: "also sensitive" }], }, true); @@ -55,88 +93,94 @@ describe("emitGeneration", () => { expect(props.$ai_output_choices).toBe("[REDACTED]"); }); - it("includes full input/output when privacy mode is off (allows opt-in capture)", () => { + it("does not set $ai_total_cost_usd when cost is zero (prevents misleading $0 in PostHog)", () => { traceCtx.startTrace("s", "r"); - const input = [{ role: "user", content: "hello" }]; - traceCtx.setInput("r", input, false); - - emitGeneration(ph, traceCtx, { runId: "r" }, { - output: [{ role: "assistant", content: "world" }], - }, false); - - const props = ph.capture.mock.calls[0][0].properties; - expect(props.$ai_input).toEqual(input); - expect(props.$ai_output_choices).toEqual([{ role: "assistant", content: "world" }]); + emitGeneration(ph, traceCtx, "s", { cost: { totalUsd: 0 } }, true); + expect(ph.capture.mock.calls[0][0].properties.$ai_total_cost_usd).toBeUndefined(); }); - it("captures error details when generation fails (enables error tracking in PostHog)", () => { + it("does not set $ai_total_cost_usd when cost object is absent", () => { traceCtx.startTrace("s", "r"); + emitGeneration(ph, traceCtx, "s", {}, true); + expect(ph.capture.mock.calls[0][0].properties.$ai_total_cost_usd).toBeUndefined(); + }); - emitGeneration(ph, traceCtx, { runId: "r" }, { - error: { message: "Rate limit exceeded" }, - }, true); + it("sets $ai_total_cost_usd when cost is positive", () => { + traceCtx.startTrace("s", "r"); + emitGeneration(ph, traceCtx, "s", { cost: { totalUsd: 0.05 } }, true); + expect(ph.capture.mock.calls[0][0].properties.$ai_total_cost_usd).toBe(0.05); + }); + it("does not set token counts when usage is absent (prevents null in PostHog)", () => { + traceCtx.startTrace("s", "r"); + emitGeneration(ph, traceCtx, "s", {}, true); + const props = ph.capture.mock.calls[0][0].properties; + expect(props).not.toHaveProperty("$ai_input_tokens"); + expect(props).not.toHaveProperty("$ai_output_tokens"); + }); + + it("does not set token counts when they are zero", () => { + traceCtx.startTrace("s", "r"); + emitGeneration(ph, traceCtx, "s", { usage: { inputTokens: 0, outputTokens: 0 } }, true); + const props = ph.capture.mock.calls[0][0].properties; + expect(props).not.toHaveProperty("$ai_input_tokens"); + expect(props).not.toHaveProperty("$ai_output_tokens"); + }); + + it("captures error details when generation fails", () => { + traceCtx.startTrace("s", "r"); + emitGeneration(ph, traceCtx, "s", { error: { message: "Rate limit exceeded" } }, true); const props = ph.capture.mock.calls[0][0].properties; expect(props.$ai_is_error).toBe(true); expect(props.$ai_error).toBe("Rate limit exceeded"); }); - it("handles string error (defensive: different error shapes from providers)", () => { + it("detects error from success=false even without error field", () => { traceCtx.startTrace("s", "r"); - emitGeneration(ph, traceCtx, { runId: "r" }, { error: "connection timeout" }, true); - expect(ph.capture.mock.calls[0][0].properties.$ai_error).toBe("connection timeout"); + emitGeneration(ph, traceCtx, "s", { success: false }, true); + expect(ph.capture.mock.calls[0][0].properties.$ai_is_error).toBe(true); }); - it("includes tool names in $ai_tools when tools were called (enables tool usage analytics)", () => { + it("includes tool names from both spans and messages in $ai_tools", () => { traceCtx.startTrace("s", "r"); - traceCtx.startToolSpan("r", "web_search", {}); - traceCtx.endToolSpan("r", "web_search", {}); - traceCtx.startToolSpan("r", "exec", {}); - traceCtx.endToolSpan("r", "exec", {}); + traceCtx.startToolSpan("s", "web_search", {}); + traceCtx.endToolSpan("s", "web_search", {}); - emitGeneration(ph, traceCtx, { runId: "r" }, {}, true); + const messages = [ + { role: "assistant", tool_calls: [{ function: { name: "exec" } }] }, + { role: "tool", name: "exec", content: "result" }, + ]; - const props = ph.capture.mock.calls[0][0].properties; - expect(props.$ai_tools).toEqual(["web_search", "exec"]); + emitGeneration(ph, traceCtx, "s", { messages }, true); + + const tools = ph.capture.mock.calls[0][0].properties.$ai_tools; + expect(tools).toEqual([ + { type: "function", function: { name: "web_search" } }, + { type: "function", function: { name: "exec" } }, + ]); }); - it("sets $ai_tools to undefined when no tools were called (prevents empty array in PostHog)", () => { + it("sets $ai_tools to undefined when no tools were called", () => { traceCtx.startTrace("s", "r"); - emitGeneration(ph, traceCtx, { runId: "r" }, {}, true); + emitGeneration(ph, traceCtx, "s", {}, true); expect(ph.capture.mock.calls[0][0].properties.$ai_tools).toBeUndefined(); }); - it("silently skips when trace does not exist (prevents crash on stale runId)", () => { - emitGeneration(ph, traceCtx, { runId: "ghost" }, {}, true); + it("silently skips when trace does not exist", () => { + emitGeneration(ph, traceCtx, "ghost", {}, true); expect(ph.capture).not.toHaveBeenCalled(); }); - it("falls back to event.model when trace has no model set (handles missing before_model_resolve)", () => { - traceCtx.startTrace("s", "r"); - emitGeneration(ph, traceCtx, { runId: "r" }, { model: "fallback-model" }, true); - expect(ph.capture.mock.calls[0][0].properties.$ai_model).toBe("fallback-model"); - }); - - it("uses 'unknown' when neither trace nor event has model (defensive default)", () => { - traceCtx.startTrace("s", "r"); - emitGeneration(ph, traceCtx, { runId: "r" }, {}, true); - expect(ph.capture.mock.calls[0][0].properties.$ai_model).toBe("unknown"); - }); - - it("handles snake_case usage keys from some providers (input_tokens vs inputTokens)", () => { - traceCtx.startTrace("s", "r"); - emitGeneration(ph, traceCtx, { runId: "r" }, { - usage: { input_tokens: 5, output_tokens: 15 }, - }, true); - const props = ph.capture.mock.calls[0][0].properties; - expect(props.$ai_input_tokens).toBe(5); - expect(props.$ai_output_tokens).toBe(15); - }); - it("never throws even if PostHog capture throws (prevents gateway crash)", () => { ph.capture.mockImplementation(() => { throw new Error("PostHog down"); }); traceCtx.startTrace("s", "r"); - expect(() => emitGeneration(ph, traceCtx, { runId: "r" }, {}, true)).not.toThrow(); + expect(() => emitGeneration(ph, traceCtx, "s", {}, true)).not.toThrow(); + }); + + it("uses durationMs from event for latency when available (more accurate)", () => { + traceCtx.startTrace("s", "r"); + emitGeneration(ph, traceCtx, "s", { durationMs: 5000 }, true); + expect(ph.capture.mock.calls[0][0].properties.$ai_latency).toBe(5); }); }); @@ -149,27 +193,26 @@ describe("emitToolSpan", () => { traceCtx = new TraceContextManager(); }); - it("emits $ai_span with correct tool name, timing, and trace linkage", () => { + it("emits $ai_span with correct tool name, timing, and trace linkage (same session key)", () => { traceCtx.startTrace("sess", "r"); - traceCtx.startToolSpan("r", "web_search", { q: "test" }); - traceCtx.endToolSpan("r", "web_search", { results: [] }); + traceCtx.startToolSpan("sess", "web_search", { q: "test" }); + traceCtx.endToolSpan("sess", "web_search", { results: [] }); - emitToolSpan(ph, traceCtx, "r", {}, false); + emitToolSpan(ph, traceCtx, "sess", {}, false); const call = ph.capture.mock.calls[0][0]; expect(call.event).toBe("$ai_span"); expect(call.properties.$ai_span_name).toBe("web_search"); - expect(call.properties.$ai_trace_id).toBe(traceCtx.getTrace("r")!.traceId); - expect(call.properties.$ai_parent_id).toBe(traceCtx.getTrace("r")!.traceId); - expect(typeof call.properties.$ai_latency).toBe("number"); + expect(call.properties.$ai_trace_id).toBe(traceCtx.getTrace("sess")!.traceId); + expect(call.properties.$ai_parent_id).toBe(traceCtx.getTrace("sess")!.traceId); }); - it("excludes tool_params and tool_result when privacy mode is on (enforces content redaction)", () => { + it("excludes tool_params and tool_result when privacy mode is on", () => { traceCtx.startTrace("s", "r"); - traceCtx.startToolSpan("r", "exec", { cmd: "cat /etc/passwd" }); - traceCtx.endToolSpan("r", "exec", { output: "root:x:0:0:..." }); + traceCtx.startToolSpan("s", "exec", { cmd: "cat /etc/passwd" }); + traceCtx.endToolSpan("s", "exec", { output: "root:x:0:0:..." }); - emitToolSpan(ph, traceCtx, "r", {}, true); + emitToolSpan(ph, traceCtx, "s", {}, true); const props = ph.capture.mock.calls[0][0].properties; expect(props).not.toHaveProperty("tool_params"); @@ -178,78 +221,51 @@ describe("emitToolSpan", () => { it("includes tool_params and tool_result with secrets stripped when privacy is off", () => { traceCtx.startTrace("s", "r"); - traceCtx.startToolSpan("r", "api_call", { url: "https://api.example.com", apiKey: "secret" }); - traceCtx.endToolSpan("r", "api_call", { status: 200, data: "ok" }); + traceCtx.startToolSpan("s", "api_call", { url: "https://example.com", apiKey: "secret" }); + traceCtx.endToolSpan("s", "api_call", { status: 200 }); - emitToolSpan(ph, traceCtx, "r", {}, false); + emitToolSpan(ph, traceCtx, "s", {}, false); const props = ph.capture.mock.calls[0][0].properties; - expect(props.tool_params.url).toBe("https://api.example.com"); + expect(props.tool_params.url).toBe("https://example.com"); expect(props.tool_params.apiKey).toBe("[REDACTED]"); - expect(props.tool_result.status).toBe(200); }); - it("silently skips when no trace or span exists (prevents crash on orphaned events)", () => { - emitToolSpan(ph, traceCtx, "ghost", {}, false); - expect(ph.capture).not.toHaveBeenCalled(); - }); - - it("never throws even if PostHog capture throws (prevents gateway crash)", () => { - ph.capture.mockImplementation(() => { throw new Error("boom"); }); + it("uses event.durationMs for latency when span timing is unavailable", () => { traceCtx.startTrace("s", "r"); - traceCtx.startToolSpan("r", "x", {}); - traceCtx.endToolSpan("r", "x", {}); - expect(() => emitToolSpan(ph, traceCtx, "r", {}, false)).not.toThrow(); + traceCtx.startToolSpan("s", "exec", {}); + + emitToolSpan(ph, traceCtx, "s", { durationMs: 250 }, false); + + expect(ph.capture.mock.calls[0][0].properties.$ai_latency).toBe(0.25); }); }); describe("emitTrace", () => { - let ph: ReturnType; - let traceCtx: TraceContextManager; - - beforeEach(() => { - ph = createMockPostHog(); - traceCtx = new TraceContextManager(); - }); - it("emits $ai_trace with correct trace ID, session ID, and tool count", () => { + const ph = createMockPostHog(); + const traceCtx = new TraceContextManager(); traceCtx.startTrace("sess-1", "r"); - traceCtx.startToolSpan("r", "a", {}); - traceCtx.endToolSpan("r", "a", {}); - traceCtx.startToolSpan("r", "b", {}); - traceCtx.endToolSpan("r", "b", {}); + traceCtx.startToolSpan("sess-1", "a", {}); + traceCtx.endToolSpan("sess-1", "a", {}); + traceCtx.startToolSpan("sess-1", "b", {}); + traceCtx.endToolSpan("sess-1", "b", {}); - emitTrace(ph, traceCtx, { runId: "r" }); + emitTrace(ph, traceCtx, "sess-1"); const call = ph.capture.mock.calls[0][0]; expect(call.event).toBe("$ai_trace"); - expect(call.properties.$ai_trace_id).toBe(traceCtx.getTrace("r")!.traceId); - expect(call.properties.$ai_session_id).toBe("sess-1"); + expect(call.properties.$ai_trace_id).toBe(traceCtx.getTrace("sess-1")!.traceId); expect(call.properties.tool_count).toBe(2); - expect(call.properties.$ai_span_name).toBe("agent_run"); - }); - - it("silently skips for non-existent trace", () => { - emitTrace(ph, traceCtx, { runId: "ghost" }); - expect(ph.capture).not.toHaveBeenCalled(); }); }); describe("emitCustomEvent", () => { it("captures event with $process_person_profile: false (prevents person profile creation)", () => { const ph = createMockPostHog(); - emitCustomEvent(ph, "dench_session_start", { session_id: "abc", channel: "telegram" }); - + emitCustomEvent(ph, "dench_session_start", { session_id: "abc" }); const call = ph.capture.mock.calls[0][0]; expect(call.event).toBe("dench_session_start"); - expect(call.properties.session_id).toBe("abc"); - expect(call.properties.channel).toBe("telegram"); expect(call.properties.$process_person_profile).toBe(false); }); - - it("never throws even if PostHog capture throws", () => { - const ph = createMockPostHog(); - ph.capture.mockImplementation(() => { throw new Error("boom"); }); - expect(() => emitCustomEvent(ph, "test", {})).not.toThrow(); - }); }); diff --git a/src/telemetry/trace-context.test.ts b/src/telemetry/trace-context.test.ts index 603c40f0654..5365d9de240 100644 --- a/src/telemetry/trace-context.test.ts +++ b/src/telemetry/trace-context.test.ts @@ -1,5 +1,23 @@ import { describe, it, expect, beforeEach } from "vitest"; -import { TraceContextManager } from "../../extensions/posthog-analytics/lib/trace-context.js"; +import { TraceContextManager, resolveSessionKey } from "../../extensions/posthog-analytics/lib/trace-context.js"; + +describe("resolveSessionKey", () => { + it("prefers sessionId over sessionKey and runId", () => { + expect(resolveSessionKey({ sessionId: "s", sessionKey: "k", runId: "r" })).toBe("s"); + }); + + it("falls back to sessionKey when sessionId is absent", () => { + expect(resolveSessionKey({ sessionKey: "k", runId: "r" })).toBe("k"); + }); + + it("falls back to runId when both session fields are absent", () => { + expect(resolveSessionKey({ runId: "r" })).toBe("r"); + }); + + it("returns 'unknown' when no identifiers are present", () => { + expect(resolveSessionKey({})).toBe("unknown"); + }); +}); describe("TraceContextManager", () => { let ctx: TraceContextManager; @@ -8,42 +26,55 @@ describe("TraceContextManager", () => { ctx = new TraceContextManager(); }); - // ── Trace lifecycle ── + // ── Trace lifecycle (session-keyed) ── it("generates a unique UUID traceId for each trace (ensures PostHog trace grouping)", () => { ctx.startTrace("session-1", "run-1"); - ctx.startTrace("session-1", "run-2"); - const t1 = ctx.getTrace("run-1")!; - const t2 = ctx.getTrace("run-2")!; + ctx.startTrace("session-2", "run-2"); + const t1 = ctx.getTrace("session-1")!; + const t2 = ctx.getTrace("session-2")!; expect(t1.traceId).toMatch(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/); expect(t1.traceId).not.toBe(t2.traceId); }); - it("records sessionId and runId on the trace (ensures trace-to-session linkage)", () => { - ctx.startTrace("sess-abc", "run-xyz"); - const trace = ctx.getTrace("run-xyz")!; - expect(trace.sessionId).toBe("sess-abc"); - expect(trace.runId).toBe("run-xyz"); + it("indexes by sessionKey so different runIds in the same session share one trace", () => { + ctx.startTrace("sess-abc", "run-1"); + ctx.startToolSpan("sess-abc", "search", { q: "test" }); + ctx.endToolSpan("sess-abc", "search", { ok: true }); + + const trace = ctx.getTrace("sess-abc")!; + expect(trace.toolSpans).toHaveLength(1); + expect(trace.runId).toBe("run-1"); + }); + + it("replaces existing trace when startTrace is called again for the same session (new run)", () => { + ctx.startTrace("sess", "run-old"); + ctx.startToolSpan("sess", "exec", {}); + expect(ctx.getTrace("sess")!.toolSpans).toHaveLength(1); + + ctx.startTrace("sess", "run-new"); + expect(ctx.getTrace("sess")!.runId).toBe("run-new"); + expect(ctx.getTrace("sess")!.toolSpans).toHaveLength(0); }); it("records startedAt timestamp on trace creation (enables latency calculation)", () => { const before = Date.now(); ctx.startTrace("s", "r"); const after = Date.now(); - const trace = ctx.getTrace("r")!; + const trace = ctx.getTrace("s")!; expect(trace.startedAt).toBeGreaterThanOrEqual(before); expect(trace.startedAt).toBeLessThanOrEqual(after); }); - it("endTrace sets endedAt on the trace (enables accurate latency measurement)", () => { + it("endTrace sets endedAt on the trace", () => { ctx.startTrace("s", "r"); - ctx.endTrace("r"); - const trace = ctx.getTrace("r")!; + ctx.endTrace("s"); + const trace = ctx.getTrace("s")!; expect(trace.endedAt).toBeDefined(); expect(trace.endedAt!).toBeGreaterThanOrEqual(trace.startedAt); }); - it("returns undefined for non-existent runId (defensive: no crash on stale references)", () => { + it("returns undefined for non-existent session key (defensive)", () => { expect(ctx.getTrace("nonexistent")).toBeUndefined(); expect(ctx.getModel("nonexistent")).toBeUndefined(); expect(ctx.getLastToolSpan("nonexistent")).toBeUndefined(); @@ -51,140 +82,103 @@ describe("TraceContextManager", () => { // ── Model resolution ── - it("extracts provider from model string with slash separator (enables PostHog $ai_provider)", () => { + it("extracts provider from model string with slash separator", () => { ctx.startTrace("s", "r"); - ctx.setModel("r", "anthropic/claude-4-sonnet"); - expect(ctx.getTrace("r")!.model).toBe("anthropic/claude-4-sonnet"); - expect(ctx.getTrace("r")!.provider).toBe("anthropic"); + ctx.setModel("s", "anthropic/claude-4-sonnet"); + expect(ctx.getTrace("s")!.model).toBe("anthropic/claude-4-sonnet"); + expect(ctx.getTrace("s")!.provider).toBe("anthropic"); }); - it("does not set provider for models without a slash (e.g. 'gpt-4o')", () => { + it("does not set provider for models without a slash", () => { ctx.startTrace("s", "r"); - ctx.setModel("r", "gpt-4o"); - expect(ctx.getTrace("r")!.model).toBe("gpt-4o"); - expect(ctx.getTrace("r")!.provider).toBeUndefined(); + ctx.setModel("s", "gpt-4o"); + expect(ctx.getTrace("s")!.model).toBe("gpt-4o"); + expect(ctx.getTrace("s")!.provider).toBeUndefined(); }); it("handles multi-segment provider paths like vercel-ai-gateway/anthropic/claude-4", () => { ctx.startTrace("s", "r"); - ctx.setModel("r", "vercel-ai-gateway/anthropic/claude-4"); - expect(ctx.getTrace("r")!.provider).toBe("vercel-ai-gateway"); - }); - - it("ignores setModel for non-existent run (no crash on race between model resolve and cleanup)", () => { - ctx.setModel("ghost-run", "gpt-4o"); - expect(ctx.getModel("ghost-run")).toBeUndefined(); + ctx.setModel("s", "vercel-ai-gateway/anthropic/claude-4"); + expect(ctx.getTrace("s")!.provider).toBe("vercel-ai-gateway"); }); // ── Input capture with privacy ── - it("redacts message content when privacy mode is on (prevents content leakage in PostHog)", () => { + it("redacts message content when privacy mode is on (prevents content leakage)", () => { ctx.startTrace("s", "r"); - ctx.setInput("r", [ + ctx.setInput("s", [ { role: "user", content: "My SSN is 123-45-6789" }, - { role: "assistant", content: "I should not store that." }, ], true); - const input = ctx.getTrace("r")!.input as Array>; + const input = ctx.getTrace("s")!.input as Array>; expect(input[0].content).toBe("[REDACTED]"); - expect(input[1].content).toBe("[REDACTED]"); expect(input[0].role).toBe("user"); }); - it("preserves message content when privacy mode is off (allows opt-in content capture)", () => { + it("preserves message content when privacy mode is off", () => { ctx.startTrace("s", "r"); - ctx.setInput("r", [{ role: "user", content: "Hello world" }], false); - const input = ctx.getTrace("r")!.input as Array>; - expect(input[0].content).toBe("Hello world"); - }); - - it("ignores setInput for non-existent run (no crash on stale context)", () => { - ctx.setInput("ghost", [{ role: "user", content: "test" }], true); - expect(ctx.getTrace("ghost")).toBeUndefined(); + ctx.setInput("s", [{ role: "user", content: "Hello" }], false); + const input = ctx.getTrace("s")!.input as Array>; + expect(input[0].content).toBe("Hello"); }); // ── Tool span lifecycle ── - it("tracks tool span start/end with timing and error detection (enables $ai_span events)", () => { + it("tracks tool span start/end with timing and error detection", () => { ctx.startTrace("s", "r"); - const before = Date.now(); - ctx.startToolSpan("r", "web_search", { query: "test" }); - const span = ctx.getLastToolSpan("r")!; + ctx.startToolSpan("s", "web_search", { query: "test" }); + const span = ctx.getLastToolSpan("s")!; expect(span.toolName).toBe("web_search"); - expect(span.startedAt).toBeGreaterThanOrEqual(before); expect(span.endedAt).toBeUndefined(); - expect(span.spanId).toMatch(/^[0-9a-f]{8}-/); - ctx.endToolSpan("r", "web_search", { results: ["a", "b"] }); + ctx.endToolSpan("s", "web_search", { results: ["a"] }); expect(span.endedAt).toBeDefined(); expect(span.isError).toBe(false); }); - it("marks tool span as error when result contains an 'error' key (enables $ai_is_error flag)", () => { + it("marks tool span as error when result contains an 'error' key", () => { ctx.startTrace("s", "r"); - ctx.startToolSpan("r", "exec", { cmd: "rm -rf /" }); - ctx.endToolSpan("r", "exec", { error: "permission denied" }); - expect(ctx.getLastToolSpan("r")!.isError).toBe(true); + ctx.startToolSpan("s", "exec", { cmd: "rm -rf /" }); + ctx.endToolSpan("s", "exec", { error: "permission denied" }); + expect(ctx.getLastToolSpan("s")!.isError).toBe(true); }); - it("does not mark as error for results without error key (prevents false error flags)", () => { + it("handles multiple tool spans in order", () => { ctx.startTrace("s", "r"); - ctx.startToolSpan("r", "read_file", { path: "/tmp/x" }); - ctx.endToolSpan("r", "read_file", { content: "file data" }); - expect(ctx.getLastToolSpan("r")!.isError).toBe(false); + ctx.startToolSpan("s", "search", {}); + ctx.endToolSpan("s", "search", { ok: true }); + ctx.startToolSpan("s", "read", {}); + ctx.endToolSpan("s", "read", { ok: true }); + + expect(ctx.getTrace("s")!.toolSpans).toHaveLength(2); + expect(ctx.getLastToolSpan("s")!.toolName).toBe("read"); }); - it("handles multiple tool spans in order (enables correct span-to-trace nesting)", () => { + it("matches end to the most recent unfinished span of the same tool name", () => { ctx.startTrace("s", "r"); - ctx.startToolSpan("r", "search", { q: "a" }); - ctx.endToolSpan("r", "search", { ok: true }); - ctx.startToolSpan("r", "read", { path: "/tmp" }); - ctx.endToolSpan("r", "read", { ok: true }); + ctx.startToolSpan("s", "exec", { cmd: "ls" }); + ctx.endToolSpan("s", "exec", { output: "file1" }); + ctx.startToolSpan("s", "exec", { cmd: "pwd" }); + ctx.endToolSpan("s", "exec", { output: "/home" }); - const trace = ctx.getTrace("r")!; - expect(trace.toolSpans).toHaveLength(2); - expect(trace.toolSpans[0].toolName).toBe("search"); - expect(trace.toolSpans[1].toolName).toBe("read"); - expect(ctx.getLastToolSpan("r")!.toolName).toBe("read"); - }); - - it("matches end to the most recent unfinished span of the same tool name (prevents mismatched close)", () => { - ctx.startTrace("s", "r"); - ctx.startToolSpan("r", "exec", { cmd: "ls" }); - ctx.endToolSpan("r", "exec", { output: "file1" }); - ctx.startToolSpan("r", "exec", { cmd: "pwd" }); - ctx.endToolSpan("r", "exec", { output: "/home" }); - - const spans = ctx.getTrace("r")!.toolSpans; + const spans = ctx.getTrace("s")!.toolSpans; expect(spans).toHaveLength(2); expect(spans[0].endedAt).toBeDefined(); expect(spans[1].endedAt).toBeDefined(); }); - it("ignores startToolSpan/endToolSpan for non-existent run (no crash on orphaned tool events)", () => { - ctx.startToolSpan("ghost", "search", {}); - ctx.endToolSpan("ghost", "search", {}); - expect(ctx.getLastToolSpan("ghost")).toBeUndefined(); - }); + // ── Concurrent sessions ── - it("getLastToolSpan returns undefined when no spans exist (defensive edge case)", () => { - ctx.startTrace("s", "r"); - expect(ctx.getLastToolSpan("r")).toBeUndefined(); - }); - - // ── Concurrent runs ── - - it("isolates traces across concurrent runs (prevents cross-run data contamination)", () => { + it("isolates traces across concurrent sessions (prevents cross-session contamination)", () => { ctx.startTrace("s1", "run-a"); ctx.startTrace("s2", "run-b"); - ctx.setModel("run-a", "gpt-4o"); - ctx.setModel("run-b", "claude-4-sonnet"); - ctx.startToolSpan("run-a", "search", {}); - ctx.startToolSpan("run-b", "exec", {}); + ctx.setModel("s1", "gpt-4o"); + ctx.setModel("s2", "claude-4-sonnet"); + ctx.startToolSpan("s1", "search", {}); + ctx.startToolSpan("s2", "exec", {}); - expect(ctx.getModel("run-a")).toBe("gpt-4o"); - expect(ctx.getModel("run-b")).toBe("claude-4-sonnet"); - expect(ctx.getTrace("run-a")!.toolSpans).toHaveLength(1); - expect(ctx.getTrace("run-a")!.toolSpans[0].toolName).toBe("search"); - expect(ctx.getTrace("run-b")!.toolSpans[0].toolName).toBe("exec"); + expect(ctx.getModel("s1")).toBe("gpt-4o"); + expect(ctx.getModel("s2")).toBe("claude-4-sonnet"); + expect(ctx.getTrace("s1")!.toolSpans[0].toolName).toBe("search"); + expect(ctx.getTrace("s2")!.toolSpans[0].toolName).toBe("exec"); }); });