fix: resolve PostHog trace linkage, cost display, and tool capture
Switch TraceContextManager from runId to sessionId-based indexing so tool spans share the same trace_id as their parent generation. Extract tool names from agent_end messages for the PostHog Tools tab. Guard against sending $0 cost and null token counts.
This commit is contained in:
parent
fdd89b4e6f
commit
460bf6bf6e
@ -1,11 +1,20 @@
|
||||
import { createPostHogClient, shutdownPostHogClient } from "./lib/posthog-client.js";
|
||||
import { TraceContextManager } from "./lib/trace-context.js";
|
||||
import { TraceContextManager, resolveSessionKey } from "./lib/trace-context.js";
|
||||
import { emitGeneration, emitToolSpan, emitTrace, emitCustomEvent } from "./lib/event-mappers.js";
|
||||
import { readPrivacyMode } from "./lib/privacy.js";
|
||||
import type { PluginConfig } from "./lib/types.js";
|
||||
|
||||
export const id = "posthog-analytics";
|
||||
|
||||
const DEBUG = process.env.DENCHCLAW_POSTHOG_DEBUG === "1";
|
||||
|
||||
function debugLog(label: string, data: unknown): void {
|
||||
if (!DEBUG) return;
|
||||
try {
|
||||
process.stderr.write(`[posthog-analytics] ${label}: ${JSON.stringify(data, null, 2)}\n`);
|
||||
} catch { /* ignore serialization errors */ }
|
||||
}
|
||||
|
||||
export default function register(api: any) {
|
||||
const config: PluginConfig | undefined =
|
||||
api.config?.plugins?.entries?.["posthog-analytics"]?.config;
|
||||
@ -18,12 +27,28 @@ export default function register(api: any) {
|
||||
|
||||
const getPrivacyMode = () => readPrivacyMode(api.config);
|
||||
|
||||
const getConfigModel = (): string | undefined =>
|
||||
api.config?.agents?.defaults?.model?.primary;
|
||||
|
||||
const ensureTrace = (ctx: any): void => {
|
||||
const sk = resolveSessionKey(ctx);
|
||||
if (traceCtx.getTrace(sk)) return;
|
||||
traceCtx.startTrace(sk, ctx.runId ?? sk);
|
||||
const model = getConfigModel();
|
||||
if (model) traceCtx.setModel(sk, model);
|
||||
};
|
||||
|
||||
api.on(
|
||||
"before_model_resolve",
|
||||
(event: any, ctx: any) => {
|
||||
traceCtx.startTrace(ctx.sessionId ?? ctx.runId, ctx.runId);
|
||||
if (event.modelOverride) {
|
||||
traceCtx.setModel(ctx.runId, event.modelOverride);
|
||||
debugLog("before_model_resolve event", event);
|
||||
debugLog("before_model_resolve ctx", { runId: ctx.runId, sessionId: ctx.sessionId, sessionKey: ctx.sessionKey });
|
||||
|
||||
const sk = resolveSessionKey(ctx);
|
||||
traceCtx.startTrace(sk, ctx.runId ?? sk);
|
||||
const model = event.modelOverride || getConfigModel();
|
||||
if (model) {
|
||||
traceCtx.setModel(sk, model);
|
||||
}
|
||||
},
|
||||
{ priority: -10 },
|
||||
@ -32,8 +57,12 @@ export default function register(api: any) {
|
||||
api.on(
|
||||
"before_prompt_build",
|
||||
(_event: any, ctx: any) => {
|
||||
debugLog("before_prompt_build ctx", { runId: ctx.runId, sessionId: ctx.sessionId, hasMessages: Boolean(ctx.messages) });
|
||||
|
||||
const sk = resolveSessionKey(ctx);
|
||||
ensureTrace(ctx);
|
||||
if (ctx.messages) {
|
||||
traceCtx.setInput(ctx.runId, ctx.messages, getPrivacyMode());
|
||||
traceCtx.setInput(sk, ctx.messages, getPrivacyMode());
|
||||
}
|
||||
},
|
||||
{ priority: -10 },
|
||||
@ -42,7 +71,11 @@ export default function register(api: any) {
|
||||
api.on(
|
||||
"before_tool_call",
|
||||
(event: any, ctx: any) => {
|
||||
traceCtx.startToolSpan(ctx.runId, event.toolName, event.params);
|
||||
debugLog("before_tool_call", { toolName: event.toolName, runId: ctx.runId, sessionId: ctx.sessionId });
|
||||
|
||||
const sk = resolveSessionKey(ctx);
|
||||
ensureTrace(ctx);
|
||||
traceCtx.startToolSpan(sk, event.toolName, event.params);
|
||||
},
|
||||
{ priority: -10 },
|
||||
);
|
||||
@ -50,8 +83,12 @@ export default function register(api: any) {
|
||||
api.on(
|
||||
"after_tool_call",
|
||||
(event: any, ctx: any) => {
|
||||
traceCtx.endToolSpan(ctx.runId, event.toolName, event.result);
|
||||
emitToolSpan(ph, traceCtx, ctx.runId, event, getPrivacyMode());
|
||||
debugLog("after_tool_call", { toolName: event.toolName, runId: ctx.runId, sessionId: ctx.sessionId, hasError: Boolean(event.error), durationMs: event.durationMs });
|
||||
|
||||
const sk = resolveSessionKey(ctx);
|
||||
ensureTrace(ctx);
|
||||
traceCtx.endToolSpan(sk, event.toolName, event.result);
|
||||
emitToolSpan(ph, traceCtx, sk, event, getPrivacyMode());
|
||||
},
|
||||
{ priority: -10 },
|
||||
);
|
||||
@ -59,14 +96,26 @@ export default function register(api: any) {
|
||||
api.on(
|
||||
"agent_end",
|
||||
(event: any, ctx: any) => {
|
||||
emitGeneration(ph, traceCtx, ctx, event, getPrivacyMode());
|
||||
emitTrace(ph, traceCtx, ctx);
|
||||
debugLog("agent_end event", { success: event.success, error: event.error, durationMs: event.durationMs, messageCount: event.messages?.length });
|
||||
debugLog("agent_end ctx", { runId: ctx.runId, sessionId: ctx.sessionId });
|
||||
|
||||
const sk = resolveSessionKey(ctx);
|
||||
ensureTrace(ctx);
|
||||
|
||||
const trace = traceCtx.getTrace(sk);
|
||||
if (trace && !trace.model) {
|
||||
const model = getConfigModel();
|
||||
if (model) traceCtx.setModel(sk, model);
|
||||
}
|
||||
|
||||
emitGeneration(ph, traceCtx, sk, event, getPrivacyMode());
|
||||
emitTrace(ph, traceCtx, sk);
|
||||
emitCustomEvent(ph, "dench_turn_completed", {
|
||||
session_id: ctx.sessionId,
|
||||
session_id: sk,
|
||||
run_id: ctx.runId,
|
||||
model: traceCtx.getModel(ctx.runId),
|
||||
model: traceCtx.getModel(sk),
|
||||
});
|
||||
traceCtx.endTrace(ctx.runId);
|
||||
traceCtx.endTrace(sk);
|
||||
},
|
||||
{ priority: -10 },
|
||||
);
|
||||
@ -75,7 +124,7 @@ export default function register(api: any) {
|
||||
"message_received",
|
||||
(event: any, ctx: any) => {
|
||||
emitCustomEvent(ph, "dench_message_received", {
|
||||
channel: ctx.channel,
|
||||
channel: ctx.channel ?? ctx.channelId,
|
||||
session_id: ctx.sessionId,
|
||||
has_attachments: Boolean(event.attachments?.length),
|
||||
});
|
||||
@ -88,7 +137,7 @@ export default function register(api: any) {
|
||||
(_event: any, ctx: any) => {
|
||||
emitCustomEvent(ph, "dench_session_start", {
|
||||
session_id: ctx.sessionId,
|
||||
channel: ctx.channel,
|
||||
channel: ctx.channel ?? ctx.channelId,
|
||||
});
|
||||
},
|
||||
{ priority: -10 },
|
||||
@ -99,7 +148,7 @@ export default function register(api: any) {
|
||||
(_event: any, ctx: any) => {
|
||||
emitCustomEvent(ph, "dench_session_end", {
|
||||
session_id: ctx.sessionId,
|
||||
channel: ctx.channel,
|
||||
channel: ctx.channel ?? ctx.channelId,
|
||||
});
|
||||
},
|
||||
{ priority: -10 },
|
||||
|
||||
@ -13,25 +13,66 @@ function getAnonymousId(): string {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract tool call names from the messages array provided by agent_end.
|
||||
* Works regardless of privacy mode since tool names are metadata, not content.
|
||||
*/
|
||||
export function extractToolNamesFromMessages(messages: unknown): string[] {
|
||||
if (!Array.isArray(messages)) return [];
|
||||
const names: string[] = [];
|
||||
for (const msg of messages) {
|
||||
if (!msg || typeof msg !== "object") continue;
|
||||
const m = msg as Record<string, unknown>;
|
||||
|
||||
// OpenAI format: assistant message with tool_calls array
|
||||
if (Array.isArray(m.tool_calls)) {
|
||||
for (const tc of m.tool_calls) {
|
||||
const name = (tc as any)?.function?.name ?? (tc as any)?.name;
|
||||
if (typeof name === "string" && name) names.push(name);
|
||||
}
|
||||
}
|
||||
// Anthropic format: content blocks with type "tool_use"
|
||||
if (Array.isArray(m.content)) {
|
||||
for (const block of m.content) {
|
||||
if ((block as any)?.type === "tool_use" && typeof (block as any)?.name === "string") {
|
||||
names.push((block as any).name);
|
||||
}
|
||||
if ((block as any)?.type === "tool-call" && typeof (block as any)?.toolName === "string") {
|
||||
names.push((block as any).toolName);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Tool result messages have a "name" field with the tool name
|
||||
if (m.role === "tool" && typeof m.name === "string") {
|
||||
names.push(m.name);
|
||||
}
|
||||
}
|
||||
return [...new Set(names)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit a `$ai_generation` event from the agent_end hook data.
|
||||
*/
|
||||
export function emitGeneration(
|
||||
ph: PostHogClient,
|
||||
traceCtx: TraceContextManager,
|
||||
ctx: any,
|
||||
sessionKey: string,
|
||||
event: any,
|
||||
privacyMode: boolean,
|
||||
): void {
|
||||
try {
|
||||
const trace = traceCtx.getTrace(ctx.runId);
|
||||
const trace = traceCtx.getTrace(sessionKey);
|
||||
if (!trace) return;
|
||||
|
||||
const latency = trace.startedAt
|
||||
? (Date.now() - trace.startedAt) / 1_000
|
||||
: undefined;
|
||||
const latency = event.durationMs != null
|
||||
? event.durationMs / 1_000
|
||||
: trace.startedAt
|
||||
? (Date.now() - trace.startedAt) / 1_000
|
||||
: undefined;
|
||||
|
||||
const toolNames = trace.toolSpans.map((s) => s.toolName);
|
||||
const spanToolNames = trace.toolSpans.map((s) => s.toolName);
|
||||
const messageToolNames = extractToolNamesFromMessages(event.messages);
|
||||
const allToolNames = [...new Set([...spanToolNames, ...messageToolNames])];
|
||||
|
||||
const properties: Record<string, unknown> = {
|
||||
$ai_trace_id: trace.traceId,
|
||||
@ -39,19 +80,26 @@ export function emitGeneration(
|
||||
$ai_model: trace.model ?? event.model ?? "unknown",
|
||||
$ai_provider: trace.provider ?? event.provider,
|
||||
$ai_latency: latency,
|
||||
$ai_tools: toolNames.length > 0 ? toolNames : undefined,
|
||||
$ai_tools: allToolNames.length > 0
|
||||
? allToolNames.map((name) => ({ type: "function", function: { name } }))
|
||||
: undefined,
|
||||
$ai_stream: event.stream,
|
||||
$ai_temperature: event.temperature,
|
||||
$ai_is_error: Boolean(event.error),
|
||||
$ai_is_error: event.success === false || Boolean(event.error),
|
||||
};
|
||||
|
||||
if (event.usage) {
|
||||
properties.$ai_input_tokens = event.usage.inputTokens ?? event.usage.input_tokens;
|
||||
properties.$ai_output_tokens = event.usage.outputTokens ?? event.usage.output_tokens;
|
||||
const inputTokens = event.usage.inputTokens ?? event.usage.input_tokens;
|
||||
const outputTokens = event.usage.outputTokens ?? event.usage.output_tokens;
|
||||
if (inputTokens != null && inputTokens > 0) properties.$ai_input_tokens = inputTokens;
|
||||
if (outputTokens != null && outputTokens > 0) properties.$ai_output_tokens = outputTokens;
|
||||
}
|
||||
|
||||
if (event.cost) {
|
||||
properties.$ai_total_cost_usd = event.cost.totalUsd ?? event.cost.total_usd;
|
||||
const cost = event.cost.totalUsd ?? event.cost.total_usd;
|
||||
if (cost != null && cost > 0) {
|
||||
properties.$ai_total_cost_usd = cost;
|
||||
}
|
||||
}
|
||||
|
||||
properties.$ai_input = sanitizeForCapture(trace.input, privacyMode);
|
||||
@ -82,18 +130,20 @@ export function emitGeneration(
|
||||
export function emitToolSpan(
|
||||
ph: PostHogClient,
|
||||
traceCtx: TraceContextManager,
|
||||
runId: string,
|
||||
sessionKey: string,
|
||||
event: any,
|
||||
privacyMode: boolean,
|
||||
): void {
|
||||
try {
|
||||
const trace = traceCtx.getTrace(runId);
|
||||
const span = traceCtx.getLastToolSpan(runId);
|
||||
const trace = traceCtx.getTrace(sessionKey);
|
||||
const span = traceCtx.getLastToolSpan(sessionKey);
|
||||
if (!trace || !span) return;
|
||||
|
||||
const latency = span.startedAt && span.endedAt
|
||||
? (span.endedAt - span.startedAt) / 1_000
|
||||
: undefined;
|
||||
: event.durationMs != null
|
||||
? event.durationMs / 1_000
|
||||
: undefined;
|
||||
|
||||
const properties: Record<string, unknown> = {
|
||||
$ai_trace_id: trace.traceId,
|
||||
@ -126,10 +176,10 @@ export function emitToolSpan(
|
||||
export function emitTrace(
|
||||
ph: PostHogClient,
|
||||
traceCtx: TraceContextManager,
|
||||
ctx: any,
|
||||
sessionKey: string,
|
||||
): void {
|
||||
try {
|
||||
const trace = traceCtx.getTrace(ctx.runId);
|
||||
const trace = traceCtx.getTrace(sessionKey);
|
||||
if (!trace) return;
|
||||
|
||||
const latency = trace.startedAt
|
||||
|
||||
@ -3,24 +3,40 @@ import { redactMessages } from "./privacy.js";
|
||||
import type { TraceEntry, ToolSpanEntry } from "./types.js";
|
||||
|
||||
/**
|
||||
* Tracks in-flight trace and span state per agent run.
|
||||
* Each `runId` maps to one trace containing zero or more tool spans.
|
||||
* Resolve a stable session key from the hook context.
|
||||
* OpenClaw assigns different `runId` values to different hook phases
|
||||
* in the same agent loop, so we use sessionId/sessionKey as the
|
||||
* stable key for trace indexing.
|
||||
*/
|
||||
export function resolveSessionKey(ctx: {
|
||||
sessionId?: string;
|
||||
sessionKey?: string;
|
||||
runId?: string;
|
||||
}): string {
|
||||
return ctx.sessionId ?? ctx.sessionKey ?? ctx.runId ?? "unknown";
|
||||
}
|
||||
|
||||
/**
|
||||
* Tracks in-flight trace and span state per session.
|
||||
* Each session has at most one active agent run at a time
|
||||
* (OpenClaw serializes runs per session), so sessionId is
|
||||
* a stable key across all hooks in the same agent loop.
|
||||
*/
|
||||
export class TraceContextManager {
|
||||
private traces = new Map<string, TraceEntry>();
|
||||
|
||||
startTrace(sessionId: string, runId: string): void {
|
||||
this.traces.set(runId, {
|
||||
startTrace(sessionKey: string, runId: string): void {
|
||||
this.traces.set(sessionKey, {
|
||||
traceId: randomUUID(),
|
||||
sessionId,
|
||||
sessionId: sessionKey,
|
||||
runId,
|
||||
startedAt: Date.now(),
|
||||
toolSpans: [],
|
||||
});
|
||||
}
|
||||
|
||||
setModel(runId: string, model: string): void {
|
||||
const t = this.traces.get(runId);
|
||||
setModel(sessionKey: string, model: string): void {
|
||||
const t = this.traces.get(sessionKey);
|
||||
if (!t) return;
|
||||
t.model = model;
|
||||
const slashIdx = model.indexOf("/");
|
||||
@ -29,14 +45,14 @@ export class TraceContextManager {
|
||||
}
|
||||
}
|
||||
|
||||
setInput(runId: string, messages: unknown, privacyMode: boolean): void {
|
||||
const t = this.traces.get(runId);
|
||||
setInput(sessionKey: string, messages: unknown, privacyMode: boolean): void {
|
||||
const t = this.traces.get(sessionKey);
|
||||
if (!t) return;
|
||||
t.input = privacyMode ? redactMessages(messages) : messages;
|
||||
}
|
||||
|
||||
startToolSpan(runId: string, toolName: string, params?: unknown): void {
|
||||
const t = this.traces.get(runId);
|
||||
startToolSpan(sessionKey: string, toolName: string, params?: unknown): void {
|
||||
const t = this.traces.get(sessionKey);
|
||||
if (!t) return;
|
||||
t.toolSpans.push({
|
||||
toolName,
|
||||
@ -46,8 +62,8 @@ export class TraceContextManager {
|
||||
});
|
||||
}
|
||||
|
||||
endToolSpan(runId: string, toolName: string, result?: unknown): void {
|
||||
const t = this.traces.get(runId);
|
||||
endToolSpan(sessionKey: string, toolName: string, result?: unknown): void {
|
||||
const t = this.traces.get(sessionKey);
|
||||
if (!t) return;
|
||||
for (let i = t.toolSpans.length - 1; i >= 0; i--) {
|
||||
const span = t.toolSpans[i];
|
||||
@ -63,26 +79,25 @@ export class TraceContextManager {
|
||||
}
|
||||
}
|
||||
|
||||
getTrace(runId: string): TraceEntry | undefined {
|
||||
return this.traces.get(runId);
|
||||
getTrace(sessionKey: string): TraceEntry | undefined {
|
||||
return this.traces.get(sessionKey);
|
||||
}
|
||||
|
||||
getModel(runId: string): string | undefined {
|
||||
return this.traces.get(runId)?.model;
|
||||
getModel(sessionKey: string): string | undefined {
|
||||
return this.traces.get(sessionKey)?.model;
|
||||
}
|
||||
|
||||
getLastToolSpan(runId: string): ToolSpanEntry | undefined {
|
||||
const t = this.traces.get(runId);
|
||||
getLastToolSpan(sessionKey: string): ToolSpanEntry | undefined {
|
||||
const t = this.traces.get(sessionKey);
|
||||
if (!t || t.toolSpans.length === 0) return undefined;
|
||||
return t.toolSpans[t.toolSpans.length - 1];
|
||||
}
|
||||
|
||||
endTrace(runId: string): void {
|
||||
const t = this.traces.get(runId);
|
||||
endTrace(sessionKey: string): void {
|
||||
const t = this.traces.get(sessionKey);
|
||||
if (t) {
|
||||
t.endedAt = Date.now();
|
||||
}
|
||||
// Clean up after a short delay to allow final event emission.
|
||||
setTimeout(() => this.traces.delete(runId), 5_000);
|
||||
setTimeout(() => this.traces.delete(sessionKey), 5_000);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { describe, it, expect, beforeEach, vi } from "vitest";
|
||||
import { TraceContextManager } from "../../extensions/posthog-analytics/lib/trace-context.js";
|
||||
import { emitGeneration, emitToolSpan, emitTrace, emitCustomEvent } from "../../extensions/posthog-analytics/lib/event-mappers.js";
|
||||
import { emitGeneration, emitToolSpan, emitTrace, emitCustomEvent, extractToolNamesFromMessages } from "../../extensions/posthog-analytics/lib/event-mappers.js";
|
||||
|
||||
function createMockPostHog() {
|
||||
return {
|
||||
@ -9,6 +9,44 @@ function createMockPostHog() {
|
||||
} as any;
|
||||
}
|
||||
|
||||
describe("extractToolNamesFromMessages", () => {
|
||||
it("extracts tool names from OpenAI-format tool_calls in assistant messages", () => {
|
||||
const messages = [
|
||||
{ role: "assistant", tool_calls: [{ function: { name: "web_search" } }, { function: { name: "exec" } }] },
|
||||
];
|
||||
expect(extractToolNamesFromMessages(messages)).toEqual(["web_search", "exec"]);
|
||||
});
|
||||
|
||||
it("extracts tool names from Anthropic-format content blocks with type tool_use", () => {
|
||||
const messages = [
|
||||
{ role: "assistant", content: [{ type: "tool_use", name: "read_file" }, { type: "text", text: "hello" }] },
|
||||
];
|
||||
expect(extractToolNamesFromMessages(messages)).toEqual(["read_file"]);
|
||||
});
|
||||
|
||||
it("extracts tool names from tool result messages with role=tool", () => {
|
||||
const messages = [
|
||||
{ role: "tool", name: "exec", content: "output" },
|
||||
];
|
||||
expect(extractToolNamesFromMessages(messages)).toEqual(["exec"]);
|
||||
});
|
||||
|
||||
it("deduplicates tool names across messages", () => {
|
||||
const messages = [
|
||||
{ role: "assistant", tool_calls: [{ function: { name: "exec" } }] },
|
||||
{ role: "tool", name: "exec", content: "result" },
|
||||
{ role: "assistant", tool_calls: [{ function: { name: "exec" } }] },
|
||||
];
|
||||
expect(extractToolNamesFromMessages(messages)).toEqual(["exec"]);
|
||||
});
|
||||
|
||||
it("returns empty array for messages without tool calls", () => {
|
||||
expect(extractToolNamesFromMessages([{ role: "user", content: "hello" }])).toEqual([]);
|
||||
expect(extractToolNamesFromMessages(null)).toEqual([]);
|
||||
expect(extractToolNamesFromMessages([])).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("emitGeneration", () => {
|
||||
let ph: ReturnType<typeof createMockPostHog>;
|
||||
let traceCtx: TraceContextManager;
|
||||
@ -20,10 +58,10 @@ describe("emitGeneration", () => {
|
||||
|
||||
it("emits $ai_generation with correct model, provider, and trace linkage", () => {
|
||||
traceCtx.startTrace("sess-1", "run-1");
|
||||
traceCtx.setModel("run-1", "anthropic/claude-4-sonnet");
|
||||
traceCtx.setInput("run-1", [{ role: "user", content: "hello" }], false);
|
||||
traceCtx.setModel("sess-1", "anthropic/claude-4-sonnet");
|
||||
traceCtx.setInput("sess-1", [{ role: "user", content: "hello" }], false);
|
||||
|
||||
emitGeneration(ph, traceCtx, { runId: "run-1" }, {
|
||||
emitGeneration(ph, traceCtx, "sess-1", {
|
||||
usage: { inputTokens: 10, outputTokens: 20 },
|
||||
cost: { totalUsd: 0.001 },
|
||||
output: [{ role: "assistant", content: "hi" }],
|
||||
@ -34,7 +72,7 @@ describe("emitGeneration", () => {
|
||||
expect(call.event).toBe("$ai_generation");
|
||||
expect(call.properties.$ai_model).toBe("anthropic/claude-4-sonnet");
|
||||
expect(call.properties.$ai_provider).toBe("anthropic");
|
||||
expect(call.properties.$ai_trace_id).toBe(traceCtx.getTrace("run-1")!.traceId);
|
||||
expect(call.properties.$ai_trace_id).toBe(traceCtx.getTrace("sess-1")!.traceId);
|
||||
expect(call.properties.$ai_session_id).toBe("sess-1");
|
||||
expect(call.properties.$ai_input_tokens).toBe(10);
|
||||
expect(call.properties.$ai_output_tokens).toBe(20);
|
||||
@ -44,9 +82,9 @@ describe("emitGeneration", () => {
|
||||
|
||||
it("redacts input/output when privacy mode is on (enforces privacy boundary)", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
traceCtx.setInput("r", [{ role: "user", content: "sensitive" }], true);
|
||||
traceCtx.setInput("s", [{ role: "user", content: "sensitive" }], true);
|
||||
|
||||
emitGeneration(ph, traceCtx, { runId: "r" }, {
|
||||
emitGeneration(ph, traceCtx, "s", {
|
||||
output: [{ role: "assistant", content: "also sensitive" }],
|
||||
}, true);
|
||||
|
||||
@ -55,88 +93,94 @@ describe("emitGeneration", () => {
|
||||
expect(props.$ai_output_choices).toBe("[REDACTED]");
|
||||
});
|
||||
|
||||
it("includes full input/output when privacy mode is off (allows opt-in capture)", () => {
|
||||
it("does not set $ai_total_cost_usd when cost is zero (prevents misleading $0 in PostHog)", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
const input = [{ role: "user", content: "hello" }];
|
||||
traceCtx.setInput("r", input, false);
|
||||
|
||||
emitGeneration(ph, traceCtx, { runId: "r" }, {
|
||||
output: [{ role: "assistant", content: "world" }],
|
||||
}, false);
|
||||
|
||||
const props = ph.capture.mock.calls[0][0].properties;
|
||||
expect(props.$ai_input).toEqual(input);
|
||||
expect(props.$ai_output_choices).toEqual([{ role: "assistant", content: "world" }]);
|
||||
emitGeneration(ph, traceCtx, "s", { cost: { totalUsd: 0 } }, true);
|
||||
expect(ph.capture.mock.calls[0][0].properties.$ai_total_cost_usd).toBeUndefined();
|
||||
});
|
||||
|
||||
it("captures error details when generation fails (enables error tracking in PostHog)", () => {
|
||||
it("does not set $ai_total_cost_usd when cost object is absent", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
emitGeneration(ph, traceCtx, "s", {}, true);
|
||||
expect(ph.capture.mock.calls[0][0].properties.$ai_total_cost_usd).toBeUndefined();
|
||||
});
|
||||
|
||||
emitGeneration(ph, traceCtx, { runId: "r" }, {
|
||||
error: { message: "Rate limit exceeded" },
|
||||
}, true);
|
||||
it("sets $ai_total_cost_usd when cost is positive", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
emitGeneration(ph, traceCtx, "s", { cost: { totalUsd: 0.05 } }, true);
|
||||
expect(ph.capture.mock.calls[0][0].properties.$ai_total_cost_usd).toBe(0.05);
|
||||
});
|
||||
|
||||
it("does not set token counts when usage is absent (prevents null in PostHog)", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
emitGeneration(ph, traceCtx, "s", {}, true);
|
||||
const props = ph.capture.mock.calls[0][0].properties;
|
||||
expect(props).not.toHaveProperty("$ai_input_tokens");
|
||||
expect(props).not.toHaveProperty("$ai_output_tokens");
|
||||
});
|
||||
|
||||
it("does not set token counts when they are zero", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
emitGeneration(ph, traceCtx, "s", { usage: { inputTokens: 0, outputTokens: 0 } }, true);
|
||||
const props = ph.capture.mock.calls[0][0].properties;
|
||||
expect(props).not.toHaveProperty("$ai_input_tokens");
|
||||
expect(props).not.toHaveProperty("$ai_output_tokens");
|
||||
});
|
||||
|
||||
it("captures error details when generation fails", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
emitGeneration(ph, traceCtx, "s", { error: { message: "Rate limit exceeded" } }, true);
|
||||
const props = ph.capture.mock.calls[0][0].properties;
|
||||
expect(props.$ai_is_error).toBe(true);
|
||||
expect(props.$ai_error).toBe("Rate limit exceeded");
|
||||
});
|
||||
|
||||
it("handles string error (defensive: different error shapes from providers)", () => {
|
||||
it("detects error from success=false even without error field", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
emitGeneration(ph, traceCtx, { runId: "r" }, { error: "connection timeout" }, true);
|
||||
expect(ph.capture.mock.calls[0][0].properties.$ai_error).toBe("connection timeout");
|
||||
emitGeneration(ph, traceCtx, "s", { success: false }, true);
|
||||
expect(ph.capture.mock.calls[0][0].properties.$ai_is_error).toBe(true);
|
||||
});
|
||||
|
||||
it("includes tool names in $ai_tools when tools were called (enables tool usage analytics)", () => {
|
||||
it("includes tool names from both spans and messages in $ai_tools", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
traceCtx.startToolSpan("r", "web_search", {});
|
||||
traceCtx.endToolSpan("r", "web_search", {});
|
||||
traceCtx.startToolSpan("r", "exec", {});
|
||||
traceCtx.endToolSpan("r", "exec", {});
|
||||
traceCtx.startToolSpan("s", "web_search", {});
|
||||
traceCtx.endToolSpan("s", "web_search", {});
|
||||
|
||||
emitGeneration(ph, traceCtx, { runId: "r" }, {}, true);
|
||||
const messages = [
|
||||
{ role: "assistant", tool_calls: [{ function: { name: "exec" } }] },
|
||||
{ role: "tool", name: "exec", content: "result" },
|
||||
];
|
||||
|
||||
const props = ph.capture.mock.calls[0][0].properties;
|
||||
expect(props.$ai_tools).toEqual(["web_search", "exec"]);
|
||||
emitGeneration(ph, traceCtx, "s", { messages }, true);
|
||||
|
||||
const tools = ph.capture.mock.calls[0][0].properties.$ai_tools;
|
||||
expect(tools).toEqual([
|
||||
{ type: "function", function: { name: "web_search" } },
|
||||
{ type: "function", function: { name: "exec" } },
|
||||
]);
|
||||
});
|
||||
|
||||
it("sets $ai_tools to undefined when no tools were called (prevents empty array in PostHog)", () => {
|
||||
it("sets $ai_tools to undefined when no tools were called", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
emitGeneration(ph, traceCtx, { runId: "r" }, {}, true);
|
||||
emitGeneration(ph, traceCtx, "s", {}, true);
|
||||
expect(ph.capture.mock.calls[0][0].properties.$ai_tools).toBeUndefined();
|
||||
});
|
||||
|
||||
it("silently skips when trace does not exist (prevents crash on stale runId)", () => {
|
||||
emitGeneration(ph, traceCtx, { runId: "ghost" }, {}, true);
|
||||
it("silently skips when trace does not exist", () => {
|
||||
emitGeneration(ph, traceCtx, "ghost", {}, true);
|
||||
expect(ph.capture).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("falls back to event.model when trace has no model set (handles missing before_model_resolve)", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
emitGeneration(ph, traceCtx, { runId: "r" }, { model: "fallback-model" }, true);
|
||||
expect(ph.capture.mock.calls[0][0].properties.$ai_model).toBe("fallback-model");
|
||||
});
|
||||
|
||||
it("uses 'unknown' when neither trace nor event has model (defensive default)", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
emitGeneration(ph, traceCtx, { runId: "r" }, {}, true);
|
||||
expect(ph.capture.mock.calls[0][0].properties.$ai_model).toBe("unknown");
|
||||
});
|
||||
|
||||
it("handles snake_case usage keys from some providers (input_tokens vs inputTokens)", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
emitGeneration(ph, traceCtx, { runId: "r" }, {
|
||||
usage: { input_tokens: 5, output_tokens: 15 },
|
||||
}, true);
|
||||
const props = ph.capture.mock.calls[0][0].properties;
|
||||
expect(props.$ai_input_tokens).toBe(5);
|
||||
expect(props.$ai_output_tokens).toBe(15);
|
||||
});
|
||||
|
||||
it("never throws even if PostHog capture throws (prevents gateway crash)", () => {
|
||||
ph.capture.mockImplementation(() => { throw new Error("PostHog down"); });
|
||||
traceCtx.startTrace("s", "r");
|
||||
expect(() => emitGeneration(ph, traceCtx, { runId: "r" }, {}, true)).not.toThrow();
|
||||
expect(() => emitGeneration(ph, traceCtx, "s", {}, true)).not.toThrow();
|
||||
});
|
||||
|
||||
it("uses durationMs from event for latency when available (more accurate)", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
emitGeneration(ph, traceCtx, "s", { durationMs: 5000 }, true);
|
||||
expect(ph.capture.mock.calls[0][0].properties.$ai_latency).toBe(5);
|
||||
});
|
||||
});
|
||||
|
||||
@ -149,27 +193,26 @@ describe("emitToolSpan", () => {
|
||||
traceCtx = new TraceContextManager();
|
||||
});
|
||||
|
||||
it("emits $ai_span with correct tool name, timing, and trace linkage", () => {
|
||||
it("emits $ai_span with correct tool name, timing, and trace linkage (same session key)", () => {
|
||||
traceCtx.startTrace("sess", "r");
|
||||
traceCtx.startToolSpan("r", "web_search", { q: "test" });
|
||||
traceCtx.endToolSpan("r", "web_search", { results: [] });
|
||||
traceCtx.startToolSpan("sess", "web_search", { q: "test" });
|
||||
traceCtx.endToolSpan("sess", "web_search", { results: [] });
|
||||
|
||||
emitToolSpan(ph, traceCtx, "r", {}, false);
|
||||
emitToolSpan(ph, traceCtx, "sess", {}, false);
|
||||
|
||||
const call = ph.capture.mock.calls[0][0];
|
||||
expect(call.event).toBe("$ai_span");
|
||||
expect(call.properties.$ai_span_name).toBe("web_search");
|
||||
expect(call.properties.$ai_trace_id).toBe(traceCtx.getTrace("r")!.traceId);
|
||||
expect(call.properties.$ai_parent_id).toBe(traceCtx.getTrace("r")!.traceId);
|
||||
expect(typeof call.properties.$ai_latency).toBe("number");
|
||||
expect(call.properties.$ai_trace_id).toBe(traceCtx.getTrace("sess")!.traceId);
|
||||
expect(call.properties.$ai_parent_id).toBe(traceCtx.getTrace("sess")!.traceId);
|
||||
});
|
||||
|
||||
it("excludes tool_params and tool_result when privacy mode is on (enforces content redaction)", () => {
|
||||
it("excludes tool_params and tool_result when privacy mode is on", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
traceCtx.startToolSpan("r", "exec", { cmd: "cat /etc/passwd" });
|
||||
traceCtx.endToolSpan("r", "exec", { output: "root:x:0:0:..." });
|
||||
traceCtx.startToolSpan("s", "exec", { cmd: "cat /etc/passwd" });
|
||||
traceCtx.endToolSpan("s", "exec", { output: "root:x:0:0:..." });
|
||||
|
||||
emitToolSpan(ph, traceCtx, "r", {}, true);
|
||||
emitToolSpan(ph, traceCtx, "s", {}, true);
|
||||
|
||||
const props = ph.capture.mock.calls[0][0].properties;
|
||||
expect(props).not.toHaveProperty("tool_params");
|
||||
@ -178,78 +221,51 @@ describe("emitToolSpan", () => {
|
||||
|
||||
it("includes tool_params and tool_result with secrets stripped when privacy is off", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
traceCtx.startToolSpan("r", "api_call", { url: "https://api.example.com", apiKey: "secret" });
|
||||
traceCtx.endToolSpan("r", "api_call", { status: 200, data: "ok" });
|
||||
traceCtx.startToolSpan("s", "api_call", { url: "https://example.com", apiKey: "secret" });
|
||||
traceCtx.endToolSpan("s", "api_call", { status: 200 });
|
||||
|
||||
emitToolSpan(ph, traceCtx, "r", {}, false);
|
||||
emitToolSpan(ph, traceCtx, "s", {}, false);
|
||||
|
||||
const props = ph.capture.mock.calls[0][0].properties;
|
||||
expect(props.tool_params.url).toBe("https://api.example.com");
|
||||
expect(props.tool_params.url).toBe("https://example.com");
|
||||
expect(props.tool_params.apiKey).toBe("[REDACTED]");
|
||||
expect(props.tool_result.status).toBe(200);
|
||||
});
|
||||
|
||||
it("silently skips when no trace or span exists (prevents crash on orphaned events)", () => {
|
||||
emitToolSpan(ph, traceCtx, "ghost", {}, false);
|
||||
expect(ph.capture).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("never throws even if PostHog capture throws (prevents gateway crash)", () => {
|
||||
ph.capture.mockImplementation(() => { throw new Error("boom"); });
|
||||
it("uses event.durationMs for latency when span timing is unavailable", () => {
|
||||
traceCtx.startTrace("s", "r");
|
||||
traceCtx.startToolSpan("r", "x", {});
|
||||
traceCtx.endToolSpan("r", "x", {});
|
||||
expect(() => emitToolSpan(ph, traceCtx, "r", {}, false)).not.toThrow();
|
||||
traceCtx.startToolSpan("s", "exec", {});
|
||||
|
||||
emitToolSpan(ph, traceCtx, "s", { durationMs: 250 }, false);
|
||||
|
||||
expect(ph.capture.mock.calls[0][0].properties.$ai_latency).toBe(0.25);
|
||||
});
|
||||
});
|
||||
|
||||
describe("emitTrace", () => {
|
||||
let ph: ReturnType<typeof createMockPostHog>;
|
||||
let traceCtx: TraceContextManager;
|
||||
|
||||
beforeEach(() => {
|
||||
ph = createMockPostHog();
|
||||
traceCtx = new TraceContextManager();
|
||||
});
|
||||
|
||||
it("emits $ai_trace with correct trace ID, session ID, and tool count", () => {
|
||||
const ph = createMockPostHog();
|
||||
const traceCtx = new TraceContextManager();
|
||||
traceCtx.startTrace("sess-1", "r");
|
||||
traceCtx.startToolSpan("r", "a", {});
|
||||
traceCtx.endToolSpan("r", "a", {});
|
||||
traceCtx.startToolSpan("r", "b", {});
|
||||
traceCtx.endToolSpan("r", "b", {});
|
||||
traceCtx.startToolSpan("sess-1", "a", {});
|
||||
traceCtx.endToolSpan("sess-1", "a", {});
|
||||
traceCtx.startToolSpan("sess-1", "b", {});
|
||||
traceCtx.endToolSpan("sess-1", "b", {});
|
||||
|
||||
emitTrace(ph, traceCtx, { runId: "r" });
|
||||
emitTrace(ph, traceCtx, "sess-1");
|
||||
|
||||
const call = ph.capture.mock.calls[0][0];
|
||||
expect(call.event).toBe("$ai_trace");
|
||||
expect(call.properties.$ai_trace_id).toBe(traceCtx.getTrace("r")!.traceId);
|
||||
expect(call.properties.$ai_session_id).toBe("sess-1");
|
||||
expect(call.properties.$ai_trace_id).toBe(traceCtx.getTrace("sess-1")!.traceId);
|
||||
expect(call.properties.tool_count).toBe(2);
|
||||
expect(call.properties.$ai_span_name).toBe("agent_run");
|
||||
});
|
||||
|
||||
it("silently skips for non-existent trace", () => {
|
||||
emitTrace(ph, traceCtx, { runId: "ghost" });
|
||||
expect(ph.capture).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe("emitCustomEvent", () => {
|
||||
it("captures event with $process_person_profile: false (prevents person profile creation)", () => {
|
||||
const ph = createMockPostHog();
|
||||
emitCustomEvent(ph, "dench_session_start", { session_id: "abc", channel: "telegram" });
|
||||
|
||||
emitCustomEvent(ph, "dench_session_start", { session_id: "abc" });
|
||||
const call = ph.capture.mock.calls[0][0];
|
||||
expect(call.event).toBe("dench_session_start");
|
||||
expect(call.properties.session_id).toBe("abc");
|
||||
expect(call.properties.channel).toBe("telegram");
|
||||
expect(call.properties.$process_person_profile).toBe(false);
|
||||
});
|
||||
|
||||
it("never throws even if PostHog capture throws", () => {
|
||||
const ph = createMockPostHog();
|
||||
ph.capture.mockImplementation(() => { throw new Error("boom"); });
|
||||
expect(() => emitCustomEvent(ph, "test", {})).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
@ -1,5 +1,23 @@
|
||||
import { describe, it, expect, beforeEach } from "vitest";
|
||||
import { TraceContextManager } from "../../extensions/posthog-analytics/lib/trace-context.js";
|
||||
import { TraceContextManager, resolveSessionKey } from "../../extensions/posthog-analytics/lib/trace-context.js";
|
||||
|
||||
describe("resolveSessionKey", () => {
|
||||
it("prefers sessionId over sessionKey and runId", () => {
|
||||
expect(resolveSessionKey({ sessionId: "s", sessionKey: "k", runId: "r" })).toBe("s");
|
||||
});
|
||||
|
||||
it("falls back to sessionKey when sessionId is absent", () => {
|
||||
expect(resolveSessionKey({ sessionKey: "k", runId: "r" })).toBe("k");
|
||||
});
|
||||
|
||||
it("falls back to runId when both session fields are absent", () => {
|
||||
expect(resolveSessionKey({ runId: "r" })).toBe("r");
|
||||
});
|
||||
|
||||
it("returns 'unknown' when no identifiers are present", () => {
|
||||
expect(resolveSessionKey({})).toBe("unknown");
|
||||
});
|
||||
});
|
||||
|
||||
describe("TraceContextManager", () => {
|
||||
let ctx: TraceContextManager;
|
||||
@ -8,42 +26,55 @@ describe("TraceContextManager", () => {
|
||||
ctx = new TraceContextManager();
|
||||
});
|
||||
|
||||
// ── Trace lifecycle ──
|
||||
// ── Trace lifecycle (session-keyed) ──
|
||||
|
||||
it("generates a unique UUID traceId for each trace (ensures PostHog trace grouping)", () => {
|
||||
ctx.startTrace("session-1", "run-1");
|
||||
ctx.startTrace("session-1", "run-2");
|
||||
const t1 = ctx.getTrace("run-1")!;
|
||||
const t2 = ctx.getTrace("run-2")!;
|
||||
ctx.startTrace("session-2", "run-2");
|
||||
const t1 = ctx.getTrace("session-1")!;
|
||||
const t2 = ctx.getTrace("session-2")!;
|
||||
expect(t1.traceId).toMatch(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/);
|
||||
expect(t1.traceId).not.toBe(t2.traceId);
|
||||
});
|
||||
|
||||
it("records sessionId and runId on the trace (ensures trace-to-session linkage)", () => {
|
||||
ctx.startTrace("sess-abc", "run-xyz");
|
||||
const trace = ctx.getTrace("run-xyz")!;
|
||||
expect(trace.sessionId).toBe("sess-abc");
|
||||
expect(trace.runId).toBe("run-xyz");
|
||||
it("indexes by sessionKey so different runIds in the same session share one trace", () => {
|
||||
ctx.startTrace("sess-abc", "run-1");
|
||||
ctx.startToolSpan("sess-abc", "search", { q: "test" });
|
||||
ctx.endToolSpan("sess-abc", "search", { ok: true });
|
||||
|
||||
const trace = ctx.getTrace("sess-abc")!;
|
||||
expect(trace.toolSpans).toHaveLength(1);
|
||||
expect(trace.runId).toBe("run-1");
|
||||
});
|
||||
|
||||
it("replaces existing trace when startTrace is called again for the same session (new run)", () => {
|
||||
ctx.startTrace("sess", "run-old");
|
||||
ctx.startToolSpan("sess", "exec", {});
|
||||
expect(ctx.getTrace("sess")!.toolSpans).toHaveLength(1);
|
||||
|
||||
ctx.startTrace("sess", "run-new");
|
||||
expect(ctx.getTrace("sess")!.runId).toBe("run-new");
|
||||
expect(ctx.getTrace("sess")!.toolSpans).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("records startedAt timestamp on trace creation (enables latency calculation)", () => {
|
||||
const before = Date.now();
|
||||
ctx.startTrace("s", "r");
|
||||
const after = Date.now();
|
||||
const trace = ctx.getTrace("r")!;
|
||||
const trace = ctx.getTrace("s")!;
|
||||
expect(trace.startedAt).toBeGreaterThanOrEqual(before);
|
||||
expect(trace.startedAt).toBeLessThanOrEqual(after);
|
||||
});
|
||||
|
||||
it("endTrace sets endedAt on the trace (enables accurate latency measurement)", () => {
|
||||
it("endTrace sets endedAt on the trace", () => {
|
||||
ctx.startTrace("s", "r");
|
||||
ctx.endTrace("r");
|
||||
const trace = ctx.getTrace("r")!;
|
||||
ctx.endTrace("s");
|
||||
const trace = ctx.getTrace("s")!;
|
||||
expect(trace.endedAt).toBeDefined();
|
||||
expect(trace.endedAt!).toBeGreaterThanOrEqual(trace.startedAt);
|
||||
});
|
||||
|
||||
it("returns undefined for non-existent runId (defensive: no crash on stale references)", () => {
|
||||
it("returns undefined for non-existent session key (defensive)", () => {
|
||||
expect(ctx.getTrace("nonexistent")).toBeUndefined();
|
||||
expect(ctx.getModel("nonexistent")).toBeUndefined();
|
||||
expect(ctx.getLastToolSpan("nonexistent")).toBeUndefined();
|
||||
@ -51,140 +82,103 @@ describe("TraceContextManager", () => {
|
||||
|
||||
// ── Model resolution ──
|
||||
|
||||
it("extracts provider from model string with slash separator (enables PostHog $ai_provider)", () => {
|
||||
it("extracts provider from model string with slash separator", () => {
|
||||
ctx.startTrace("s", "r");
|
||||
ctx.setModel("r", "anthropic/claude-4-sonnet");
|
||||
expect(ctx.getTrace("r")!.model).toBe("anthropic/claude-4-sonnet");
|
||||
expect(ctx.getTrace("r")!.provider).toBe("anthropic");
|
||||
ctx.setModel("s", "anthropic/claude-4-sonnet");
|
||||
expect(ctx.getTrace("s")!.model).toBe("anthropic/claude-4-sonnet");
|
||||
expect(ctx.getTrace("s")!.provider).toBe("anthropic");
|
||||
});
|
||||
|
||||
it("does not set provider for models without a slash (e.g. 'gpt-4o')", () => {
|
||||
it("does not set provider for models without a slash", () => {
|
||||
ctx.startTrace("s", "r");
|
||||
ctx.setModel("r", "gpt-4o");
|
||||
expect(ctx.getTrace("r")!.model).toBe("gpt-4o");
|
||||
expect(ctx.getTrace("r")!.provider).toBeUndefined();
|
||||
ctx.setModel("s", "gpt-4o");
|
||||
expect(ctx.getTrace("s")!.model).toBe("gpt-4o");
|
||||
expect(ctx.getTrace("s")!.provider).toBeUndefined();
|
||||
});
|
||||
|
||||
it("handles multi-segment provider paths like vercel-ai-gateway/anthropic/claude-4", () => {
|
||||
ctx.startTrace("s", "r");
|
||||
ctx.setModel("r", "vercel-ai-gateway/anthropic/claude-4");
|
||||
expect(ctx.getTrace("r")!.provider).toBe("vercel-ai-gateway");
|
||||
});
|
||||
|
||||
it("ignores setModel for non-existent run (no crash on race between model resolve and cleanup)", () => {
|
||||
ctx.setModel("ghost-run", "gpt-4o");
|
||||
expect(ctx.getModel("ghost-run")).toBeUndefined();
|
||||
ctx.setModel("s", "vercel-ai-gateway/anthropic/claude-4");
|
||||
expect(ctx.getTrace("s")!.provider).toBe("vercel-ai-gateway");
|
||||
});
|
||||
|
||||
// ── Input capture with privacy ──
|
||||
|
||||
it("redacts message content when privacy mode is on (prevents content leakage in PostHog)", () => {
|
||||
it("redacts message content when privacy mode is on (prevents content leakage)", () => {
|
||||
ctx.startTrace("s", "r");
|
||||
ctx.setInput("r", [
|
||||
ctx.setInput("s", [
|
||||
{ role: "user", content: "My SSN is 123-45-6789" },
|
||||
{ role: "assistant", content: "I should not store that." },
|
||||
], true);
|
||||
const input = ctx.getTrace("r")!.input as Array<Record<string, unknown>>;
|
||||
const input = ctx.getTrace("s")!.input as Array<Record<string, unknown>>;
|
||||
expect(input[0].content).toBe("[REDACTED]");
|
||||
expect(input[1].content).toBe("[REDACTED]");
|
||||
expect(input[0].role).toBe("user");
|
||||
});
|
||||
|
||||
it("preserves message content when privacy mode is off (allows opt-in content capture)", () => {
|
||||
it("preserves message content when privacy mode is off", () => {
|
||||
ctx.startTrace("s", "r");
|
||||
ctx.setInput("r", [{ role: "user", content: "Hello world" }], false);
|
||||
const input = ctx.getTrace("r")!.input as Array<Record<string, unknown>>;
|
||||
expect(input[0].content).toBe("Hello world");
|
||||
});
|
||||
|
||||
it("ignores setInput for non-existent run (no crash on stale context)", () => {
|
||||
ctx.setInput("ghost", [{ role: "user", content: "test" }], true);
|
||||
expect(ctx.getTrace("ghost")).toBeUndefined();
|
||||
ctx.setInput("s", [{ role: "user", content: "Hello" }], false);
|
||||
const input = ctx.getTrace("s")!.input as Array<Record<string, unknown>>;
|
||||
expect(input[0].content).toBe("Hello");
|
||||
});
|
||||
|
||||
// ── Tool span lifecycle ──
|
||||
|
||||
it("tracks tool span start/end with timing and error detection (enables $ai_span events)", () => {
|
||||
it("tracks tool span start/end with timing and error detection", () => {
|
||||
ctx.startTrace("s", "r");
|
||||
const before = Date.now();
|
||||
ctx.startToolSpan("r", "web_search", { query: "test" });
|
||||
const span = ctx.getLastToolSpan("r")!;
|
||||
ctx.startToolSpan("s", "web_search", { query: "test" });
|
||||
const span = ctx.getLastToolSpan("s")!;
|
||||
expect(span.toolName).toBe("web_search");
|
||||
expect(span.startedAt).toBeGreaterThanOrEqual(before);
|
||||
expect(span.endedAt).toBeUndefined();
|
||||
expect(span.spanId).toMatch(/^[0-9a-f]{8}-/);
|
||||
|
||||
ctx.endToolSpan("r", "web_search", { results: ["a", "b"] });
|
||||
ctx.endToolSpan("s", "web_search", { results: ["a"] });
|
||||
expect(span.endedAt).toBeDefined();
|
||||
expect(span.isError).toBe(false);
|
||||
});
|
||||
|
||||
it("marks tool span as error when result contains an 'error' key (enables $ai_is_error flag)", () => {
|
||||
it("marks tool span as error when result contains an 'error' key", () => {
|
||||
ctx.startTrace("s", "r");
|
||||
ctx.startToolSpan("r", "exec", { cmd: "rm -rf /" });
|
||||
ctx.endToolSpan("r", "exec", { error: "permission denied" });
|
||||
expect(ctx.getLastToolSpan("r")!.isError).toBe(true);
|
||||
ctx.startToolSpan("s", "exec", { cmd: "rm -rf /" });
|
||||
ctx.endToolSpan("s", "exec", { error: "permission denied" });
|
||||
expect(ctx.getLastToolSpan("s")!.isError).toBe(true);
|
||||
});
|
||||
|
||||
it("does not mark as error for results without error key (prevents false error flags)", () => {
|
||||
it("handles multiple tool spans in order", () => {
|
||||
ctx.startTrace("s", "r");
|
||||
ctx.startToolSpan("r", "read_file", { path: "/tmp/x" });
|
||||
ctx.endToolSpan("r", "read_file", { content: "file data" });
|
||||
expect(ctx.getLastToolSpan("r")!.isError).toBe(false);
|
||||
ctx.startToolSpan("s", "search", {});
|
||||
ctx.endToolSpan("s", "search", { ok: true });
|
||||
ctx.startToolSpan("s", "read", {});
|
||||
ctx.endToolSpan("s", "read", { ok: true });
|
||||
|
||||
expect(ctx.getTrace("s")!.toolSpans).toHaveLength(2);
|
||||
expect(ctx.getLastToolSpan("s")!.toolName).toBe("read");
|
||||
});
|
||||
|
||||
it("handles multiple tool spans in order (enables correct span-to-trace nesting)", () => {
|
||||
it("matches end to the most recent unfinished span of the same tool name", () => {
|
||||
ctx.startTrace("s", "r");
|
||||
ctx.startToolSpan("r", "search", { q: "a" });
|
||||
ctx.endToolSpan("r", "search", { ok: true });
|
||||
ctx.startToolSpan("r", "read", { path: "/tmp" });
|
||||
ctx.endToolSpan("r", "read", { ok: true });
|
||||
ctx.startToolSpan("s", "exec", { cmd: "ls" });
|
||||
ctx.endToolSpan("s", "exec", { output: "file1" });
|
||||
ctx.startToolSpan("s", "exec", { cmd: "pwd" });
|
||||
ctx.endToolSpan("s", "exec", { output: "/home" });
|
||||
|
||||
const trace = ctx.getTrace("r")!;
|
||||
expect(trace.toolSpans).toHaveLength(2);
|
||||
expect(trace.toolSpans[0].toolName).toBe("search");
|
||||
expect(trace.toolSpans[1].toolName).toBe("read");
|
||||
expect(ctx.getLastToolSpan("r")!.toolName).toBe("read");
|
||||
});
|
||||
|
||||
it("matches end to the most recent unfinished span of the same tool name (prevents mismatched close)", () => {
|
||||
ctx.startTrace("s", "r");
|
||||
ctx.startToolSpan("r", "exec", { cmd: "ls" });
|
||||
ctx.endToolSpan("r", "exec", { output: "file1" });
|
||||
ctx.startToolSpan("r", "exec", { cmd: "pwd" });
|
||||
ctx.endToolSpan("r", "exec", { output: "/home" });
|
||||
|
||||
const spans = ctx.getTrace("r")!.toolSpans;
|
||||
const spans = ctx.getTrace("s")!.toolSpans;
|
||||
expect(spans).toHaveLength(2);
|
||||
expect(spans[0].endedAt).toBeDefined();
|
||||
expect(spans[1].endedAt).toBeDefined();
|
||||
});
|
||||
|
||||
it("ignores startToolSpan/endToolSpan for non-existent run (no crash on orphaned tool events)", () => {
|
||||
ctx.startToolSpan("ghost", "search", {});
|
||||
ctx.endToolSpan("ghost", "search", {});
|
||||
expect(ctx.getLastToolSpan("ghost")).toBeUndefined();
|
||||
});
|
||||
// ── Concurrent sessions ──
|
||||
|
||||
it("getLastToolSpan returns undefined when no spans exist (defensive edge case)", () => {
|
||||
ctx.startTrace("s", "r");
|
||||
expect(ctx.getLastToolSpan("r")).toBeUndefined();
|
||||
});
|
||||
|
||||
// ── Concurrent runs ──
|
||||
|
||||
it("isolates traces across concurrent runs (prevents cross-run data contamination)", () => {
|
||||
it("isolates traces across concurrent sessions (prevents cross-session contamination)", () => {
|
||||
ctx.startTrace("s1", "run-a");
|
||||
ctx.startTrace("s2", "run-b");
|
||||
ctx.setModel("run-a", "gpt-4o");
|
||||
ctx.setModel("run-b", "claude-4-sonnet");
|
||||
ctx.startToolSpan("run-a", "search", {});
|
||||
ctx.startToolSpan("run-b", "exec", {});
|
||||
ctx.setModel("s1", "gpt-4o");
|
||||
ctx.setModel("s2", "claude-4-sonnet");
|
||||
ctx.startToolSpan("s1", "search", {});
|
||||
ctx.startToolSpan("s2", "exec", {});
|
||||
|
||||
expect(ctx.getModel("run-a")).toBe("gpt-4o");
|
||||
expect(ctx.getModel("run-b")).toBe("claude-4-sonnet");
|
||||
expect(ctx.getTrace("run-a")!.toolSpans).toHaveLength(1);
|
||||
expect(ctx.getTrace("run-a")!.toolSpans[0].toolName).toBe("search");
|
||||
expect(ctx.getTrace("run-b")!.toolSpans[0].toolName).toBe("exec");
|
||||
expect(ctx.getModel("s1")).toBe("gpt-4o");
|
||||
expect(ctx.getModel("s2")).toBe("claude-4-sonnet");
|
||||
expect(ctx.getTrace("s1")!.toolSpans[0].toolName).toBe("search");
|
||||
expect(ctx.getTrace("s2")!.toolSpans[0].toolName).toBe("exec");
|
||||
});
|
||||
});
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user