From 35be87b09b0c2463491c146a1a785382b208e961 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Fri, 20 Feb 2026 23:52:43 -0500 Subject: [PATCH] fix(tui): strip inbound metadata blocks from user messages (clean rewrite) (#22345) * fix(tui): strip inbound metadata blocks from user text * chore: clean up metadata-strip format and changelog credit * chore: format tui metadata-strip tests * test: align metadata-strip regression expectations * refactor: reuse canonical inbound metadata stripper * test: allow tmp media fixture paths in media-understanding tests * refactor: reuse canonical inbound metadata stripper * format: fix changelog blank line after headings * test: fix unrelated check typing regressions * test: align memory async mock embedding signatures * test: avoid tsgo mock typing pitfall * test: restore async search mock typings in merge tree * test: trigger ci rerun without behavior change * chore: dedupe todays changelog entries * fix: dedupe sqlite mock keys in qmd manager test * Update qmd-manager.test.ts * test: align chat metadata sanitization expectation --- CHANGELOG.md | 4 +- src/discord/send.components.test.ts | 4 +- src/gateway/chat-sanitize.test.ts | 6 +- src/gateway/chat-sanitize.ts | 13 +- ...ver.chat.gateway-server-chat-b.e2e.test.ts | 2 + .../runner.auto-audio.test.ts | 5 +- .../runner.deepgram.test.ts | 146 ++++++++++-------- src/memory/manager.async-search.test.ts | 9 +- src/shared/chat-envelope.ts | 27 ---- src/tui/tui-formatters.ts | 4 +- 10 files changed, 101 insertions(+), 119 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c54ca26ff2..d71b4798ba6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,7 +33,8 @@ Docs: https://docs.openclaw.ai - Security/OpenClawKit/UI: strip inbound metadata blocks from user messages in TUI rendering while preserving user-authored content. (#22345) Thanks @kansodata, @vincentkoc. - Security/OpenClawKit/UI: prevent inbound metadata leaks and reply-tag streaming artifacts in TUI rendering by stripping untrusted metadata prefixes at display boundaries. (#22346) Thanks @akramcodez, @vincentkoc. - Agents/System Prompt: label allowlisted senders as authorized senders to avoid implying ownership. Thanks @thewilloftheshadow. -- Agents/Tool display: fix exec cwd suffix inference so `pushd ... && popd ... && ` does not keep stale `(in )` context in summaries. (#21925) thanks @Lukavyi. +- Agents/Tool display: fix exec cwd suffix inference so `pushd ... && popd ... && ` does not keep stale `(in )` context in summaries. (#21925) Thanks @Lukavyi. +- Discord: restore model picker back navigation when a provider is missing and document the Discord picker flow. (#21458) Thanks @pejmanjohn and @thewilloftheshadow. - Gateway/Auth: allow trusted-proxy mode with loopback bind for same-host reverse-proxy deployments, while still requiring configured `gateway.trustedProxies`. (#20097) thanks @xinhuagu. - Gateway/Auth: allow authenticated clients across roles/scopes to call `health` while preserving role and scope enforcement for non-health methods. (#19699) thanks @Nachx639. - Gateway/Security: remove shared-IP fallback for canvas endpoints and require token or session capability for canvas access. Thanks @thewilloftheshadow. @@ -59,7 +60,6 @@ Docs: https://docs.openclaw.ai - WhatsApp/Cron/Heartbeat: enforce allowlisted routing for implicit scheduled/system delivery by merging pairing-store + configured `allowFrom` recipients, selecting authorized recipients when last-route context points to a non-allowlisted chat, and preventing heartbeat fan-out to recent unauthorized chats. - Heartbeat/Active hours: constrain active-hours `24` sentinel parsing to `24:00` in time validation so invalid values like `24:30` are rejected early. (#21410) thanks @adhitShet. - Heartbeat: treat `activeHours` windows with identical `start`/`end` times as zero-width (always outside the window) instead of always-active. (#21408) thanks @adhitShet. -- Discord: restore model picker back navigation when a provider is missing and document the Discord picker flow. (#21458) Thanks @pejmanjohn and @thewilloftheshadow. - Gateway/Pairing: tolerate legacy paired devices missing `roles`/`scopes` metadata in websocket upgrade checks and backfill metadata on reconnect. (#21447, fixes #21236) Thanks @joshavant. - Gateway/Pairing/CLI: align read-scope compatibility in pairing/device-token checks and add local `openclaw devices` fallback recovery for loopback `pairing required` deadlocks, with explicit fallback notice to unblock approval bootstrap flows. (#21616) Thanks @shakkernerd. - CLI/Pairing: default `pairing list` and `pairing approve` to the sole available pairing channel when omitted, so TUI-only setups can recover from `pairing required` without guessing channel arguments. (#21527) Thanks @losts1. diff --git a/src/discord/send.components.test.ts b/src/discord/send.components.test.ts index 2dd89d76e8d..41a05acbb19 100644 --- a/src/discord/send.components.test.ts +++ b/src/discord/send.components.test.ts @@ -25,7 +25,7 @@ describe("sendDiscordComponentMessage", () => { vi.clearAllMocks(); }); - it("registers component entries for DM channel targets", async () => { + it("keeps direct-channel DM session keys on component entries", async () => { const { rest, postMock, getMock } = makeDiscordRest(); getMock.mockResolvedValueOnce({ type: ChannelType.DM, @@ -48,6 +48,6 @@ describe("sendDiscordComponentMessage", () => { expect(registerMock).toHaveBeenCalledTimes(1); const args = registerMock.mock.calls[0]?.[0]; - expect(args?.entries[0]).toBeDefined(); + expect(args?.entries[0]?.sessionKey).toBe("agent:main:discord:channel:dm-1"); }); }); diff --git a/src/gateway/chat-sanitize.test.ts b/src/gateway/chat-sanitize.test.ts index bc55ef8476c..715c0e3db4a 100644 --- a/src/gateway/chat-sanitize.test.ts +++ b/src/gateway/chat-sanitize.test.ts @@ -59,15 +59,13 @@ describe("stripEnvelopeFromMessage", () => { expect(result.content).toBe("Actual user message"); }); - test("does not strip metadata-like blocks that are not a prefix", () => { + test("strips metadata-like blocks even when not a prefix", () => { const input = { role: "user", content: 'Actual text\nConversation info (untrusted metadata):\n```json\n{"message_id": "123"}\n```\n\nFollow-up', }; const result = stripEnvelopeFromMessage(input) as { content?: string }; - expect(result.content).toBe( - 'Actual text\nConversation info (untrusted metadata):\n```json\n{"message_id": "123"}\n```\n\nFollow-up', - ); + expect(result.content).toBe("Actual text\n\nFollow-up"); }); }); diff --git a/src/gateway/chat-sanitize.ts b/src/gateway/chat-sanitize.ts index 91238c58225..f87262ab5d3 100644 --- a/src/gateway/chat-sanitize.ts +++ b/src/gateway/chat-sanitize.ts @@ -1,8 +1,5 @@ -import { - stripEnvelope, - stripInboundMetadataBlocks, - stripMessageIdHints, -} from "../shared/chat-envelope.js"; +import { stripInboundMetadata } from "../auto-reply/reply/strip-inbound-meta.js"; +import { stripEnvelope, stripMessageIdHints } from "../shared/chat-envelope.js"; export { stripEnvelope }; @@ -16,7 +13,7 @@ function stripEnvelopeFromContent(content: unknown[]): { content: unknown[]; cha if (entry.type !== "text" || typeof entry.text !== "string") { return item; } - const stripped = stripMessageIdHints(stripEnvelope(stripInboundMetadataBlocks(entry.text))); + const stripped = stripMessageIdHints(stripEnvelope(stripInboundMetadata(entry.text))); if (stripped === entry.text) { return item; } @@ -43,7 +40,7 @@ export function stripEnvelopeFromMessage(message: unknown): unknown { const next: Record = { ...entry }; if (typeof entry.content === "string") { - const stripped = stripMessageIdHints(stripEnvelope(stripInboundMetadataBlocks(entry.content))); + const stripped = stripMessageIdHints(stripEnvelope(stripInboundMetadata(entry.content))); if (stripped !== entry.content) { next.content = stripped; changed = true; @@ -55,7 +52,7 @@ export function stripEnvelopeFromMessage(message: unknown): unknown { changed = true; } } else if (typeof entry.text === "string") { - const stripped = stripMessageIdHints(stripEnvelope(stripInboundMetadataBlocks(entry.text))); + const stripped = stripMessageIdHints(stripEnvelope(stripInboundMetadata(entry.text))); if (stripped !== entry.text) { next.text = stripped; changed = true; diff --git a/src/gateway/server.chat.gateway-server-chat-b.e2e.test.ts b/src/gateway/server.chat.gateway-server-chat-b.e2e.test.ts index 2255443a079..937089ea5a9 100644 --- a/src/gateway/server.chat.gateway-server-chat-b.e2e.test.ts +++ b/src/gateway/server.chat.gateway-server-chat-b.e2e.test.ts @@ -150,6 +150,7 @@ describe("gateway server chat", () => { let capturedOpts: GetReplyOptions | undefined; spy.mockImplementationOnce(async (_ctx: unknown, opts?: GetReplyOptions) => { capturedOpts = opts; + return undefined; }); const sendRes = await rpcReq(ws, "chat.send", { @@ -314,6 +315,7 @@ describe("gateway server chat", () => { { once: true }, ); }); + return undefined; }); const sendResP = onceMessage(ws, (o) => o.type === "res" && o.id === "send-abort-1", 8_000); diff --git a/src/media-understanding/runner.auto-audio.test.ts b/src/media-understanding/runner.auto-audio.test.ts index 143891b5d04..b01291c8831 100644 --- a/src/media-understanding/runner.auto-audio.test.ts +++ b/src/media-understanding/runner.auto-audio.test.ts @@ -4,7 +4,6 @@ import path from "node:path"; import { describe, expect, it } from "vitest"; import type { MsgContext } from "../auto-reply/templating.js"; import type { OpenClawConfig } from "../config/config.js"; -import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; import { buildProviderRegistry, createMediaAttachmentCache, @@ -20,13 +19,13 @@ async function withAudioFixture( }) => Promise, ) { const originalPath = process.env.PATH; - process.env.PATH = "/usr/bin:/bin"; + process.env.PATH = ""; const tmpPath = path.join(os.tmpdir(), `openclaw-auto-audio-${Date.now()}.wav`); await fs.writeFile(tmpPath, Buffer.from("RIFF")); const ctx: MsgContext = { MediaPath: tmpPath, MediaType: "audio/wav" }; const media = normalizeMediaAttachments(ctx); const cache = createMediaAttachmentCache(media, { - localPathRoots: [resolvePreferredOpenClawTmpDir(), os.tmpdir()], + localPathRoots: [path.dirname(tmpPath)], }); try { diff --git a/src/media-understanding/runner.deepgram.test.ts b/src/media-understanding/runner.deepgram.test.ts index 8246c7cd087..e4c42d0e64a 100644 --- a/src/media-understanding/runner.deepgram.test.ts +++ b/src/media-understanding/runner.deepgram.test.ts @@ -4,7 +4,6 @@ import path from "node:path"; import { describe, expect, it } from "vitest"; import type { MsgContext } from "../auto-reply/templating.js"; import type { OpenClawConfig } from "../config/config.js"; -import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; import { buildProviderRegistry, createMediaAttachmentCache, @@ -12,78 +11,96 @@ import { runCapability, } from "./runner.js"; +async function withAudioFixture( + run: (params: { + ctx: MsgContext; + media: ReturnType; + cache: ReturnType; + }) => Promise, +) { + const originalPath = process.env.PATH; + process.env.PATH = ""; + const tmpPath = path.join(os.tmpdir(), `openclaw-deepgram-${Date.now()}.wav`); + await fs.writeFile(tmpPath, Buffer.from("RIFF")); + const ctx: MsgContext = { MediaPath: tmpPath, MediaType: "audio/wav" }; + const media = normalizeMediaAttachments(ctx); + const cache = createMediaAttachmentCache(media, { + localPathRoots: [path.dirname(tmpPath)], + }); + + try { + await run({ ctx, media, cache }); + } finally { + process.env.PATH = originalPath; + await cache.cleanup(); + await fs.unlink(tmpPath).catch(() => {}); + } +} + describe("runCapability deepgram provider options", () => { it("merges provider options, headers, and baseUrl overrides", async () => { - const tmpPath = path.join(os.tmpdir(), `openclaw-deepgram-${Date.now()}.wav`); - await fs.writeFile(tmpPath, Buffer.from("RIFF")); - const ctx: MsgContext = { MediaPath: tmpPath, MediaType: "audio/wav" }; - const media = normalizeMediaAttachments(ctx); - const cache = createMediaAttachmentCache(media, { - localPathRoots: [resolvePreferredOpenClawTmpDir(), os.tmpdir()], - }); + await withAudioFixture(async ({ ctx, media, cache }) => { + let seenQuery: Record | undefined; + let seenBaseUrl: string | undefined; + let seenHeaders: Record | undefined; - let seenQuery: Record | undefined; - let seenBaseUrl: string | undefined; - let seenHeaders: Record | undefined; - - const providerRegistry = buildProviderRegistry({ - deepgram: { - id: "deepgram", - capabilities: ["audio"], - transcribeAudio: async (req) => { - seenQuery = req.query; - seenBaseUrl = req.baseUrl; - seenHeaders = req.headers; - return { text: "ok", model: req.model }; - }, - }, - }); - - const cfg = { - models: { - providers: { - deepgram: { - baseUrl: "https://provider.example", - apiKey: "test-key", - headers: { "X-Provider": "1" }, - models: [], + const providerRegistry = buildProviderRegistry({ + deepgram: { + id: "deepgram", + capabilities: ["audio"], + transcribeAudio: async (req) => { + seenQuery = req.query; + seenBaseUrl = req.baseUrl; + seenHeaders = req.headers; + return { text: "ok", model: req.model }; }, }, - }, - tools: { - media: { - audio: { - enabled: true, - baseUrl: "https://config.example", - headers: { "X-Config": "2" }, - providerOptions: { - deepgram: { - detect_language: true, - punctuate: true, - }, + }); + + const cfg = { + models: { + providers: { + deepgram: { + baseUrl: "https://provider.example", + apiKey: "test-key", + headers: { "X-Provider": "1" }, + models: [], }, - deepgram: { smartFormat: true }, - models: [ - { - provider: "deepgram", - model: "nova-3", - baseUrl: "https://entry.example", - headers: { "X-Entry": "3" }, - providerOptions: { - deepgram: { - detectLanguage: false, - punctuate: false, - smart_format: true, - }, + }, + }, + tools: { + media: { + audio: { + enabled: true, + baseUrl: "https://config.example", + headers: { "X-Config": "2" }, + providerOptions: { + deepgram: { + detect_language: true, + punctuate: true, }, }, - ], + deepgram: { smartFormat: true }, + models: [ + { + provider: "deepgram", + model: "nova-3", + baseUrl: "https://entry.example", + headers: { "X-Entry": "3" }, + providerOptions: { + deepgram: { + detectLanguage: false, + punctuate: false, + smart_format: true, + }, + }, + }, + ], + }, }, }, - }, - } as unknown as OpenClawConfig; + } as unknown as OpenClawConfig; - try { const result = await runCapability({ capability: "audio", cfg, @@ -105,9 +122,6 @@ describe("runCapability deepgram provider options", () => { smart_format: true, }); expect((seenQuery as Record)["detectLanguage"]).toBeUndefined(); - } finally { - await cache.cleanup(); - await fs.unlink(tmpPath).catch(() => {}); - } + }); }); }); diff --git a/src/memory/manager.async-search.test.ts b/src/memory/manager.async-search.test.ts index 30ac2dc07d0..ef26fc394e4 100644 --- a/src/memory/manager.async-search.test.ts +++ b/src/memory/manager.async-search.test.ts @@ -7,8 +7,8 @@ import { getMemorySearchManager, type MemoryIndexManager } from "./index.js"; import { createOpenAIEmbeddingProviderMock } from "./test-embeddings-mock.js"; import { createMemoryManagerOrThrow } from "./test-manager.js"; -const embedBatch = vi.fn(async (_input: string[]) => [] as number[][]); -const embedQuery = vi.fn(async (_input: string) => [0.2, 0.2, 0.2] as number[]); +const embedBatch = vi.fn(async (_input: string[]): Promise => []); +const embedQuery = vi.fn(async (_input: string): Promise => [0.2, 0.2, 0.2]); vi.mock("./embeddings.js", () => ({ createEmbeddingProvider: async (_options: unknown) => @@ -61,7 +61,6 @@ describe("memory search async sync", () => { it("does not await sync when searching", async () => { const cfg = buildConfig(); - manager = await createMemoryManagerOrThrow(cfg); const pending = new Promise(() => {}); @@ -78,9 +77,9 @@ describe("memory search async sync", () => { it("waits for in-flight search sync during close", async () => { const cfg = buildConfig(); - let releaseSync!: (value?: void) => void; + let releaseSync = () => {}; const syncGate = new Promise((resolve) => { - releaseSync = resolve; + releaseSync = () => resolve(); }); embedBatch.mockImplementation(async (input: string[]) => { await syncGate; diff --git a/src/shared/chat-envelope.ts b/src/shared/chat-envelope.ts index c96a231af8b..409a41357a1 100644 --- a/src/shared/chat-envelope.ts +++ b/src/shared/chat-envelope.ts @@ -16,21 +16,6 @@ const ENVELOPE_CHANNELS = [ ]; const MESSAGE_ID_LINE = /^\s*\[message_id:\s*[^\]]+\]\s*$/i; -const INBOUND_METADATA_HEADERS = [ - "Conversation info (untrusted metadata):", - "Sender (untrusted metadata):", - "Thread starter (untrusted, for context):", - "Replied message (untrusted, for context):", - "Forwarded message context (untrusted metadata):", - "Chat history since last reply (untrusted, for context):", -]; -const REGEX_ESCAPE_RE = /[.*+?^${}()|[\]\\-]/g; -const INBOUND_METADATA_PREFIX_RE = new RegExp( - "^\\s*(?:" + - INBOUND_METADATA_HEADERS.map((header) => header.replace(REGEX_ESCAPE_RE, "\\$&")).join("|") + - ")\\r?\\n```json\\r?\\n[\\s\\S]*?\\r?\\n```(?:\\r?\\n)*", -); - function looksLikeEnvelopeHeader(header: string): boolean { if (/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}Z\b/.test(header)) { return true; @@ -61,15 +46,3 @@ export function stripMessageIdHints(text: string): string { const filtered = lines.filter((line) => !MESSAGE_ID_LINE.test(line)); return filtered.length === lines.length ? text : filtered.join("\n"); } - -export function stripInboundMetadataBlocks(text: string): string { - let remaining = text; - for (;;) { - const match = INBOUND_METADATA_PREFIX_RE.exec(remaining); - if (!match) { - break; - } - remaining = remaining.slice(match[0].length).replace(/^\r?\n+/, ""); - } - return remaining.trim(); -} diff --git a/src/tui/tui-formatters.ts b/src/tui/tui-formatters.ts index d4bca178b66..9d2ea82842e 100644 --- a/src/tui/tui-formatters.ts +++ b/src/tui/tui-formatters.ts @@ -1,5 +1,5 @@ import { formatRawAssistantErrorForUi } from "../agents/pi-embedded-helpers.js"; -import { stripInboundMetadataBlocks } from "../shared/chat-envelope.js"; +import { stripInboundMetadata } from "../auto-reply/reply/strip-inbound-meta.js"; import { stripAnsi } from "../terminal/ansi.js"; import { formatTokenCount } from "../utils/usage-format.js"; @@ -275,7 +275,7 @@ export function extractTextFromMessage( const text = extractTextBlocks(record.content, opts); if (text) { if (record.role === "user") { - return stripInboundMetadataBlocks(text); + return stripInboundMetadata(text); } return text; }