openclaw/src/auto-reply/chunk.test.ts

398 lines
14 KiB
TypeScript
Raw Normal View History

2025-12-03 00:25:01 +00:00
import { describe, expect, it } from "vitest";
2026-01-24 16:47:10 -08:00
import {
chunkByNewline,
chunkMarkdownText,
2026-01-25 04:05:14 +00:00
chunkMarkdownTextWithMode,
2026-01-24 16:47:10 -08:00
chunkText,
chunkTextWithMode,
resolveChunkMode,
resolveTextChunkLimit,
} from "./chunk.js";
function expectFencesBalanced(chunks: string[]) {
for (const chunk of chunks) {
let open: { markerChar: string; markerLen: number } | null = null;
for (const line of chunk.split("\n")) {
const match = line.match(/^( {0,3})(`{3,}|~{3,})(.*)$/);
if (!match) {
continue;
}
const marker = match[2];
if (!open) {
open = { markerChar: marker[0], markerLen: marker.length };
continue;
}
if (open.markerChar === marker[0] && marker.length >= open.markerLen) {
open = null;
}
}
expect(open).toBe(null);
}
}
2025-12-03 00:25:01 +00:00
type ChunkCase = {
name: string;
text: string;
limit: number;
expected: string[];
};
function runChunkCases(chunker: (text: string, limit: number) => string[], cases: ChunkCase[]) {
for (const { name, text, limit, expected } of cases) {
it(name, () => {
expect(chunker(text, limit)).toEqual(expected);
});
}
}
const parentheticalCases: ChunkCase[] = [
{
name: "keeps parenthetical phrases together",
text: "Heads up now (Though now I'm curious)ok",
limit: 35,
expected: ["Heads up now", "(Though now I'm curious)ok"],
},
{
name: "handles nested parentheses",
text: "Hello (outer (inner) end) world",
limit: 26,
expected: ["Hello (outer (inner) end)", "world"],
},
{
name: "ignores unmatched closing parentheses",
text: "Hello) world (ok)",
limit: 12,
expected: ["Hello)", "world (ok)"],
},
];
2025-12-03 00:25:01 +00:00
describe("chunkText", () => {
it("keeps multi-line text in one chunk when under limit", () => {
const text = "Line one\n\nLine two\n\nLine three";
const chunks = chunkText(text, 1600);
expect(chunks).toEqual([text]);
});
it("splits only when text exceeds the limit", () => {
const part = "a".repeat(20);
const text = part.repeat(5); // 100 chars
const chunks = chunkText(text, 60);
expect(chunks.length).toBe(2);
expect(chunks[0].length).toBe(60);
expect(chunks[1].length).toBe(40);
expect(chunks.join("")).toBe(text);
});
it("prefers breaking at a newline before the limit", () => {
const text = `paragraph one line\n\nparagraph two starts here and continues`;
const chunks = chunkText(text, 40);
expect(chunks).toEqual(["paragraph one line", "paragraph two starts here and continues"]);
2025-12-03 00:25:01 +00:00
});
it("otherwise breaks at the last whitespace under the limit", () => {
const text = "This is a message that should break nicely near a word boundary.";
2025-12-03 00:25:01 +00:00
const chunks = chunkText(text, 30);
expect(chunks[0].length).toBeLessThanOrEqual(30);
expect(chunks[1].length).toBeLessThanOrEqual(30);
expect(chunks.join(" ").replace(/\s+/g, " ").trim()).toBe(text.replace(/\s+/g, " ").trim());
2025-12-03 00:25:01 +00:00
});
it("falls back to a hard break when no whitespace is present", () => {
const text = "Supercalifragilisticexpialidocious"; // 34 chars
const chunks = chunkText(text, 10);
expect(chunks).toEqual(["Supercalif", "ragilistic", "expialidoc", "ious"]);
});
runChunkCases(chunkText, [parentheticalCases[0]]);
2025-12-03 00:25:01 +00:00
});
describe("resolveTextChunkLimit", () => {
it("uses per-provider defaults", () => {
expect(resolveTextChunkLimit(undefined, "whatsapp")).toBe(4000);
expect(resolveTextChunkLimit(undefined, "telegram")).toBe(4000);
2026-01-04 07:23:39 +01:00
expect(resolveTextChunkLimit(undefined, "slack")).toBe(4000);
expect(resolveTextChunkLimit(undefined, "signal")).toBe(4000);
expect(resolveTextChunkLimit(undefined, "imessage")).toBe(4000);
Move provider to a plugin-architecture (#661) * refactor: introduce provider plugin registry * refactor: move provider CLI to plugins * docs: add provider plugin implementation notes * refactor: shift provider runtime logic into plugins * refactor: add plugin defaults and summaries * docs: update provider plugin notes * feat(commands): add /commands slash list * Auto-reply: tidy help message * Auto-reply: fix status command lint * Tests: align google shared expectations * Auto-reply: tidy help message * Auto-reply: fix status command lint * refactor: move provider routing into plugins * test: align agent routing expectations * docs: update provider plugin notes * refactor: route replies via provider plugins * docs: note route-reply plugin hooks * refactor: extend provider plugin contract * refactor: derive provider status from plugins * refactor: unify gateway provider control * refactor: use plugin metadata in auto-reply * fix: parenthesize cron target selection * refactor: derive gateway methods from plugins * refactor: generalize provider logout * refactor: route provider logout through plugins * refactor: move WhatsApp web login methods into plugin * refactor: generalize provider log prefixes * refactor: centralize default chat provider * refactor: derive provider lists from registry * refactor: move provider reload noops into plugins * refactor: resolve web login provider via alias * refactor: derive CLI provider options from plugins * refactor: derive prompt provider list from plugins * style: apply biome lint fixes * fix: resolve provider routing edge cases * docs: update provider plugin refactor notes * fix(gateway): harden agent provider routing * refactor: move provider routing into plugins * refactor: move provider CLI to plugins * refactor: derive provider lists from registry * fix: restore slash command parsing * refactor: align provider ids for schema * refactor: unify outbound target resolution * fix: keep outbound labels stable * feat: add msteams to cron surfaces * fix: clean up lint build issues * refactor: localize chat provider alias normalization * refactor: drive gateway provider lists from plugins * docs: update provider plugin notes * style: format message-provider * fix: avoid provider registry init cycles * style: sort message-provider imports * fix: relax provider alias map typing * refactor: move provider routing into plugins * refactor: add plugin pairing/config adapters * refactor: route pairing and provider removal via plugins * refactor: align auto-reply provider typing * test: stabilize telegram media mocks * docs: update provider plugin refactor notes * refactor: pluginize outbound targets * refactor: pluginize provider selection * refactor: generalize text chunk limits * docs: update provider plugin notes * refactor: generalize group session/config * fix: normalize provider id for room detection * fix: avoid provider init in system prompt * style: formatting cleanup * refactor: normalize agent delivery targets * test: update outbound delivery labels * chore: fix lint regressions * refactor: extend provider plugin adapters * refactor: move elevated/block streaming defaults to plugins * refactor: defer outbound send deps to plugins * docs: note plugin-driven streaming/elevated defaults * refactor: centralize webchat provider constant * refactor: add provider setup adapters * refactor: delegate provider add config to plugins * docs: document plugin-driven provider add * refactor: add plugin state/binding metadata * refactor: build agent provider status from plugins * docs: note plugin-driven agent bindings * refactor: centralize internal provider constant usage * fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing) * refactor: centralize default chat provider * refactor: centralize WhatsApp target normalization * refactor: move provider routing into plugins * refactor: normalize agent delivery targets * chore: fix lint regressions * fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing) * feat: expand provider plugin adapters * refactor: route auto-reply via provider plugins * fix: align WhatsApp target normalization * fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing) * refactor: centralize WhatsApp target normalization * feat: add /config chat config updates * docs: add /config get alias * feat(commands): add /commands slash list * refactor: centralize default chat provider * style: apply biome lint fixes * chore: fix lint regressions * fix: clean up whatsapp allowlist typing * style: format config command helpers * refactor: pluginize tool threading context * refactor: normalize session announce targets * docs: note new plugin threading and announce hooks * refactor: pluginize message actions * docs: update provider plugin actions notes * fix: align provider action adapters * refactor: centralize webchat checks * style: format message provider helpers * refactor: move provider onboarding into adapters * docs: note onboarding provider adapters * feat: add msteams onboarding adapter * style: organize onboarding imports * fix: normalize msteams allowFrom types * feat: add plugin text chunk limits * refactor: use plugin chunk limit fallbacks * feat: add provider mention stripping hooks * style: organize provider plugin type imports * refactor: generalize health snapshots * refactor: update macOS health snapshot handling * docs: refresh health snapshot notes * style: format health snapshot updates * refactor: drive security warnings via plugins * docs: note provider security adapter * style: format provider security adapters * refactor: centralize provider account defaults * refactor: type gateway client identity constants * chore: regen gateway protocol swift * fix: degrade health on failed provider probe * refactor: centralize pairing approve hint * docs: add plugin CLI command references * refactor: route auth and tool sends through plugins * docs: expand provider plugin hooks * refactor: document provider docking touchpoints * refactor: normalize internal provider defaults * refactor: streamline outbound delivery wiring * refactor: make provider onboarding plugin-owned * refactor: support provider-owned agent tools * refactor: move telegram draft chunking into telegram module * refactor: infer provider tool sends via extractToolSend * fix: repair plugin onboarding imports * refactor: de-dup outbound target normalization * style: tidy plugin and agent imports * refactor: data-drive provider selection line * fix: satisfy lint after provider plugin rebase * test: deflake gateway-cli coverage * style: format gateway-cli coverage test * refactor(provider-plugins): simplify provider ids * test(pairing-cli): avoid provider-specific ternary * style(macos): swiftformat HealthStore * refactor(sandbox): derive provider tool denylist * fix(sandbox): avoid plugin init in defaults * refactor(provider-plugins): centralize provider aliases * style(test): satisfy biome * refactor(protocol): v3 providers.status maps * refactor(ui): adapt to protocol v3 * refactor(macos): adapt to protocol v3 * test: update providers.status v3 fixtures * refactor(gateway): map provider runtime snapshot * test(gateway): update reload runtime snapshot * refactor(whatsapp): normalize heartbeat provider id * docs(refactor): update provider plugin notes * style: satisfy biome after rebase * fix: describe sandboxed elevated in prompt * feat(gateway): add agent image attachments + live probe * refactor: derive CLI provider options from plugins * fix(gateway): harden agent provider routing * fix(gateway): harden agent provider routing * refactor: align provider ids for schema * fix(protocol): keep agent provider string * fix(gateway): harden agent provider routing * fix(protocol): keep agent provider string * refactor: normalize agent delivery targets * refactor: support provider-owned agent tools * refactor(config): provider-keyed elevated allowFrom * style: satisfy biome * fix(gateway): appease provider narrowing * style: satisfy biome * refactor(reply): move group intro hints into plugin * fix(reply): avoid plugin registry init cycle * refactor(providers): add lightweight provider dock * refactor(gateway): use typed client id in connect * refactor(providers): document docks and avoid init cycles * refactor(providers): make media limit helper generic * fix(providers): break plugin registry import cycles * style: satisfy biome * refactor(status-all): build providers table from plugins * refactor(gateway): delegate web login to provider plugin * refactor(provider): drop web alias * refactor(provider): lazy-load monitors * style: satisfy lint/format * style: format status-all providers table * style: swiftformat gateway discovery model * test: make reload plan plugin-driven * fix: avoid token stringification in status-all * refactor: make provider IDs explicit in status * feat: warn on signal/imessage provider runtime errors * test: cover gateway provider runtime warnings in status * fix: add runtime kind to provider status issues * test: cover health degradation on probe failure * fix: keep routeReply lightweight * style: organize routeReply imports * refactor(web): extract auth-store helpers * refactor(whatsapp): lazy login imports * refactor(outbound): route replies via plugin outbound * docs: update provider plugin notes * style: format provider status issues * fix: make sandbox scope warning wrap-safe * refactor: load outbound adapters from provider plugins * docs: update provider plugin outbound notes * style(macos): fix swiftformat lint * docs: changelog for provider plugins * fix(macos): satisfy swiftformat * fix(macos): open settings via menu action * style: format after rebase * fix(macos): open Settings via menu action --------- Co-authored-by: LK <luke@kyohere.com> Co-authored-by: Luke K (pr-0f3t) <2609441+lc0rp@users.noreply.github.com> Co-authored-by: Xin <xin@imfing.com>
2026-01-11 11:45:25 +00:00
expect(resolveTextChunkLimit(undefined, "discord")).toBe(4000);
expect(
resolveTextChunkLimit(undefined, "discord", undefined, {
fallbackLimit: 2000,
}),
).toBe(2000);
});
it("supports provider overrides", () => {
const cfg = { channels: { telegram: { textChunkLimit: 1234 } } };
expect(resolveTextChunkLimit(cfg, "whatsapp")).toBe(4000);
expect(resolveTextChunkLimit(cfg, "telegram")).toBe(1234);
});
it("prefers account overrides when provided", () => {
const cfg = {
channels: {
telegram: {
textChunkLimit: 2000,
accounts: {
default: { textChunkLimit: 1234 },
primary: { textChunkLimit: 777 },
},
},
},
};
expect(resolveTextChunkLimit(cfg, "telegram", "primary")).toBe(777);
expect(resolveTextChunkLimit(cfg, "telegram", "default")).toBe(1234);
});
it("uses the matching provider override", () => {
const cfg = {
channels: {
discord: { textChunkLimit: 111 },
slack: { textChunkLimit: 222 },
},
};
expect(resolveTextChunkLimit(cfg, "discord")).toBe(111);
2026-01-04 07:23:39 +01:00
expect(resolveTextChunkLimit(cfg, "slack")).toBe(222);
expect(resolveTextChunkLimit(cfg, "telegram")).toBe(4000);
});
});
describe("chunkMarkdownText", () => {
it("keeps fenced blocks intact when a safe break exists", () => {
const prefix = "p".repeat(60);
const fence = "```bash\nline1\nline2\n```";
const suffix = "s".repeat(60);
const text = `${prefix}\n\n${fence}\n\n${suffix}`;
const chunks = chunkMarkdownText(text, 40);
expect(chunks.some((chunk) => chunk.trimEnd() === fence)).toBe(true);
expectFencesBalanced(chunks);
});
it("reopens fenced blocks when forced to split inside them", () => {
const text = `\`\`\`txt\n${"a".repeat(500)}\n\`\`\``;
const limit = 120;
const chunks = chunkMarkdownText(text, limit);
expect(chunks.length).toBeGreaterThan(1);
for (const chunk of chunks) {
expect(chunk.length).toBeLessThanOrEqual(limit);
expect(chunk.startsWith("```txt\n")).toBe(true);
expect(chunk.trimEnd().endsWith("```")).toBe(true);
}
expectFencesBalanced(chunks);
});
it("supports tilde fences", () => {
const text = `~~~sh\n${"x".repeat(600)}\n~~~`;
const limit = 140;
const chunks = chunkMarkdownText(text, limit);
expect(chunks.length).toBeGreaterThan(1);
for (const chunk of chunks) {
expect(chunk.length).toBeLessThanOrEqual(limit);
expect(chunk.startsWith("~~~sh\n")).toBe(true);
expect(chunk.trimEnd().endsWith("~~~")).toBe(true);
}
expectFencesBalanced(chunks);
});
it("supports longer fence markers for close", () => {
const text = `\`\`\`\`md\n${"y".repeat(600)}\n\`\`\`\``;
const limit = 140;
const chunks = chunkMarkdownText(text, limit);
expect(chunks.length).toBeGreaterThan(1);
for (const chunk of chunks) {
expect(chunk.length).toBeLessThanOrEqual(limit);
expect(chunk.startsWith("````md\n")).toBe(true);
expect(chunk.trimEnd().endsWith("````")).toBe(true);
}
expectFencesBalanced(chunks);
});
it("preserves indentation for indented fences", () => {
const text = ` \`\`\`js\n ${"z".repeat(600)}\n \`\`\``;
const limit = 160;
const chunks = chunkMarkdownText(text, limit);
expect(chunks.length).toBeGreaterThan(1);
for (const chunk of chunks) {
expect(chunk.length).toBeLessThanOrEqual(limit);
expect(chunk.startsWith(" ```js\n")).toBe(true);
expect(chunk.trimEnd().endsWith(" ```")).toBe(true);
}
expectFencesBalanced(chunks);
});
it("never produces an empty fenced chunk when splitting", () => {
const text = `\`\`\`txt\n${"a".repeat(300)}\n\`\`\``;
const chunks = chunkMarkdownText(text, 60);
for (const chunk of chunks) {
const nonFenceLines = chunk
.split("\n")
.filter((line) => !/^( {0,3})(`{3,}|~{3,})(.*)$/.test(line));
expect(nonFenceLines.join("\n").trim()).not.toBe("");
}
});
runChunkCases(chunkMarkdownText, parentheticalCases);
it("hard-breaks when a parenthetical exceeds the limit", () => {
const text = `(${"a".repeat(80)})`;
const chunks = chunkMarkdownText(text, 20);
expect(chunks[0]?.length).toBe(20);
expect(chunks.join("")).toBe(text);
});
});
2026-01-24 16:47:10 -08:00
describe("chunkByNewline", () => {
it("splits text on newlines", () => {
const text = "Line one\nLine two\nLine three";
const chunks = chunkByNewline(text, 1000);
expect(chunks).toEqual(["Line one", "Line two", "Line three"]);
});
2026-01-25 04:05:14 +00:00
it("preserves blank lines by folding into the next chunk", () => {
2026-01-24 16:47:10 -08:00
const text = "Line one\n\n\nLine two\n\nLine three";
const chunks = chunkByNewline(text, 1000);
2026-01-25 04:05:14 +00:00
expect(chunks).toEqual(["Line one", "\n\nLine two", "\nLine three"]);
2026-01-24 16:47:10 -08:00
});
it("trims whitespace from lines", () => {
const text = " Line one \n Line two ";
const chunks = chunkByNewline(text, 1000);
expect(chunks).toEqual(["Line one", "Line two"]);
});
2026-01-25 04:05:14 +00:00
it("preserves leading blank lines on the first chunk", () => {
const text = "\n\nLine one\nLine two";
const chunks = chunkByNewline(text, 1000);
expect(chunks).toEqual(["\n\nLine one", "Line two"]);
});
2026-01-24 16:47:10 -08:00
it("falls back to length-based for long lines", () => {
const text = "Short line\n" + "a".repeat(50) + "\nAnother short";
const chunks = chunkByNewline(text, 20);
expect(chunks[0]).toBe("Short line");
// Long line gets split into multiple chunks
expect(chunks[1].length).toBe(20);
expect(chunks[2].length).toBe(20);
expect(chunks[3].length).toBe(10);
expect(chunks[4]).toBe("Another short");
});
2026-01-25 04:05:14 +00:00
it("does not split long lines when splitLongLines is false", () => {
const text = "a".repeat(50);
const chunks = chunkByNewline(text, 20, { splitLongLines: false });
expect(chunks).toEqual([text]);
});
2026-01-24 16:47:10 -08:00
it("returns empty array for empty input", () => {
expect(chunkByNewline("", 100)).toEqual([]);
});
it("returns empty array for whitespace-only input", () => {
expect(chunkByNewline(" \n\n ", 100)).toEqual([]);
});
2026-01-25 04:05:14 +00:00
it("preserves trailing blank lines on the last chunk", () => {
const text = "Line one\n\n";
const chunks = chunkByNewline(text, 1000);
expect(chunks).toEqual(["Line one\n\n"]);
});
it("keeps whitespace when trimLines is false", () => {
const text = " indented line \nNext";
const chunks = chunkByNewline(text, 1000, { trimLines: false });
expect(chunks).toEqual([" indented line ", "Next"]);
});
2026-01-24 16:47:10 -08:00
});
describe("chunkTextWithMode", () => {
it("uses length-based chunking for length mode", () => {
const text = "Line one\nLine two";
const chunks = chunkTextWithMode(text, 1000, "length");
expect(chunks).toEqual(["Line one\nLine two"]);
});
it("uses paragraph-based chunking for newline mode", () => {
2026-01-24 16:47:10 -08:00
const text = "Line one\nLine two";
const chunks = chunkTextWithMode(text, 1000, "newline");
expect(chunks).toEqual(["Line one\nLine two"]);
});
it("splits on blank lines for newline mode", () => {
const text = "Para one\n\nPara two";
const chunks = chunkTextWithMode(text, 1000, "newline");
expect(chunks).toEqual(["Para one", "Para two"]);
2026-01-24 16:47:10 -08:00
});
});
2026-01-25 04:05:14 +00:00
describe("chunkMarkdownTextWithMode", () => {
it("uses markdown-aware chunking for length mode", () => {
const text = "Line one\nLine two";
expect(chunkMarkdownTextWithMode(text, 1000, "length")).toEqual(chunkMarkdownText(text, 1000));
});
it("uses paragraph-based chunking for newline mode", () => {
2026-01-25 04:05:14 +00:00
const text = "Line one\nLine two";
expect(chunkMarkdownTextWithMode(text, 1000, "newline")).toEqual(["Line one\nLine two"]);
});
it("splits on blank lines for newline mode", () => {
const text = "Para one\n\nPara two";
expect(chunkMarkdownTextWithMode(text, 1000, "newline")).toEqual(["Para one", "Para two"]);
2026-01-25 04:05:14 +00:00
});
it("does not split single-newline code fences in newline mode", () => {
2026-01-25 04:05:14 +00:00
const text = "```js\nconst a = 1;\nconst b = 2;\n```\nAfter";
expect(chunkMarkdownTextWithMode(text, 1000, "newline")).toEqual([text]);
2026-01-25 04:05:14 +00:00
});
it("defers long markdown paragraphs to markdown chunking in newline mode", () => {
const text = `\`\`\`js\n${"const a = 1;\n".repeat(20)}\`\`\``;
expect(chunkMarkdownTextWithMode(text, 40, "newline")).toEqual(chunkMarkdownText(text, 40));
});
it("does not split on blank lines inside a fenced code block", () => {
const text = "```python\ndef my_function():\n x = 1\n\n y = 2\n return x + y\n```";
expect(chunkMarkdownTextWithMode(text, 1000, "newline")).toEqual([text]);
});
it("splits on blank lines between a code fence and following paragraph", () => {
const fence = "```python\ndef my_function():\n x = 1\n\n y = 2\n return x + y\n```";
const text = `${fence}\n\nAfter`;
expect(chunkMarkdownTextWithMode(text, 1000, "newline")).toEqual([fence, "After"]);
});
2026-01-25 04:05:14 +00:00
});
2026-01-24 16:47:10 -08:00
describe("resolveChunkMode", () => {
it("returns length as default", () => {
expect(resolveChunkMode(undefined, "telegram")).toBe("length");
expect(resolveChunkMode({}, "discord")).toBe("length");
expect(resolveChunkMode(undefined, "bluebubbles")).toBe("length");
});
it("returns length for internal channel", () => {
const cfg = { channels: { bluebubbles: { chunkMode: "newline" as const } } };
expect(resolveChunkMode(cfg, "__internal__")).toBe("length");
});
2026-01-25 04:05:14 +00:00
it("supports provider-level overrides for slack", () => {
const cfg = { channels: { slack: { chunkMode: "newline" as const } } };
expect(resolveChunkMode(cfg, "slack")).toBe("newline");
2026-01-24 16:47:10 -08:00
expect(resolveChunkMode(cfg, "discord")).toBe("length");
});
2026-01-25 04:05:14 +00:00
it("supports account-level overrides for slack", () => {
2026-01-24 16:47:10 -08:00
const cfg = {
channels: {
2026-01-25 04:05:14 +00:00
slack: {
2026-01-24 16:47:10 -08:00
chunkMode: "length" as const,
accounts: {
primary: { chunkMode: "newline" as const },
},
},
},
};
2026-01-25 04:05:14 +00:00
expect(resolveChunkMode(cfg, "slack", "primary")).toBe("newline");
expect(resolveChunkMode(cfg, "slack", "other")).toBe("length");
2026-01-24 16:47:10 -08:00
});
});