import { describe, expect, it } from "vitest"; import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js"; function createFlushOnParagraphChunker(params: { minChars: number; maxChars: number }) { return new EmbeddedBlockChunker({ minChars: params.minChars, maxChars: params.maxChars, breakPreference: "paragraph", flushOnParagraph: true, }); } function drainChunks(chunker: EmbeddedBlockChunker) { const chunks: string[] = []; chunker.drain({ force: false, emit: (chunk) => chunks.push(chunk) }); return chunks; } function expectFlushAtFirstParagraphBreak(text: string) { const chunker = createFlushOnParagraphChunker({ minChars: 100, maxChars: 200 }); chunker.append(text); const chunks = drainChunks(chunker); expect(chunks).toEqual(["First paragraph."]); expect(chunker.bufferedText).toBe("Second paragraph."); } describe("EmbeddedBlockChunker", () => { it("breaks at paragraph boundary right after fence close", () => { const chunker = new EmbeddedBlockChunker({ minChars: 1, maxChars: 40, breakPreference: "paragraph", }); const text = [ "Intro", "```js", "console.log('x')", "```", "", "After first line", "After second line", ].join("\n"); chunker.append(text); const chunks = drainChunks(chunker); expect(chunks.length).toBe(1); expect(chunks[0]).toContain("console.log"); expect(chunks[0]).toMatch(/```\n?$/); expect(chunks[0]).not.toContain("After"); expect(chunker.bufferedText).toMatch(/^After/); }); it("flushes paragraph boundaries before minChars when flushOnParagraph is set", () => { expectFlushAtFirstParagraphBreak("First paragraph.\n\nSecond paragraph."); }); it("treats blank lines with whitespace as paragraph boundaries when flushOnParagraph is set", () => { expectFlushAtFirstParagraphBreak("First paragraph.\n \nSecond paragraph."); }); it("falls back to maxChars when flushOnParagraph is set and no paragraph break exists", () => { const chunker = new EmbeddedBlockChunker({ minChars: 1, maxChars: 10, breakPreference: "paragraph", flushOnParagraph: true, }); chunker.append("abcdefghijKLMNOP"); const chunks = drainChunks(chunker); expect(chunks).toEqual(["abcdefghij"]); expect(chunker.bufferedText).toBe("KLMNOP"); }); it("clamps long paragraphs to maxChars when flushOnParagraph is set", () => { const chunker = new EmbeddedBlockChunker({ minChars: 1, maxChars: 10, breakPreference: "paragraph", flushOnParagraph: true, }); chunker.append("abcdefghijk\n\nRest"); const chunks = drainChunks(chunker); expect(chunks.every((chunk) => chunk.length <= 10)).toBe(true); expect(chunks).toEqual(["abcdefghij", "k"]); expect(chunker.bufferedText).toBe("Rest"); }); it("ignores paragraph breaks inside fences when flushOnParagraph is set", () => { const chunker = new EmbeddedBlockChunker({ minChars: 100, maxChars: 200, breakPreference: "paragraph", flushOnParagraph: true, }); const text = [ "Intro", "```js", "const a = 1;", "", "const b = 2;", "```", "", "After fence", ].join("\n"); chunker.append(text); const chunks = drainChunks(chunker); expect(chunks).toEqual(["Intro\n```js\nconst a = 1;\n\nconst b = 2;\n```"]); expect(chunker.bufferedText).toBe("After fence"); }); });