From 2bef2910f179b36d332a05d4921cbc72f9a38777 Mon Sep 17 00:00:00 2001 From: Ayaan Zaidi Date: Sat, 28 Feb 2026 08:06:06 +0530 Subject: [PATCH] fix: preserve whitespace in telegram html retry chunking --- src/telegram/format.ts | 62 ++++++++++++++++++++++++++++- src/telegram/format.wrap-md.test.ts | 8 ++++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/src/telegram/format.ts b/src/telegram/format.ts index acefd8f75d9..f74b508b42d 100644 --- a/src/telegram/format.ts +++ b/src/telegram/format.ts @@ -258,11 +258,69 @@ function splitTelegramChunkByHtmlLimit( Number.isFinite(candidateLimit) && candidateLimit > 0 ? candidateLimit : Math.max(1, Math.floor(currentTextLength / 2)); - const split = chunkMarkdownIR(chunk, splitLimit); + const split = splitMarkdownIRPreserveWhitespace(chunk, splitLimit); if (split.length > 1) { return split; } - return chunkMarkdownIR(chunk, Math.max(1, Math.floor(currentTextLength / 2))); + return splitMarkdownIRPreserveWhitespace(chunk, Math.max(1, Math.floor(currentTextLength / 2))); +} + +function sliceStyleSpans( + styles: MarkdownIR["styles"], + start: number, + end: number, +): MarkdownIR["styles"] { + return styles.flatMap((span) => { + if (span.end <= start || span.start >= end) { + return []; + } + const nextStart = Math.max(span.start, start) - start; + const nextEnd = Math.min(span.end, end) - start; + if (nextEnd <= nextStart) { + return []; + } + return [{ ...span, start: nextStart, end: nextEnd }]; + }); +} + +function sliceLinkSpans( + links: MarkdownIR["links"], + start: number, + end: number, +): MarkdownIR["links"] { + return links.flatMap((link) => { + if (link.end <= start || link.start >= end) { + return []; + } + const nextStart = Math.max(link.start, start) - start; + const nextEnd = Math.min(link.end, end) - start; + if (nextEnd <= nextStart) { + return []; + } + return [{ ...link, start: nextStart, end: nextEnd }]; + }); +} + +function splitMarkdownIRPreserveWhitespace(ir: MarkdownIR, limit: number): MarkdownIR[] { + if (!ir.text) { + return []; + } + const normalizedLimit = Math.max(1, Math.floor(limit)); + if (normalizedLimit <= 0 || ir.text.length <= normalizedLimit) { + return [ir]; + } + const chunks: MarkdownIR[] = []; + let cursor = 0; + while (cursor < ir.text.length) { + const end = Math.min(ir.text.length, cursor + normalizedLimit); + chunks.push({ + text: ir.text.slice(cursor, end), + styles: sliceStyleSpans(ir.styles, cursor, end), + links: sliceLinkSpans(ir.links, cursor, end), + }); + cursor = end; + } + return chunks; } function renderTelegramChunksWithinHtmlLimit( diff --git a/src/telegram/format.wrap-md.test.ts b/src/telegram/format.wrap-md.test.ts index 8d003eba320..9921b669973 100644 --- a/src/telegram/format.wrap-md.test.ts +++ b/src/telegram/format.wrap-md.test.ts @@ -166,6 +166,14 @@ describe("markdownToTelegramChunks - file reference wrapping", () => { expect(chunks.map((chunk) => chunk.text).join("")).toBe(input); expect(chunks.every((chunk) => chunk.html.length <= 512)).toBe(true); }); + + it("preserves whitespace when html-limit retry splitting runs", () => { + const input = "a < b"; + const chunks = markdownToTelegramChunks(input, 5); + expect(chunks.length).toBeGreaterThan(1); + expect(chunks.map((chunk) => chunk.text).join("")).toBe(input); + expect(chunks.every((chunk) => chunk.html.length <= 5)).toBe(true); + }); }); describe("edge cases", () => {