From 6cb5cae6cf73467c6e446abf81778067ede10751 Mon Sep 17 00:00:00 2001 From: Jerry-Xin Date: Mon, 16 Mar 2026 18:07:05 +0800 Subject: [PATCH] fix(memory): recalculate window end after snapping to line boundary When windowStart is moved back to align with a line boundary, windowEnd must be recalculated to maintain the maxChars window size. Without this, the snippet could exceed maxChars and truncateUtf16Safe would trim the tail, potentially removing the matched query term. --- src/memory/manager-search.ts | 83 +++++++++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 16 deletions(-) diff --git a/src/memory/manager-search.ts b/src/memory/manager-search.ts index 4706a42a708..6bb11c373b0 100644 --- a/src/memory/manager-search.ts +++ b/src/memory/manager-search.ts @@ -5,6 +5,72 @@ import { cosineSimilarity, parseEmbedding } from "./internal.js"; const vectorToBlob = (embedding: number[]): Buffer => Buffer.from(new Float32Array(embedding).buffer); +/** + * Extract a relevant snippet window around the query match in the text. + * If the query is found, returns a window centered on the match. + * Otherwise falls back to the beginning of the text. + */ +function extractRelevantSnippet( + text: string, + query: string, + maxChars: number, +): { snippet: string; offsetLines: number } { + if (text.length <= maxChars) { + return { snippet: text, offsetLines: 0 }; + } + + // Try to find the query (case-insensitive) in the text + const lowerText = text.toLowerCase(); + const queryTerms = query + .toLowerCase() + .split(/\s+/) + .filter((term) => term.length > 2); + + let matchIndex = -1; + + // Find the first matching term + for (const term of queryTerms) { + const idx = lowerText.indexOf(term); + if (idx !== -1) { + matchIndex = idx; + break; + } + } + + // If no match found, fall back to beginning + if (matchIndex === -1) { + return { snippet: truncateUtf16Safe(text, maxChars), offsetLines: 0 }; + } + + // Calculate window start, trying to center the match + const halfWindow = Math.floor(maxChars / 2); + let windowStart = Math.max(0, matchIndex - halfWindow); + let windowEnd = Math.min(text.length, windowStart + maxChars); + + // Adjust if we're near the end + if (windowEnd === text.length && windowEnd - windowStart < maxChars) { + windowStart = Math.max(0, windowEnd - maxChars); + } + + // Try to start at a line boundary for cleaner output + if (windowStart > 0) { + const lineStart = text.lastIndexOf("\n", windowStart); + if (lineStart !== -1 && windowStart - lineStart < 100) { + windowStart = lineStart + 1; + // Recalculate windowEnd to maintain maxChars length after snap + windowEnd = Math.min(text.length, windowStart + maxChars); + } + } + + // Count lines before the window to adjust startLine/endLine display + const textBeforeWindow = text.substring(0, windowStart); + const offsetLines = (textBeforeWindow.match(/\n/g) || []).length; + + const snippet = text.substring(windowStart, windowEnd); + return { snippet: truncateUtf16Safe(snippet, maxChars), offsetLines }; +} + + export type SearchSource = string; export type SearchRowResult = { @@ -85,17 +151,6 @@ export async function searchVector(params: { return scored .toSorted((a, b) => b.score - a.score) .slice(0, params.limit) -<<<<<<< HEAD - .map((entry) => ({ - id: entry.chunk.id, - path: entry.chunk.path, - startLine: entry.chunk.startLine, - endLine: entry.chunk.endLine, - score: entry.score, - snippet: truncateUtf16Safe(entry.chunk.text, params.snippetMaxChars), - source: entry.chunk.source, - })); -======= .map((entry) => { const { snippet, offsetLines } = extractRelevantSnippet( entry.chunk.text, @@ -112,7 +167,6 @@ export async function searchVector(params: { source: entry.chunk.source, }; }); ->>>>>>> 58e5ec73b (fix(memory): use offsetLines to report accurate snippet start positions) } export function listChunks(params: { @@ -199,10 +253,7 @@ export async function searchKeyword(params: { return rows.map((row) => { const textScore = params.bm25RankToScore(row.rank); -<<<<<<< HEAD -======= const { snippet, offsetLines } = extractRelevantSnippet(row.text, params.query, params.snippetMaxChars); ->>>>>>> 58e5ec73b (fix(memory): use offsetLines to report accurate snippet start positions) return { id: row.id, path: row.path, @@ -210,7 +261,7 @@ export async function searchKeyword(params: { endLine: row.end_line, score: textScore, textScore, - snippet: truncateUtf16Safe(row.text, params.snippetMaxChars), + snippet, source: row.source, }; });