From 4b309e205e6999fda4243f96359bd76d33814113 Mon Sep 17 00:00:00 2001 From: Jerry-Xin Date: Mon, 16 Mar 2026 16:11:14 +0800 Subject: [PATCH 1/3] fix(memory): use offsetLines to report accurate snippet start positions Apply the already-computed offsetLines from extractRelevantSnippet to adjust startLine in all three search mappers (vector, keyword, and hybrid paths). This ensures the reported line range matches the actual snippet content rather than always pointing to the raw chunk start. --- src/memory/manager-search.ts | 46 ++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/src/memory/manager-search.ts b/src/memory/manager-search.ts index a3c8c06146a..4706a42a708 100644 --- a/src/memory/manager-search.ts +++ b/src/memory/manager-search.ts @@ -57,15 +57,18 @@ export async function searchVector(params: { source: SearchSource; dist: number; }>; - return rows.map((row) => ({ - id: row.id, - path: row.path, - startLine: row.start_line, - endLine: row.end_line, - score: 1 - row.dist, - snippet: truncateUtf16Safe(row.text, params.snippetMaxChars), - source: row.source, - })); + return rows.map((row) => { + const { snippet, offsetLines } = extractRelevantSnippet(row.text, params.queryText, params.snippetMaxChars); + return { + id: row.id, + path: row.path, + startLine: row.start_line + offsetLines, + endLine: row.end_line, + score: 1 - row.dist, + snippet, + source: row.source, + }; + }); } const candidates = listChunks({ @@ -82,6 +85,7 @@ export async function searchVector(params: { return scored .toSorted((a, b) => b.score - a.score) .slice(0, params.limit) +<<<<<<< HEAD .map((entry) => ({ id: entry.chunk.id, path: entry.chunk.path, @@ -91,6 +95,24 @@ export async function searchVector(params: { snippet: truncateUtf16Safe(entry.chunk.text, params.snippetMaxChars), source: entry.chunk.source, })); +======= + .map((entry) => { + const { snippet, offsetLines } = extractRelevantSnippet( + entry.chunk.text, + params.queryText, + params.snippetMaxChars, + ); + return { + id: entry.chunk.id, + path: entry.chunk.path, + startLine: entry.chunk.startLine + offsetLines, + endLine: entry.chunk.endLine, + score: entry.score, + snippet, + source: entry.chunk.source, + }; + }); +>>>>>>> 58e5ec73b (fix(memory): use offsetLines to report accurate snippet start positions) } export function listChunks(params: { @@ -177,10 +199,14 @@ export async function searchKeyword(params: { return rows.map((row) => { const textScore = params.bm25RankToScore(row.rank); +<<<<<<< HEAD +======= + const { snippet, offsetLines } = extractRelevantSnippet(row.text, params.query, params.snippetMaxChars); +>>>>>>> 58e5ec73b (fix(memory): use offsetLines to report accurate snippet start positions) return { id: row.id, path: row.path, - startLine: row.start_line, + startLine: row.start_line + offsetLines, endLine: row.end_line, score: textScore, textScore, From 6cb5cae6cf73467c6e446abf81778067ede10751 Mon Sep 17 00:00:00 2001 From: Jerry-Xin Date: Mon, 16 Mar 2026 18:07:05 +0800 Subject: [PATCH 2/3] fix(memory): recalculate window end after snapping to line boundary When windowStart is moved back to align with a line boundary, windowEnd must be recalculated to maintain the maxChars window size. Without this, the snippet could exceed maxChars and truncateUtf16Safe would trim the tail, potentially removing the matched query term. --- src/memory/manager-search.ts | 83 +++++++++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 16 deletions(-) diff --git a/src/memory/manager-search.ts b/src/memory/manager-search.ts index 4706a42a708..6bb11c373b0 100644 --- a/src/memory/manager-search.ts +++ b/src/memory/manager-search.ts @@ -5,6 +5,72 @@ import { cosineSimilarity, parseEmbedding } from "./internal.js"; const vectorToBlob = (embedding: number[]): Buffer => Buffer.from(new Float32Array(embedding).buffer); +/** + * Extract a relevant snippet window around the query match in the text. + * If the query is found, returns a window centered on the match. + * Otherwise falls back to the beginning of the text. + */ +function extractRelevantSnippet( + text: string, + query: string, + maxChars: number, +): { snippet: string; offsetLines: number } { + if (text.length <= maxChars) { + return { snippet: text, offsetLines: 0 }; + } + + // Try to find the query (case-insensitive) in the text + const lowerText = text.toLowerCase(); + const queryTerms = query + .toLowerCase() + .split(/\s+/) + .filter((term) => term.length > 2); + + let matchIndex = -1; + + // Find the first matching term + for (const term of queryTerms) { + const idx = lowerText.indexOf(term); + if (idx !== -1) { + matchIndex = idx; + break; + } + } + + // If no match found, fall back to beginning + if (matchIndex === -1) { + return { snippet: truncateUtf16Safe(text, maxChars), offsetLines: 0 }; + } + + // Calculate window start, trying to center the match + const halfWindow = Math.floor(maxChars / 2); + let windowStart = Math.max(0, matchIndex - halfWindow); + let windowEnd = Math.min(text.length, windowStart + maxChars); + + // Adjust if we're near the end + if (windowEnd === text.length && windowEnd - windowStart < maxChars) { + windowStart = Math.max(0, windowEnd - maxChars); + } + + // Try to start at a line boundary for cleaner output + if (windowStart > 0) { + const lineStart = text.lastIndexOf("\n", windowStart); + if (lineStart !== -1 && windowStart - lineStart < 100) { + windowStart = lineStart + 1; + // Recalculate windowEnd to maintain maxChars length after snap + windowEnd = Math.min(text.length, windowStart + maxChars); + } + } + + // Count lines before the window to adjust startLine/endLine display + const textBeforeWindow = text.substring(0, windowStart); + const offsetLines = (textBeforeWindow.match(/\n/g) || []).length; + + const snippet = text.substring(windowStart, windowEnd); + return { snippet: truncateUtf16Safe(snippet, maxChars), offsetLines }; +} + + export type SearchSource = string; export type SearchRowResult = { @@ -85,17 +151,6 @@ export async function searchVector(params: { return scored .toSorted((a, b) => b.score - a.score) .slice(0, params.limit) -<<<<<<< HEAD - .map((entry) => ({ - id: entry.chunk.id, - path: entry.chunk.path, - startLine: entry.chunk.startLine, - endLine: entry.chunk.endLine, - score: entry.score, - snippet: truncateUtf16Safe(entry.chunk.text, params.snippetMaxChars), - source: entry.chunk.source, - })); -======= .map((entry) => { const { snippet, offsetLines } = extractRelevantSnippet( entry.chunk.text, @@ -112,7 +167,6 @@ export async function searchVector(params: { source: entry.chunk.source, }; }); ->>>>>>> 58e5ec73b (fix(memory): use offsetLines to report accurate snippet start positions) } export function listChunks(params: { @@ -199,10 +253,7 @@ export async function searchKeyword(params: { return rows.map((row) => { const textScore = params.bm25RankToScore(row.rank); -<<<<<<< HEAD -======= const { snippet, offsetLines } = extractRelevantSnippet(row.text, params.query, params.snippetMaxChars); ->>>>>>> 58e5ec73b (fix(memory): use offsetLines to report accurate snippet start positions) return { id: row.id, path: row.path, @@ -210,7 +261,7 @@ export async function searchKeyword(params: { endLine: row.end_line, score: textScore, textScore, - snippet: truncateUtf16Safe(row.text, params.snippetMaxChars), + snippet, source: row.source, }; }); From 11c02404e4edd818c1d3b181b3f55412279342c4 Mon Sep 17 00:00:00 2001 From: Jerry-Xin Date: Mon, 16 Mar 2026 20:07:38 +0800 Subject: [PATCH 3/3] fix(memory): pass queryText to searchVector for snippet extraction --- src/memory/manager-search.ts | 1 + src/memory/manager.ts | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/memory/manager-search.ts b/src/memory/manager-search.ts index 6bb11c373b0..111b1061f81 100644 --- a/src/memory/manager-search.ts +++ b/src/memory/manager-search.ts @@ -88,6 +88,7 @@ export async function searchVector(params: { vectorTable: string; providerModel: string; queryVec: number[]; + queryText: string; limit: number; snippetMaxChars: number; ensureVectorReady: (dimensions: number) => Promise; diff --git a/src/memory/manager.ts b/src/memory/manager.ts index 61e2cd71af8..6e759682120 100644 --- a/src/memory/manager.ts +++ b/src/memory/manager.ts @@ -327,7 +327,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem const queryVec = await this.embedQueryWithTimeout(cleaned); const hasVector = queryVec.some((v) => v !== 0); const vectorResults = hasVector - ? await this.searchVector(queryVec, candidates).catch(() => []) + ? await this.searchVector(queryVec, cleaned, candidates).catch(() => []) : []; if (!hybrid.enabled || !this.fts.enabled || !this.fts.available) { @@ -368,6 +368,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem private async searchVector( queryVec: number[], + queryText: string, limit: number, ): Promise> { // This method should never be called without a provider @@ -379,6 +380,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem vectorTable: VECTOR_TABLE, providerModel: this.provider.model, queryVec, + queryText, limit, snippetMaxChars: SNIPPET_MAX_CHARS, ensureVectorReady: async (dimensions) => await this.ensureVectorReady(dimensions),