diff --git a/src/memory/manager-search.ts b/src/memory/manager-search.ts index 4706a42a708..6bb11c373b0 100644 --- a/src/memory/manager-search.ts +++ b/src/memory/manager-search.ts @@ -5,6 +5,72 @@ import { cosineSimilarity, parseEmbedding } from "./internal.js"; const vectorToBlob = (embedding: number[]): Buffer => Buffer.from(new Float32Array(embedding).buffer); +/** + * Extract a relevant snippet window around the query match in the text. + * If the query is found, returns a window centered on the match. + * Otherwise falls back to the beginning of the text. + */ +function extractRelevantSnippet( + text: string, + query: string, + maxChars: number, +): { snippet: string; offsetLines: number } { + if (text.length <= maxChars) { + return { snippet: text, offsetLines: 0 }; + } + + // Try to find the query (case-insensitive) in the text + const lowerText = text.toLowerCase(); + const queryTerms = query + .toLowerCase() + .split(/\s+/) + .filter((term) => term.length > 2); + + let matchIndex = -1; + + // Find the first matching term + for (const term of queryTerms) { + const idx = lowerText.indexOf(term); + if (idx !== -1) { + matchIndex = idx; + break; + } + } + + // If no match found, fall back to beginning + if (matchIndex === -1) { + return { snippet: truncateUtf16Safe(text, maxChars), offsetLines: 0 }; + } + + // Calculate window start, trying to center the match + const halfWindow = Math.floor(maxChars / 2); + let windowStart = Math.max(0, matchIndex - halfWindow); + let windowEnd = Math.min(text.length, windowStart + maxChars); + + // Adjust if we're near the end + if (windowEnd === text.length && windowEnd - windowStart < maxChars) { + windowStart = Math.max(0, windowEnd - maxChars); + } + + // Try to start at a line boundary for cleaner output + if (windowStart > 0) { + const lineStart = text.lastIndexOf("\n", windowStart); + if (lineStart !== -1 && windowStart - lineStart < 100) { + windowStart = lineStart + 1; + // Recalculate windowEnd to maintain maxChars length after snap + windowEnd = Math.min(text.length, windowStart + maxChars); + } + } + + // Count lines before the window to adjust startLine/endLine display + const textBeforeWindow = text.substring(0, windowStart); + const offsetLines = (textBeforeWindow.match(/\n/g) || []).length; + + const snippet = text.substring(windowStart, windowEnd); + return { snippet: truncateUtf16Safe(snippet, maxChars), offsetLines }; +} + + export type SearchSource = string; export type SearchRowResult = { @@ -85,17 +151,6 @@ export async function searchVector(params: { return scored .toSorted((a, b) => b.score - a.score) .slice(0, params.limit) -<<<<<<< HEAD - .map((entry) => ({ - id: entry.chunk.id, - path: entry.chunk.path, - startLine: entry.chunk.startLine, - endLine: entry.chunk.endLine, - score: entry.score, - snippet: truncateUtf16Safe(entry.chunk.text, params.snippetMaxChars), - source: entry.chunk.source, - })); -======= .map((entry) => { const { snippet, offsetLines } = extractRelevantSnippet( entry.chunk.text, @@ -112,7 +167,6 @@ export async function searchVector(params: { source: entry.chunk.source, }; }); ->>>>>>> 58e5ec73b (fix(memory): use offsetLines to report accurate snippet start positions) } export function listChunks(params: { @@ -199,10 +253,7 @@ export async function searchKeyword(params: { return rows.map((row) => { const textScore = params.bm25RankToScore(row.rank); -<<<<<<< HEAD -======= const { snippet, offsetLines } = extractRelevantSnippet(row.text, params.query, params.snippetMaxChars); ->>>>>>> 58e5ec73b (fix(memory): use offsetLines to report accurate snippet start positions) return { id: row.id, path: row.path, @@ -210,7 +261,7 @@ export async function searchKeyword(params: { endLine: row.end_line, score: textScore, textScore, - snippet: truncateUtf16Safe(row.text, params.snippetMaxChars), + snippet, source: row.source, }; });