diff --git a/src/memory/manager-search.ts b/src/memory/manager-search.ts index a3c8c06146a..111b1061f81 100644 --- a/src/memory/manager-search.ts +++ b/src/memory/manager-search.ts @@ -5,6 +5,72 @@ import { cosineSimilarity, parseEmbedding } from "./internal.js"; const vectorToBlob = (embedding: number[]): Buffer => Buffer.from(new Float32Array(embedding).buffer); +/** + * Extract a relevant snippet window around the query match in the text. + * If the query is found, returns a window centered on the match. + * Otherwise falls back to the beginning of the text. + */ +function extractRelevantSnippet( + text: string, + query: string, + maxChars: number, +): { snippet: string; offsetLines: number } { + if (text.length <= maxChars) { + return { snippet: text, offsetLines: 0 }; + } + + // Try to find the query (case-insensitive) in the text + const lowerText = text.toLowerCase(); + const queryTerms = query + .toLowerCase() + .split(/\s+/) + .filter((term) => term.length > 2); + + let matchIndex = -1; + + // Find the first matching term + for (const term of queryTerms) { + const idx = lowerText.indexOf(term); + if (idx !== -1) { + matchIndex = idx; + break; + } + } + + // If no match found, fall back to beginning + if (matchIndex === -1) { + return { snippet: truncateUtf16Safe(text, maxChars), offsetLines: 0 }; + } + + // Calculate window start, trying to center the match + const halfWindow = Math.floor(maxChars / 2); + let windowStart = Math.max(0, matchIndex - halfWindow); + let windowEnd = Math.min(text.length, windowStart + maxChars); + + // Adjust if we're near the end + if (windowEnd === text.length && windowEnd - windowStart < maxChars) { + windowStart = Math.max(0, windowEnd - maxChars); + } + + // Try to start at a line boundary for cleaner output + if (windowStart > 0) { + const lineStart = text.lastIndexOf("\n", windowStart); + if (lineStart !== -1 && windowStart - lineStart < 100) { + windowStart = lineStart + 1; + // Recalculate windowEnd to maintain maxChars length after snap + windowEnd = Math.min(text.length, windowStart + maxChars); + } + } + + // Count lines before the window to adjust startLine/endLine display + const textBeforeWindow = text.substring(0, windowStart); + const offsetLines = (textBeforeWindow.match(/\n/g) || []).length; + + const snippet = text.substring(windowStart, windowEnd); + return { snippet: truncateUtf16Safe(snippet, maxChars), offsetLines }; +} + + export type SearchSource = string; export type SearchRowResult = { @@ -22,6 +88,7 @@ export async function searchVector(params: { vectorTable: string; providerModel: string; queryVec: number[]; + queryText: string; limit: number; snippetMaxChars: number; ensureVectorReady: (dimensions: number) => Promise; @@ -57,15 +124,18 @@ export async function searchVector(params: { source: SearchSource; dist: number; }>; - return rows.map((row) => ({ - id: row.id, - path: row.path, - startLine: row.start_line, - endLine: row.end_line, - score: 1 - row.dist, - snippet: truncateUtf16Safe(row.text, params.snippetMaxChars), - source: row.source, - })); + return rows.map((row) => { + const { snippet, offsetLines } = extractRelevantSnippet(row.text, params.queryText, params.snippetMaxChars); + return { + id: row.id, + path: row.path, + startLine: row.start_line + offsetLines, + endLine: row.end_line, + score: 1 - row.dist, + snippet, + source: row.source, + }; + }); } const candidates = listChunks({ @@ -82,15 +152,22 @@ export async function searchVector(params: { return scored .toSorted((a, b) => b.score - a.score) .slice(0, params.limit) - .map((entry) => ({ - id: entry.chunk.id, - path: entry.chunk.path, - startLine: entry.chunk.startLine, - endLine: entry.chunk.endLine, - score: entry.score, - snippet: truncateUtf16Safe(entry.chunk.text, params.snippetMaxChars), - source: entry.chunk.source, - })); + .map((entry) => { + const { snippet, offsetLines } = extractRelevantSnippet( + entry.chunk.text, + params.queryText, + params.snippetMaxChars, + ); + return { + id: entry.chunk.id, + path: entry.chunk.path, + startLine: entry.chunk.startLine + offsetLines, + endLine: entry.chunk.endLine, + score: entry.score, + snippet, + source: entry.chunk.source, + }; + }); } export function listChunks(params: { @@ -177,14 +254,15 @@ export async function searchKeyword(params: { return rows.map((row) => { const textScore = params.bm25RankToScore(row.rank); + const { snippet, offsetLines } = extractRelevantSnippet(row.text, params.query, params.snippetMaxChars); return { id: row.id, path: row.path, - startLine: row.start_line, + startLine: row.start_line + offsetLines, endLine: row.end_line, score: textScore, textScore, - snippet: truncateUtf16Safe(row.text, params.snippetMaxChars), + snippet, source: row.source, }; }); diff --git a/src/memory/manager.ts b/src/memory/manager.ts index 93a2332c9a9..7bae48d8e5d 100644 --- a/src/memory/manager.ts +++ b/src/memory/manager.ts @@ -345,7 +345,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem const queryVec = await this.embedQueryWithTimeout(cleaned); const hasVector = queryVec.some((v) => v !== 0); const vectorResults = hasVector - ? await this.searchVector(queryVec, candidates).catch(() => []) + ? await this.searchVector(queryVec, cleaned, candidates).catch(() => []) : []; if (!hybrid.enabled || !this.fts.enabled || !this.fts.available) { @@ -386,6 +386,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem private async searchVector( queryVec: number[], + queryText: string, limit: number, ): Promise> { // This method should never be called without a provider @@ -397,6 +398,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem vectorTable: VECTOR_TABLE, providerModel: this.provider.model, queryVec, + queryText, limit, snippetMaxChars: SNIPPET_MAX_CHARS, ensureVectorReady: async (dimensions) => await this.ensureVectorReady(dimensions),