fix(memory): recalculate window end after snapping to line boundary

When windowStart is moved back to align with a line boundary,
windowEnd must be recalculated to maintain the maxChars window size.
Without this, the snippet could exceed maxChars and truncateUtf16Safe
would trim the tail, potentially removing the matched query term.
This commit is contained in:
Jerry-Xin 2026-03-16 18:07:05 +08:00
parent 4b309e205e
commit 6cb5cae6cf

View File

@ -5,6 +5,72 @@ import { cosineSimilarity, parseEmbedding } from "./internal.js";
const vectorToBlob = (embedding: number[]): Buffer =>
Buffer.from(new Float32Array(embedding).buffer);
/**
* Extract a relevant snippet window around the query match in the text.
* If the query is found, returns a window centered on the match.
* Otherwise falls back to the beginning of the text.
*/
function extractRelevantSnippet(
text: string,
query: string,
maxChars: number,
): { snippet: string; offsetLines: number } {
if (text.length <= maxChars) {
return { snippet: text, offsetLines: 0 };
}
// Try to find the query (case-insensitive) in the text
const lowerText = text.toLowerCase();
const queryTerms = query
.toLowerCase()
.split(/\s+/)
.filter((term) => term.length > 2);
let matchIndex = -1;
// Find the first matching term
for (const term of queryTerms) {
const idx = lowerText.indexOf(term);
if (idx !== -1) {
matchIndex = idx;
break;
}
}
// If no match found, fall back to beginning
if (matchIndex === -1) {
return { snippet: truncateUtf16Safe(text, maxChars), offsetLines: 0 };
}
// Calculate window start, trying to center the match
const halfWindow = Math.floor(maxChars / 2);
let windowStart = Math.max(0, matchIndex - halfWindow);
let windowEnd = Math.min(text.length, windowStart + maxChars);
// Adjust if we're near the end
if (windowEnd === text.length && windowEnd - windowStart < maxChars) {
windowStart = Math.max(0, windowEnd - maxChars);
}
// Try to start at a line boundary for cleaner output
if (windowStart > 0) {
const lineStart = text.lastIndexOf("\n", windowStart);
if (lineStart !== -1 && windowStart - lineStart < 100) {
windowStart = lineStart + 1;
// Recalculate windowEnd to maintain maxChars length after snap
windowEnd = Math.min(text.length, windowStart + maxChars);
}
}
// Count lines before the window to adjust startLine/endLine display
const textBeforeWindow = text.substring(0, windowStart);
const offsetLines = (textBeforeWindow.match(/\n/g) || []).length;
const snippet = text.substring(windowStart, windowEnd);
return { snippet: truncateUtf16Safe(snippet, maxChars), offsetLines };
}
export type SearchSource = string;
export type SearchRowResult = {
@ -85,17 +151,6 @@ export async function searchVector(params: {
return scored
.toSorted((a, b) => b.score - a.score)
.slice(0, params.limit)
<<<<<<< HEAD
.map((entry) => ({
id: entry.chunk.id,
path: entry.chunk.path,
startLine: entry.chunk.startLine,
endLine: entry.chunk.endLine,
score: entry.score,
snippet: truncateUtf16Safe(entry.chunk.text, params.snippetMaxChars),
source: entry.chunk.source,
}));
=======
.map((entry) => {
const { snippet, offsetLines } = extractRelevantSnippet(
entry.chunk.text,
@ -112,7 +167,6 @@ export async function searchVector(params: {
source: entry.chunk.source,
};
});
>>>>>>> 58e5ec73b (fix(memory): use offsetLines to report accurate snippet start positions)
}
export function listChunks(params: {
@ -199,10 +253,7 @@ export async function searchKeyword(params: {
return rows.map((row) => {
const textScore = params.bm25RankToScore(row.rank);
<<<<<<< HEAD
=======
const { snippet, offsetLines } = extractRelevantSnippet(row.text, params.query, params.snippetMaxChars);
>>>>>>> 58e5ec73b (fix(memory): use offsetLines to report accurate snippet start positions)
return {
id: row.id,
path: row.path,
@ -210,7 +261,7 @@ export async function searchKeyword(params: {
endLine: row.end_line,
score: textScore,
textScore,
snippet: truncateUtf16Safe(row.text, params.snippetMaxChars),
snippet,
source: row.source,
};
});