Merge 11c02404e4edd818c1d3b181b3f55412279342c4 into 8a05c05596ca9ba0735dafd8e359885de4c2c969

This commit is contained in:
Jerry-Xin 2026-03-21 14:04:07 +08:00 committed by GitHub
commit 2ec94df9f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 101 additions and 21 deletions

View File

@ -5,6 +5,72 @@ import { cosineSimilarity, parseEmbedding } from "./internal.js";
const vectorToBlob = (embedding: number[]): Buffer =>
Buffer.from(new Float32Array(embedding).buffer);
/**
* Extract a relevant snippet window around the query match in the text.
* If the query is found, returns a window centered on the match.
* Otherwise falls back to the beginning of the text.
*/
function extractRelevantSnippet(
text: string,
query: string,
maxChars: number,
): { snippet: string; offsetLines: number } {
if (text.length <= maxChars) {
return { snippet: text, offsetLines: 0 };
}
// Try to find the query (case-insensitive) in the text
const lowerText = text.toLowerCase();
const queryTerms = query
.toLowerCase()
.split(/\s+/)
.filter((term) => term.length > 2);
let matchIndex = -1;
// Find the first matching term
for (const term of queryTerms) {
const idx = lowerText.indexOf(term);
if (idx !== -1) {
matchIndex = idx;
break;
}
}
// If no match found, fall back to beginning
if (matchIndex === -1) {
return { snippet: truncateUtf16Safe(text, maxChars), offsetLines: 0 };
}
// Calculate window start, trying to center the match
const halfWindow = Math.floor(maxChars / 2);
let windowStart = Math.max(0, matchIndex - halfWindow);
let windowEnd = Math.min(text.length, windowStart + maxChars);
// Adjust if we're near the end
if (windowEnd === text.length && windowEnd - windowStart < maxChars) {
windowStart = Math.max(0, windowEnd - maxChars);
}
// Try to start at a line boundary for cleaner output
if (windowStart > 0) {
const lineStart = text.lastIndexOf("\n", windowStart);
if (lineStart !== -1 && windowStart - lineStart < 100) {
windowStart = lineStart + 1;
// Recalculate windowEnd to maintain maxChars length after snap
windowEnd = Math.min(text.length, windowStart + maxChars);
}
}
// Count lines before the window to adjust startLine/endLine display
const textBeforeWindow = text.substring(0, windowStart);
const offsetLines = (textBeforeWindow.match(/\n/g) || []).length;
const snippet = text.substring(windowStart, windowEnd);
return { snippet: truncateUtf16Safe(snippet, maxChars), offsetLines };
}
export type SearchSource = string;
export type SearchRowResult = {
@ -22,6 +88,7 @@ export async function searchVector(params: {
vectorTable: string;
providerModel: string;
queryVec: number[];
queryText: string;
limit: number;
snippetMaxChars: number;
ensureVectorReady: (dimensions: number) => Promise<boolean>;
@ -57,15 +124,18 @@ export async function searchVector(params: {
source: SearchSource;
dist: number;
}>;
return rows.map((row) => ({
id: row.id,
path: row.path,
startLine: row.start_line,
endLine: row.end_line,
score: 1 - row.dist,
snippet: truncateUtf16Safe(row.text, params.snippetMaxChars),
source: row.source,
}));
return rows.map((row) => {
const { snippet, offsetLines } = extractRelevantSnippet(row.text, params.queryText, params.snippetMaxChars);
return {
id: row.id,
path: row.path,
startLine: row.start_line + offsetLines,
endLine: row.end_line,
score: 1 - row.dist,
snippet,
source: row.source,
};
});
}
const candidates = listChunks({
@ -82,15 +152,22 @@ export async function searchVector(params: {
return scored
.toSorted((a, b) => b.score - a.score)
.slice(0, params.limit)
.map((entry) => ({
id: entry.chunk.id,
path: entry.chunk.path,
startLine: entry.chunk.startLine,
endLine: entry.chunk.endLine,
score: entry.score,
snippet: truncateUtf16Safe(entry.chunk.text, params.snippetMaxChars),
source: entry.chunk.source,
}));
.map((entry) => {
const { snippet, offsetLines } = extractRelevantSnippet(
entry.chunk.text,
params.queryText,
params.snippetMaxChars,
);
return {
id: entry.chunk.id,
path: entry.chunk.path,
startLine: entry.chunk.startLine + offsetLines,
endLine: entry.chunk.endLine,
score: entry.score,
snippet,
source: entry.chunk.source,
};
});
}
export function listChunks(params: {
@ -177,14 +254,15 @@ export async function searchKeyword(params: {
return rows.map((row) => {
const textScore = params.bm25RankToScore(row.rank);
const { snippet, offsetLines } = extractRelevantSnippet(row.text, params.query, params.snippetMaxChars);
return {
id: row.id,
path: row.path,
startLine: row.start_line,
startLine: row.start_line + offsetLines,
endLine: row.end_line,
score: textScore,
textScore,
snippet: truncateUtf16Safe(row.text, params.snippetMaxChars),
snippet,
source: row.source,
};
});

View File

@ -345,7 +345,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
const queryVec = await this.embedQueryWithTimeout(cleaned);
const hasVector = queryVec.some((v) => v !== 0);
const vectorResults = hasVector
? await this.searchVector(queryVec, candidates).catch(() => [])
? await this.searchVector(queryVec, cleaned, candidates).catch(() => [])
: [];
if (!hybrid.enabled || !this.fts.enabled || !this.fts.available) {
@ -386,6 +386,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
private async searchVector(
queryVec: number[],
queryText: string,
limit: number,
): Promise<Array<MemorySearchResult & { id: string }>> {
// This method should never be called without a provider
@ -397,6 +398,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
vectorTable: VECTOR_TABLE,
providerModel: this.provider.model,
queryVec,
queryText,
limit,
snippetMaxChars: SNIPPET_MAX_CHARS,
ensureVectorReady: async (dimensions) => await this.ensureVectorReady(dimensions),