fix(memory): hard-cap embedding inputs before batch
This commit is contained in:
parent
d306fc8ef1
commit
1000ff04ea
@ -122,6 +122,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
- Memory/Embeddings: apply configured remote-base host pinning (`allowedHostnames`) across OpenAI/Voyage/Gemini embedding requests to keep private/self-hosted endpoints working without cross-host drift. (#18198) Thanks @ianpcook.
|
- Memory/Embeddings: apply configured remote-base host pinning (`allowedHostnames`) across OpenAI/Voyage/Gemini embedding requests to keep private/self-hosted endpoints working without cross-host drift. (#18198) Thanks @ianpcook.
|
||||||
- Memory/Batch: route OpenAI/Voyage/Gemini batch upload/create/status/download requests through the same guarded HTTP path for consistent SSRF policy enforcement.
|
- Memory/Batch: route OpenAI/Voyage/Gemini batch upload/create/status/download requests through the same guarded HTTP path for consistent SSRF policy enforcement.
|
||||||
- Memory/Index: detect memory source-set changes (for example enabling `sessions` after an existing memory-only index) and trigger a full reindex so existing session transcripts are indexed without requiring `--force`. (#17576) Thanks @TarsAI-Agent.
|
- Memory/Index: detect memory source-set changes (for example enabling `sessions` after an existing memory-only index) and trigger a full reindex so existing session transcripts are indexed without requiring `--force`. (#17576) Thanks @TarsAI-Agent.
|
||||||
|
- Memory/Embeddings: enforce a per-input 8k safety cap before embedding batching and apply a conservative 2k fallback limit for local providers without declared input limits, preventing oversized session/memory chunks from triggering provider context-size failures during sync/indexing. (#6016) Thanks @batumilove.
|
||||||
- Memory/QMD: on Windows, resolve bare `qmd`/`mcporter` command names to npm shim executables (`.cmd`) before spawning, so qmd boot updates and mcporter-backed searches no longer fail with `spawn ... ENOENT` on default npm installs. (#23899) Thanks @arcbuilder-ai.
|
- Memory/QMD: on Windows, resolve bare `qmd`/`mcporter` command names to npm shim executables (`.cmd`) before spawning, so qmd boot updates and mcporter-backed searches no longer fail with `spawn ... ENOENT` on default npm installs. (#23899) Thanks @arcbuilder-ai.
|
||||||
- Memory/QMD: parse plain-text `qmd collection list --json` output when older qmd builds ignore JSON mode, and retry memory searches once after re-ensuring managed collections when qmd returns `Collection not found ...`. (#23613) Thanks @leozhucn.
|
- Memory/QMD: parse plain-text `qmd collection list --json` output when older qmd builds ignore JSON mode, and retry memory searches once after re-ensuring managed collections when qmd returns `Collection not found ...`. (#23613) Thanks @leozhucn.
|
||||||
- Signal/RPC: guard malformed Signal RPC JSON responses with a clear status-scoped error and add regression coverage for invalid JSON responses. (#22995) Thanks @adhitShet.
|
- Signal/RPC: guard malformed Signal RPC JSON responses with a clear status-scoped error and add regression coverage for invalid JSON responses. (#22995) Thanks @adhitShet.
|
||||||
|
|||||||
@ -13,6 +13,18 @@ function createProvider(maxInputTokens: number): EmbeddingProvider {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function createProviderWithoutMaxInputTokens(params: {
|
||||||
|
id: string;
|
||||||
|
model: string;
|
||||||
|
}): EmbeddingProvider {
|
||||||
|
return {
|
||||||
|
id: params.id,
|
||||||
|
model: params.model,
|
||||||
|
embedQuery: async () => [0],
|
||||||
|
embedBatch: async () => [[0]],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
describe("embedding chunk limits", () => {
|
describe("embedding chunk limits", () => {
|
||||||
it("splits oversized chunks so each embedding input stays <= maxInputTokens bytes", () => {
|
it("splits oversized chunks so each embedding input stays <= maxInputTokens bytes", () => {
|
||||||
const provider = createProvider(8192);
|
const provider = createProvider(8192);
|
||||||
@ -49,4 +61,42 @@ describe("embedding chunk limits", () => {
|
|||||||
// If we split inside surrogate pairs we'd likely end up with replacement chars.
|
// If we split inside surrogate pairs we'd likely end up with replacement chars.
|
||||||
expect(out.map((chunk) => chunk.text).join("")).not.toContain("\uFFFD");
|
expect(out.map((chunk) => chunk.text).join("")).not.toContain("\uFFFD");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("uses conservative fallback limits for local providers without declared maxInputTokens", () => {
|
||||||
|
const provider = createProviderWithoutMaxInputTokens({
|
||||||
|
id: "local",
|
||||||
|
model: "unknown-local-embedding",
|
||||||
|
});
|
||||||
|
|
||||||
|
const out = enforceEmbeddingMaxInputTokens(provider, [
|
||||||
|
{
|
||||||
|
startLine: 1,
|
||||||
|
endLine: 1,
|
||||||
|
text: "x".repeat(3000),
|
||||||
|
hash: "ignored",
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
|
expect(out.length).toBeGreaterThan(1);
|
||||||
|
expect(out.every((chunk) => estimateUtf8Bytes(chunk.text) <= 2048)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("honors hard safety caps lower than provider maxInputTokens", () => {
|
||||||
|
const provider = createProvider(8192);
|
||||||
|
const out = enforceEmbeddingMaxInputTokens(
|
||||||
|
provider,
|
||||||
|
[
|
||||||
|
{
|
||||||
|
startLine: 1,
|
||||||
|
endLine: 1,
|
||||||
|
text: "x".repeat(8100),
|
||||||
|
hash: "ignored",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
8000,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(out.length).toBeGreaterThan(1);
|
||||||
|
expect(out.every((chunk) => estimateUtf8Bytes(chunk.text) <= 8000)).toBe(true);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -6,8 +6,13 @@ import { hashText, type MemoryChunk } from "./internal.js";
|
|||||||
export function enforceEmbeddingMaxInputTokens(
|
export function enforceEmbeddingMaxInputTokens(
|
||||||
provider: EmbeddingProvider,
|
provider: EmbeddingProvider,
|
||||||
chunks: MemoryChunk[],
|
chunks: MemoryChunk[],
|
||||||
|
hardMaxInputTokens?: number,
|
||||||
): MemoryChunk[] {
|
): MemoryChunk[] {
|
||||||
const maxInputTokens = resolveEmbeddingMaxInputTokens(provider);
|
const providerMaxInputTokens = resolveEmbeddingMaxInputTokens(provider);
|
||||||
|
const maxInputTokens =
|
||||||
|
typeof hardMaxInputTokens === "number" && hardMaxInputTokens > 0
|
||||||
|
? Math.min(providerMaxInputTokens, hardMaxInputTokens)
|
||||||
|
: providerMaxInputTokens;
|
||||||
const out: MemoryChunk[] = [];
|
const out: MemoryChunk[] = [];
|
||||||
|
|
||||||
for (const chunk of chunks) {
|
for (const chunk of chunks) {
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
import type { EmbeddingProvider } from "./embeddings.js";
|
import type { EmbeddingProvider } from "./embeddings.js";
|
||||||
|
|
||||||
const DEFAULT_EMBEDDING_MAX_INPUT_TOKENS = 8192;
|
const DEFAULT_EMBEDDING_MAX_INPUT_TOKENS = 8192;
|
||||||
|
const DEFAULT_LOCAL_EMBEDDING_MAX_INPUT_TOKENS = 2048;
|
||||||
|
|
||||||
const KNOWN_EMBEDDING_MAX_INPUT_TOKENS: Record<string, number> = {
|
const KNOWN_EMBEDDING_MAX_INPUT_TOKENS: Record<string, number> = {
|
||||||
"openai:text-embedding-3-small": 8192,
|
"openai:text-embedding-3-small": 8192,
|
||||||
@ -30,6 +31,9 @@ export function resolveEmbeddingMaxInputTokens(provider: EmbeddingProvider): num
|
|||||||
if (provider.id.toLowerCase() === "gemini") {
|
if (provider.id.toLowerCase() === "gemini") {
|
||||||
return 2048;
|
return 2048;
|
||||||
}
|
}
|
||||||
|
if (provider.id.toLowerCase() === "local") {
|
||||||
|
return DEFAULT_LOCAL_EMBEDDING_MAX_INPUT_TOKENS;
|
||||||
|
}
|
||||||
|
|
||||||
return DEFAULT_EMBEDDING_MAX_INPUT_TOKENS;
|
return DEFAULT_EMBEDDING_MAX_INPUT_TOKENS;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -709,6 +709,7 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
chunkMarkdown(content, this.settings.chunking).filter(
|
chunkMarkdown(content, this.settings.chunking).filter(
|
||||||
(chunk) => chunk.text.trim().length > 0,
|
(chunk) => chunk.text.trim().length > 0,
|
||||||
),
|
),
|
||||||
|
EMBEDDING_BATCH_MAX_TOKENS,
|
||||||
);
|
);
|
||||||
if (options.source === "sessions" && "lineMap" in entry) {
|
if (options.source === "sessions" && "lineMap" in entry) {
|
||||||
remapChunkLines(chunks, entry.lineMap);
|
remapChunkLines(chunks, entry.lineMap);
|
||||||
|
|||||||
@ -6,7 +6,7 @@ import { installEmbeddingManagerFixture } from "./embedding-manager.test-harness
|
|||||||
|
|
||||||
const fx = installEmbeddingManagerFixture({
|
const fx = installEmbeddingManagerFixture({
|
||||||
fixturePrefix: "openclaw-mem-",
|
fixturePrefix: "openclaw-mem-",
|
||||||
largeTokens: 1250,
|
largeTokens: 4000,
|
||||||
smallTokens: 200,
|
smallTokens: 200,
|
||||||
createCfg: ({ workspaceDir, indexPath, tokens }) => ({
|
createCfg: ({ workspaceDir, indexPath, tokens }) => ({
|
||||||
agents: {
|
agents: {
|
||||||
@ -50,6 +50,10 @@ describe("memory embedding batches", () => {
|
|||||||
);
|
);
|
||||||
expect(totalTexts).toBe(status.chunks);
|
expect(totalTexts).toBe(status.chunks);
|
||||||
expect(embedBatch.mock.calls.length).toBeGreaterThan(1);
|
expect(embedBatch.mock.calls.length).toBeGreaterThan(1);
|
||||||
|
const inputs: string[] = embedBatch.mock.calls.flatMap(
|
||||||
|
(call: unknown[]) => (call[0] as string[] | undefined) ?? [],
|
||||||
|
);
|
||||||
|
expect(inputs.every((text) => Buffer.byteLength(text, "utf8") <= 8000)).toBe(true);
|
||||||
expect(updates.length).toBeGreaterThan(0);
|
expect(updates.length).toBeGreaterThan(0);
|
||||||
expect(updates.some((update) => update.label?.includes("/"))).toBe(true);
|
expect(updates.some((update) => update.label?.includes("/"))).toBe(true);
|
||||||
const last = updates[updates.length - 1];
|
const last = updates[updates.length - 1];
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user