From 3e00460cdc93c171a2b256ac612fae33a5c97396 Mon Sep 17 00:00:00 2001 From: fan Date: Sun, 15 Feb 2026 07:38:29 +0800 Subject: [PATCH 1/3] feat(memory-lancedb): make auto-capture max length configurable --- extensions/memory-lancedb/config.ts | 24 ++++++++++++++++++- extensions/memory-lancedb/index.test.ts | 17 +++++++++++++ extensions/memory-lancedb/index.ts | 9 ++++--- .../memory-lancedb/openclaw.plugin.json | 11 +++++++++ 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/extensions/memory-lancedb/config.ts b/extensions/memory-lancedb/config.ts index d3ab87d20df..4caa48cdced 100644 --- a/extensions/memory-lancedb/config.ts +++ b/extensions/memory-lancedb/config.ts @@ -11,12 +11,14 @@ export type MemoryConfig = { dbPath?: string; autoCapture?: boolean; autoRecall?: boolean; + captureMaxChars?: number; }; export const MEMORY_CATEGORIES = ["preference", "fact", "decision", "entity", "other"] as const; export type MemoryCategory = (typeof MEMORY_CATEGORIES)[number]; const DEFAULT_MODEL = "text-embedding-3-small"; +const DEFAULT_CAPTURE_MAX_CHARS = 1500; const LEGACY_STATE_DIRS: string[] = []; function resolveDefaultDbPath(): string { @@ -89,7 +91,11 @@ export const memoryConfigSchema = { throw new Error("memory config required"); } const cfg = value as Record; - assertAllowedKeys(cfg, ["embedding", "dbPath", "autoCapture", "autoRecall"], "memory config"); + assertAllowedKeys( + cfg, + ["embedding", "dbPath", "autoCapture", "autoRecall", "captureMaxChars"], + "memory config", + ); const embedding = cfg.embedding as Record | undefined; if (!embedding || typeof embedding.apiKey !== "string") { @@ -99,6 +105,15 @@ export const memoryConfigSchema = { const model = resolveEmbeddingModel(embedding); + const captureMaxChars = + typeof cfg.captureMaxChars === "number" ? Math.floor(cfg.captureMaxChars) : undefined; + if ( + typeof captureMaxChars === "number" && + (captureMaxChars < 100 || captureMaxChars > 10_000) + ) { + throw new Error("captureMaxChars must be between 100 and 10000"); + } + return { embedding: { provider: "openai", @@ -108,6 +123,7 @@ export const memoryConfigSchema = { dbPath: typeof cfg.dbPath === "string" ? cfg.dbPath : DEFAULT_DB_PATH, autoCapture: cfg.autoCapture !== false, autoRecall: cfg.autoRecall !== false, + captureMaxChars: captureMaxChars ?? DEFAULT_CAPTURE_MAX_CHARS, }; }, uiHints: { @@ -135,5 +151,11 @@ export const memoryConfigSchema = { label: "Auto-Recall", help: "Automatically inject relevant memories into context", }, + captureMaxChars: { + label: "Capture Max Chars", + help: "Maximum message length eligible for auto-capture", + advanced: true, + placeholder: String(DEFAULT_CAPTURE_MAX_CHARS), + }, }, }; diff --git a/extensions/memory-lancedb/index.test.ts b/extensions/memory-lancedb/index.test.ts index d51eb66ad7f..b5261f848a0 100644 --- a/extensions/memory-lancedb/index.test.ts +++ b/extensions/memory-lancedb/index.test.ts @@ -61,6 +61,7 @@ describe("memory plugin e2e", () => { expect(config).toBeDefined(); expect(config?.embedding?.apiKey).toBe(OPENAI_API_KEY); expect(config?.dbPath).toBe(dbPath); + expect(config?.captureMaxChars).toBe(1500); }); test("config schema resolves env vars", async () => { @@ -92,6 +93,18 @@ describe("memory plugin e2e", () => { }).toThrow("embedding.apiKey is required"); }); + test("config schema validates captureMaxChars range", async () => { + const { default: memoryPlugin } = await import("./index.js"); + + expect(() => { + memoryPlugin.configSchema?.parse?.({ + embedding: { apiKey: OPENAI_API_KEY }, + dbPath, + captureMaxChars: 99, + }); + }).toThrow("captureMaxChars must be between 100 and 10000"); + }); + test("shouldCapture applies real capture rules", async () => { const { shouldCapture } = await import("./index.js"); @@ -104,6 +117,10 @@ describe("memory plugin e2e", () => { expect(shouldCapture("injected")).toBe(false); expect(shouldCapture("status")).toBe(false); expect(shouldCapture("Here is a short **summary**\n- bullet")).toBe(false); + const longButAllowed = `I always prefer this style. ${"x".repeat(1200)}`; + const tooLong = `I always prefer this style. ${"x".repeat(1600)}`; + expect(shouldCapture(longButAllowed, { maxChars: 1500 })).toBe(true); + expect(shouldCapture(tooLong, { maxChars: 1500 })).toBe(false); }); test("detectCategory classifies using production logic", async () => { diff --git a/extensions/memory-lancedb/index.ts b/extensions/memory-lancedb/index.ts index 64f557ea954..aa53d834dd2 100644 --- a/extensions/memory-lancedb/index.ts +++ b/extensions/memory-lancedb/index.ts @@ -194,8 +194,9 @@ const MEMORY_TRIGGERS = [ /always|never|important/i, ]; -export function shouldCapture(text: string): boolean { - if (text.length < 10 || text.length > 500) { +export function shouldCapture(text: string, options?: { maxChars?: number }): boolean { + const maxChars = options?.maxChars ?? 1500; + if (text.length < 10 || text.length > maxChars) { return false; } // Skip injected context from memory recall @@ -570,7 +571,9 @@ const memoryPlugin = { } // Filter for capturable content - const toCapture = texts.filter((text) => text && shouldCapture(text)); + const toCapture = texts.filter( + (text) => text && shouldCapture(text, { maxChars: cfg.captureMaxChars }), + ); if (toCapture.length === 0) { return; } diff --git a/extensions/memory-lancedb/openclaw.plugin.json b/extensions/memory-lancedb/openclaw.plugin.json index de25c49529b..007ea3d63dd 100644 --- a/extensions/memory-lancedb/openclaw.plugin.json +++ b/extensions/memory-lancedb/openclaw.plugin.json @@ -25,6 +25,12 @@ "autoRecall": { "label": "Auto-Recall", "help": "Automatically inject relevant memories into context" + }, + "captureMaxChars": { + "label": "Capture Max Chars", + "help": "Maximum message length eligible for auto-capture", + "advanced": true, + "placeholder": "1500" } }, "configSchema": { @@ -53,6 +59,11 @@ }, "autoRecall": { "type": "boolean" + }, + "captureMaxChars": { + "type": "number", + "minimum": 100, + "maximum": 10000 } }, "required": ["embedding"] From 8cb0373bc1e778cd80d79114e5f7fa5a45ed1f1e Mon Sep 17 00:00:00 2001 From: Vignesh Natarajan Date: Sat, 14 Feb 2026 15:54:01 -0800 Subject: [PATCH 2/3] Memory-lancedb: configurable capture limit (#16624) (thanks @ciberponk) --- extensions/memory-lancedb/config.ts | 2 +- extensions/memory-lancedb/index.test.ts | 29 +++++++++++++++---- extensions/memory-lancedb/index.ts | 3 +- .../memory-lancedb/openclaw.plugin.json | 2 +- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/extensions/memory-lancedb/config.ts b/extensions/memory-lancedb/config.ts index 4caa48cdced..339e5c8cd7a 100644 --- a/extensions/memory-lancedb/config.ts +++ b/extensions/memory-lancedb/config.ts @@ -18,7 +18,7 @@ export const MEMORY_CATEGORIES = ["preference", "fact", "decision", "entity", "o export type MemoryCategory = (typeof MEMORY_CATEGORIES)[number]; const DEFAULT_MODEL = "text-embedding-3-small"; -const DEFAULT_CAPTURE_MAX_CHARS = 1500; +export const DEFAULT_CAPTURE_MAX_CHARS = 500; const LEGACY_STATE_DIRS: string[] = []; function resolveDefaultDbPath(): string { diff --git a/extensions/memory-lancedb/index.test.ts b/extensions/memory-lancedb/index.test.ts index b5261f848a0..c2cb431e78a 100644 --- a/extensions/memory-lancedb/index.test.ts +++ b/extensions/memory-lancedb/index.test.ts @@ -61,7 +61,7 @@ describe("memory plugin e2e", () => { expect(config).toBeDefined(); expect(config?.embedding?.apiKey).toBe(OPENAI_API_KEY); expect(config?.dbPath).toBe(dbPath); - expect(config?.captureMaxChars).toBe(1500); + expect(config?.captureMaxChars).toBe(500); }); test("config schema resolves env vars", async () => { @@ -105,6 +105,21 @@ describe("memory plugin e2e", () => { }).toThrow("captureMaxChars must be between 100 and 10000"); }); + test("config schema accepts captureMaxChars override", async () => { + const { default: memoryPlugin } = await import("./index.js"); + + const config = memoryPlugin.configSchema?.parse?.({ + embedding: { + apiKey: OPENAI_API_KEY, + model: "text-embedding-3-small", + }, + dbPath, + captureMaxChars: 1800, + }); + + expect(config?.captureMaxChars).toBe(1800); + }); + test("shouldCapture applies real capture rules", async () => { const { shouldCapture } = await import("./index.js"); @@ -117,10 +132,14 @@ describe("memory plugin e2e", () => { expect(shouldCapture("injected")).toBe(false); expect(shouldCapture("status")).toBe(false); expect(shouldCapture("Here is a short **summary**\n- bullet")).toBe(false); - const longButAllowed = `I always prefer this style. ${"x".repeat(1200)}`; - const tooLong = `I always prefer this style. ${"x".repeat(1600)}`; - expect(shouldCapture(longButAllowed, { maxChars: 1500 })).toBe(true); - expect(shouldCapture(tooLong, { maxChars: 1500 })).toBe(false); + const defaultAllowed = `I always prefer this style. ${"x".repeat(400)}`; + const defaultTooLong = `I always prefer this style. ${"x".repeat(600)}`; + expect(shouldCapture(defaultAllowed)).toBe(true); + expect(shouldCapture(defaultTooLong)).toBe(false); + const customAllowed = `I always prefer this style. ${"x".repeat(1200)}`; + const customTooLong = `I always prefer this style. ${"x".repeat(1600)}`; + expect(shouldCapture(customAllowed, { maxChars: 1500 })).toBe(true); + expect(shouldCapture(customTooLong, { maxChars: 1500 })).toBe(false); }); test("detectCategory classifies using production logic", async () => { diff --git a/extensions/memory-lancedb/index.ts b/extensions/memory-lancedb/index.ts index aa53d834dd2..0778006c7bd 100644 --- a/extensions/memory-lancedb/index.ts +++ b/extensions/memory-lancedb/index.ts @@ -12,6 +12,7 @@ import { Type } from "@sinclair/typebox"; import { randomUUID } from "node:crypto"; import OpenAI from "openai"; import { + DEFAULT_CAPTURE_MAX_CHARS, MEMORY_CATEGORIES, type MemoryCategory, memoryConfigSchema, @@ -195,7 +196,7 @@ const MEMORY_TRIGGERS = [ ]; export function shouldCapture(text: string, options?: { maxChars?: number }): boolean { - const maxChars = options?.maxChars ?? 1500; + const maxChars = options?.maxChars ?? DEFAULT_CAPTURE_MAX_CHARS; if (text.length < 10 || text.length > maxChars) { return false; } diff --git a/extensions/memory-lancedb/openclaw.plugin.json b/extensions/memory-lancedb/openclaw.plugin.json index 007ea3d63dd..44ee0dcd04f 100644 --- a/extensions/memory-lancedb/openclaw.plugin.json +++ b/extensions/memory-lancedb/openclaw.plugin.json @@ -30,7 +30,7 @@ "label": "Capture Max Chars", "help": "Maximum message length eligible for auto-capture", "advanced": true, - "placeholder": "1500" + "placeholder": "500" } }, "configSchema": { From 3f69607d8c629fb87307e66ed77101c6fb3989f6 Mon Sep 17 00:00:00 2001 From: Vignesh Natarajan Date: Sat, 14 Feb 2026 16:01:30 -0800 Subject: [PATCH 3/3] Changelog: configurable LanceDB capture limit --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a215c25dcc9..67df4de0c11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -132,6 +132,7 @@ Docs: https://docs.openclaw.ai - Telegram: scope skill commands to the resolved agent for default accounts so `setMyCommands` no longer triggers `BOT_COMMANDS_TOO_MUCH` when multiple agents are configured. (#15599) - Discord: avoid misrouting numeric guild allowlist entries to `/channels/` by prefixing guild-only inputs with `guild:` during resolution. (#12326) Thanks @headswim. - Memory/QMD: default `memory.qmd.searchMode` to `search` for faster CPU-only recall and always scope `search`/`vsearch` requests to managed collections (auto-falling back to `query` when required). (#16047) Thanks @togotago. +- Memory/LanceDB: add configurable `captureMaxChars` for auto-capture while keeping the legacy 500-char default. (#16641) Thanks @ciberponk. - MS Teams: preserve parsed mention entities/text when appending OneDrive fallback file links, and accept broader real-world Teams mention ID formats (`29:...`, `8:orgid:...`) while still rejecting placeholder patterns. (#15436) Thanks @hyojin. - Media: classify `text/*` MIME types as documents in media-kind routing so text attachments are no longer treated as unknown. (#12237) Thanks @arosstale. - Inbound/Web UI: preserve literal `\n` sequences when normalizing inbound text so Windows paths like `C:\\Work\\nxxx\\README.md` are not corrupted. (#11547) Thanks @mcaxtr.