diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md index b3940945249..f14105a70ef 100644 --- a/docs/concepts/memory.md +++ b/docs/concepts/memory.md @@ -310,6 +310,28 @@ Notes: - `remote.baseUrl` is optional (defaults to the Gemini API base URL). - `remote.headers` lets you add extra headers if needed. - Default model: `gemini-embedding-001`. +- `gemini-embedding-2-preview` is also supported: multimodal inputs, 8192 token limit, configurable dimensions (768 / 1536 / 3072, default 3072). + +#### Gemini Embedding 2 (preview) + +```json5 +agents: { + defaults: { + memorySearch: { + provider: "gemini", + model: "gemini-embedding-2-preview", + outputDimensionality: 3072, // optional: 768, 1536, or 3072 (default) + remote: { + apiKey: "YOUR_GEMINI_API_KEY" + } + } + } +} +``` + +> **⚠️ Re-index required:** Switching from `gemini-embedding-001` (768 dimensions) +> to `gemini-embedding-2-preview` (3072 dimensions) changes the vector size. +> OpenClaw will automatically reindex when it detects the model change. If you want to use a **custom OpenAI-compatible endpoint** (OpenRouter, vLLM, or a proxy), you can use the `remote` configuration with the OpenAI provider: diff --git a/src/memory/embedding-model-limits.ts b/src/memory/embedding-model-limits.ts index b9960009606..0819686b905 100644 --- a/src/memory/embedding-model-limits.ts +++ b/src/memory/embedding-model-limits.ts @@ -8,6 +8,8 @@ const KNOWN_EMBEDDING_MAX_INPUT_TOKENS: Record = { "openai:text-embedding-3-large": 8192, "openai:text-embedding-ada-002": 8191, "gemini:text-embedding-004": 2048, + "gemini:gemini-embedding-001": 2048, + "gemini:gemini-embedding-2-preview": 8192, "voyage:voyage-3": 32000, "voyage:voyage-3-lite": 16000, "voyage:voyage-code-3": 32000, diff --git a/src/memory/embeddings-gemini.test.ts b/src/memory/embeddings-gemini.test.ts new file mode 100644 index 00000000000..b91dd9bd29f --- /dev/null +++ b/src/memory/embeddings-gemini.test.ts @@ -0,0 +1,411 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import * as authModule from "../agents/model-auth.js"; +import { + buildFileDataPart, + buildGeminiParts, + buildInlineDataPart, + createGeminiEmbeddingProvider, + DEFAULT_GEMINI_EMBEDDING_MODEL, + GEMINI_EMBEDDING_2_MODELS, + isGeminiEmbedding2Model, + resolveGeminiOutputDimensionality, + type GeminiPart, +} from "./embeddings-gemini.js"; + +vi.mock("../agents/model-auth.js", async () => { + const { createModelAuthMockModule } = await import("../test-utils/model-auth-mock.js"); + return createModelAuthMockModule(); +}); + +const createGeminiFetchMock = (embeddingValues = [1, 2, 3]) => + vi.fn(async (_input?: unknown, _init?: unknown) => ({ + ok: true, + status: 200, + json: async () => ({ embedding: { values: embeddingValues } }), + })); + +const createGeminiBatchFetchMock = (count: number, embeddingValues = [1, 2, 3]) => + vi.fn(async (_input?: unknown, _init?: unknown) => ({ + ok: true, + status: 200, + json: async () => ({ + embeddings: Array.from({ length: count }, () => ({ values: embeddingValues })), + }), + })); + +function readFirstFetchRequest(fetchMock: { mock: { calls: unknown[][] } }) { + const [url, init] = fetchMock.mock.calls[0] ?? []; + return { url, init: init as RequestInit | undefined }; +} + +function parseFetchBody(fetchMock: { mock: { calls: unknown[][] } }, callIndex = 0) { + const init = fetchMock.mock.calls[callIndex]?.[1] as RequestInit | undefined; + return JSON.parse((init?.body as string) ?? "{}") as Record; +} + +afterEach(() => { + vi.resetAllMocks(); + vi.unstubAllGlobals(); +}); + +function mockResolvedProviderKey(apiKey = "test-key") { + vi.mocked(authModule.resolveApiKeyForProvider).mockResolvedValue({ + apiKey, + mode: "api-key", + source: "test", + }); +} + +// ---------- Helper function tests ---------- + +describe("buildGeminiParts", () => { + it("wraps a string into a single text part", () => { + expect(buildGeminiParts("hello")).toEqual([{ text: "hello" }]); + }); + + it("passes through an existing parts array", () => { + const parts: GeminiPart[] = [ + { text: "hello" }, + { inlineData: { mimeType: "image/png", data: "base64data" } }, + ]; + expect(buildGeminiParts(parts)).toBe(parts); + }); +}); + +describe("buildInlineDataPart", () => { + it("produces the correct shape", () => { + const part = buildInlineDataPart("image/jpeg", "abc123"); + expect(part).toEqual({ + inlineData: { mimeType: "image/jpeg", data: "abc123" }, + }); + }); +}); + +describe("buildFileDataPart", () => { + it("produces the correct shape", () => { + const part = buildFileDataPart("application/pdf", "gs://bucket/file.pdf"); + expect(part).toEqual({ + fileData: { mimeType: "application/pdf", fileUri: "gs://bucket/file.pdf" }, + }); + }); +}); + +// ---------- Model detection ---------- + +describe("isGeminiEmbedding2Model", () => { + it("returns true for gemini-embedding-2-preview", () => { + expect(isGeminiEmbedding2Model("gemini-embedding-2-preview")).toBe(true); + }); + + it("returns false for gemini-embedding-001", () => { + expect(isGeminiEmbedding2Model("gemini-embedding-001")).toBe(false); + }); + + it("returns false for text-embedding-004", () => { + expect(isGeminiEmbedding2Model("text-embedding-004")).toBe(false); + }); +}); + +describe("GEMINI_EMBEDDING_2_MODELS", () => { + it("contains gemini-embedding-2-preview", () => { + expect(GEMINI_EMBEDDING_2_MODELS.has("gemini-embedding-2-preview")).toBe(true); + }); +}); + +// ---------- Dimension resolution ---------- + +describe("resolveGeminiOutputDimensionality", () => { + it("returns undefined for non-v2 models", () => { + expect(resolveGeminiOutputDimensionality("gemini-embedding-001")).toBeUndefined(); + expect(resolveGeminiOutputDimensionality("text-embedding-004")).toBeUndefined(); + }); + + it("returns 3072 by default for v2 models", () => { + expect(resolveGeminiOutputDimensionality("gemini-embedding-2-preview")).toBe(3072); + }); + + it("accepts valid dimension values", () => { + expect(resolveGeminiOutputDimensionality("gemini-embedding-2-preview", 768)).toBe(768); + expect(resolveGeminiOutputDimensionality("gemini-embedding-2-preview", 1536)).toBe(1536); + expect(resolveGeminiOutputDimensionality("gemini-embedding-2-preview", 3072)).toBe(3072); + }); + + it("throws for invalid dimension values", () => { + expect(() => resolveGeminiOutputDimensionality("gemini-embedding-2-preview", 512)).toThrow( + /Invalid outputDimensionality 512/, + ); + expect(() => resolveGeminiOutputDimensionality("gemini-embedding-2-preview", 1024)).toThrow( + /Valid values: 768, 1536, 3072/, + ); + }); +}); + +// ---------- Provider: gemini-embedding-001 (backward compat) ---------- + +describe("gemini-embedding-001 provider (backward compat)", () => { + it("does NOT include outputDimensionality in embedQuery", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-001", + fallback: "none", + }); + + await provider.embedQuery("test query"); + + const body = parseFetchBody(fetchMock); + expect(body).not.toHaveProperty("outputDimensionality"); + expect(body.taskType).toBe("RETRIEVAL_QUERY"); + expect(body.content).toEqual({ parts: [{ text: "test query" }] }); + }); + + it("does NOT include outputDimensionality in embedBatch", async () => { + const fetchMock = createGeminiBatchFetchMock(2); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-001", + fallback: "none", + }); + + await provider.embedBatch(["text1", "text2"]); + + const body = parseFetchBody(fetchMock); + expect(body).not.toHaveProperty("outputDimensionality"); + }); +}); + +// ---------- Provider: gemini-embedding-2-preview ---------- + +describe("gemini-embedding-2-preview provider", () => { + it("includes outputDimensionality in embedQuery request", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + }); + + await provider.embedQuery("test query"); + + const body = parseFetchBody(fetchMock); + expect(body.outputDimensionality).toBe(3072); + expect(body.taskType).toBe("RETRIEVAL_QUERY"); + expect(body.content).toEqual({ parts: [{ text: "test query" }] }); + }); + + it("includes outputDimensionality in embedBatch request", async () => { + const fetchMock = createGeminiBatchFetchMock(2); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + }); + + await provider.embedBatch(["text1", "text2"]); + + const body = parseFetchBody(fetchMock); + expect(body.outputDimensionality).toBe(3072); + expect(body.requests).toEqual([ + { + model: "models/gemini-embedding-2-preview", + content: { parts: [{ text: "text1" }] }, + taskType: "RETRIEVAL_DOCUMENT", + }, + { + model: "models/gemini-embedding-2-preview", + content: { parts: [{ text: "text2" }] }, + taskType: "RETRIEVAL_DOCUMENT", + }, + ]); + }); + + it("respects custom outputDimensionality", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + outputDimensionality: 768, + }); + + await provider.embedQuery("test"); + + const body = parseFetchBody(fetchMock); + expect(body.outputDimensionality).toBe(768); + }); + + it("throws for invalid outputDimensionality", async () => { + mockResolvedProviderKey(); + + await expect( + createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + outputDimensionality: 512, + }), + ).rejects.toThrow(/Invalid outputDimensionality 512/); + }); + + it("uses correct endpoint URL", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + }); + + await provider.embedQuery("test"); + + const { url } = readFirstFetchRequest(fetchMock); + expect(url).toBe( + "https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-2-preview:embedContent", + ); + }); + + it("allows taskType override via options", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + taskType: "SEMANTIC_SIMILARITY", + }); + + await provider.embedQuery("test"); + + const body = parseFetchBody(fetchMock); + expect(body.taskType).toBe("SEMANTIC_SIMILARITY"); + }); +}); + +// ---------- Model normalization ---------- + +describe("gemini model normalization", () => { + it("handles models/ prefix for v2 model", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "models/gemini-embedding-2-preview", + fallback: "none", + }); + + await provider.embedQuery("test"); + + const body = parseFetchBody(fetchMock); + expect(body.outputDimensionality).toBe(3072); + }); + + it("handles gemini/ prefix for v2 model", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini/gemini-embedding-2-preview", + fallback: "none", + }); + + await provider.embedQuery("test"); + + const body = parseFetchBody(fetchMock); + expect(body.outputDimensionality).toBe(3072); + }); + + it("handles google/ prefix for v2 model", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "google/gemini-embedding-2-preview", + fallback: "none", + }); + + await provider.embedQuery("test"); + + const body = parseFetchBody(fetchMock); + expect(body.outputDimensionality).toBe(3072); + }); + + it("defaults to gemini-embedding-001 when model is empty", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider, client } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "", + fallback: "none", + }); + + expect(client.model).toBe(DEFAULT_GEMINI_EMBEDDING_MODEL); + expect(provider.model).toBe(DEFAULT_GEMINI_EMBEDDING_MODEL); + }); + + it("returns empty array for blank query text", async () => { + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + }); + + const result = await provider.embedQuery(" "); + expect(result).toEqual([]); + }); + + it("returns empty array for empty batch", async () => { + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + }); + + const result = await provider.embedBatch([]); + expect(result).toEqual([]); + }); +}); diff --git a/src/memory/embeddings-gemini.ts b/src/memory/embeddings-gemini.ts index 1d5cc5876ea..f9b87997519 100644 --- a/src/memory/embeddings-gemini.ts +++ b/src/memory/embeddings-gemini.ts @@ -24,6 +24,83 @@ export const DEFAULT_GEMINI_EMBEDDING_MODEL = "gemini-embedding-001"; const GEMINI_MAX_INPUT_TOKENS: Record = { "text-embedding-004": 2048, }; + +// --- gemini-embedding-2-preview support --- + +export const GEMINI_EMBEDDING_2_MODELS = new Set([ + "gemini-embedding-2-preview", + // Add the GA model name here once released. +]); + +const GEMINI_EMBEDDING_2_DEFAULT_DIMENSIONS = 3072; +const GEMINI_EMBEDDING_2_VALID_DIMENSIONS = [768, 1536, 3072] as const; + +export type GeminiTaskType = + | "RETRIEVAL_QUERY" + | "RETRIEVAL_DOCUMENT" + | "SEMANTIC_SIMILARITY" + | "CLASSIFICATION" + | "CLUSTERING" + | "QUESTION_ANSWERING" + | "FACT_VERIFICATION"; + +export type GeminiTextPart = { text: string }; +export type GeminiInlinePart = { + inlineData: { mimeType: string; data: string }; +}; +export type GeminiFilePart = { + fileData: { mimeType: string; fileUri: string }; +}; +export type GeminiPart = GeminiTextPart | GeminiInlinePart | GeminiFilePart; + +/** Convert a string or pre-built parts array into `GeminiPart[]`. */ +export function buildGeminiParts(input: string | GeminiPart[]): GeminiPart[] { + if (typeof input === "string") { + return [{ text: input }]; + } + return input; +} + +/** Convenience: build an inline-data part for multimodal embeddings. */ +export function buildInlineDataPart(mimeType: string, base64Data: string): GeminiInlinePart { + return { inlineData: { mimeType, data: base64Data } }; +} + +/** Convenience: build a file-data part for multimodal embeddings. */ +export function buildFileDataPart(mimeType: string, fileUri: string): GeminiFilePart { + return { fileData: { mimeType, fileUri } }; +} + +/** + * Returns true if the given model name is a gemini-embedding-2 variant that + * supports `outputDimensionality` and extended task types. + */ +export function isGeminiEmbedding2Model(model: string): boolean { + return GEMINI_EMBEDDING_2_MODELS.has(model); +} + +/** + * Validate and return the `outputDimensionality` for gemini-embedding-2 models. + * Returns `undefined` for older models (they don't support the param). + */ +export function resolveGeminiOutputDimensionality( + model: string, + requested?: number, +): number | undefined { + if (!isGeminiEmbedding2Model(model)) { + return undefined; + } + if (requested == null) { + return GEMINI_EMBEDDING_2_DEFAULT_DIMENSIONS; + } + const valid: readonly number[] = GEMINI_EMBEDDING_2_VALID_DIMENSIONS; + if (!valid.includes(requested)) { + throw new Error( + `Invalid outputDimensionality ${requested} for ${model}. Valid values: ${valid.join(", ")}`, + ); + } + return requested; +} function resolveRemoteApiKey(remoteApiKey: unknown): string | undefined { const trimmed = resolveMemorySecretInputString({ value: remoteApiKey, @@ -73,6 +150,11 @@ export async function createGeminiEmbeddingProvider( const baseUrl = client.baseUrl.replace(/\/$/, ""); const embedUrl = `${baseUrl}/${client.modelPath}:embedContent`; const batchUrl = `${baseUrl}/${client.modelPath}:batchEmbedContents`; + const isV2 = isGeminiEmbedding2Model(client.model); + const outputDimensionality = resolveGeminiOutputDimensionality( + client.model, + options.outputDimensionality, + ); const fetchWithGeminiAuth = async (apiKey: string, endpoint: string, body: unknown) => { const authHeaders = parseGeminiAuth(apiKey); @@ -106,14 +188,17 @@ export async function createGeminiEmbeddingProvider( if (!text.trim()) { return []; } + const body: Record = { + content: { parts: [{ text }] }, + taskType: options.taskType ?? "RETRIEVAL_QUERY", + }; + if (isV2 && outputDimensionality != null) { + body.outputDimensionality = outputDimensionality; + } const payload = await executeWithApiKeyRotation({ provider: "google", apiKeys: client.apiKeys, - execute: (apiKey) => - fetchWithGeminiAuth(apiKey, embedUrl, { - content: { parts: [{ text }] }, - taskType: "RETRIEVAL_QUERY", - }), + execute: (apiKey) => fetchWithGeminiAuth(apiKey, embedUrl, body), }); return payload.embedding?.values ?? []; }; @@ -122,18 +207,22 @@ export async function createGeminiEmbeddingProvider( if (texts.length === 0) { return []; } - const requests = texts.map((text) => ({ - model: client.modelPath, - content: { parts: [{ text }] }, - taskType: "RETRIEVAL_DOCUMENT", - })); + const requests = texts.map((text) => { + const req: Record = { + model: client.modelPath, + content: { parts: [{ text }] }, + taskType: options.taskType ?? "RETRIEVAL_DOCUMENT", + }; + return req; + }); + const batchBody: Record = { requests }; + if (isV2 && outputDimensionality != null) { + batchBody.outputDimensionality = outputDimensionality; + } const payload = await executeWithApiKeyRotation({ provider: "google", apiKeys: client.apiKeys, - execute: (apiKey) => - fetchWithGeminiAuth(apiKey, batchUrl, { - requests, - }), + execute: (apiKey) => fetchWithGeminiAuth(apiKey, batchUrl, batchBody), }); const embeddings = Array.isArray(payload.embeddings) ? payload.embeddings : []; return texts.map((_, index) => embeddings[index]?.values ?? []); diff --git a/src/memory/embeddings.ts b/src/memory/embeddings.ts index ca6b4046e2c..9d55e226210 100644 --- a/src/memory/embeddings.ts +++ b/src/memory/embeddings.ts @@ -74,6 +74,10 @@ export type EmbeddingProviderOptions = { modelPath?: string; modelCacheDir?: string; }; + /** Gemini embedding-2: output vector dimensions (768, 1536, or 3072). */ + outputDimensionality?: number; + /** Gemini: override the default task type sent with embedding requests. */ + taskType?: string; }; export const DEFAULT_LOCAL_MODEL =