From df539d50851f7f0ce245afde10942a5a045510a7 Mon Sep 17 00:00:00 2001 From: jianxh Date: Sat, 21 Mar 2026 08:42:52 +0800 Subject: [PATCH] fix(agents): accept model options in config and safely recover Ollama tool calls Closes #50894 Add options field to ModelDefinitionSchema and propagate it through applyConfiguredProviderOverrides so user-specified Ollama settings (e.g. num_ctx) reach the runtime instead of being dropped during model resolution. Use options.num_ctx as fallback for contextWindow in both native Ollama and OpenAI-compat num_ctx injection paths (extracted once, reused in both places). Add recoverToolCallsFromText for tool_call XML tags only (no fenced JSON blocks per security review). Strip recovered markup from text content to prevent history pollution on subsequent turns. Made-with: Cursor --- src/agents/ollama-stream.test.ts | 173 +++++++++++++++++++ src/agents/ollama-stream.ts | 86 ++++++++- src/agents/pi-embedded-runner/model.ts | 4 +- src/agents/pi-embedded-runner/run/attempt.ts | 15 +- src/config/defaults.ts | 10 +- src/config/types.models.ts | 1 + src/config/zod-schema.core.ts | 1 + 7 files changed, 279 insertions(+), 11 deletions(-) diff --git a/src/agents/ollama-stream.test.ts b/src/agents/ollama-stream.test.ts index ded8064ea19..7b6fb0febcf 100644 --- a/src/agents/ollama-stream.test.ts +++ b/src/agents/ollama-stream.test.ts @@ -4,6 +4,7 @@ import { createOllamaStreamFn, convertToOllamaMessages, buildAssistantMessage, + recoverToolCallsFromText, parseNdjsonStream, resolveOllamaBaseUrlForRun, } from "./ollama-stream.js"; @@ -182,6 +183,119 @@ describe("buildAssistantMessage", () => { total: 0, }); }); + + it("recovers tool calls from tags and strips markup from text", () => { + const response = { + model: "qwen3.5:9b", + created_at: "2026-01-01T00:00:00Z", + message: { + role: "assistant" as const, + content: + 'I\'ll save that for you.\n\n{"name": "write_file", "arguments": {"path": "/tmp/note.md", "content": "hello"}}\n', + }, + done: true, + prompt_eval_count: 50, + eval_count: 30, + }; + const result = buildAssistantMessage(response, { + api: "ollama", + provider: "ollama", + id: "qwen3.5:9b", + }); + expect(result.stopReason).toBe("toolUse"); + const textParts = result.content.filter((c) => c.type === "text"); + expect(textParts.length).toBe(1); + expect((textParts[0] as { text: string }).text).toBe("I'll save that for you."); + const toolCalls = result.content.filter((c) => c.type === "toolCall"); + expect(toolCalls.length).toBe(1); + const tc = toolCalls[0] as { name: string; arguments: Record }; + expect(tc.name).toBe("write_file"); + expect(tc.arguments).toEqual({ path: "/tmp/note.md", content: "hello" }); + }); + + it("does not recover tool calls when proper tool_calls exist", () => { + const response = { + model: "qwen3.5:9b", + created_at: "2026-01-01T00:00:00Z", + message: { + role: "assistant" as const, + content: '{"name": "ghost", "arguments": {}}', + tool_calls: [{ function: { name: "bash", arguments: { command: "ls" } } }], + }, + done: true, + }; + const result = buildAssistantMessage(response, modelInfo); + const toolCalls = result.content.filter((c) => c.type === "toolCall"); + expect(toolCalls.length).toBe(1); + expect((toolCalls[0] as { name: string }).name).toBe("bash"); + }); + + it("does not recover tool calls from fenced JSON code blocks", () => { + const response = { + model: "qwen3.5:9b", + created_at: "2026-01-01T00:00:00Z", + message: { + role: "assistant" as const, + content: + 'Here is an example:\n```json\n{"name": "bash", "arguments": {"command": "rm -rf /"}}\n```', + }, + done: true, + }; + const result = buildAssistantMessage(response, { + api: "ollama", + provider: "ollama", + id: "qwen3.5:9b", + }); + expect(result.stopReason).toBe("stop"); + expect(result.content.filter((c) => c.type === "toolCall").length).toBe(0); + }); +}); + +describe("recoverToolCallsFromText", () => { + it("extracts tool calls from XML tags", () => { + const text = + 'Sure!\n\n{"name": "bash", "arguments": {"command": "ls"}}\n'; + const result = recoverToolCallsFromText(text); + expect(result).toEqual([{ function: { name: "bash", arguments: { command: "ls" } } }]); + }); + + it("handles function-wrapped format", () => { + const text = + '{"function": {"name": "bash", "arguments": {"command": "pwd"}}}'; + const result = recoverToolCallsFromText(text); + expect(result).toEqual([{ function: { name: "bash", arguments: { command: "pwd" } } }]); + }); + + it("handles parameters key instead of arguments", () => { + const text = '{"name": "read", "parameters": {"path": "/tmp/x"}}'; + const result = recoverToolCallsFromText(text); + expect(result).toEqual([{ function: { name: "read", arguments: { path: "/tmp/x" } } }]); + }); + + it("returns empty for plain text without tool patterns", () => { + expect(recoverToolCallsFromText("Just a normal reply.")).toEqual([]); + }); + + it("returns empty for malformed JSON inside tags", () => { + expect(recoverToolCallsFromText("not json")).toEqual([]); + }); + + it("extracts multiple tool calls", () => { + const text = [ + '{"name": "bash", "arguments": {"command": "ls"}}', + '{"name": "read", "arguments": {"path": "a.txt"}}', + ].join("\n"); + const result = recoverToolCallsFromText(text); + expect(result.length).toBe(2); + expect(result[0]!.function.name).toBe("bash"); + expect(result[1]!.function.name).toBe("read"); + }); + + it("ignores fenced JSON blocks (security: no code-block recovery)", () => { + const text = + '```json\n{"name": "bash", "arguments": {"command": "rm -rf /"}}\n```'; + expect(recoverToolCallsFromText(text)).toEqual([]); + }); }); // Helper: build a ReadableStreamDefaultReader from NDJSON lines @@ -399,6 +513,65 @@ describe("createOllamaStreamFn", () => { ); }); + it("uses model options num_ctx when contextWindow is not set", async () => { + await withMockNdjsonFetch( + [ + '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":true,"prompt_eval_count":1,"eval_count":1}', + ], + async (fetchMock) => { + const streamFn = createOllamaStreamFn("http://ollama-host:11434", undefined, { + num_ctx: 8192, + }); + const stream = await Promise.resolve( + streamFn( + { id: "qwen3.5:9b", api: "ollama", provider: "ollama" } as never, + { messages: [{ role: "user", content: "hi" }] } as never, + {} as never, + ), + ); + await collectStreamEvents(stream); + + const [, requestInit] = fetchMock.mock.calls[0] as unknown as [string, RequestInit]; + const body = JSON.parse(requestInit.body as string) as { + options: Record; + }; + expect(body.options.num_ctx).toBe(8192); + }, + ); + }); + + it("contextWindow takes precedence over model options num_ctx", async () => { + await withMockNdjsonFetch( + [ + '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":true,"prompt_eval_count":1,"eval_count":1}', + ], + async (fetchMock) => { + const streamFn = createOllamaStreamFn("http://ollama-host:11434", undefined, { + num_ctx: 8192, + }); + const stream = await Promise.resolve( + streamFn( + { + id: "qwen3.5:9b", + api: "ollama", + provider: "ollama", + contextWindow: 131072, + } as never, + { messages: [{ role: "user", content: "hi" }] } as never, + {} as never, + ), + ); + await collectStreamEvents(stream); + + const [, requestInit] = fetchMock.mock.calls[0] as unknown as [string, RequestInit]; + const body = JSON.parse(requestInit.body as string) as { + options: Record; + }; + expect(body.options.num_ctx).toBe(131072); + }, + ); + }); + it("merges default headers and allows request headers to override them", async () => { await withMockNdjsonFetch( [ diff --git a/src/agents/ollama-stream.ts b/src/agents/ollama-stream.ts index f332ad1fd83..c7aca409e5e 100644 --- a/src/agents/ollama-stream.ts +++ b/src/agents/ollama-stream.ts @@ -335,6 +335,49 @@ function extractOllamaTools(tools: Tool[] | undefined): OllamaTool[] { // ── Response conversion ───────────────────────────────────────────────────── +/** + * Recover structured tool calls from `` tags in text. + * + * Some smaller local models (e.g. Qwen3.5-9B) understand tool definitions but + * emit the call as text instead of a proper `tool_calls` array. We only match + * the explicit `` tag pattern to minimise false-positive risk; + * generic fenced JSON blocks are intentionally ignored. + */ +export function recoverToolCallsFromText(text: string): OllamaToolCall[] { + const recovered: OllamaToolCall[] = []; + const tagRegex = /\s*([\s\S]*?)\s*<\/tool_call>/gi; + let match: RegExpExecArray | null; + while ((match = tagRegex.exec(text)) !== null) { + const parsed = tryParseToolCallJson(match[1]!); + if (parsed) { + recovered.push(parsed); + } + } + return recovered; +} + +function tryParseToolCallJson(raw: string): OllamaToolCall | null { + try { + const obj = parseJsonPreservingUnsafeIntegers(raw.trim()) as Record; + const name = obj.name ?? (obj.function as Record | undefined)?.name; + const args = + obj.arguments ?? + obj.parameters ?? + (obj.function as Record | undefined)?.arguments; + if (typeof name === "string" && name && args && typeof args === "object" && !Array.isArray(args)) { + return { function: { name, arguments: args as Record } }; + } + } catch { + // Not valid JSON – skip. + } + return null; +} + +/** Strip `` blocks from text after recovery. */ +function stripToolCallTags(text: string): string { + return text.replace(/[\s\S]*?<\/tool_call>/gi, "").replace(/\n{3,}/g, "\n\n").trim(); +} + export function buildAssistantMessage( response: OllamaChatResponse, modelInfo: { api: string; provider: string; id: string }, @@ -344,11 +387,31 @@ export function buildAssistantMessage( // Native Ollama reasoning fields are internal model output. The reply text // must come from `content`; reasoning visibility is controlled elsewhere. const text = response.message.content || ""; - if (text) { - content.push({ type: "text", text }); + + let toolCalls = response.message.tool_calls; + let didRecover = false; + + // Fallback: recover tool calls from tags when the model + // produced no native tool_calls. Only the explicit XML-tag pattern is + // matched to reduce false-positive risk (see security review). + if ((!toolCalls || toolCalls.length === 0) && text) { + const recovered = recoverToolCallsFromText(text); + if (recovered.length > 0) { + toolCalls = recovered; + didRecover = true; + log.info( + `Recovered ${recovered.length} tool call(s) from text output for model ${modelInfo.id}`, + ); + } + } + + // When tool calls were recovered from text, strip the raw markup so it + // does not pollute conversation history on subsequent turns. + const visibleText = didRecover ? stripToolCallTags(text) : text; + if (visibleText) { + content.push({ type: "text", text: visibleText }); } - const toolCalls = response.message.tool_calls; if (toolCalls && toolCalls.length > 0) { for (const tc of toolCalls) { content.push({ @@ -434,6 +497,7 @@ function resolveOllamaModelHeaders(model: { export function createOllamaStreamFn( baseUrl: string, defaultHeaders?: Record, + modelOptions?: Record, ): StreamFn { const chatUrl = resolveOllamaChatUrl(baseUrl); @@ -449,9 +513,16 @@ export function createOllamaStreamFn( const ollamaTools = extractOllamaTools(context.tools); - // Ollama defaults to num_ctx=4096 which is too small for large - // system prompts + many tool definitions. Use model's contextWindow. - const ollamaOptions: Record = { num_ctx: model.contextWindow ?? 65536 }; + const userNumCtx = + modelOptions && + typeof modelOptions.num_ctx === "number" && + Number.isFinite(modelOptions.num_ctx) + ? modelOptions.num_ctx + : undefined; + const ollamaOptions: Record = { + ...(modelOptions ?? {}), + num_ctx: model.contextWindow ?? userNumCtx ?? 65536, + }; if (typeof options?.temperature === "number") { ollamaOptions.temperature = options.temperature; } @@ -561,7 +632,7 @@ export function createOllamaStreamFn( } export function createConfiguredOllamaStreamFn(params: { - model: { baseUrl?: string; headers?: unknown }; + model: { baseUrl?: string; headers?: unknown; options?: Record }; providerBaseUrl?: string; }): StreamFn { const modelBaseUrl = typeof params.model.baseUrl === "string" ? params.model.baseUrl : undefined; @@ -571,5 +642,6 @@ export function createConfiguredOllamaStreamFn(params: { providerBaseUrl: params.providerBaseUrl, }), resolveOllamaModelHeaders(params.model), + params.model.options, ); } diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index 5bf97a683d0..7b396128323 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -146,7 +146,8 @@ function applyConfiguredProviderOverrides(params: { } : undefined, compat: configuredModel?.compat ?? discoveredModel.compat, - }; + ...(configuredModel?.options != null ? { options: configuredModel.options } : {}), + } as Model; } export function buildInlineProviderModels( @@ -301,6 +302,7 @@ export function resolveModelWithRegistry(params: { DEFAULT_CONTEXT_TOKENS, headers: providerHeaders || modelHeaders ? { ...providerHeaders, ...modelHeaders } : undefined, + ...(configuredModel?.options != null ? { options: configuredModel.options } : {}), } as Model, }); } diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index d785218f819..4dd553b878a 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -2159,12 +2159,20 @@ export async function runEmbeddedAttempt( queueYieldInterruptForSession = () => { queueSessionsYieldInterruptMessage(activeSession); }; + const modelWithOptions = params.model as { options?: Record }; + const optionsNumCtx = + typeof modelWithOptions.options?.num_ctx === "number" + ? modelWithOptions.options.num_ctx + : undefined; removeToolResultContextGuard = installToolResultContextGuard({ agent: activeSession.agent, contextWindowTokens: Math.max( 1, Math.floor( - params.model.contextWindow ?? params.model.maxTokens ?? DEFAULT_CONTEXT_TOKENS, + params.model.contextWindow ?? + optionsNumCtx ?? + params.model.maxTokens ?? + DEFAULT_CONTEXT_TOKENS, ), ), }); @@ -2237,7 +2245,10 @@ export async function runEmbeddedAttempt( const numCtx = Math.max( 1, Math.floor( - params.model.contextWindow ?? params.model.maxTokens ?? DEFAULT_CONTEXT_TOKENS, + params.model.contextWindow ?? + optionsNumCtx ?? + params.model.maxTokens ?? + DEFAULT_CONTEXT_TOKENS, ), ); activeSession.agent.streamFn = wrapOllamaCompatNumCtx(activeSession.agent.streamFn, numCtx); diff --git a/src/config/defaults.ts b/src/config/defaults.ts index 2febc3869ee..6db7e0c25e4 100644 --- a/src/config/defaults.ts +++ b/src/config/defaults.ts @@ -254,9 +254,17 @@ export function applyModelDefaults(cfg: OpenClawConfig): OpenClawConfig { modelMutated = true; } + const optionsNumCtx = + raw.options && + typeof raw.options === "object" && + typeof (raw.options as Record).num_ctx === "number" && + Number.isFinite((raw.options as Record).num_ctx) && + ((raw.options as Record).num_ctx as number) > 0 + ? ((raw.options as Record).num_ctx as number) + : undefined; const contextWindow = isPositiveNumber(raw.contextWindow) ? raw.contextWindow - : DEFAULT_CONTEXT_TOKENS; + : optionsNumCtx ?? DEFAULT_CONTEXT_TOKENS; if (raw.contextWindow !== contextWindow) { modelMutated = true; } diff --git a/src/config/types.models.ts b/src/config/types.models.ts index e1d60bcf695..03feb104d67 100644 --- a/src/config/types.models.ts +++ b/src/config/types.models.ts @@ -59,6 +59,7 @@ export type ModelDefinitionConfig = { maxTokens: number; headers?: Record; compat?: ModelCompatConfig; + options?: Record; }; export type ModelProviderConfig = { diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index 22c589c8490..8f3340709eb 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -242,6 +242,7 @@ export const ModelDefinitionSchema = z maxTokens: z.number().positive().optional(), headers: z.record(z.string(), z.string()).optional(), compat: ModelCompatSchema, + options: z.record(z.string(), z.unknown()).optional(), }) .strict();