From df539d50851f7f0ce245afde10942a5a045510a7 Mon Sep 17 00:00:00 2001
From: jianxh <jianxh@111.com>
Date: Sat, 21 Mar 2026 08:42:52 +0800
Subject: [PATCH] fix(agents): accept model options in config and safely
 recover Ollama tool calls

Closes #50894

Add options field to ModelDefinitionSchema and propagate it through applyConfiguredProviderOverrides so user-specified Ollama settings (e.g. num_ctx) reach the runtime instead of being dropped during model resolution.

Use options.num_ctx as fallback for contextWindow in both native Ollama and OpenAI-compat num_ctx injection paths (extracted once, reused in both places).

Add recoverToolCallsFromText for tool_call XML tags only (no fenced JSON blocks per security review). Strip recovered markup from text content to prevent history pollution on subsequent turns.

Made-with: Cursor
---
 src/agents/ollama-stream.test.ts             | 173 +++++++++++++++++++
 src/agents/ollama-stream.ts                  |  86 ++++++++-
 src/agents/pi-embedded-runner/model.ts       |   4 +-
 src/agents/pi-embedded-runner/run/attempt.ts |  15 +-
 src/config/defaults.ts                       |  10 +-
 src/config/types.models.ts                   |   1 +
 src/config/zod-schema.core.ts                |   1 +
 7 files changed, 279 insertions(+), 11 deletions(-)

diff --git a/src/agents/ollama-stream.test.ts b/src/agents/ollama-stream.test.ts
index ded8064ea19..7b6fb0febcf 100644
--- a/src/agents/ollama-stream.test.ts
+++ b/src/agents/ollama-stream.test.ts
@@ -4,6 +4,7 @@ import {
   createOllamaStreamFn,
   convertToOllamaMessages,
   buildAssistantMessage,
+  recoverToolCallsFromText,
   parseNdjsonStream,
   resolveOllamaBaseUrlForRun,
 } from "./ollama-stream.js";
@@ -182,6 +183,119 @@ describe("buildAssistantMessage", () => {
       total: 0,
     });
   });
+
+  it("recovers tool calls from <tool_call> tags and strips markup from text", () => {
+    const response = {
+      model: "qwen3.5:9b",
+      created_at: "2026-01-01T00:00:00Z",
+      message: {
+        role: "assistant" as const,
+        content:
+          'I\'ll save that for you.\n<tool_call>\n{"name": "write_file", "arguments": {"path": "/tmp/note.md", "content": "hello"}}\n</tool_call>',
+      },
+      done: true,
+      prompt_eval_count: 50,
+      eval_count: 30,
+    };
+    const result = buildAssistantMessage(response, {
+      api: "ollama",
+      provider: "ollama",
+      id: "qwen3.5:9b",
+    });
+    expect(result.stopReason).toBe("toolUse");
+    const textParts = result.content.filter((c) => c.type === "text");
+    expect(textParts.length).toBe(1);
+    expect((textParts[0] as { text: string }).text).toBe("I'll save that for you.");
+    const toolCalls = result.content.filter((c) => c.type === "toolCall");
+    expect(toolCalls.length).toBe(1);
+    const tc = toolCalls[0] as { name: string; arguments: Record<string, unknown> };
+    expect(tc.name).toBe("write_file");
+    expect(tc.arguments).toEqual({ path: "/tmp/note.md", content: "hello" });
+  });
+
+  it("does not recover tool calls when proper tool_calls exist", () => {
+    const response = {
+      model: "qwen3.5:9b",
+      created_at: "2026-01-01T00:00:00Z",
+      message: {
+        role: "assistant" as const,
+        content: '<tool_call>{"name": "ghost", "arguments": {}}</tool_call>',
+        tool_calls: [{ function: { name: "bash", arguments: { command: "ls" } } }],
+      },
+      done: true,
+    };
+    const result = buildAssistantMessage(response, modelInfo);
+    const toolCalls = result.content.filter((c) => c.type === "toolCall");
+    expect(toolCalls.length).toBe(1);
+    expect((toolCalls[0] as { name: string }).name).toBe("bash");
+  });
+
+  it("does not recover tool calls from fenced JSON code blocks", () => {
+    const response = {
+      model: "qwen3.5:9b",
+      created_at: "2026-01-01T00:00:00Z",
+      message: {
+        role: "assistant" as const,
+        content:
+          'Here is an example:\n```json\n{"name": "bash", "arguments": {"command": "rm -rf /"}}\n```',
+      },
+      done: true,
+    };
+    const result = buildAssistantMessage(response, {
+      api: "ollama",
+      provider: "ollama",
+      id: "qwen3.5:9b",
+    });
+    expect(result.stopReason).toBe("stop");
+    expect(result.content.filter((c) => c.type === "toolCall").length).toBe(0);
+  });
+});
+
+describe("recoverToolCallsFromText", () => {
+  it("extracts tool calls from <tool_call> XML tags", () => {
+    const text =
+      'Sure!\n<tool_call>\n{"name": "bash", "arguments": {"command": "ls"}}\n</tool_call>';
+    const result = recoverToolCallsFromText(text);
+    expect(result).toEqual([{ function: { name: "bash", arguments: { command: "ls" } } }]);
+  });
+
+  it("handles function-wrapped format", () => {
+    const text =
+      '<tool_call>{"function": {"name": "bash", "arguments": {"command": "pwd"}}}</tool_call>';
+    const result = recoverToolCallsFromText(text);
+    expect(result).toEqual([{ function: { name: "bash", arguments: { command: "pwd" } } }]);
+  });
+
+  it("handles parameters key instead of arguments", () => {
+    const text = '<tool_call>{"name": "read", "parameters": {"path": "/tmp/x"}}</tool_call>';
+    const result = recoverToolCallsFromText(text);
+    expect(result).toEqual([{ function: { name: "read", arguments: { path: "/tmp/x" } } }]);
+  });
+
+  it("returns empty for plain text without tool patterns", () => {
+    expect(recoverToolCallsFromText("Just a normal reply.")).toEqual([]);
+  });
+
+  it("returns empty for malformed JSON inside tags", () => {
+    expect(recoverToolCallsFromText("<tool_call>not json</tool_call>")).toEqual([]);
+  });
+
+  it("extracts multiple tool calls", () => {
+    const text = [
+      '<tool_call>{"name": "bash", "arguments": {"command": "ls"}}</tool_call>',
+      '<tool_call>{"name": "read", "arguments": {"path": "a.txt"}}</tool_call>',
+    ].join("\n");
+    const result = recoverToolCallsFromText(text);
+    expect(result.length).toBe(2);
+    expect(result[0]!.function.name).toBe("bash");
+    expect(result[1]!.function.name).toBe("read");
+  });
+
+  it("ignores fenced JSON blocks (security: no code-block recovery)", () => {
+    const text =
+      '```json\n{"name": "bash", "arguments": {"command": "rm -rf /"}}\n```';
+    expect(recoverToolCallsFromText(text)).toEqual([]);
+  });
 });
 
 // Helper: build a ReadableStreamDefaultReader from NDJSON lines
@@ -399,6 +513,65 @@ describe("createOllamaStreamFn", () => {
     );
   });
 
+  it("uses model options num_ctx when contextWindow is not set", async () => {
+    await withMockNdjsonFetch(
+      [
+        '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":true,"prompt_eval_count":1,"eval_count":1}',
+      ],
+      async (fetchMock) => {
+        const streamFn = createOllamaStreamFn("http://ollama-host:11434", undefined, {
+          num_ctx: 8192,
+        });
+        const stream = await Promise.resolve(
+          streamFn(
+            { id: "qwen3.5:9b", api: "ollama", provider: "ollama" } as never,
+            { messages: [{ role: "user", content: "hi" }] } as never,
+            {} as never,
+          ),
+        );
+        await collectStreamEvents(stream);
+
+        const [, requestInit] = fetchMock.mock.calls[0] as unknown as [string, RequestInit];
+        const body = JSON.parse(requestInit.body as string) as {
+          options: Record<string, unknown>;
+        };
+        expect(body.options.num_ctx).toBe(8192);
+      },
+    );
+  });
+
+  it("contextWindow takes precedence over model options num_ctx", async () => {
+    await withMockNdjsonFetch(
+      [
+        '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":true,"prompt_eval_count":1,"eval_count":1}',
+      ],
+      async (fetchMock) => {
+        const streamFn = createOllamaStreamFn("http://ollama-host:11434", undefined, {
+          num_ctx: 8192,
+        });
+        const stream = await Promise.resolve(
+          streamFn(
+            {
+              id: "qwen3.5:9b",
+              api: "ollama",
+              provider: "ollama",
+              contextWindow: 131072,
+            } as never,
+            { messages: [{ role: "user", content: "hi" }] } as never,
+            {} as never,
+          ),
+        );
+        await collectStreamEvents(stream);
+
+        const [, requestInit] = fetchMock.mock.calls[0] as unknown as [string, RequestInit];
+        const body = JSON.parse(requestInit.body as string) as {
+          options: Record<string, unknown>;
+        };
+        expect(body.options.num_ctx).toBe(131072);
+      },
+    );
+  });
+
   it("merges default headers and allows request headers to override them", async () => {
     await withMockNdjsonFetch(
       [
diff --git a/src/agents/ollama-stream.ts b/src/agents/ollama-stream.ts
index f332ad1fd83..c7aca409e5e 100644
--- a/src/agents/ollama-stream.ts
+++ b/src/agents/ollama-stream.ts
@@ -335,6 +335,49 @@ function extractOllamaTools(tools: Tool[] | undefined): OllamaTool[] {
 
 // ── Response conversion ─────────────────────────────────────────────────────
 
+/**
+ * Recover structured tool calls from `<tool_call>…</tool_call>` tags in text.
+ *
+ * Some smaller local models (e.g. Qwen3.5-9B) understand tool definitions but
+ * emit the call as text instead of a proper `tool_calls` array.  We only match
+ * the explicit `<tool_call>` tag pattern to minimise false-positive risk;
+ * generic fenced JSON blocks are intentionally ignored.
+ */
+export function recoverToolCallsFromText(text: string): OllamaToolCall[] {
+  const recovered: OllamaToolCall[] = [];
+  const tagRegex = /<tool_call>\s*([\s\S]*?)\s*<\/tool_call>/gi;
+  let match: RegExpExecArray | null;
+  while ((match = tagRegex.exec(text)) !== null) {
+    const parsed = tryParseToolCallJson(match[1]!);
+    if (parsed) {
+      recovered.push(parsed);
+    }
+  }
+  return recovered;
+}
+
+function tryParseToolCallJson(raw: string): OllamaToolCall | null {
+  try {
+    const obj = parseJsonPreservingUnsafeIntegers(raw.trim()) as Record<string, unknown>;
+    const name = obj.name ?? (obj.function as Record<string, unknown> | undefined)?.name;
+    const args =
+      obj.arguments ??
+      obj.parameters ??
+      (obj.function as Record<string, unknown> | undefined)?.arguments;
+    if (typeof name === "string" && name && args && typeof args === "object" && !Array.isArray(args)) {
+      return { function: { name, arguments: args as Record<string, unknown> } };
+    }
+  } catch {
+    // Not valid JSON – skip.
+  }
+  return null;
+}
+
+/** Strip `<tool_call>…</tool_call>` blocks from text after recovery. */
+function stripToolCallTags(text: string): string {
+  return text.replace(/<tool_call>[\s\S]*?<\/tool_call>/gi, "").replace(/\n{3,}/g, "\n\n").trim();
+}
+
 export function buildAssistantMessage(
   response: OllamaChatResponse,
   modelInfo: { api: string; provider: string; id: string },
@@ -344,11 +387,31 @@ export function buildAssistantMessage(
   // Native Ollama reasoning fields are internal model output. The reply text
   // must come from `content`; reasoning visibility is controlled elsewhere.
   const text = response.message.content || "";
-  if (text) {
-    content.push({ type: "text", text });
+
+  let toolCalls = response.message.tool_calls;
+  let didRecover = false;
+
+  // Fallback: recover tool calls from <tool_call> tags when the model
+  // produced no native tool_calls.  Only the explicit XML-tag pattern is
+  // matched to reduce false-positive risk (see security review).
+  if ((!toolCalls || toolCalls.length === 0) && text) {
+    const recovered = recoverToolCallsFromText(text);
+    if (recovered.length > 0) {
+      toolCalls = recovered;
+      didRecover = true;
+      log.info(
+        `Recovered ${recovered.length} tool call(s) from text output for model ${modelInfo.id}`,
+      );
+    }
+  }
+
+  // When tool calls were recovered from text, strip the raw markup so it
+  // does not pollute conversation history on subsequent turns.
+  const visibleText = didRecover ? stripToolCallTags(text) : text;
+  if (visibleText) {
+    content.push({ type: "text", text: visibleText });
   }
 
-  const toolCalls = response.message.tool_calls;
   if (toolCalls && toolCalls.length > 0) {
     for (const tc of toolCalls) {
       content.push({
@@ -434,6 +497,7 @@ function resolveOllamaModelHeaders(model: {
 export function createOllamaStreamFn(
   baseUrl: string,
   defaultHeaders?: Record<string, string>,
+  modelOptions?: Record<string, unknown>,
 ): StreamFn {
   const chatUrl = resolveOllamaChatUrl(baseUrl);
 
@@ -449,9 +513,16 @@ export function createOllamaStreamFn(
 
         const ollamaTools = extractOllamaTools(context.tools);
 
-        // Ollama defaults to num_ctx=4096 which is too small for large
-        // system prompts + many tool definitions. Use model's contextWindow.
-        const ollamaOptions: Record<string, unknown> = { num_ctx: model.contextWindow ?? 65536 };
+        const userNumCtx =
+          modelOptions &&
+          typeof modelOptions.num_ctx === "number" &&
+          Number.isFinite(modelOptions.num_ctx)
+            ? modelOptions.num_ctx
+            : undefined;
+        const ollamaOptions: Record<string, unknown> = {
+          ...(modelOptions ?? {}),
+          num_ctx: model.contextWindow ?? userNumCtx ?? 65536,
+        };
         if (typeof options?.temperature === "number") {
           ollamaOptions.temperature = options.temperature;
         }
@@ -561,7 +632,7 @@ export function createOllamaStreamFn(
 }
 
 export function createConfiguredOllamaStreamFn(params: {
-  model: { baseUrl?: string; headers?: unknown };
+  model: { baseUrl?: string; headers?: unknown; options?: Record<string, unknown> };
   providerBaseUrl?: string;
 }): StreamFn {
   const modelBaseUrl = typeof params.model.baseUrl === "string" ? params.model.baseUrl : undefined;
@@ -571,5 +642,6 @@ export function createConfiguredOllamaStreamFn(params: {
       providerBaseUrl: params.providerBaseUrl,
     }),
     resolveOllamaModelHeaders(params.model),
+    params.model.options,
   );
 }
diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts
index 5bf97a683d0..7b396128323 100644
--- a/src/agents/pi-embedded-runner/model.ts
+++ b/src/agents/pi-embedded-runner/model.ts
@@ -146,7 +146,8 @@ function applyConfiguredProviderOverrides(params: {
           }
         : undefined,
     compat: configuredModel?.compat ?? discoveredModel.compat,
-  };
+    ...(configuredModel?.options != null ? { options: configuredModel.options } : {}),
+  } as Model<Api>;
 }
 
 export function buildInlineProviderModels(
@@ -301,6 +302,7 @@ export function resolveModelWithRegistry(params: {
           DEFAULT_CONTEXT_TOKENS,
         headers:
           providerHeaders || modelHeaders ? { ...providerHeaders, ...modelHeaders } : undefined,
+        ...(configuredModel?.options != null ? { options: configuredModel.options } : {}),
       } as Model<Api>,
     });
   }
diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts
index d785218f819..4dd553b878a 100644
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -2159,12 +2159,20 @@ export async function runEmbeddedAttempt(
       queueYieldInterruptForSession = () => {
         queueSessionsYieldInterruptMessage(activeSession);
       };
+      const modelWithOptions = params.model as { options?: Record<string, unknown> };
+      const optionsNumCtx =
+        typeof modelWithOptions.options?.num_ctx === "number"
+          ? modelWithOptions.options.num_ctx
+          : undefined;
       removeToolResultContextGuard = installToolResultContextGuard({
         agent: activeSession.agent,
         contextWindowTokens: Math.max(
           1,
           Math.floor(
-            params.model.contextWindow ?? params.model.maxTokens ?? DEFAULT_CONTEXT_TOKENS,
+            params.model.contextWindow ??
+              optionsNumCtx ??
+              params.model.maxTokens ??
+              DEFAULT_CONTEXT_TOKENS,
           ),
         ),
       });
@@ -2237,7 +2245,10 @@ export async function runEmbeddedAttempt(
         const numCtx = Math.max(
           1,
           Math.floor(
-            params.model.contextWindow ?? params.model.maxTokens ?? DEFAULT_CONTEXT_TOKENS,
+            params.model.contextWindow ??
+              optionsNumCtx ??
+              params.model.maxTokens ??
+              DEFAULT_CONTEXT_TOKENS,
           ),
         );
         activeSession.agent.streamFn = wrapOllamaCompatNumCtx(activeSession.agent.streamFn, numCtx);
diff --git a/src/config/defaults.ts b/src/config/defaults.ts
index 2febc3869ee..6db7e0c25e4 100644
--- a/src/config/defaults.ts
+++ b/src/config/defaults.ts
@@ -254,9 +254,17 @@ export function applyModelDefaults(cfg: OpenClawConfig): OpenClawConfig {
           modelMutated = true;
         }
 
+        const optionsNumCtx =
+          raw.options &&
+          typeof raw.options === "object" &&
+          typeof (raw.options as Record<string, unknown>).num_ctx === "number" &&
+          Number.isFinite((raw.options as Record<string, unknown>).num_ctx) &&
+          ((raw.options as Record<string, unknown>).num_ctx as number) > 0
+            ? ((raw.options as Record<string, unknown>).num_ctx as number)
+            : undefined;
         const contextWindow = isPositiveNumber(raw.contextWindow)
           ? raw.contextWindow
-          : DEFAULT_CONTEXT_TOKENS;
+          : optionsNumCtx ?? DEFAULT_CONTEXT_TOKENS;
         if (raw.contextWindow !== contextWindow) {
           modelMutated = true;
         }
diff --git a/src/config/types.models.ts b/src/config/types.models.ts
index e1d60bcf695..03feb104d67 100644
--- a/src/config/types.models.ts
+++ b/src/config/types.models.ts
@@ -59,6 +59,7 @@ export type ModelDefinitionConfig = {
   maxTokens: number;
   headers?: Record<string, string>;
   compat?: ModelCompatConfig;
+  options?: Record<string, unknown>;
 };
 
 export type ModelProviderConfig = {
diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts
index 22c589c8490..8f3340709eb 100644
--- a/src/config/zod-schema.core.ts
+++ b/src/config/zod-schema.core.ts
@@ -242,6 +242,7 @@ export const ModelDefinitionSchema = z
     maxTokens: z.number().positive().optional(),
     headers: z.record(z.string(), z.string()).optional(),
     compat: ModelCompatSchema,
+    options: z.record(z.string(), z.unknown()).optional(),
   })
   .strict();