fix(web-fetch): tighten cache key and docs

2026-03-14 16:36:33 +08:00 · 2026-03-14 16:36:33 +08:00 · f54dac6d52
commit f54dac6d52
parent 7022767fbc
5 changed files with 106 additions and 45 deletions
--- a/docs/tools/web.md
+++ b/docs/tools/web.md
@ -387,25 +387,30 @@ Notes:
 - If you use tool profiles/allowlists, add `web_search`/`web_fetch` or `group:web`.
 - If the API key is missing, `web_search` returns a short setup hint with a docs link.

-* +### SSRF policy for web_fetch
-* +`tools.web.fetch.ssrfPolicy` lets you tighten or relax the SSRF guard for `web_fetch` requests without affecting other tools. The optional fields mirror the browser-level settings:
-* +- `allowPrivateNetwork`: legacy alias for `dangerouslyAllowPrivateNetwork`. Set to `true` to permit private/internal IP addresses.
-  +- `dangerouslyAllowPrivateNetwork`: high-risk toggle that removes private/internal/special-use blocking. Enable only in trusted, isolated environments.
-  +- `allowedHostnames`: explicitly allowed hostnames or IPs that bypass private network checks even when private access is blocked globally.
-  +- `hostnameAllowlist`: pattern-based allowlist (e.g. `*.internal`) that shortlists which hostnames `web_fetch` is allowed to reach.
-* +Example:
-* +```json5
-  +{
-* tools: {
-* web: {
-*      fetch: {
-*        ssrfPolicy: {
-*          hostnameAllowlist: ["example.com", "*.example.internal"],
-*          allowedHostnames: ["192.168.1.42"],
-*        },
-*      },
-* },
-* },
-  +}
-  +```
-* +**Risk note:** `dangerouslyAllowPrivateNetwork` (and its alias `allowPrivateNetwork`) undermines the default SSRF blocking, so avoid enabling it unless you fully trust the target network. Prefer allowlists/whitelists to expand access only for specific hosts. If you do need to relax the guard, keep a narrow `hostnameAllowlist` or `allowedHostnames` and monitor the usage closely.
+### SSRF policy for web_fetch
+
+`tools.web.fetch.ssrfPolicy` lets you tighten or relax the SSRF guard for `web_fetch` requests without affecting other tools. The optional fields mirror the browser-level settings:
+
+- `allowPrivateNetwork`: legacy alias for `dangerouslyAllowPrivateNetwork`. Set to `true` to permit private/internal IP addresses.
+- `dangerouslyAllowPrivateNetwork`: high-risk toggle that removes private/internal/special-use blocking. Enable only in trusted, isolated environments.
+- `allowedHostnames`: explicitly allowed hostnames or IPs that bypass private network checks even when private access is blocked globally.
+- `hostnameAllowlist`: pattern-based allowlist (e.g. `*.internal`) that shortlists which hostnames `web_fetch` is allowed to reach.
+
+Example:
+
+```json5
+{
+  tools: {
+    web: {
+      fetch: {
+        ssrfPolicy: {
+          hostnameAllowlist: ["example.com", "*.example.internal"],
+          allowedHostnames: ["192.168.1.42"],
+        },
+      },
+    },
+  },
+}
+```
+
+**Risk note:** `dangerouslyAllowPrivateNetwork` (and its alias `allowPrivateNetwork`) undermines the default SSRF blocking, so avoid enabling it unless you fully trust the target network. Prefer allowlists/whitelists to expand access only for specific hosts. If you do need to relax the guard, keep a narrow `hostnameAllowlist` or `allowedHostnames` and monitor the usage closely.
--- a/docs/zh-CN/tools/web.md
+++ b/docs/zh-CN/tools/web.md
@ -257,24 +257,28 @@ await web_search({
 - 如果缺少 Brave 密钥，`web_search` 返回一个简短的设置提示和文档链接。

 * +### web_fetch 的 SSRF 策略
-* +`tools.web.fetch.ssrfPolicy` 允许你在不影响其他工具的前提下，放宽或收紧 `web_fetch` 的 SSRF 保护。可选字段沿用了浏览器层的语义：
-* +- `allowPrivateNetwork`：`dangerouslyAllowPrivateNetwork` 的兼容别名。设置为 `true` 可允许访问私有/内部 IP。
-  +- `dangerouslyAllowPrivateNetwork`：高风险开关，禁用私网/特殊用途地址拦截。仅在完全受控的环境中启用。
-  +- `allowedHostnames`：显式允许的主机名或 IP，哪怕私网检查仍在，也能绕过拦截。
-  +- `hostnameAllowlist`：支持模式（例如 `*.internal`）的 hostname 白名单，用于限定 `web_fetch` 的目标范围。
-* +示例：
-* +```json5
-  +{
-* tools: {
-* web: {
-*      fetch: {
-*        ssrfPolicy: {
-*          hostnameAllowlist: ["example.com", "*.example.internal"],
-*          allowedHostnames: ["192.168.1.42"],
-*        },
-*      },
-* },
-* },
-  +}
-  +```
-* +**风险提示：** `dangerouslyAllowPrivateNetwork`（及其别名 `allowPrivateNetwork`）会弱化默认 SSRF 拦截，非必要不要启用。优先使用 allowlist/hostname 精准放行特定主机；若必须放宽，务必限制 `hostnameAllowlist` 或 `allowedHostnames`，并密切监控访问。
+  `tools.web.fetch.ssrfPolicy` 允许你在不影响其他工具的前提下，放宽或收紧 `web_fetch` 的 SSRF 保护。可选字段沿用了浏览器层的语义：
+
+- `allowPrivateNetwork`：`dangerouslyAllowPrivateNetwork` 的兼容别名。设置为 `true` 可允许访问私有/内部 IP。
+- `dangerouslyAllowPrivateNetwork`：高风险开关，禁用私网/特殊用途地址拦截。仅在完全受控的环境中启用。
+- `allowedHostnames`：显式允许的主机名或 IP，哪怕私网检查仍在，也能绕过拦截。
+- `hostnameAllowlist`：支持模式（例如 `*.internal`）的 hostname 白名单，用于限定 `web_fetch` 的目标范围。
+
+示例：
+
+```json5
+{
+  tools: {
+    web: {
+      fetch: {
+        ssrfPolicy: {
+          hostnameAllowlist: ["example.com", "*.example.internal"],
+          allowedHostnames: ["192.168.1.42"],
+        },
+      },
+    },
+  },
+}
+```
+
+**风险提示：** `dangerouslyAllowPrivateNetwork`（及其别名 `allowPrivateNetwork`）会弱化默认 SSRF 拦截，非必要不要启用。优先使用 allowlist/hostname 精准放行特定主机；若必须放宽，务必限制 `hostnameAllowlist` 或 `allowedHostnames`，并密切监控访问。
--- a/src/agents/tools/web-fetch.ssrf.test.ts
+++ b/src/agents/tools/web-fetch.ssrf.test.ts
@ -174,4 +174,53 @@ describe("web_fetch SSRF protection", () => {

    await expect(tool?.execute?.("call", { url: "http://192.168.1.1" })).resolves.toBeDefined();
  });
+
+  it("cache key differentiates between different SSRF policies", async () => {
+    const { createWebFetchTool } = await import("./web-tools.js");
+
+    lookupMock.mockResolvedValue([{ address: "93.184.216.34", family: 4 }]);
+
+    const testUrl = "https://example.com/page";
+    let callCount = 0;
+    setMockFetch().mockImplementation(async () => {
+      callCount++;
+      return textResponse(`response-${callCount}`);
+    });
+
+    // Create tool with caching ENABLED (non-zero cacheTtlMinutes)
+    const createTool = (ssrfPolicy?: ssrf.SsrFPolicy) => {
+      const fetchConfig: Record<string, unknown> = {
+        cacheTtlMinutes: 15, // Enable caching for this test
+        firecrawl: { enabled: false },
+      };
+      if (ssrfPolicy) {
+        fetchConfig.ssrfPolicy = ssrfPolicy;
+      }
+      return createWebFetchTool({
+        config: {
+          tools: {
+            web: {
+              fetch: fetchConfig,
+            },
+          },
+        },
+      });
+    };
+
+    // First, fetch with no SSRF policy
+    const toolNoPolicy = createTool();
+    const result1 = await toolNoPolicy?.execute?.("call", { url: testUrl });
+    expect(callCount).toBe(1);
+    expect(result1?.details?.text).toContain("response-1");
+
+    // Fetch the same URL with a different SSRF policy
+    // This should NOT hit the cache from the first call, creating a new fetch
+    const toolWithPolicy = createTool({ allowPrivateNetwork: true });
+    const result2 = await toolWithPolicy?.execute?.("call", { url: testUrl });
+    expect(callCount).toBe(2); // Should have called fetch again, not used cache
+    expect(result2?.details?.text).toContain("response-2");
+
+    // Verify different policies produce different results due to separate cache entries
+    expect(result1?.details?.text).not.toEqual(result2?.details?.text);
+  });
 });
--- a/src/agents/tools/web-fetch.ts
+++ b/src/agents/tools/web-fetch.ts
@ -508,7 +508,7 @@ async function maybeFetchFirecrawlWebFetchPayload(

 async function runWebFetch(params: WebFetchRuntimeParams): Promise<Record<string, unknown>> {
  const cacheKey = normalizeCacheKey(
-    `fetch:${params.url}:${params.extractMode}:${params.maxChars}`,
+    `fetch:${params.url}:${params.extractMode}:${params.maxChars}:${JSON.stringify(params.ssrfPolicy ?? null)}`,
  );
  const cached = readCache(FETCH_CACHE, cacheKey);
  if (cached) {
--- a/src/agents/tools/web-guarded-fetch.ts
+++ b/src/agents/tools/web-guarded-fetch.ts
@ -20,7 +20,10 @@ type WebToolGuardedFetchOptions = Omit<
  useEnvProxy?: boolean;
  ssrfPolicy?: SsrFPolicy;
 };
-type WebToolEndpointFetchOptions = Omit<WebToolGuardedFetchOptions, "policy" | "useEnvProxy">;
+type WebToolEndpointFetchOptions = Omit<
+  WebToolGuardedFetchOptions,
+  "policy" | "useEnvProxy" | "ssrfPolicy"
+>;

 function resolveTimeoutMs(params: {
  timeoutMs?: number;