* feat: add PDF analysis tool with native provider support New `pdf` tool for analyzing PDF documents with model-powered analysis. Architecture: - Native PDF path: sends raw PDF bytes directly to providers that support inline document input (Anthropic via DocumentBlockParam, Google Gemini via inlineData with application/pdf MIME type) - Extraction fallback: for providers without native PDF support, extracts text via pdfjs-dist and rasterizes pages to images via @napi-rs/canvas, then sends through the standard vision/text completion path Key features: - Single PDF (`pdf` param) or multiple PDFs (`pdfs` array, up to 10) - Page range selection (`pages` param, e.g. "1-5", "1,3,7-9") - Model override (`model` param) and file size limits (`maxBytesMb`) - Auto-detects provider capability and falls back gracefully - Same security patterns as image tool (SSRF guards, sandbox support, local path roots, workspace-only policy) Config (agents.defaults): - pdfModel: primary/fallbacks (defaults to imageModel, then session model) - pdfMaxBytesMb: max PDF file size (default: 10) - pdfMaxPages: max pages to process (default: 20) Model catalog: - Extended ModelInputType to include "document" alongside "text"/"image" - Added modelSupportsDocument() capability check Files: - src/agents/tools/pdf-tool.ts - main tool factory - src/agents/tools/pdf-tool.helpers.ts - helpers (page range, config, etc.) - src/agents/tools/pdf-native-providers.ts - direct API calls for Anthropic/Google - src/agents/tools/pdf-tool.test.ts - 43 tests covering all paths - Modified: model-catalog.ts, openclaw-tools.ts, config schema/types/labels/help * fix: prepare pdf tool for merge (#31319) (thanks @tyler6204)
34 lines
1.1 KiB
TypeScript
34 lines
1.1 KiB
TypeScript
import fs from "node:fs/promises";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import { describe, expect, it } from "vitest";
|
|
import type { OpenClawConfig } from "../config/config.js";
|
|
import "./test-helpers/fast-core-tools.js";
|
|
import { createOpenClawTools } from "./openclaw-tools.js";
|
|
|
|
async function withTempAgentDir<T>(run: (agentDir: string) => Promise<T>): Promise<T> {
|
|
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-tools-pdf-"));
|
|
try {
|
|
return await run(agentDir);
|
|
} finally {
|
|
await fs.rm(agentDir, { recursive: true, force: true });
|
|
}
|
|
}
|
|
|
|
describe("createOpenClawTools PDF registration", () => {
|
|
it("includes pdf tool when pdfModel is configured", async () => {
|
|
await withTempAgentDir(async (agentDir) => {
|
|
const cfg: OpenClawConfig = {
|
|
agents: {
|
|
defaults: {
|
|
pdfModel: { primary: "openai/gpt-5-mini" },
|
|
},
|
|
},
|
|
};
|
|
|
|
const tools = createOpenClawTools({ config: cfg, agentDir });
|
|
expect(tools.some((tool) => tool.name === "pdf")).toBe(true);
|
|
});
|
|
});
|
|
});
|