diff --git a/apps/web/app/api/chat/route.ts b/apps/web/app/api/chat/route.ts index 5c612941d0e..c22b84ceb0d 100644 --- a/apps/web/app/api/chat/route.ts +++ b/apps/web/app/api/chat/route.ts @@ -78,7 +78,19 @@ export async function POST(req: Request) { .map((p) => p.text) .join("\n") ?? ""; - if (!userText.trim()) { + // Extract image file parts for vision-capable models + type FilePart = { type: "file"; mediaType: string; url: string; filename?: string }; + const imageAttachments: Array<{ mediaType: string; data: string }> = + (lastUserMessage?.parts ?? []) + .filter((p): p is FilePart => + (p as FilePart).type === "file" && + typeof (p as FilePart).mediaType === "string" && + (p as FilePart).mediaType.startsWith("image/") && + typeof (p as FilePart).url === "string", + ) + .map((p) => ({ mediaType: p.mediaType, data: p.url })); + + if (!userText.trim() && imageAttachments.length === 0) { return new Response("No message provided", { status: 400 }); } @@ -106,10 +118,22 @@ export async function POST(req: Request) { let agentMessage = userText; const wsPrefix = resolveAgentWorkspacePrefix(); if (wsPrefix) { - agentMessage = userText.replace( + agentMessage = agentMessage.replace( /\[Context: workspace file '([^']+)'\]/, `[Context: workspace file '${wsPrefix}/$1']`, ); + agentMessage = agentMessage.replace( + /\[Attached files: (.+?)\]/, + (_, paths: string) => { + const prefixed = paths + .split(", ") + .map((p: string) => p.trim()) + .filter(Boolean) + .map((p: string) => p.startsWith("/") ? p : `${wsPrefix}/${p}`) + .join(", "); + return `[Attached files: ${prefixed}]`; + }, + ); } const runKey = isSubagentSession && sessionKey ? sessionKey : (sessionId as string); @@ -151,6 +175,7 @@ export async function POST(req: Request) { message: agentMessage, agentSessionId: sessionId, overrideAgentId: effectiveAgentId, + attachments: imageAttachments.length > 0 ? imageAttachments : undefined, }); } catch (err) { return new Response( diff --git a/apps/web/app/api/gateway/chat/route.ts b/apps/web/app/api/gateway/chat/route.ts index 88f2029f7ba..b9c85a53604 100644 --- a/apps/web/app/api/gateway/chat/route.ts +++ b/apps/web/app/api/gateway/chat/route.ts @@ -9,7 +9,11 @@ import { export const runtime = "nodejs"; export async function POST(req: Request) { - const { sessionKey, message }: { sessionKey: string; message: string } = await req.json(); + const { sessionKey, message, attachments }: { + sessionKey: string; + message: string; + attachments?: Array<{ mediaType: string; data: string }>; + } = await req.json(); if (!sessionKey || !message?.trim()) { return new Response("sessionKey and message are required", { status: 400 }); diff --git a/apps/web/app/components/chat-message.tsx b/apps/web/app/components/chat-message.tsx index 03a64bd5a1a..2209fe5de45 100644 --- a/apps/web/app/components/chat-message.tsx +++ b/apps/web/app/components/chat-message.tsx @@ -785,17 +785,46 @@ export const ChatMessage = memo(function ChatMessage({ message, isStreaming, onS .join("\n"); const attachmentInfo = parseAttachments(textContent); + + // Extract inline image parts (FileUIPart with image/* mediaType) + const inlineImages = message.parts.filter( + (p): p is { type: "file"; mediaType: string; url: string; filename?: string } => + (p as Record).type === "file" && + typeof (p as Record).mediaType === "string" && + ((p as Record).mediaType as string).startsWith("image/") && + typeof (p as Record).url === "string", + ); + + const hasImages = inlineImages.length > 0; + const hasPathAttachments = !!attachmentInfo; + const richHtml = userHtmlMap?.get(message.id) ?? userHtmlMap?.get(textContent) ?? userHtmlMap?.get(attachmentInfo?.message ?? ""); + const displayText = attachmentInfo?.message ?? textContent; const bubbleContent = richHtml ?
- :

{attachmentInfo?.message ?? textContent}

; + : displayText ?

{displayText}

: null; - if (attachmentInfo) { + if (hasImages || hasPathAttachments) { return (
- - {(attachmentInfo.message || richHtml) && ( + {hasPathAttachments && } + {hasImages && ( +
+ {inlineImages.map((img, i) => ( +
+ {img.filename +
+ ))} +
+ )} + {(displayText || richHtml) && (
( // Build message with optional attachment prefix let messageText = userText; - // Merge mention paths and attachment paths + // Separate image attachments (have base64 data) from non-image file references + const imageAttachments = currentAttachments.filter( + (f) => f.base64Data && f.mimeType?.startsWith("image/"), + ); + const nonImageAttachments = currentAttachments.filter( + (f) => !f.base64Data || !f.mimeType?.startsWith("image/"), + ); + + // Build file path prefix for non-image attachments and mentions const allFilePaths = [ ...mentionedFiles.map((f) => f.path), - ...currentAttachments.map((f) => f.path), - ]; + ...nonImageAttachments.map((f) => f.path), + ].filter(Boolean); if (allFilePaths.length > 0) { const prefix = `[Attached files: ${allFilePaths.join(", ")}]`; messageText = messageText @@ -1724,6 +1736,16 @@ export const ChatPanel = forwardRef( isFirstFileMessageRef.current = false; } + // Build FileUIPart[] for image attachments + const fileParts = imageAttachments + .filter((f) => f.base64Data && f.mimeType) + .map((f) => ({ + type: "file" as const, + mediaType: f.mimeType!, + url: f.base64Data!, + filename: f.name, + })); + // Store HTML for display and pipe to server via transport userHtmlMapRef.current.set(messageText, html); pendingHtmlRef.current = html; @@ -1731,25 +1753,39 @@ export const ChatPanel = forwardRef( userScrolledAwayRef.current = false; if (gatewaySessionKey) { + const msgParts: UIMessage["parts"] = [ + { type: "text" as const, text: messageText }, + ...fileParts, + ]; const userMsg = { id: `user-${Date.now()}`, role: "user" as const, - parts: [{ type: "text" as const, text: messageText }] as UIMessage["parts"], + parts: msgParts, }; setMessages((prev) => [...prev, userMsg]); try { + const attachments = imageAttachments + .filter((f) => f.base64Data && f.mimeType) + .map((f) => ({ mediaType: f.mimeType!, data: f.base64Data! })); const res = await fetch("/api/gateway/chat", { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ sessionKey: gatewaySessionKey, message: messageText }), + body: JSON.stringify({ + sessionKey: gatewaySessionKey, + message: messageText, + ...(attachments.length > 0 ? { attachments } : {}), + }), }); if (res.ok && res.body) { await attemptReconnect(gatewaySessionKey, [], { sessionKey: gatewaySessionKey }); } } catch { /* ignore */ } } else { - void sendMessage({ text: messageText }); + void sendMessage({ + text: messageText, + ...(fileParts.length > 0 ? { files: fileParts } : {}), + }); } }, [ @@ -2029,7 +2065,8 @@ export const ChatPanel = forwardRef( }, []); /** Upload native files (e.g. dropped from Finder/Desktop) and attach them. - * Shows files instantly with a local preview, then uploads in the background. */ + * Shows files instantly with a local preview, then uploads in the background. + * For images, also reads the file as a base64 data URL for AI vision. */ const uploadAndAttachNativeFiles = useCallback( (files: FileList) => { const fileArray = Array.from(files); @@ -2041,6 +2078,7 @@ export const ChatPanel = forwardRef( path: "", uploading: true, localUrl: URL.createObjectURL(file), + mimeType: file.type || undefined, })); setAttachedFiles((prev) => [...prev, ...placeholders]); @@ -2050,6 +2088,23 @@ export const ChatPanel = forwardRef( const placeholderId = placeholders[i].id; const localUrl = placeholders[i].localUrl; + // For image files, read as base64 for AI vision + const isImage = file.type.startsWith("image/"); + if (isImage) { + const reader = new FileReader(); + reader.onload = () => { + const dataUrl = reader.result as string; + setAttachedFiles((prev) => + prev.map((f) => + f.id === placeholderId + ? { ...f, base64Data: dataUrl, mimeType: file.type } + : f, + ), + ); + }; + reader.readAsDataURL(file); + } + const form = new FormData(); form.append("file", file); fetch("/api/workspace/upload", { @@ -2059,7 +2114,6 @@ export const ChatPanel = forwardRef( .then((res) => res.ok ? res.json() : null) .then((json: { ok?: boolean; path?: string } | null) => { if (json?.ok && json.path) { - // Replace placeholder with the real uploaded file setAttachedFiles((prev) => prev.map((f) => f.id === placeholderId @@ -2068,7 +2122,6 @@ export const ChatPanel = forwardRef( ), ); } else { - // Upload failed — remove the placeholder setAttachedFiles((prev) => prev.filter((f) => f.id !== placeholderId)); if (localUrl) {URL.revokeObjectURL(localUrl);} } diff --git a/apps/web/lib/active-runs.ts b/apps/web/lib/active-runs.ts index a89facf2382..dfccd9b7513 100644 --- a/apps/web/lib/active-runs.ts +++ b/apps/web/lib/active-runs.ts @@ -551,8 +551,10 @@ export function startRun(params: { agentSessionId?: string; /** Use a specific agent ID instead of the workspace default. */ overrideAgentId?: string; + /** Image attachments to forward to the gateway for vision models. */ + attachments?: Array<{ mediaType: string; data: string }>; }): ActiveRun { - const { sessionId, message, agentSessionId, overrideAgentId } = params; + const { sessionId, message, agentSessionId, overrideAgentId, attachments } = params; const existing = activeRuns.get(sessionId); if (existing?.status === "running") { @@ -566,7 +568,7 @@ export function startRun(params: { ? `agent:${agentId}:web:${agentSessionId}` : undefined; const abortController = new AbortController(); - const child = spawnAgentProcess(message, agentSessionId, overrideAgentId); + const child = spawnAgentProcess(message, agentSessionId, overrideAgentId, attachments); const run: ActiveRun = { sessionId, diff --git a/apps/web/lib/agent-runner.ts b/apps/web/lib/agent-runner.ts index 93b4b429a58..d3d1da70f4f 100644 --- a/apps/web/lib/agent-runner.ts +++ b/apps/web/lib/agent-runner.ts @@ -145,6 +145,8 @@ type SpawnGatewayProcessParams = { sessionKey?: string; afterSeq: number; lane?: string; + /** Image attachments to send alongside the message for vision models. */ + attachments?: Array<{ mediaType: string; data: string }>; }; type BuildConnectParamsOptions = { @@ -801,6 +803,7 @@ class GatewayProcessHandle startRes = await this.client.request("chat.send", { message: msg, ...(sessionKey ? { sessionKey } : {}), + ...(this.params.attachments?.length ? { attachments: this.params.attachments } : {}), idempotencyKey: randomUUID(), deliver: false, }); @@ -1220,11 +1223,13 @@ export async function callGatewayRpc( /** * Start an agent run via the Gateway WebSocket and return a process handle. * @param overrideAgentId - Use a specific agent ID instead of the workspace default. + * @param attachments - Image attachments for vision models. */ export function spawnAgentProcess( message: string, agentSessionId?: string, overrideAgentId?: string, + attachments?: Array<{ mediaType: string; data: string }>, ): AgentProcessHandle { const agentId = overrideAgentId ?? resolveActiveAgentId(); const sessionKey = agentSessionId @@ -1236,6 +1241,7 @@ export function spawnAgentProcess( sessionKey, afterSeq: 0, lane: agentSessionId ? `web:${agentSessionId}` : "web", + attachments, }); }