fix: send image attachments to gateway and prefix attached file paths
Two issues with image handling in chat: 1. Images pasted/dropped in chat were uploaded to disk but only file paths were sent as plain text. The model never received actual image bytes. Now sends base64 image data as `attachments` in the chat.send RPC so vision-capable models can see images directly. 2. Attached file paths (e.g. assets/screenshot.png) were not prefixed with the workspace root, unlike [Context: workspace file '...'] paths. The agent couldn't resolve relative paths. Now both patterns get the workspace prefix. Files changed: - chat-panel.tsx: read images as base64 via FileReader, send as FileUIPart - chat/route.ts: extract image file parts, prefix attached file paths - gateway/chat/route.ts: accept attachments in request body - active-runs.ts: thread attachments through startRun - agent-runner.ts: forward attachments to chat.send RPC - chat-message.tsx: render inline image previews in user messages Made-with: Cursor
This commit is contained in:
parent
c38d1281d4
commit
c73acb731f
@ -78,7 +78,19 @@ export async function POST(req: Request) {
|
||||
.map((p) => p.text)
|
||||
.join("\n") ?? "";
|
||||
|
||||
if (!userText.trim()) {
|
||||
// Extract image file parts for vision-capable models
|
||||
type FilePart = { type: "file"; mediaType: string; url: string; filename?: string };
|
||||
const imageAttachments: Array<{ mediaType: string; data: string }> =
|
||||
(lastUserMessage?.parts ?? [])
|
||||
.filter((p): p is FilePart =>
|
||||
(p as FilePart).type === "file" &&
|
||||
typeof (p as FilePart).mediaType === "string" &&
|
||||
(p as FilePart).mediaType.startsWith("image/") &&
|
||||
typeof (p as FilePart).url === "string",
|
||||
)
|
||||
.map((p) => ({ mediaType: p.mediaType, data: p.url }));
|
||||
|
||||
if (!userText.trim() && imageAttachments.length === 0) {
|
||||
return new Response("No message provided", { status: 400 });
|
||||
}
|
||||
|
||||
@ -106,10 +118,22 @@ export async function POST(req: Request) {
|
||||
let agentMessage = userText;
|
||||
const wsPrefix = resolveAgentWorkspacePrefix();
|
||||
if (wsPrefix) {
|
||||
agentMessage = userText.replace(
|
||||
agentMessage = agentMessage.replace(
|
||||
/\[Context: workspace file '([^']+)'\]/,
|
||||
`[Context: workspace file '${wsPrefix}/$1']`,
|
||||
);
|
||||
agentMessage = agentMessage.replace(
|
||||
/\[Attached files: (.+?)\]/,
|
||||
(_, paths: string) => {
|
||||
const prefixed = paths
|
||||
.split(", ")
|
||||
.map((p: string) => p.trim())
|
||||
.filter(Boolean)
|
||||
.map((p: string) => p.startsWith("/") ? p : `${wsPrefix}/${p}`)
|
||||
.join(", ");
|
||||
return `[Attached files: ${prefixed}]`;
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
const runKey = isSubagentSession && sessionKey ? sessionKey : (sessionId as string);
|
||||
@ -151,6 +175,7 @@ export async function POST(req: Request) {
|
||||
message: agentMessage,
|
||||
agentSessionId: sessionId,
|
||||
overrideAgentId: effectiveAgentId,
|
||||
attachments: imageAttachments.length > 0 ? imageAttachments : undefined,
|
||||
});
|
||||
} catch (err) {
|
||||
return new Response(
|
||||
|
||||
@ -9,7 +9,11 @@ import {
|
||||
export const runtime = "nodejs";
|
||||
|
||||
export async function POST(req: Request) {
|
||||
const { sessionKey, message }: { sessionKey: string; message: string } = await req.json();
|
||||
const { sessionKey, message, attachments }: {
|
||||
sessionKey: string;
|
||||
message: string;
|
||||
attachments?: Array<{ mediaType: string; data: string }>;
|
||||
} = await req.json();
|
||||
|
||||
if (!sessionKey || !message?.trim()) {
|
||||
return new Response("sessionKey and message are required", { status: 400 });
|
||||
|
||||
@ -785,17 +785,46 @@ export const ChatMessage = memo(function ChatMessage({ message, isStreaming, onS
|
||||
.join("\n");
|
||||
|
||||
const attachmentInfo = parseAttachments(textContent);
|
||||
|
||||
// Extract inline image parts (FileUIPart with image/* mediaType)
|
||||
const inlineImages = message.parts.filter(
|
||||
(p): p is { type: "file"; mediaType: string; url: string; filename?: string } =>
|
||||
(p as Record<string, unknown>).type === "file" &&
|
||||
typeof (p as Record<string, unknown>).mediaType === "string" &&
|
||||
((p as Record<string, unknown>).mediaType as string).startsWith("image/") &&
|
||||
typeof (p as Record<string, unknown>).url === "string",
|
||||
);
|
||||
|
||||
const hasImages = inlineImages.length > 0;
|
||||
const hasPathAttachments = !!attachmentInfo;
|
||||
|
||||
const richHtml = userHtmlMap?.get(message.id) ?? userHtmlMap?.get(textContent) ?? userHtmlMap?.get(attachmentInfo?.message ?? "");
|
||||
|
||||
const displayText = attachmentInfo?.message ?? textContent;
|
||||
const bubbleContent = richHtml
|
||||
? <div className="chat-user-html-content" dangerouslySetInnerHTML={{ __html: richHtml }} />
|
||||
: <p className="whitespace-pre-wrap break-words">{attachmentInfo?.message ?? textContent}</p>;
|
||||
: displayText ? <p className="whitespace-pre-wrap break-words">{displayText}</p> : null;
|
||||
|
||||
if (attachmentInfo) {
|
||||
if (hasImages || hasPathAttachments) {
|
||||
return (
|
||||
<div className="flex flex-col items-end gap-1.5 py-2">
|
||||
<AttachedFilesCard paths={attachmentInfo.paths} />
|
||||
{(attachmentInfo.message || richHtml) && (
|
||||
{hasPathAttachments && <AttachedFilesCard paths={attachmentInfo.paths} />}
|
||||
{hasImages && (
|
||||
<div className="flex flex-wrap gap-1.5 justify-end">
|
||||
{inlineImages.map((img, i) => (
|
||||
<div key={i} className="relative rounded-xl overflow-hidden shrink-0">
|
||||
<img
|
||||
src={img.url}
|
||||
alt={img.filename ?? "Attached image"}
|
||||
className="block rounded-xl object-cover"
|
||||
style={{ maxHeight: 200, maxWidth: 240, background: "rgba(0,0,0,0.04)" }}
|
||||
loading="lazy"
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
{(displayText || richHtml) && (
|
||||
<div
|
||||
className="max-w-[80%] w-fit rounded-2xl rounded-br-sm px-3 py-2 text-sm leading-6 break-words chat-message-font"
|
||||
style={{
|
||||
|
||||
@ -170,6 +170,10 @@ type AttachedFile = {
|
||||
uploading?: boolean;
|
||||
/** Local blob URL for instant preview before upload completes. */
|
||||
localUrl?: string;
|
||||
/** MIME type of the file (e.g. "image/png"). */
|
||||
mimeType?: string;
|
||||
/** Base64 data URL for sending image content to the AI model. */
|
||||
base64Data?: string;
|
||||
};
|
||||
|
||||
function getFileCategory(
|
||||
@ -1706,11 +1710,19 @@ export const ChatPanel = forwardRef<ChatPanelHandle, ChatPanelProps>(
|
||||
// Build message with optional attachment prefix
|
||||
let messageText = userText;
|
||||
|
||||
// Merge mention paths and attachment paths
|
||||
// Separate image attachments (have base64 data) from non-image file references
|
||||
const imageAttachments = currentAttachments.filter(
|
||||
(f) => f.base64Data && f.mimeType?.startsWith("image/"),
|
||||
);
|
||||
const nonImageAttachments = currentAttachments.filter(
|
||||
(f) => !f.base64Data || !f.mimeType?.startsWith("image/"),
|
||||
);
|
||||
|
||||
// Build file path prefix for non-image attachments and mentions
|
||||
const allFilePaths = [
|
||||
...mentionedFiles.map((f) => f.path),
|
||||
...currentAttachments.map((f) => f.path),
|
||||
];
|
||||
...nonImageAttachments.map((f) => f.path),
|
||||
].filter(Boolean);
|
||||
if (allFilePaths.length > 0) {
|
||||
const prefix = `[Attached files: ${allFilePaths.join(", ")}]`;
|
||||
messageText = messageText
|
||||
@ -1724,6 +1736,16 @@ export const ChatPanel = forwardRef<ChatPanelHandle, ChatPanelProps>(
|
||||
isFirstFileMessageRef.current = false;
|
||||
}
|
||||
|
||||
// Build FileUIPart[] for image attachments
|
||||
const fileParts = imageAttachments
|
||||
.filter((f) => f.base64Data && f.mimeType)
|
||||
.map((f) => ({
|
||||
type: "file" as const,
|
||||
mediaType: f.mimeType!,
|
||||
url: f.base64Data!,
|
||||
filename: f.name,
|
||||
}));
|
||||
|
||||
// Store HTML for display and pipe to server via transport
|
||||
userHtmlMapRef.current.set(messageText, html);
|
||||
pendingHtmlRef.current = html;
|
||||
@ -1731,25 +1753,39 @@ export const ChatPanel = forwardRef<ChatPanelHandle, ChatPanelProps>(
|
||||
userScrolledAwayRef.current = false;
|
||||
|
||||
if (gatewaySessionKey) {
|
||||
const msgParts: UIMessage["parts"] = [
|
||||
{ type: "text" as const, text: messageText },
|
||||
...fileParts,
|
||||
];
|
||||
const userMsg = {
|
||||
id: `user-${Date.now()}`,
|
||||
role: "user" as const,
|
||||
parts: [{ type: "text" as const, text: messageText }] as UIMessage["parts"],
|
||||
parts: msgParts,
|
||||
};
|
||||
setMessages((prev) => [...prev, userMsg]);
|
||||
|
||||
try {
|
||||
const attachments = imageAttachments
|
||||
.filter((f) => f.base64Data && f.mimeType)
|
||||
.map((f) => ({ mediaType: f.mimeType!, data: f.base64Data! }));
|
||||
const res = await fetch("/api/gateway/chat", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ sessionKey: gatewaySessionKey, message: messageText }),
|
||||
body: JSON.stringify({
|
||||
sessionKey: gatewaySessionKey,
|
||||
message: messageText,
|
||||
...(attachments.length > 0 ? { attachments } : {}),
|
||||
}),
|
||||
});
|
||||
if (res.ok && res.body) {
|
||||
await attemptReconnect(gatewaySessionKey, [], { sessionKey: gatewaySessionKey });
|
||||
}
|
||||
} catch { /* ignore */ }
|
||||
} else {
|
||||
void sendMessage({ text: messageText });
|
||||
void sendMessage({
|
||||
text: messageText,
|
||||
...(fileParts.length > 0 ? { files: fileParts } : {}),
|
||||
});
|
||||
}
|
||||
},
|
||||
[
|
||||
@ -2029,7 +2065,8 @@ export const ChatPanel = forwardRef<ChatPanelHandle, ChatPanelProps>(
|
||||
}, []);
|
||||
|
||||
/** Upload native files (e.g. dropped from Finder/Desktop) and attach them.
|
||||
* Shows files instantly with a local preview, then uploads in the background. */
|
||||
* Shows files instantly with a local preview, then uploads in the background.
|
||||
* For images, also reads the file as a base64 data URL for AI vision. */
|
||||
const uploadAndAttachNativeFiles = useCallback(
|
||||
(files: FileList) => {
|
||||
const fileArray = Array.from(files);
|
||||
@ -2041,6 +2078,7 @@ export const ChatPanel = forwardRef<ChatPanelHandle, ChatPanelProps>(
|
||||
path: "",
|
||||
uploading: true,
|
||||
localUrl: URL.createObjectURL(file),
|
||||
mimeType: file.type || undefined,
|
||||
}));
|
||||
setAttachedFiles((prev) => [...prev, ...placeholders]);
|
||||
|
||||
@ -2050,6 +2088,23 @@ export const ChatPanel = forwardRef<ChatPanelHandle, ChatPanelProps>(
|
||||
const placeholderId = placeholders[i].id;
|
||||
const localUrl = placeholders[i].localUrl;
|
||||
|
||||
// For image files, read as base64 for AI vision
|
||||
const isImage = file.type.startsWith("image/");
|
||||
if (isImage) {
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => {
|
||||
const dataUrl = reader.result as string;
|
||||
setAttachedFiles((prev) =>
|
||||
prev.map((f) =>
|
||||
f.id === placeholderId
|
||||
? { ...f, base64Data: dataUrl, mimeType: file.type }
|
||||
: f,
|
||||
),
|
||||
);
|
||||
};
|
||||
reader.readAsDataURL(file);
|
||||
}
|
||||
|
||||
const form = new FormData();
|
||||
form.append("file", file);
|
||||
fetch("/api/workspace/upload", {
|
||||
@ -2059,7 +2114,6 @@ export const ChatPanel = forwardRef<ChatPanelHandle, ChatPanelProps>(
|
||||
.then((res) => res.ok ? res.json() : null)
|
||||
.then((json: { ok?: boolean; path?: string } | null) => {
|
||||
if (json?.ok && json.path) {
|
||||
// Replace placeholder with the real uploaded file
|
||||
setAttachedFiles((prev) =>
|
||||
prev.map((f) =>
|
||||
f.id === placeholderId
|
||||
@ -2068,7 +2122,6 @@ export const ChatPanel = forwardRef<ChatPanelHandle, ChatPanelProps>(
|
||||
),
|
||||
);
|
||||
} else {
|
||||
// Upload failed — remove the placeholder
|
||||
setAttachedFiles((prev) => prev.filter((f) => f.id !== placeholderId));
|
||||
if (localUrl) {URL.revokeObjectURL(localUrl);}
|
||||
}
|
||||
|
||||
@ -551,8 +551,10 @@ export function startRun(params: {
|
||||
agentSessionId?: string;
|
||||
/** Use a specific agent ID instead of the workspace default. */
|
||||
overrideAgentId?: string;
|
||||
/** Image attachments to forward to the gateway for vision models. */
|
||||
attachments?: Array<{ mediaType: string; data: string }>;
|
||||
}): ActiveRun {
|
||||
const { sessionId, message, agentSessionId, overrideAgentId } = params;
|
||||
const { sessionId, message, agentSessionId, overrideAgentId, attachments } = params;
|
||||
|
||||
const existing = activeRuns.get(sessionId);
|
||||
if (existing?.status === "running") {
|
||||
@ -566,7 +568,7 @@ export function startRun(params: {
|
||||
? `agent:${agentId}:web:${agentSessionId}`
|
||||
: undefined;
|
||||
const abortController = new AbortController();
|
||||
const child = spawnAgentProcess(message, agentSessionId, overrideAgentId);
|
||||
const child = spawnAgentProcess(message, agentSessionId, overrideAgentId, attachments);
|
||||
|
||||
const run: ActiveRun = {
|
||||
sessionId,
|
||||
|
||||
@ -145,6 +145,8 @@ type SpawnGatewayProcessParams = {
|
||||
sessionKey?: string;
|
||||
afterSeq: number;
|
||||
lane?: string;
|
||||
/** Image attachments to send alongside the message for vision models. */
|
||||
attachments?: Array<{ mediaType: string; data: string }>;
|
||||
};
|
||||
|
||||
type BuildConnectParamsOptions = {
|
||||
@ -801,6 +803,7 @@ class GatewayProcessHandle
|
||||
startRes = await this.client.request("chat.send", {
|
||||
message: msg,
|
||||
...(sessionKey ? { sessionKey } : {}),
|
||||
...(this.params.attachments?.length ? { attachments: this.params.attachments } : {}),
|
||||
idempotencyKey: randomUUID(),
|
||||
deliver: false,
|
||||
});
|
||||
@ -1220,11 +1223,13 @@ export async function callGatewayRpc(
|
||||
/**
|
||||
* Start an agent run via the Gateway WebSocket and return a process handle.
|
||||
* @param overrideAgentId - Use a specific agent ID instead of the workspace default.
|
||||
* @param attachments - Image attachments for vision models.
|
||||
*/
|
||||
export function spawnAgentProcess(
|
||||
message: string,
|
||||
agentSessionId?: string,
|
||||
overrideAgentId?: string,
|
||||
attachments?: Array<{ mediaType: string; data: string }>,
|
||||
): AgentProcessHandle {
|
||||
const agentId = overrideAgentId ?? resolveActiveAgentId();
|
||||
const sessionKey = agentSessionId
|
||||
@ -1236,6 +1241,7 @@ export function spawnAgentProcess(
|
||||
sessionKey,
|
||||
afterSeq: 0,
|
||||
lane: agentSessionId ? `web:${agentSessionId}` : "web",
|
||||
attachments,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user