galaxis-agent/agent/utils/multimodal.py

84 lines
2.5 KiB
Python
Raw Normal View History

2026-03-20 14:38:07 +09:00
"""Utilities for building multimodal content blocks."""
from __future__ import annotations
import base64
import logging
import mimetypes
import os
import re
from typing import Any
import httpx
from langchain_core.messages.content import create_image_block
logger = logging.getLogger(__name__)
IMAGE_MARKDOWN_RE = re.compile(r"!\[[^\]]*\]\((https?://[^\s)]+)\)")
IMAGE_URL_RE = re.compile(
r"(https?://[^\s)]+\.(?:png|jpe?g|gif|webp|bmp|tiff)(?:\?[^\s)]+)?)",
re.IGNORECASE,
)
def extract_image_urls(text: str) -> list[str]:
"""Extract image URLs from markdown image syntax and direct image links."""
if not text:
return []
urls: list[str] = []
urls.extend(IMAGE_MARKDOWN_RE.findall(text))
urls.extend(IMAGE_URL_RE.findall(text))
deduped = dedupe_urls(urls)
if deduped:
logger.debug("Extracted %d image URL(s)", len(deduped))
return deduped
async def fetch_image_block(
image_url: str,
client: httpx.AsyncClient,
) -> dict[str, Any] | None:
"""Fetch image bytes and build an image content block."""
try:
logger.debug("Fetching image from %s", image_url)
headers = None
if "uploads.linear.app" in image_url:
linear_api_key = os.environ.get("LINEAR_API_KEY", "")
if linear_api_key:
headers = {"Authorization": linear_api_key}
else:
logger.warning(
"LINEAR_API_KEY not set; cannot authenticate image fetch for %s",
image_url,
)
response = await client.get(image_url, headers=headers)
response.raise_for_status()
content_type = response.headers.get("Content-Type", "").split(";")[0].strip()
if not content_type:
guessed, _ = mimetypes.guess_type(image_url)
if not guessed:
logger.warning(
"Could not determine content type for %s; skipping image",
image_url,
)
return None
content_type = guessed
encoded = base64.b64encode(response.content).decode("ascii")
logger.info(
"Fetched image %s (%s, %d bytes)",
image_url,
content_type,
len(response.content),
)
return create_image_block(base64=encoded, mime_type=content_type)
except Exception:
logger.exception("Failed to fetch image from %s", image_url)
return None
def dedupe_urls(urls: list[str]) -> list[str]:
return list(dict.fromkeys(urls))