galaxis-agent/agent/utils/multimodal.py

"""Utilities for building multimodal content blocks."""

from __future__ import annotations

import base64
import logging
import mimetypes
import os
import re
from typing import Any

import httpx
from langchain_core.messages.content import create_image_block

logger = logging.getLogger(__name__)

IMAGE_MARKDOWN_RE = re.compile(r"!\[[^\]]*\]\((https?://[^\s)]+)\)")
IMAGE_URL_RE = re.compile(
    r"(https?://[^\s)]+\.(?:png|jpe?g|gif|webp|bmp|tiff)(?:\?[^\s)]+)?)",
    re.IGNORECASE,
)


def extract_image_urls(text: str) -> list[str]:
    """Extract image URLs from markdown image syntax and direct image links."""
    if not text:
        return []

    urls: list[str] = []
    urls.extend(IMAGE_MARKDOWN_RE.findall(text))
    urls.extend(IMAGE_URL_RE.findall(text))

    deduped = dedupe_urls(urls)
    if deduped:
        logger.debug("Extracted %d image URL(s)", len(deduped))
    return deduped


async def fetch_image_block(
    image_url: str,
    client: httpx.AsyncClient,
) -> dict[str, Any] | None:
    """Fetch image bytes and build an image content block."""
    try:
        logger.debug("Fetching image from %s", image_url)
        headers = None
        if "uploads.linear.app" in image_url:
            linear_api_key = os.environ.get("LINEAR_API_KEY", "")
            if linear_api_key:
                headers = {"Authorization": linear_api_key}
            else:
                logger.warning(
                    "LINEAR_API_KEY not set; cannot authenticate image fetch for %s",
                    image_url,
                )
        response = await client.get(image_url, headers=headers)
        response.raise_for_status()
        content_type = response.headers.get("Content-Type", "").split(";")[0].strip()
        if not content_type:
            guessed, _ = mimetypes.guess_type(image_url)
            if not guessed:
                logger.warning(
                    "Could not determine content type for %s; skipping image",
                    image_url,
                )
                return None
            content_type = guessed

        encoded = base64.b64encode(response.content).decode("ascii")
        logger.info(
            "Fetched image %s (%s, %d bytes)",
            image_url,
            content_type,
            len(response.content),
        )
        return create_image_block(base64=encoded, mime_type=content_type)
    except Exception:
        logger.exception("Failed to fetch image from %s", image_url)
        return None


def dedupe_urls(urls: list[str]) -> list[str]:
    return list(dict.fromkeys(urls))
chore: initial copy from open-swe 2026-03-20 14:38:07 +09:00			`"""Utilities for building multimodal content blocks."""`

			`from __future__ import annotations`

			`import base64`
			`import logging`
			`import mimetypes`
			`import os`
			`import re`
			`from typing import Any`

			`import httpx`
			`from langchain_core.messages.content import create_image_block`

			`logger = logging.getLogger(__name__)`

			`IMAGE_MARKDOWN_RE = re.compile(r"!\[[^\]]*\]\((https?://[^\s)]+)\)")`
			`IMAGE_URL_RE = re.compile(`
			`r"(https?://[^\s)]+\.(?:png\|jpe?g\|gif\|webp\|bmp\|tiff)(?:\?[^\s)]+)?)",`
			`re.IGNORECASE,`
			`)`


			`def extract_image_urls(text: str) -> list[str]:`
			`"""Extract image URLs from markdown image syntax and direct image links."""`
			`if not text:`
			`return []`

			`urls: list[str] = []`
			`urls.extend(IMAGE_MARKDOWN_RE.findall(text))`
			`urls.extend(IMAGE_URL_RE.findall(text))`

			`deduped = dedupe_urls(urls)`
			`if deduped:`
			`logger.debug("Extracted %d image URL(s)", len(deduped))`
			`return deduped`


			`async def fetch_image_block(`
			`image_url: str,`
			`client: httpx.AsyncClient,`
			`) -> dict[str, Any] \| None:`
			`"""Fetch image bytes and build an image content block."""`
			`try:`
			`logger.debug("Fetching image from %s", image_url)`
			`headers = None`
			`if "uploads.linear.app" in image_url:`
			`linear_api_key = os.environ.get("LINEAR_API_KEY", "")`
			`if linear_api_key:`
			`headers = {"Authorization": linear_api_key}`
			`else:`
			`logger.warning(`
			`"LINEAR_API_KEY not set; cannot authenticate image fetch for %s",`
			`image_url,`
			`)`
			`response = await client.get(image_url, headers=headers)`
			`response.raise_for_status()`
			`content_type = response.headers.get("Content-Type", "").split(";")[0].strip()`
			`if not content_type:`
			`guessed, _ = mimetypes.guess_type(image_url)`
			`if not guessed:`
			`logger.warning(`
			`"Could not determine content type for %s; skipping image",`
			`image_url,`
			`)`
			`return None`
			`content_type = guessed`

			`encoded = base64.b64encode(response.content).decode("ascii")`
			`logger.info(`
			`"Fetched image %s (%s, %d bytes)",`
			`image_url,`
			`content_type,`
			`len(response.content),`
			`)`
			`return create_image_block(base64=encoded, mime_type=content_type)`
			`except Exception:`
			`logger.exception("Failed to fetch image from %s", image_url)`
			`return None`


			`def dedupe_urls(urls: list[str]) -> list[str]:`
			`return list(dict.fromkeys(urls))`