"""Utilities for building multimodal content blocks.""" from __future__ import annotations import base64 import logging import mimetypes import os import re from typing import Any import httpx from langchain_core.messages.content import create_image_block logger = logging.getLogger(__name__) IMAGE_MARKDOWN_RE = re.compile(r"!\[[^\]]*\]\((https?://[^\s)]+)\)") IMAGE_URL_RE = re.compile( r"(https?://[^\s)]+\.(?:png|jpe?g|gif|webp|bmp|tiff)(?:\?[^\s)]+)?)", re.IGNORECASE, ) def extract_image_urls(text: str) -> list[str]: """Extract image URLs from markdown image syntax and direct image links.""" if not text: return [] urls: list[str] = [] urls.extend(IMAGE_MARKDOWN_RE.findall(text)) urls.extend(IMAGE_URL_RE.findall(text)) deduped = dedupe_urls(urls) if deduped: logger.debug("Extracted %d image URL(s)", len(deduped)) return deduped async def fetch_image_block( image_url: str, client: httpx.AsyncClient, ) -> dict[str, Any] | None: """Fetch image bytes and build an image content block.""" try: logger.debug("Fetching image from %s", image_url) headers = None if "uploads.linear.app" in image_url: linear_api_key = os.environ.get("LINEAR_API_KEY", "") if linear_api_key: headers = {"Authorization": linear_api_key} else: logger.warning( "LINEAR_API_KEY not set; cannot authenticate image fetch for %s", image_url, ) response = await client.get(image_url, headers=headers) response.raise_for_status() content_type = response.headers.get("Content-Type", "").split(";")[0].strip() if not content_type: guessed, _ = mimetypes.guess_type(image_url) if not guessed: logger.warning( "Could not determine content type for %s; skipping image", image_url, ) return None content_type = guessed encoded = base64.b64encode(response.content).decode("ascii") logger.info( "Fetched image %s (%s, %d bytes)", image_url, content_type, len(response.content), ) return create_image_block(base64=encoded, mime_type=content_type) except Exception: logger.exception("Failed to fetch image from %s", image_url) return None def dedupe_urls(urls: list[str]) -> list[str]: return list(dict.fromkeys(urls))