galaxis-agent/tests/test_multimodal.py

from __future__ import annotations

from agent.utils.multimodal import extract_image_urls


def test_extract_image_urls_empty() -> None:
    assert extract_image_urls("") == []


def test_extract_image_urls_markdown_and_direct_dedupes() -> None:
    text = (
        "Here is an image ![alt](https://example.com/a.png) and another "
        "![https://example.com/b.JPG?size=large plus a repeat https://example.com/a.png"
    )

    assert extract_image_urls(text) == [
        "https://example.com/a.png",
        "https://example.com/b.JPG?size=large",
    ]


def test_extract_image_urls_ignores_non_images() -> None:
    text = "Not images: https://example.com/file.pdf and https://example.com/noext"

    assert extract_image_urls(text) == []


def test_extract_image_urls_markdown_syntax() -> None:
    text = "Check out this screenshot: ![Screenshot](https://example.com/screenshot.png)"

    assert extract_image_urls(text) == ["https://example.com/screenshot.png"]


def test_extract_image_urls_direct_links() -> None:
    text = "Direct link: https://example.com/photo.jpg and another https://example.com/image.gif"

    assert extract_image_urls(text) == [
        "https://example.com/photo.jpg",
        "https://example.com/image.gif",
    ]


def test_extract_image_urls_various_formats() -> None:
    text = (
        "Multiple formats: "
        "https://example.com/image.png "
        "https://example.com/photo.jpeg "
        "https://example.com/pic.gif "
        "https://example.com/img.webp "
        "https://example.com/bitmap.bmp "
        "https://example.com/scan.tiff"
    )

    assert extract_image_urls(text) == [
        "https://example.com/image.png",
        "https://example.com/photo.jpeg",
        "https://example.com/pic.gif",
        "https://example.com/img.webp",
        "https://example.com/bitmap.bmp",
        "https://example.com/scan.tiff",
    ]


def test_extract_image_urls_with_query_params() -> None:
    text = "Image with params: https://cdn.example.com/image.png?width=800&height=600"

    assert extract_image_urls(text) == ["https://cdn.example.com/image.png?width=800&height=600"]


def test_extract_image_urls_case_insensitive() -> None:
    text = "Mixed case: https://example.com/Image.PNG and https://example.com/photo.JpEg"

    assert extract_image_urls(text) == [
        "https://example.com/Image.PNG",
        "https://example.com/photo.JpEg",
    ]


def test_extract_image_urls_deduplication() -> None:
    text = "Same URL twice: https://example.com/image.png and again https://example.com/image.png"

    assert extract_image_urls(text) == ["https://example.com/image.png"]


def test_extract_image_urls_mixed_markdown_and_direct() -> None:
    text = (
        "Markdown: ![alt text](https://example.com/markdown.png) "
        "and direct: https://example.com/direct.jpg "
        "and another markdown ![](https://example.com/another.gif)"
    )

    result = extract_image_urls(text)
    assert set(result) == {
        "https://example.com/markdown.png",
        "https://example.com/direct.jpg",
        "https://example.com/another.gif",
    }
    assert len(result) == 3