galaxis-agent/tests/test_multimodal.py

99 lines
3.1 KiB
Python
Raw Permalink Normal View History

2026-03-20 14:38:07 +09:00
from __future__ import annotations
from agent.utils.multimodal import extract_image_urls
def test_extract_image_urls_empty() -> None:
assert extract_image_urls("") == []
def test_extract_image_urls_markdown_and_direct_dedupes() -> None:
text = (
"Here is an image ![alt](https://example.com/a.png) and another "
"![https://example.com/b.JPG?size=large plus a repeat https://example.com/a.png"
)
assert extract_image_urls(text) == [
"https://example.com/a.png",
"https://example.com/b.JPG?size=large",
]
def test_extract_image_urls_ignores_non_images() -> None:
text = "Not images: https://example.com/file.pdf and https://example.com/noext"
assert extract_image_urls(text) == []
def test_extract_image_urls_markdown_syntax() -> None:
text = "Check out this screenshot: ![Screenshot](https://example.com/screenshot.png)"
assert extract_image_urls(text) == ["https://example.com/screenshot.png"]
def test_extract_image_urls_direct_links() -> None:
text = "Direct link: https://example.com/photo.jpg and another https://example.com/image.gif"
assert extract_image_urls(text) == [
"https://example.com/photo.jpg",
"https://example.com/image.gif",
]
def test_extract_image_urls_various_formats() -> None:
text = (
"Multiple formats: "
"https://example.com/image.png "
"https://example.com/photo.jpeg "
"https://example.com/pic.gif "
"https://example.com/img.webp "
"https://example.com/bitmap.bmp "
"https://example.com/scan.tiff"
)
assert extract_image_urls(text) == [
"https://example.com/image.png",
"https://example.com/photo.jpeg",
"https://example.com/pic.gif",
"https://example.com/img.webp",
"https://example.com/bitmap.bmp",
"https://example.com/scan.tiff",
]
def test_extract_image_urls_with_query_params() -> None:
text = "Image with params: https://cdn.example.com/image.png?width=800&height=600"
assert extract_image_urls(text) == ["https://cdn.example.com/image.png?width=800&height=600"]
def test_extract_image_urls_case_insensitive() -> None:
text = "Mixed case: https://example.com/Image.PNG and https://example.com/photo.JpEg"
assert extract_image_urls(text) == [
"https://example.com/Image.PNG",
"https://example.com/photo.JpEg",
]
def test_extract_image_urls_deduplication() -> None:
text = "Same URL twice: https://example.com/image.png and again https://example.com/image.png"
assert extract_image_urls(text) == ["https://example.com/image.png"]
def test_extract_image_urls_mixed_markdown_and_direct() -> None:
text = (
"Markdown: ![alt text](https://example.com/markdown.png) "
"and direct: https://example.com/direct.jpg "
"and another markdown ![](https://example.com/another.gif)"
)
result = extract_image_urls(text)
assert set(result) == {
"https://example.com/markdown.png",
"https://example.com/direct.jpg",
"https://example.com/another.gif",
}
assert len(result) == 3