99 lines
3.1 KiB
Python
99 lines
3.1 KiB
Python
from __future__ import annotations
|
|
|
|
from agent.utils.multimodal import extract_image_urls
|
|
|
|
|
|
def test_extract_image_urls_empty() -> None:
|
|
assert extract_image_urls("") == []
|
|
|
|
|
|
def test_extract_image_urls_markdown_and_direct_dedupes() -> None:
|
|
text = (
|
|
"Here is an image  and another "
|
|
"![https://example.com/b.JPG?size=large plus a repeat https://example.com/a.png"
|
|
)
|
|
|
|
assert extract_image_urls(text) == [
|
|
"https://example.com/a.png",
|
|
"https://example.com/b.JPG?size=large",
|
|
]
|
|
|
|
|
|
def test_extract_image_urls_ignores_non_images() -> None:
|
|
text = "Not images: https://example.com/file.pdf and https://example.com/noext"
|
|
|
|
assert extract_image_urls(text) == []
|
|
|
|
|
|
def test_extract_image_urls_markdown_syntax() -> None:
|
|
text = "Check out this screenshot: "
|
|
|
|
assert extract_image_urls(text) == ["https://example.com/screenshot.png"]
|
|
|
|
|
|
def test_extract_image_urls_direct_links() -> None:
|
|
text = "Direct link: https://example.com/photo.jpg and another https://example.com/image.gif"
|
|
|
|
assert extract_image_urls(text) == [
|
|
"https://example.com/photo.jpg",
|
|
"https://example.com/image.gif",
|
|
]
|
|
|
|
|
|
def test_extract_image_urls_various_formats() -> None:
|
|
text = (
|
|
"Multiple formats: "
|
|
"https://example.com/image.png "
|
|
"https://example.com/photo.jpeg "
|
|
"https://example.com/pic.gif "
|
|
"https://example.com/img.webp "
|
|
"https://example.com/bitmap.bmp "
|
|
"https://example.com/scan.tiff"
|
|
)
|
|
|
|
assert extract_image_urls(text) == [
|
|
"https://example.com/image.png",
|
|
"https://example.com/photo.jpeg",
|
|
"https://example.com/pic.gif",
|
|
"https://example.com/img.webp",
|
|
"https://example.com/bitmap.bmp",
|
|
"https://example.com/scan.tiff",
|
|
]
|
|
|
|
|
|
def test_extract_image_urls_with_query_params() -> None:
|
|
text = "Image with params: https://cdn.example.com/image.png?width=800&height=600"
|
|
|
|
assert extract_image_urls(text) == ["https://cdn.example.com/image.png?width=800&height=600"]
|
|
|
|
|
|
def test_extract_image_urls_case_insensitive() -> None:
|
|
text = "Mixed case: https://example.com/Image.PNG and https://example.com/photo.JpEg"
|
|
|
|
assert extract_image_urls(text) == [
|
|
"https://example.com/Image.PNG",
|
|
"https://example.com/photo.JpEg",
|
|
]
|
|
|
|
|
|
def test_extract_image_urls_deduplication() -> None:
|
|
text = "Same URL twice: https://example.com/image.png and again https://example.com/image.png"
|
|
|
|
assert extract_image_urls(text) == ["https://example.com/image.png"]
|
|
|
|
|
|
def test_extract_image_urls_mixed_markdown_and_direct() -> None:
|
|
text = (
|
|
"Markdown:  "
|
|
"and direct: https://example.com/direct.jpg "
|
|
"and another markdown "
|
|
)
|
|
|
|
result = extract_image_urls(text)
|
|
assert set(result) == {
|
|
"https://example.com/markdown.png",
|
|
"https://example.com/direct.jpg",
|
|
"https://example.com/another.gif",
|
|
}
|
|
assert len(result) == 3
|