galaxis-agent/agent/tools/fetch_url.py

51 lines
1.8 KiB
Python
Raw Normal View History

2026-03-20 14:38:07 +09:00
from typing import Any
import requests
from markdownify import markdownify
def fetch_url(url: str, timeout: int = 30) -> dict[str, Any]:
"""Fetch content from a URL and convert HTML to markdown format.
This tool fetches web page content and converts it to clean markdown text,
making it easy to read and process HTML content. After receiving the markdown,
you MUST synthesize the information into a natural, helpful response for the user.
Args:
url: The URL to fetch (must be a valid HTTP/HTTPS URL)
timeout: Request timeout in seconds (default: 30)
Returns:
Dictionary containing:
- success: Whether the request succeeded
- url: The final URL after redirects
- markdown_content: The page content converted to markdown
- status_code: HTTP status code
- content_length: Length of the markdown content in characters
IMPORTANT: After using this tool:
1. Read through the markdown content
2. Extract relevant information that answers the user's question
3. Synthesize this into a clear, natural language response
4. NEVER show the raw markdown to the user unless specifically requested
"""
try:
response = requests.get(
url,
timeout=timeout,
headers={"User-Agent": "Mozilla/5.0 (compatible; DeepAgents/1.0)"},
)
response.raise_for_status()
# Convert HTML content to markdown
markdown_content = markdownify(response.text)
return {
"url": str(response.url),
"markdown_content": markdown_content,
"status_code": response.status_code,
"content_length": len(markdown_content),
}
except requests.exceptions.RequestException as e:
return {"error": f"Fetch URL error: {e!s}", "url": url}