fix(context): detect image MIME type from magic bytes instead of file extension

Feishu downloads images with incorrect extensions (e.g. .jpg for PNG files).
mimetypes.guess_type() relies on the file extension, causing a MIME mismatch
that Anthropic rejects with 'image was specified using image/jpeg but appears
to be image/png'.

Fix: read the first bytes of the image data and detect the real MIME type via
magic bytes (PNG: 0x89PNG, JPEG: 0xFFD8FF, GIF: GIF87a/GIF89a, WEBP: RIFF+WEBP).
Fall back to mimetypes.guess_type() only when magic bytes are inconclusive.
This commit is contained in:
coldxiangyu
2026-03-05 20:18:13 +08:00
parent 7b676962ed
commit 46192fbd2a

View File

@@ -12,6 +12,19 @@ from nanobot.agent.memory import MemoryStore
from nanobot.agent.skills import SkillsLoader
def _detect_image_mime(data: bytes) -> str | None:
"""Detect image MIME type from magic bytes, ignoring file extension."""
if data[:8] == b"\x89PNG\r\n\x1a\n":
return "image/png"
if data[:3] == b"\xff\xd8\xff":
return "image/jpeg"
if data[:6] in (b"GIF87a", b"GIF89a"):
return "image/gif"
if data[:4] == b"RIFF" and data[8:12] == b"WEBP":
return "image/webp"
return None
class ContextBuilder:
"""Builds the context (system prompt + messages) for the agent."""
@@ -136,10 +149,14 @@ Reply directly with text for conversations. Only use the 'message' tool to send
images = []
for path in media:
p = Path(path)
mime, _ = mimetypes.guess_type(path)
if not p.is_file() or not mime or not mime.startswith("image/"):
if not p.is_file():
continue
b64 = base64.b64encode(p.read_bytes()).decode()
raw = p.read_bytes()
# Detect real MIME type from magic bytes; fallback to filename guess
mime = _detect_image_mime(raw) or mimetypes.guess_type(path)[0]
if not mime or not mime.startswith("image/"):
continue
b64 = base64.b64encode(raw).decode()
images.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}})
if not images: