Add media download functionality
This commit is contained in:
@@ -477,21 +477,75 @@ class TelegramChannel(BaseChannel):
|
|||||||
+ ("..." if len(text) > TELEGRAM_REPLY_CONTEXT_MAX_LEN else "")
|
+ ("..." if len(text) > TELEGRAM_REPLY_CONTEXT_MAX_LEN else "")
|
||||||
)
|
)
|
||||||
return f"[Reply to: {truncated}]"
|
return f"[Reply to: {truncated}]"
|
||||||
# Reply has no text/caption; use type placeholder when it has media
|
# Reply has no text/caption; use type placeholder when it has media.
|
||||||
|
# Note: replied-to media is not attached to this message, so the agent won't receive it.
|
||||||
if getattr(reply, "photo", None):
|
if getattr(reply, "photo", None):
|
||||||
return "[Reply to: (image)]"
|
return "[Reply to: (image — not attached)]"
|
||||||
if getattr(reply, "document", None):
|
if getattr(reply, "document", None):
|
||||||
return "[Reply to: (document)]"
|
return "[Reply to: (document — not attached)]"
|
||||||
if getattr(reply, "voice", None):
|
if getattr(reply, "voice", None):
|
||||||
return "[Reply to: (voice)]"
|
return "[Reply to: (voice — not attached)]"
|
||||||
if getattr(reply, "video_note", None) or getattr(reply, "video", None):
|
if getattr(reply, "video_note", None) or getattr(reply, "video", None):
|
||||||
return "[Reply to: (video)]"
|
return "[Reply to: (video — not attached)]"
|
||||||
if getattr(reply, "audio", None):
|
if getattr(reply, "audio", None):
|
||||||
return "[Reply to: (audio)]"
|
return "[Reply to: (audio — not attached)]"
|
||||||
if getattr(reply, "animation", None):
|
if getattr(reply, "animation", None):
|
||||||
return "[Reply to: (animation)]"
|
return "[Reply to: (animation — not attached)]"
|
||||||
return "[Reply to: (no text)]"
|
return "[Reply to: (no text)]"
|
||||||
|
|
||||||
|
async def _download_message_media(
|
||||||
|
self, msg, *, add_failure_content: bool = False
|
||||||
|
) -> tuple[list[str], list[str]]:
|
||||||
|
"""Download media from a message (current or reply). Returns (media_paths, content_parts)."""
|
||||||
|
media_file = None
|
||||||
|
media_type = None
|
||||||
|
if getattr(msg, "photo", None):
|
||||||
|
media_file = msg.photo[-1]
|
||||||
|
media_type = "image"
|
||||||
|
elif getattr(msg, "voice", None):
|
||||||
|
media_file = msg.voice
|
||||||
|
media_type = "voice"
|
||||||
|
elif getattr(msg, "audio", None):
|
||||||
|
media_file = msg.audio
|
||||||
|
media_type = "audio"
|
||||||
|
elif getattr(msg, "document", None):
|
||||||
|
media_file = msg.document
|
||||||
|
media_type = "file"
|
||||||
|
elif getattr(msg, "video", None):
|
||||||
|
media_file = msg.video
|
||||||
|
media_type = "video"
|
||||||
|
elif getattr(msg, "video_note", None):
|
||||||
|
media_file = msg.video_note
|
||||||
|
media_type = "video"
|
||||||
|
elif getattr(msg, "animation", None):
|
||||||
|
media_file = msg.animation
|
||||||
|
media_type = "animation"
|
||||||
|
if not media_file or not self._app:
|
||||||
|
return [], []
|
||||||
|
try:
|
||||||
|
file = await self._app.bot.get_file(media_file.file_id)
|
||||||
|
ext = self._get_extension(
|
||||||
|
media_type,
|
||||||
|
getattr(media_file, "mime_type", None),
|
||||||
|
getattr(media_file, "file_name", None),
|
||||||
|
)
|
||||||
|
media_dir = get_media_dir("telegram")
|
||||||
|
file_path = media_dir / f"{media_file.file_id[:16]}{ext}"
|
||||||
|
await file.download_to_drive(str(file_path))
|
||||||
|
path_str = str(file_path)
|
||||||
|
if media_type in ("voice", "audio"):
|
||||||
|
transcription = await self.transcribe_audio(file_path)
|
||||||
|
if transcription:
|
||||||
|
logger.info("Transcribed {}: {}...", media_type, transcription[:50])
|
||||||
|
return [path_str], [f"[transcription: {transcription}]"]
|
||||||
|
return [path_str], [f"[{media_type}: {path_str}]"]
|
||||||
|
return [path_str], [f"[{media_type}: {path_str}]"]
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to download message media: {}", e)
|
||||||
|
if add_failure_content:
|
||||||
|
return [], [f"[{media_type}: download failed]"]
|
||||||
|
return [], []
|
||||||
|
|
||||||
async def _ensure_bot_identity(self) -> tuple[int | None, str | None]:
|
async def _ensure_bot_identity(self) -> tuple[int | None, str | None]:
|
||||||
"""Load bot identity once and reuse it for mention/reply checks."""
|
"""Load bot identity once and reuse it for mention/reply checks."""
|
||||||
if self._bot_user_id is not None or self._bot_username is not None:
|
if self._bot_user_id is not None or self._bot_username is not None:
|
||||||
@@ -612,56 +666,25 @@ class TelegramChannel(BaseChannel):
|
|||||||
if message.caption:
|
if message.caption:
|
||||||
content_parts.append(message.caption)
|
content_parts.append(message.caption)
|
||||||
|
|
||||||
# Handle media files
|
# Download current message media
|
||||||
media_file = None
|
current_media_paths, current_media_parts = await self._download_message_media(
|
||||||
media_type = None
|
message, add_failure_content=True
|
||||||
|
|
||||||
if message.photo:
|
|
||||||
media_file = message.photo[-1] # Largest photo
|
|
||||||
media_type = "image"
|
|
||||||
elif message.voice:
|
|
||||||
media_file = message.voice
|
|
||||||
media_type = "voice"
|
|
||||||
elif message.audio:
|
|
||||||
media_file = message.audio
|
|
||||||
media_type = "audio"
|
|
||||||
elif message.document:
|
|
||||||
media_file = message.document
|
|
||||||
media_type = "file"
|
|
||||||
|
|
||||||
# Download media if present
|
|
||||||
if media_file and self._app:
|
|
||||||
try:
|
|
||||||
file = await self._app.bot.get_file(media_file.file_id)
|
|
||||||
ext = self._get_extension(
|
|
||||||
media_type,
|
|
||||||
getattr(media_file, 'mime_type', None),
|
|
||||||
getattr(media_file, 'file_name', None),
|
|
||||||
)
|
)
|
||||||
media_dir = get_media_dir("telegram")
|
media_paths.extend(current_media_paths)
|
||||||
|
content_parts.extend(current_media_parts)
|
||||||
file_path = media_dir / f"{media_file.file_id[:16]}{ext}"
|
if current_media_paths:
|
||||||
await file.download_to_drive(str(file_path))
|
logger.debug("Downloaded message media to {}", current_media_paths[0])
|
||||||
|
|
||||||
media_paths.append(str(file_path))
|
|
||||||
|
|
||||||
if media_type in ("voice", "audio"):
|
|
||||||
transcription = await self.transcribe_audio(file_path)
|
|
||||||
if transcription:
|
|
||||||
logger.info("Transcribed {}: {}...", media_type, transcription[:50])
|
|
||||||
content_parts.append(f"[transcription: {transcription}]")
|
|
||||||
else:
|
|
||||||
content_parts.append(f"[{media_type}: {file_path}]")
|
|
||||||
else:
|
|
||||||
content_parts.append(f"[{media_type}: {file_path}]")
|
|
||||||
|
|
||||||
logger.debug("Downloaded {} to {}", media_type, file_path)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Failed to download media: {}", e)
|
|
||||||
content_parts.append(f"[{media_type}: download failed]")
|
|
||||||
|
|
||||||
|
# Reply context: include replied-to content; if reply has media, try to attach it
|
||||||
|
reply = getattr(message, "reply_to_message", None)
|
||||||
reply_ctx = self._extract_reply_context(message)
|
reply_ctx = self._extract_reply_context(message)
|
||||||
if reply_ctx is not None:
|
if reply_ctx is not None and reply is not None:
|
||||||
|
if "not attached)]" in reply_ctx:
|
||||||
|
reply_media_paths, reply_media_parts = await self._download_message_media(reply)
|
||||||
|
if reply_media_paths and reply_media_parts:
|
||||||
|
reply_ctx = f"[Reply to: {reply_media_parts[0]}]"
|
||||||
|
media_paths = reply_media_paths + media_paths
|
||||||
|
logger.debug("Attached replied-to media: {}", reply_media_paths[0])
|
||||||
content_parts.insert(0, reply_ctx)
|
content_parts.insert(0, reply_ctx)
|
||||||
content = "\n".join(content_parts) if content_parts else "[empty message]"
|
content = "\n".join(content_parts) if content_parts else "[empty message]"
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
|
from pathlib import Path
|
||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
|
from unittest.mock import AsyncMock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
@@ -43,6 +45,12 @@ class _FakeBot:
|
|||||||
async def send_chat_action(self, **kwargs) -> None:
|
async def send_chat_action(self, **kwargs) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
async def get_file(self, file_id: str):
|
||||||
|
"""Return a fake file that 'downloads' to a path (for reply-to-media tests)."""
|
||||||
|
async def _fake_download(path) -> None:
|
||||||
|
pass
|
||||||
|
return SimpleNamespace(download_to_drive=_fake_download)
|
||||||
|
|
||||||
|
|
||||||
class _FakeApp:
|
class _FakeApp:
|
||||||
def __init__(self, on_start_polling) -> None:
|
def __init__(self, on_start_polling) -> None:
|
||||||
@@ -389,14 +397,14 @@ def test_extract_reply_context_no_text_no_media() -> None:
|
|||||||
|
|
||||||
|
|
||||||
def test_extract_reply_context_reply_to_photo() -> None:
|
def test_extract_reply_context_reply_to_photo() -> None:
|
||||||
"""When reply has photo but no text/caption, return (image) placeholder."""
|
"""When reply has photo but no text/caption, return (image — not attached) placeholder."""
|
||||||
reply = SimpleNamespace(
|
reply = SimpleNamespace(
|
||||||
text=None,
|
text=None,
|
||||||
caption=None,
|
caption=None,
|
||||||
photo=[SimpleNamespace(file_id="x")],
|
photo=[SimpleNamespace(file_id="x")],
|
||||||
)
|
)
|
||||||
message = SimpleNamespace(reply_to_message=reply)
|
message = SimpleNamespace(reply_to_message=reply)
|
||||||
assert TelegramChannel._extract_reply_context(message) == "[Reply to: (image)]"
|
assert TelegramChannel._extract_reply_context(message) == "[Reply to: (image — not attached)]"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -420,3 +428,125 @@ async def test_on_message_includes_reply_context() -> None:
|
|||||||
assert len(handled) == 1
|
assert len(handled) == 1
|
||||||
assert handled[0]["content"].startswith("[Reply to: Hello]")
|
assert handled[0]["content"].startswith("[Reply to: Hello]")
|
||||||
assert "translate this" in handled[0]["content"]
|
assert "translate this" in handled[0]["content"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_download_message_media_returns_path_when_download_succeeds(
|
||||||
|
monkeypatch, tmp_path
|
||||||
|
) -> None:
|
||||||
|
"""_download_message_media returns (paths, content_parts) when bot.get_file and download succeed."""
|
||||||
|
media_dir = tmp_path / "media" / "telegram"
|
||||||
|
media_dir.mkdir(parents=True)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"nanobot.channels.telegram.get_media_dir",
|
||||||
|
lambda channel=None: media_dir if channel else tmp_path / "media",
|
||||||
|
)
|
||||||
|
|
||||||
|
channel = TelegramChannel(
|
||||||
|
TelegramConfig(enabled=True, token="123:abc", allow_from=["*"]),
|
||||||
|
MessageBus(),
|
||||||
|
)
|
||||||
|
channel._app = _FakeApp(lambda: None)
|
||||||
|
channel._app.bot.get_file = AsyncMock(
|
||||||
|
return_value=SimpleNamespace(download_to_drive=AsyncMock(return_value=None))
|
||||||
|
)
|
||||||
|
|
||||||
|
msg = SimpleNamespace(
|
||||||
|
photo=[SimpleNamespace(file_id="fid123", mime_type="image/jpeg")],
|
||||||
|
voice=None,
|
||||||
|
audio=None,
|
||||||
|
document=None,
|
||||||
|
video=None,
|
||||||
|
video_note=None,
|
||||||
|
animation=None,
|
||||||
|
)
|
||||||
|
paths, parts = await channel._download_message_media(msg)
|
||||||
|
assert len(paths) == 1
|
||||||
|
assert len(parts) == 1
|
||||||
|
assert "fid123" in paths[0]
|
||||||
|
assert "[image:" in parts[0]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_on_message_attaches_reply_to_media_when_available(monkeypatch, tmp_path) -> None:
|
||||||
|
"""When user replies to a message with media, that media is downloaded and attached to the turn."""
|
||||||
|
media_dir = tmp_path / "media" / "telegram"
|
||||||
|
media_dir.mkdir(parents=True)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"nanobot.channels.telegram.get_media_dir",
|
||||||
|
lambda channel=None: media_dir if channel else tmp_path / "media",
|
||||||
|
)
|
||||||
|
|
||||||
|
channel = TelegramChannel(
|
||||||
|
TelegramConfig(enabled=True, token="123:abc", allow_from=["*"], group_policy="open"),
|
||||||
|
MessageBus(),
|
||||||
|
)
|
||||||
|
app = _FakeApp(lambda: None)
|
||||||
|
app.bot.get_file = AsyncMock(
|
||||||
|
return_value=SimpleNamespace(download_to_drive=AsyncMock(return_value=None))
|
||||||
|
)
|
||||||
|
channel._app = app
|
||||||
|
handled = []
|
||||||
|
async def capture_handle(**kwargs) -> None:
|
||||||
|
handled.append(kwargs)
|
||||||
|
channel._handle_message = capture_handle
|
||||||
|
channel._start_typing = lambda _chat_id: None
|
||||||
|
|
||||||
|
reply_with_photo = SimpleNamespace(
|
||||||
|
text=None,
|
||||||
|
caption=None,
|
||||||
|
photo=[SimpleNamespace(file_id="reply_photo_fid", mime_type="image/jpeg")],
|
||||||
|
document=None,
|
||||||
|
voice=None,
|
||||||
|
audio=None,
|
||||||
|
video=None,
|
||||||
|
video_note=None,
|
||||||
|
animation=None,
|
||||||
|
)
|
||||||
|
update = _make_telegram_update(
|
||||||
|
text="what is the image?",
|
||||||
|
reply_to_message=reply_with_photo,
|
||||||
|
)
|
||||||
|
await channel._on_message(update, None)
|
||||||
|
|
||||||
|
assert len(handled) == 1
|
||||||
|
assert handled[0]["content"].startswith("[Reply to: [image:")
|
||||||
|
assert "what is the image?" in handled[0]["content"]
|
||||||
|
assert len(handled[0]["media"]) == 1
|
||||||
|
assert "reply_photo_fid" in handled[0]["media"][0]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_on_message_reply_to_media_fallback_when_download_fails() -> None:
|
||||||
|
"""When reply has media but download fails, keep placeholder and do not attach."""
|
||||||
|
channel = TelegramChannel(
|
||||||
|
TelegramConfig(enabled=True, token="123:abc", allow_from=["*"], group_policy="open"),
|
||||||
|
MessageBus(),
|
||||||
|
)
|
||||||
|
channel._app = _FakeApp(lambda: None)
|
||||||
|
# No get_file on bot -> download will fail
|
||||||
|
channel._app.bot.get_file = None
|
||||||
|
handled = []
|
||||||
|
async def capture_handle(**kwargs) -> None:
|
||||||
|
handled.append(kwargs)
|
||||||
|
channel._handle_message = capture_handle
|
||||||
|
channel._start_typing = lambda _chat_id: None
|
||||||
|
|
||||||
|
reply_with_photo = SimpleNamespace(
|
||||||
|
text=None,
|
||||||
|
caption=None,
|
||||||
|
photo=[SimpleNamespace(file_id="x", mime_type="image/jpeg")],
|
||||||
|
document=None,
|
||||||
|
voice=None,
|
||||||
|
audio=None,
|
||||||
|
video=None,
|
||||||
|
video_note=None,
|
||||||
|
animation=None,
|
||||||
|
)
|
||||||
|
update = _make_telegram_update(text="what is this?", reply_to_message=reply_with_photo)
|
||||||
|
await channel._on_message(update, None)
|
||||||
|
|
||||||
|
assert len(handled) == 1
|
||||||
|
assert "[Reply to: (image — not attached)]" in handled[0]["content"]
|
||||||
|
assert "what is this?" in handled[0]["content"]
|
||||||
|
assert handled[0]["media"] == []
|
||||||
|
|||||||
Reference in New Issue
Block a user