add reply context extraction for Telegram messages

This commit is contained in:
John Doe
2026-03-12 06:23:02 +07:00
parent 6155a43b8a
commit 1eedee0c40
2 changed files with 124 additions and 2 deletions

View File

@@ -20,6 +20,7 @@ from nanobot.config.schema import TelegramConfig
from nanobot.utils.helpers import split_message from nanobot.utils.helpers import split_message
TELEGRAM_MAX_MESSAGE_LEN = 4000 # Telegram message character limit TELEGRAM_MAX_MESSAGE_LEN = 4000 # Telegram message character limit
TELEGRAM_REPLY_CONTEXT_MAX_LEN = TELEGRAM_MAX_MESSAGE_LEN # Max length for reply context in user message
def _strip_md(s: str) -> str: def _strip_md(s: str) -> str:
@@ -451,6 +452,7 @@ class TelegramChannel(BaseChannel):
@staticmethod @staticmethod
def _build_message_metadata(message, user) -> dict: def _build_message_metadata(message, user) -> dict:
"""Build common Telegram inbound metadata payload.""" """Build common Telegram inbound metadata payload."""
reply_to = getattr(message, "reply_to_message", None)
return { return {
"message_id": message.message_id, "message_id": message.message_id,
"user_id": user.id, "user_id": user.id,
@@ -459,8 +461,37 @@ class TelegramChannel(BaseChannel):
"is_group": message.chat.type != "private", "is_group": message.chat.type != "private",
"message_thread_id": getattr(message, "message_thread_id", None), "message_thread_id": getattr(message, "message_thread_id", None),
"is_forum": bool(getattr(message.chat, "is_forum", False)), "is_forum": bool(getattr(message.chat, "is_forum", False)),
"reply_to_message_id": getattr(reply_to, "message_id", None) if reply_to else None,
} }
@staticmethod
def _extract_reply_context(message) -> str | None:
"""Extract content from the message being replied to, if any. Truncated to TELEGRAM_REPLY_CONTEXT_MAX_LEN."""
reply = getattr(message, "reply_to_message", None)
if not reply:
return None
text = getattr(reply, "text", None) or getattr(reply, "caption", None)
if text:
truncated = (
text[:TELEGRAM_REPLY_CONTEXT_MAX_LEN]
+ ("..." if len(text) > TELEGRAM_REPLY_CONTEXT_MAX_LEN else "")
)
return f"[Reply to: {truncated}]"
# Reply has no text/caption; use type placeholder when it has media
if getattr(reply, "photo", None):
return "[Reply to: (image)]"
if getattr(reply, "document", None):
return "[Reply to: (document)]"
if getattr(reply, "voice", None):
return "[Reply to: (voice)]"
if getattr(reply, "video_note", None) or getattr(reply, "video", None):
return "[Reply to: (video)]"
if getattr(reply, "audio", None):
return "[Reply to: (audio)]"
if getattr(reply, "animation", None):
return "[Reply to: (animation)]"
return "[Reply to: (no text)]"
async def _ensure_bot_identity(self) -> tuple[int | None, str | None]: async def _ensure_bot_identity(self) -> tuple[int | None, str | None]:
"""Load bot identity once and reuse it for mention/reply checks.""" """Load bot identity once and reuse it for mention/reply checks."""
if self._bot_user_id is not None or self._bot_username is not None: if self._bot_user_id is not None or self._bot_username is not None:
@@ -542,10 +573,14 @@ class TelegramChannel(BaseChannel):
message = update.message message = update.message
user = update.effective_user user = update.effective_user
self._remember_thread_context(message) self._remember_thread_context(message)
reply_ctx = self._extract_reply_context(message)
content = message.text or ""
if reply_ctx:
content = reply_ctx + "\n\n" + content
await self._handle_message( await self._handle_message(
sender_id=self._sender_id(user), sender_id=self._sender_id(user),
chat_id=str(message.chat_id), chat_id=str(message.chat_id),
content=message.text, content=content,
metadata=self._build_message_metadata(message, user), metadata=self._build_message_metadata(message, user),
session_key=self._derive_topic_session_key(message), session_key=self._derive_topic_session_key(message),
) )
@@ -625,6 +660,9 @@ class TelegramChannel(BaseChannel):
logger.error("Failed to download media: {}", e) logger.error("Failed to download media: {}", e)
content_parts.append(f"[{media_type}: download failed]") content_parts.append(f"[{media_type}: download failed]")
reply_ctx = self._extract_reply_context(message)
if reply_ctx is not None:
content_parts.insert(0, reply_ctx)
content = "\n".join(content_parts) if content_parts else "[empty message]" content = "\n".join(content_parts) if content_parts else "[empty message]"
logger.debug("Telegram message from {}: {}...", sender_id, content[:50]) logger.debug("Telegram message from {}: {}...", sender_id, content[:50])

View File

@@ -1,10 +1,11 @@
import asyncio
from types import SimpleNamespace from types import SimpleNamespace
import pytest import pytest
from nanobot.bus.events import OutboundMessage from nanobot.bus.events import OutboundMessage
from nanobot.bus.queue import MessageBus from nanobot.bus.queue import MessageBus
from nanobot.channels.telegram import TelegramChannel from nanobot.channels.telegram import TELEGRAM_REPLY_CONTEXT_MAX_LEN, TelegramChannel
from nanobot.config.schema import TelegramConfig from nanobot.config.schema import TelegramConfig
@@ -336,3 +337,86 @@ async def test_group_policy_open_accepts_plain_group_message() -> None:
assert len(handled) == 1 assert len(handled) == 1
assert channel._app.bot.get_me_calls == 0 assert channel._app.bot.get_me_calls == 0
def test_extract_reply_context_no_reply() -> None:
"""When there is no reply_to_message, _extract_reply_context returns None."""
message = SimpleNamespace(reply_to_message=None)
assert TelegramChannel._extract_reply_context(message) is None
def test_extract_reply_context_with_text() -> None:
"""When reply has text, return prefixed string."""
reply = SimpleNamespace(text="Hello world", caption=None)
message = SimpleNamespace(reply_to_message=reply)
assert TelegramChannel._extract_reply_context(message) == "[Reply to: Hello world]"
def test_extract_reply_context_with_caption_only() -> None:
"""When reply has only caption (no text), caption is used."""
reply = SimpleNamespace(text=None, caption="Photo caption")
message = SimpleNamespace(reply_to_message=reply)
assert TelegramChannel._extract_reply_context(message) == "[Reply to: Photo caption]"
def test_extract_reply_context_truncation() -> None:
"""Reply text is truncated at TELEGRAM_REPLY_CONTEXT_MAX_LEN."""
long_text = "x" * (TELEGRAM_REPLY_CONTEXT_MAX_LEN + 100)
reply = SimpleNamespace(text=long_text, caption=None)
message = SimpleNamespace(reply_to_message=reply)
result = TelegramChannel._extract_reply_context(message)
assert result is not None
assert result.startswith("[Reply to: ")
assert result.endswith("...]")
assert len(result) == len("[Reply to: ]") + TELEGRAM_REPLY_CONTEXT_MAX_LEN + len("...")
def test_extract_reply_context_no_text_no_media() -> None:
"""When reply has no text/caption and no media, return (no text) placeholder."""
reply = SimpleNamespace(
text=None,
caption=None,
photo=None,
document=None,
voice=None,
video_note=None,
video=None,
audio=None,
animation=None,
)
message = SimpleNamespace(reply_to_message=reply)
assert TelegramChannel._extract_reply_context(message) == "[Reply to: (no text)]"
def test_extract_reply_context_reply_to_photo() -> None:
"""When reply has photo but no text/caption, return (image) placeholder."""
reply = SimpleNamespace(
text=None,
caption=None,
photo=[SimpleNamespace(file_id="x")],
)
message = SimpleNamespace(reply_to_message=reply)
assert TelegramChannel._extract_reply_context(message) == "[Reply to: (image)]"
@pytest.mark.asyncio
async def test_on_message_includes_reply_context() -> None:
"""When user replies to a message, content passed to bus starts with reply context."""
channel = TelegramChannel(
TelegramConfig(enabled=True, token="123:abc", allow_from=["*"], group_policy="open"),
MessageBus(),
)
channel._app = _FakeApp(lambda: None)
handled = []
async def capture_handle(**kwargs) -> None:
handled.append(kwargs)
channel._handle_message = capture_handle
channel._start_typing = lambda _chat_id: None
reply = SimpleNamespace(text="Hello", message_id=2, from_user=SimpleNamespace(id=1))
update = _make_telegram_update(text="translate this", reply_to_message=reply)
await channel._on_message(update, None)
assert len(handled) == 1
assert handled[0]["content"].startswith("[Reply to: Hello]")
assert "translate this" in handled[0]["content"]