Merge remote-tracking branch 'origin/main'

# Conflicts: # README.md # nanobot/agent/context.py # nanobot/agent/loop.py # nanobot/channels/telegram.py
2026-03-19 00:42:43 +08:00
parent 61dcdffbbe 214bf66a29
commit 49fbd5c15c
13 changed files with 548 additions and 73 deletions
--- a/nanobot/agent/context.py
+++ b/nanobot/agent/context.py
@@ -171,6 +171,7 @@ Reply directly with text for conversations. Only use the 'message' tool to send
        chat_id: str | None = None,
        persona: str | None = None,
        language: str | None = None,
+        current_role: str = "user",
    ) -> list[dict[str, Any]]:
        """Build the complete message list for an LLM call."""
        runtime_ctx = self._build_runtime_context(channel, chat_id)
@@ -186,7 +187,7 @@ Reply directly with text for conversations. Only use the 'message' tool to send
        return [
            {"role": "system", "content": self.build_system_prompt(skill_names, persona=persona, language=language)},
            *history,
-            {"role": "user", "content": merged},
+            {"role": current_role, "content": merged},
        ]

    def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]:
@@ -205,7 +206,11 @@ Reply directly with text for conversations. Only use the 'message' tool to send
            if not mime or not mime.startswith("image/"):
                continue
            b64 = base64.b64encode(raw).decode()
-            images.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}})
+            images.append({
+                "type": "image_url",
+                "image_url": {"url": f"data:{mime};base64,{b64}"},
+                "_meta": {"path": str(p)},
+            })

        if not images:
            return text
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -765,6 +765,8 @@ class AgentLoop:
            await self.memory_consolidator.maybe_consolidate_by_tokens(session)
            self._set_tool_context(channel, chat_id, msg.metadata.get("message_id"))
            history = session.get_history(max_messages=0)
+            # Subagent results should be assistant role, other system messages use user role
+            current_role = "assistant" if msg.sender_id == "subagent" else "user"
            messages = self.context.build_messages(
                history=history,
                current_message=msg.content,
@@ -772,6 +774,7 @@ class AgentLoop:
                chat_id=chat_id,
                persona=persona,
                language=language,
+                current_role=current_role,
            )
            final_content, _, all_msgs = await self._run_agent_loop(messages)
            self._save_turn(session, all_msgs, 1 + len(history))
@@ -883,7 +886,9 @@ class AgentLoop:
                            continue  # Strip runtime context from multimodal messages
                        if (c.get("type") == "image_url"
                                and c.get("image_url", {}).get("url", "").startswith("data:image/")):
-                            filtered.append({"type": "text", "text": "[image]"})
+                            path = (c.get("_meta") or {}).get("path", "")
+                            placeholder = f"[image: {path}]" if path else "[image]"
+                            filtered.append({"type": "text", "text": placeholder})
                        else:
                            filtered.append(c)
                    if not filtered:
--- a/nanobot/agent/tools/cron.py
+++ b/nanobot/agent/tools/cron.py
@@ -1,11 +1,12 @@
 """Cron tool for scheduling reminders and tasks."""

 from contextvars import ContextVar
+from datetime import datetime, timezone
 from typing import Any

 from nanobot.agent.tools.base import Tool
 from nanobot.cron.service import CronService
-from nanobot.cron.types import CronSchedule
+from nanobot.cron.types import CronJobState, CronSchedule


 class CronTool(Tool):
@@ -143,11 +144,51 @@ class CronTool(Tool):
        )
        return f"Created job '{job.name}' (id: {job.id})"

+    @staticmethod
+    def _format_timing(schedule: CronSchedule) -> str:
+        """Format schedule as a human-readable timing string."""
+        if schedule.kind == "cron":
+            tz = f" ({schedule.tz})" if schedule.tz else ""
+            return f"cron: {schedule.expr}{tz}"
+        if schedule.kind == "every" and schedule.every_ms:
+            ms = schedule.every_ms
+            if ms % 3_600_000 == 0:
+                return f"every {ms // 3_600_000}h"
+            if ms % 60_000 == 0:
+                return f"every {ms // 60_000}m"
+            if ms % 1000 == 0:
+                return f"every {ms // 1000}s"
+            return f"every {ms}ms"
+        if schedule.kind == "at" and schedule.at_ms:
+            dt = datetime.fromtimestamp(schedule.at_ms / 1000, tz=timezone.utc)
+            return f"at {dt.isoformat()}"
+        return schedule.kind
+
+    @staticmethod
+    def _format_state(state: CronJobState) -> list[str]:
+        """Format job run state as display lines."""
+        lines: list[str] = []
+        if state.last_run_at_ms:
+            last_dt = datetime.fromtimestamp(state.last_run_at_ms / 1000, tz=timezone.utc)
+            info = f"  Last run: {last_dt.isoformat()} — {state.last_status or 'unknown'}"
+            if state.last_error:
+                info += f" ({state.last_error})"
+            lines.append(info)
+        if state.next_run_at_ms:
+            next_dt = datetime.fromtimestamp(state.next_run_at_ms / 1000, tz=timezone.utc)
+            lines.append(f"  Next run: {next_dt.isoformat()}")
+        return lines
+
    def _list_jobs(self) -> str:
        jobs = self._cron.list_jobs()
        if not jobs:
            return "No scheduled jobs."
-        lines = [f"- {j.name} (id: {j.id}, {j.schedule.kind})" for j in jobs]
+        lines = []
+        for j in jobs:
+            timing = self._format_timing(j.schedule)
+            parts = [f"- {j.name} (id: {j.id}, {timing})"]
+            parts.extend(self._format_state(j.state))
+            lines.append("\n".join(parts))
        return "Scheduled jobs:\n" + "\n".join(lines)

    def _remove_job(self, job_id: str | None) -> str:
--- a/nanobot/channels/telegram.py
+++ b/nanobot/channels/telegram.py
@@ -23,6 +23,7 @@ from nanobot.bus.queue import MessageBus
 from nanobot.channels.base import BaseChannel
 from nanobot.config.paths import get_media_dir
 from nanobot.config.schema import TelegramConfig, TelegramInstanceConfig
+from nanobot.security.network import validate_url_target
 from nanobot.utils.helpers import split_message

 TELEGRAM_MAX_MESSAGE_LEN = 4000  # Telegram message character limit
@@ -312,6 +313,10 @@ class TelegramChannel(BaseChannel):
            return "audio"
        return "document"

+    @staticmethod
+    def _is_remote_media_url(path: str) -> bool:
+        return path.startswith(("http://", "https://"))
+
    async def send(self, msg: OutboundMessage) -> None:
        """Send a message through Telegram."""
        if not self._app:
@@ -353,7 +358,21 @@ class TelegramChannel(BaseChannel):
                    "audio": self._app.bot.send_audio,
                }.get(media_type, self._app.bot.send_document)
                param = "photo" if media_type == "photo" else media_type if media_type in ("voice", "audio") else "document"
-                with open(media_path, 'rb') as f:
+
+                # Telegram Bot API accepts HTTP(S) URLs directly for media params.
+                if self._is_remote_media_url(media_path):
+                    ok, error = validate_url_target(media_path)
+                    if not ok:
+                        raise ValueError(f"unsafe media URL: {error}")
+                    await sender(
+                        chat_id=chat_id,
+                        **{param: media_path},
+                        reply_parameters=reply_params,
+                        **thread_kwargs,
+                    )
+                    continue
+
+                with open(media_path, "rb") as f:
                    await sender(
                        chat_id=chat_id,
                        **{param: f},
--- a/nanobot/providers/init.py
+++ b/nanobot/providers/init.py
@@ -1,8 +1,30 @@
 """LLM provider abstraction module."""

+from __future__ import annotations
+
+from importlib import import_module
+from typing import TYPE_CHECKING
+
 from nanobot.providers.base import LLMProvider, LLMResponse
-from nanobot.providers.litellm_provider import LiteLLMProvider
-from nanobot.providers.openai_codex_provider import OpenAICodexProvider
-from nanobot.providers.azure_openai_provider import AzureOpenAIProvider

 __all__ = ["LLMProvider", "LLMResponse", "LiteLLMProvider", "OpenAICodexProvider", "AzureOpenAIProvider"]
+
+_LAZY_IMPORTS = {
+    "LiteLLMProvider": ".litellm_provider",
+    "OpenAICodexProvider": ".openai_codex_provider",
+    "AzureOpenAIProvider": ".azure_openai_provider",
+}
+
+if TYPE_CHECKING:
+    from nanobot.providers.azure_openai_provider import AzureOpenAIProvider
+    from nanobot.providers.litellm_provider import LiteLLMProvider
+    from nanobot.providers.openai_codex_provider import OpenAICodexProvider
+
+
+def __getattr__(name: str):
+    """Lazily expose provider implementations without importing all backends up front."""
+    module_name = _LAZY_IMPORTS.get(name)
+    if module_name is None:
+        raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+    module = import_module(module_name, __name__)
+    return getattr(module, name)
--- a/nanobot/providers/base.py
+++ b/nanobot/providers/base.py
@@ -89,14 +89,6 @@ class LLMProvider(ABC):
        "server error",
        "temporarily unavailable",
    )
-    _IMAGE_UNSUPPORTED_MARKERS = (
-        "image_url is only supported",
-        "does not support image",
-        "images are not supported",
-        "image input is not supported",
-        "image_url is not supported",
-        "unsupported image input",
-    )

    _SENTINEL = object()

@@ -107,11 +99,7 @@ class LLMProvider(ABC):

    @staticmethod
    def _sanitize_empty_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
-        """Replace empty text content that causes provider 400 errors.
-
-        Empty content can appear when MCP tools return nothing. Most providers
-        reject empty-string content or empty text blocks in list content.
-        """
+        """Sanitize message content: fix empty blocks, strip internal _meta fields."""
        result: list[dict[str, Any]] = []
        for msg in messages:
            content = msg.get("content")
@@ -123,18 +111,25 @@ class LLMProvider(ABC):
                continue

            if isinstance(content, list):
-                filtered = [
-                    item for item in content
-                    if not (
+                new_items: list[Any] = []
+                changed = False
+                for item in content:
+                    if (
                        isinstance(item, dict)
                        and item.get("type") in ("text", "input_text", "output_text")
                        and not item.get("text")
-                    )
-                ]
-                if len(filtered) != len(content):
+                    ):
+                        changed = True
+                        continue
+                    if isinstance(item, dict) and "_meta" in item:
+                        new_items.append({k: v for k, v in item.items() if k != "_meta"})
+                        changed = True
+                    else:
+                        new_items.append(item)
+                if changed:
                    clean = dict(msg)
-                    if filtered:
-                        clean["content"] = filtered
+                    if new_items:
+                        clean["content"] = new_items
                    elif msg.get("role") == "assistant" and msg.get("tool_calls"):
                        clean["content"] = None
                    else:
@@ -197,11 +192,6 @@ class LLMProvider(ABC):
        err = (content or "").lower()
        return any(marker in err for marker in cls._TRANSIENT_ERROR_MARKERS)

-    @classmethod
-    def _is_image_unsupported_error(cls, content: str | None) -> bool:
-        err = (content or "").lower()
-        return any(marker in err for marker in cls._IMAGE_UNSUPPORTED_MARKERS)
-
    @staticmethod
    def _strip_image_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]] | None:
        """Replace image_url blocks with text placeholder. Returns None if no images found."""
@@ -213,7 +203,9 @@ class LLMProvider(ABC):
                new_content = []
                for b in content:
                    if isinstance(b, dict) and b.get("type") == "image_url":
-                        new_content.append({"type": "text", "text": "[image omitted]"})
+                        path = (b.get("_meta") or {}).get("path", "")
+                        placeholder = f"[image: {path}]" if path else "[image omitted]"
+                        new_content.append({"type": "text", "text": placeholder})
                        found = True
                    else:
                        new_content.append(b)
@@ -267,11 +259,10 @@ class LLMProvider(ABC):
                return response

            if not self._is_transient_error(response.content):
-                if self._is_image_unsupported_error(response.content):
-                    stripped = self._strip_image_content(messages)
-                    if stripped is not None:
-                        logger.warning("Model does not support image input, retrying without images")
-                        return await self._safe_chat(**{**kw, "messages": stripped})
+                stripped = self._strip_image_content(messages)
+                if stripped is not None:
+                    logger.warning("Non-transient LLM error with image content, retrying without images")
+                    return await self._safe_chat(**{**kw, "messages": stripped})
                return response

            logger.warning(