Merge remote-tracking branch 'origin/main'

# Conflicts: # nanobot/agent/context.py # nanobot/agent/loop.py # nanobot/agent/tools/web.py # nanobot/channels/telegram.py # nanobot/cli/commands.py # tests/test_commands.py # tests/test_config_migration.py # tests/test_telegram_channel.py
2026-03-23 09:39:17 +08:00
parent e9b8bee78f 5fd66cae5c
commit b1a08f3bb9
42 changed files with 2974 additions and 152 deletions
--- a/nanobot/providers/custom_provider.py
+++ b/nanobot/providers/custom_provider.py
@@ -51,6 +51,12 @@ class CustomProvider(LLMProvider):
        try:
            return self._parse(await self._client.chat.completions.create(**kwargs))
        except Exception as e:
+            # JSONDecodeError.doc / APIError.response.text may carry the raw body
+            # (e.g. "unsupported model: xxx") which is far more useful than the
+            # generic "Expecting value …" message.  Truncate to avoid huge HTML pages.
+            body = getattr(e, "doc", None) or getattr(getattr(e, "response", None), "text", None)
+            if body and body.strip():
+                return LLMResponse(content=f"Error: {body.strip()[:500]}", finish_reason="error")
            return LLMResponse(content=f"Error: {e}", finish_reason="error")

    def _parse(self, response: Any) -> LLMResponse:
--- a/nanobot/providers/litellm_provider.py
+++ b/nanobot/providers/litellm_provider.py
@@ -128,24 +128,40 @@ class LiteLLMProvider(LLMProvider):
        messages: list[dict[str, Any]],
        tools: list[dict[str, Any]] | None,
    ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
-        """Return copies of messages and tools with cache_control injected."""
-        new_messages = []
-        for msg in messages:
-            if msg.get("role") == "system":
-                content = msg["content"]
-                if isinstance(content, str):
-                    new_content = [{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}]
-                else:
-                    new_content = list(content)
-                    new_content[-1] = {**new_content[-1], "cache_control": {"type": "ephemeral"}}
-                new_messages.append({**msg, "content": new_content})
-            else:
-                new_messages.append(msg)
+        """Return copies of messages and tools with cache_control injected.
+
+        Two breakpoints are placed:
+        1. System message — caches the static system prompt
+        2. Second-to-last message — caches the conversation history prefix
+        This maximises cache hits across multi-turn conversations.
+        """
+        cache_marker = {"type": "ephemeral"}
+        new_messages = list(messages)
+
+        def _mark(msg: dict[str, Any]) -> dict[str, Any]:
+            content = msg.get("content")
+            if isinstance(content, str):
+                return {**msg, "content": [
+                    {"type": "text", "text": content, "cache_control": cache_marker}
+                ]}
+            elif isinstance(content, list) and content:
+                new_content = list(content)
+                new_content[-1] = {**new_content[-1], "cache_control": cache_marker}
+                return {**msg, "content": new_content}
+            return msg
+
+        # Breakpoint 1: system message
+        if new_messages and new_messages[0].get("role") == "system":
+            new_messages[0] = _mark(new_messages[0])
+
+        # Breakpoint 2: second-to-last message (caches conversation history prefix)
+        if len(new_messages) >= 3:
+            new_messages[-2] = _mark(new_messages[-2])

        new_tools = tools
        if tools:
            new_tools = list(tools)
-            new_tools[-1] = {**new_tools[-1], "cache_control": {"type": "ephemeral"}}
+            new_tools[-1] = {**new_tools[-1], "cache_control": cache_marker}

        return new_messages, new_tools