Merge PR #1109: perf: optimize prompt cache hit rate for Anthropic models

perf: optimize prompt cache hit rate for Anthropic models
2026-03-22 14:23:41 +08:00
parent 1c71489121 931cec3908
commit 5fd66cae5c
1 changed files with 30 additions and 14 deletions
--- a/nanobot/providers/litellm_provider.py
+++ b/nanobot/providers/litellm_provider.py
@@ -129,24 +129,40 @@ class LiteLLMProvider(LLMProvider):
        messages: list[dict[str, Any]],
        tools: list[dict[str, Any]] | None,
    ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
-        """Return copies of messages and tools with cache_control injected."""
+        """Return copies of messages and tools with cache_control injected.
-        new_messages = []
+
-        for msg in messages:
+        Two breakpoints are placed:
-            if msg.get("role") == "system":
+        1. System message — caches the static system prompt
-                content = msg["content"]
+        2. Second-to-last message — caches the conversation history prefix
        This maximises cache hits across multi-turn conversations.
        """
        cache_marker = {"type": "ephemeral"}
        new_messages = list(messages)
        def _mark(msg: dict[str, Any]) -> dict[str, Any]:
            content = msg.get("content")
            if isinstance(content, str):
-                    new_content = [{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}]
+                return {**msg, "content": [
-                else:
+                    {"type": "text", "text": content, "cache_control": cache_marker}
                ]}
            elif isinstance(content, list) and content:
                new_content = list(content)
-                    new_content[-1] = {**new_content[-1], "cache_control": {"type": "ephemeral"}}
+                new_content[-1] = {**new_content[-1], "cache_control": cache_marker}
-                new_messages.append({**msg, "content": new_content})
+                return {**msg, "content": new_content}
-            else:
+            return msg
-                new_messages.append(msg)
+
        # Breakpoint 1: system message
        if new_messages and new_messages[0].get("role") == "system":
            new_messages[0] = _mark(new_messages[0])
        # Breakpoint 2: second-to-last message (caches conversation history prefix)
        if len(new_messages) >= 3:
            new_messages[-2] = _mark(new_messages[-2])
        new_tools = tools
        if tools:
            new_tools = list(tools)
-            new_tools[-1] = {**new_tools[-1], "cache_control": {"type": "ephemeral"}}
+            new_tools[-1] = {**new_tools[-1], "cache_control": cache_marker}
        return new_messages, new_tools