From bd09cc3e6feaf2b99953194609f7c0e9a09e682e Mon Sep 17 00:00:00 2001 From: coldxiangyu Date: Tue, 24 Feb 2026 20:54:30 +0800 Subject: [PATCH] perf: optimize prompt cache hit rate for Anthropic models Part 1: Make system prompt static - Move Current Time from system prompt to user message prefix - System prompt now only changes when config/skills change, not every minute - Timestamp injected as [YYYY-MM-DD HH:MM (Day) (TZ)] prefix on each user message Part 2: Add second cache_control breakpoint - Existing: system message breakpoint (caches static system prompt) - New: second-to-last message breakpoint (caches conversation history prefix) - Refactored _apply_cache_control with shared _mark() helper Before: 0% cache hit rate (system prompt changed every minute) After: ~90% savings on cached input tokens for multi-turn conversations Closes #981 --- nanobot/agent/context.py | 37 +++++++++++++++++----- nanobot/providers/litellm_provider.py | 44 ++++++++++++++++++--------- 2 files changed, 60 insertions(+), 21 deletions(-) diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index be0ec59..ccb1215 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -111,13 +111,36 @@ Reply directly with text for conversations. Only use the 'message' tool to send channel: str | None = None, chat_id: str | None = None, ) -> list[dict[str, Any]]: - """Build the complete message list for an LLM call.""" - return [ - {"role": "system", "content": self.build_system_prompt(skill_names)}, - *history, - {"role": "user", "content": self._build_runtime_context(channel, chat_id)}, - {"role": "user", "content": self._build_user_content(current_message, media)}, - ] + """ + Build the complete message list for an LLM call. + + Args: + history: Previous conversation messages. + current_message: The new user message. + skill_names: Optional skills to include. + media: Optional list of local file paths for images/media. + channel: Current channel (telegram, feishu, etc.). + chat_id: Current chat/user ID. + + Returns: + List of messages including system prompt. + """ + messages = [] + + # System prompt + system_prompt = self.build_system_prompt(skill_names) + messages.append({"role": "system", "content": system_prompt}) + + # History + messages.extend(history) + + # Inject current timestamp into user message (keeps system prompt static for caching) + # Current message (with optional image attachments) + user_content = self._build_user_content(current_message, media) + user_content = self._inject_runtime_context(user_content, channel, chat_id) + messages.append({"role": "user", "content": user_content}) + + return messages def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]: """Build user message content with optional base64-encoded images.""" diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index 5427d97..c4f528c 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -128,24 +128,40 @@ class LiteLLMProvider(LLMProvider): messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None, ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]: - """Return copies of messages and tools with cache_control injected.""" - new_messages = [] - for msg in messages: - if msg.get("role") == "system": - content = msg["content"] - if isinstance(content, str): - new_content = [{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}] - else: - new_content = list(content) - new_content[-1] = {**new_content[-1], "cache_control": {"type": "ephemeral"}} - new_messages.append({**msg, "content": new_content}) - else: - new_messages.append(msg) + """Return copies of messages and tools with cache_control injected. + + Two breakpoints are placed: + 1. System message — caches the static system prompt + 2. Second-to-last message — caches the conversation history prefix + This maximises cache hits across multi-turn conversations. + """ + cache_marker = {"type": "ephemeral"} + new_messages = list(messages) + + def _mark(msg: dict[str, Any]) -> dict[str, Any]: + content = msg.get("content") + if isinstance(content, str): + return {**msg, "content": [ + {"type": "text", "text": content, "cache_control": cache_marker} + ]} + elif isinstance(content, list) and content: + new_content = list(content) + new_content[-1] = {**new_content[-1], "cache_control": cache_marker} + return {**msg, "content": new_content} + return msg + + # Breakpoint 1: system message + if new_messages and new_messages[0].get("role") == "system": + new_messages[0] = _mark(new_messages[0]) + + # Breakpoint 2: second-to-last message (caches conversation history prefix) + if len(new_messages) >= 3: + new_messages[-2] = _mark(new_messages[-2]) new_tools = tools if tools: new_tools = list(tools) - new_tools[-1] = {**new_tools[-1], "cache_control": {"type": "ephemeral"}} + new_tools[-1] = {**new_tools[-1], "cache_control": cache_marker} return new_messages, new_tools