diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index d14e4c0..20c3d25 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -129,24 +129,40 @@ class LiteLLMProvider(LLMProvider): messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None, ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]: - """Return copies of messages and tools with cache_control injected.""" - new_messages = [] - for msg in messages: - if msg.get("role") == "system": - content = msg["content"] - if isinstance(content, str): - new_content = [{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}] - else: - new_content = list(content) - new_content[-1] = {**new_content[-1], "cache_control": {"type": "ephemeral"}} - new_messages.append({**msg, "content": new_content}) - else: - new_messages.append(msg) + """Return copies of messages and tools with cache_control injected. + + Two breakpoints are placed: + 1. System message — caches the static system prompt + 2. Second-to-last message — caches the conversation history prefix + This maximises cache hits across multi-turn conversations. + """ + cache_marker = {"type": "ephemeral"} + new_messages = list(messages) + + def _mark(msg: dict[str, Any]) -> dict[str, Any]: + content = msg.get("content") + if isinstance(content, str): + return {**msg, "content": [ + {"type": "text", "text": content, "cache_control": cache_marker} + ]} + elif isinstance(content, list) and content: + new_content = list(content) + new_content[-1] = {**new_content[-1], "cache_control": cache_marker} + return {**msg, "content": new_content} + return msg + + # Breakpoint 1: system message + if new_messages and new_messages[0].get("role") == "system": + new_messages[0] = _mark(new_messages[0]) + + # Breakpoint 2: second-to-last message (caches conversation history prefix) + if len(new_messages) >= 3: + new_messages[-2] = _mark(new_messages[-2]) new_tools = tools if tools: new_tools = list(tools) - new_tools[-1] = {**new_tools[-1], "cache_control": {"type": "ephemeral"}} + new_tools[-1] = {**new_tools[-1], "cache_control": cache_marker} return new_messages, new_tools