From 3b4763b3f989c00da674933f459730717ea7385a Mon Sep 17 00:00:00 2001 From: tercerapersona Date: Thu, 19 Feb 2026 11:05:22 -0300 Subject: [PATCH 1/2] feat: add Anthropic prompt caching via cache_control Inject cache_control: {"type": "ephemeral"} on the system message and last tool definition for providers that support prompt caching. Adds supports_prompt_caching flag to ProviderSpec (enabled for Anthropic only) and skips caching when routing through a gateway. Co-Authored-By: Claude Sonnet 4.6 --- nanobot/providers/litellm_provider.py | 43 +++++++++++++++++++++++++-- nanobot/providers/registry.py | 4 +++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index 8cc4e35..950a138 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -93,6 +93,41 @@ class LiteLLMProvider(LLMProvider): return model + def _supports_cache_control(self, model: str) -> bool: + """Return True when the provider supports cache_control on content blocks.""" + if self._gateway is not None: + return False + spec = find_by_model(model) + return spec is not None and spec.supports_prompt_caching + + def _apply_cache_control( + self, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None, + ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]: + """Return copies of messages and tools with cache_control injected.""" + # Transform the system message + new_messages = [] + for msg in messages: + if msg.get("role") == "system": + content = msg["content"] + if isinstance(content, str): + new_content = [{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}] + else: + new_content = list(content) + new_content[-1] = {**new_content[-1], "cache_control": {"type": "ephemeral"}} + new_messages.append({**msg, "content": new_content}) + else: + new_messages.append(msg) + + # Add cache_control to the last tool definition + new_tools = tools + if tools: + new_tools = list(tools) + new_tools[-1] = {**new_tools[-1], "cache_control": {"type": "ephemeral"}} + + return new_messages, new_tools + def _apply_model_overrides(self, model: str, kwargs: dict[str, Any]) -> None: """Apply model-specific parameter overrides from the registry.""" model_lower = model.lower() @@ -124,8 +159,12 @@ class LiteLLMProvider(LLMProvider): Returns: LLMResponse with content and/or tool calls. """ - model = self._resolve_model(model or self.default_model) - + original_model = model or self.default_model + model = self._resolve_model(original_model) + + if self._supports_cache_control(original_model): + messages, tools = self._apply_cache_control(messages, tools) + # Clamp max_tokens to at least 1 — negative or zero values cause # LiteLLM to reject the request with "max_tokens must be at least 1". max_tokens = max(1, max_tokens) diff --git a/nanobot/providers/registry.py b/nanobot/providers/registry.py index 49b735c..2584e0e 100644 --- a/nanobot/providers/registry.py +++ b/nanobot/providers/registry.py @@ -57,6 +57,9 @@ class ProviderSpec: # Direct providers bypass LiteLLM entirely (e.g., CustomProvider) is_direct: bool = False + # Provider supports cache_control on content blocks (e.g. Anthropic prompt caching) + supports_prompt_caching: bool = False + @property def label(self) -> str: return self.display_name or self.name.title() @@ -155,6 +158,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( default_api_base="", strip_model_prefix=False, model_overrides=(), + supports_prompt_caching=True, ), # OpenAI: LiteLLM recognizes "gpt-*" natively, no prefix needed. From 9ffae47c13862c0942b16016e67725af199a5ace Mon Sep 17 00:00:00 2001 From: Re-bin Date: Fri, 20 Feb 2026 08:21:02 +0000 Subject: [PATCH 2/2] refactor(litellm): remove redundant comments in cache_control methods --- nanobot/providers/litellm_provider.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index 08c2f53..66751ed 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -117,7 +117,6 @@ class LiteLLMProvider(LLMProvider): tools: list[dict[str, Any]] | None, ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]: """Return copies of messages and tools with cache_control injected.""" - # Transform the system message new_messages = [] for msg in messages: if msg.get("role") == "system": @@ -131,7 +130,6 @@ class LiteLLMProvider(LLMProvider): else: new_messages.append(msg) - # Add cache_control to the last tool definition new_tools = tools if tools: new_tools = list(tools)