diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index 3fec618..66751ed 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -104,6 +104,39 @@ class LiteLLMProvider(LLMProvider): return model return f"{canonical_prefix}/{remainder}" + def _supports_cache_control(self, model: str) -> bool: + """Return True when the provider supports cache_control on content blocks.""" + if self._gateway is not None: + return False + spec = find_by_model(model) + return spec is not None and spec.supports_prompt_caching + + def _apply_cache_control( + self, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None, + ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]: + """Return copies of messages and tools with cache_control injected.""" + new_messages = [] + for msg in messages: + if msg.get("role") == "system": + content = msg["content"] + if isinstance(content, str): + new_content = [{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}] + else: + new_content = list(content) + new_content[-1] = {**new_content[-1], "cache_control": {"type": "ephemeral"}} + new_messages.append({**msg, "content": new_content}) + else: + new_messages.append(msg) + + new_tools = tools + if tools: + new_tools = list(tools) + new_tools[-1] = {**new_tools[-1], "cache_control": {"type": "ephemeral"}} + + return new_messages, new_tools + def _apply_model_overrides(self, model: str, kwargs: dict[str, Any]) -> None: """Apply model-specific parameter overrides from the registry.""" model_lower = model.lower() @@ -135,8 +168,12 @@ class LiteLLMProvider(LLMProvider): Returns: LLMResponse with content and/or tool calls. """ - model = self._resolve_model(model or self.default_model) - + original_model = model or self.default_model + model = self._resolve_model(original_model) + + if self._supports_cache_control(original_model): + messages, tools = self._apply_cache_control(messages, tools) + # Clamp max_tokens to at least 1 — negative or zero values cause # LiteLLM to reject the request with "max_tokens must be at least 1". max_tokens = max(1, max_tokens) diff --git a/nanobot/providers/registry.py b/nanobot/providers/registry.py index 3071793..a9214ca 100644 --- a/nanobot/providers/registry.py +++ b/nanobot/providers/registry.py @@ -57,6 +57,9 @@ class ProviderSpec: # Direct providers bypass LiteLLM entirely (e.g., CustomProvider) is_direct: bool = False + # Provider supports cache_control on content blocks (e.g. Anthropic prompt caching) + supports_prompt_caching: bool = False + @property def label(self) -> str: return self.display_name or self.name.title() @@ -155,6 +158,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( default_api_base="", strip_model_prefix=False, model_overrides=(), + supports_prompt_caching=True, ), # OpenAI: LiteLLM recognizes "gpt-*" natively, no prefix needed.