Merge PR #854: add Anthropic prompt caching via cache_control
This commit is contained in:
@@ -104,6 +104,39 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
return model
|
return model
|
||||||
return f"{canonical_prefix}/{remainder}"
|
return f"{canonical_prefix}/{remainder}"
|
||||||
|
|
||||||
|
def _supports_cache_control(self, model: str) -> bool:
|
||||||
|
"""Return True when the provider supports cache_control on content blocks."""
|
||||||
|
if self._gateway is not None:
|
||||||
|
return False
|
||||||
|
spec = find_by_model(model)
|
||||||
|
return spec is not None and spec.supports_prompt_caching
|
||||||
|
|
||||||
|
def _apply_cache_control(
|
||||||
|
self,
|
||||||
|
messages: list[dict[str, Any]],
|
||||||
|
tools: list[dict[str, Any]] | None,
|
||||||
|
) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
|
||||||
|
"""Return copies of messages and tools with cache_control injected."""
|
||||||
|
new_messages = []
|
||||||
|
for msg in messages:
|
||||||
|
if msg.get("role") == "system":
|
||||||
|
content = msg["content"]
|
||||||
|
if isinstance(content, str):
|
||||||
|
new_content = [{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}]
|
||||||
|
else:
|
||||||
|
new_content = list(content)
|
||||||
|
new_content[-1] = {**new_content[-1], "cache_control": {"type": "ephemeral"}}
|
||||||
|
new_messages.append({**msg, "content": new_content})
|
||||||
|
else:
|
||||||
|
new_messages.append(msg)
|
||||||
|
|
||||||
|
new_tools = tools
|
||||||
|
if tools:
|
||||||
|
new_tools = list(tools)
|
||||||
|
new_tools[-1] = {**new_tools[-1], "cache_control": {"type": "ephemeral"}}
|
||||||
|
|
||||||
|
return new_messages, new_tools
|
||||||
|
|
||||||
def _apply_model_overrides(self, model: str, kwargs: dict[str, Any]) -> None:
|
def _apply_model_overrides(self, model: str, kwargs: dict[str, Any]) -> None:
|
||||||
"""Apply model-specific parameter overrides from the registry."""
|
"""Apply model-specific parameter overrides from the registry."""
|
||||||
model_lower = model.lower()
|
model_lower = model.lower()
|
||||||
@@ -135,8 +168,12 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
Returns:
|
Returns:
|
||||||
LLMResponse with content and/or tool calls.
|
LLMResponse with content and/or tool calls.
|
||||||
"""
|
"""
|
||||||
model = self._resolve_model(model or self.default_model)
|
original_model = model or self.default_model
|
||||||
|
model = self._resolve_model(original_model)
|
||||||
|
|
||||||
|
if self._supports_cache_control(original_model):
|
||||||
|
messages, tools = self._apply_cache_control(messages, tools)
|
||||||
|
|
||||||
# Clamp max_tokens to at least 1 — negative or zero values cause
|
# Clamp max_tokens to at least 1 — negative or zero values cause
|
||||||
# LiteLLM to reject the request with "max_tokens must be at least 1".
|
# LiteLLM to reject the request with "max_tokens must be at least 1".
|
||||||
max_tokens = max(1, max_tokens)
|
max_tokens = max(1, max_tokens)
|
||||||
|
|||||||
@@ -57,6 +57,9 @@ class ProviderSpec:
|
|||||||
# Direct providers bypass LiteLLM entirely (e.g., CustomProvider)
|
# Direct providers bypass LiteLLM entirely (e.g., CustomProvider)
|
||||||
is_direct: bool = False
|
is_direct: bool = False
|
||||||
|
|
||||||
|
# Provider supports cache_control on content blocks (e.g. Anthropic prompt caching)
|
||||||
|
supports_prompt_caching: bool = False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def label(self) -> str:
|
def label(self) -> str:
|
||||||
return self.display_name or self.name.title()
|
return self.display_name or self.name.title()
|
||||||
@@ -155,6 +158,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = (
|
|||||||
default_api_base="",
|
default_api_base="",
|
||||||
strip_model_prefix=False,
|
strip_model_prefix=False,
|
||||||
model_overrides=(),
|
model_overrides=(),
|
||||||
|
supports_prompt_caching=True,
|
||||||
),
|
),
|
||||||
|
|
||||||
# OpenAI: LiteLLM recognizes "gpt-*" natively, no prefix needed.
|
# OpenAI: LiteLLM recognizes "gpt-*" natively, no prefix needed.
|
||||||
|
|||||||
Reference in New Issue
Block a user