Merge PR #1109: perf: optimize prompt cache hit rate for Anthropic models

perf: optimize prompt cache hit rate for Anthropic models
This commit is contained in:
Xubin Ren
2026-03-22 14:23:41 +08:00
committed by GitHub

View File

@@ -129,24 +129,40 @@ class LiteLLMProvider(LLMProvider):
messages: list[dict[str, Any]], messages: list[dict[str, Any]],
tools: list[dict[str, Any]] | None, tools: list[dict[str, Any]] | None,
) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]: ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
"""Return copies of messages and tools with cache_control injected.""" """Return copies of messages and tools with cache_control injected.
new_messages = []
for msg in messages: Two breakpoints are placed:
if msg.get("role") == "system": 1. System message — caches the static system prompt
content = msg["content"] 2. Second-to-last message — caches the conversation history prefix
This maximises cache hits across multi-turn conversations.
"""
cache_marker = {"type": "ephemeral"}
new_messages = list(messages)
def _mark(msg: dict[str, Any]) -> dict[str, Any]:
content = msg.get("content")
if isinstance(content, str): if isinstance(content, str):
new_content = [{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}] return {**msg, "content": [
else: {"type": "text", "text": content, "cache_control": cache_marker}
]}
elif isinstance(content, list) and content:
new_content = list(content) new_content = list(content)
new_content[-1] = {**new_content[-1], "cache_control": {"type": "ephemeral"}} new_content[-1] = {**new_content[-1], "cache_control": cache_marker}
new_messages.append({**msg, "content": new_content}) return {**msg, "content": new_content}
else: return msg
new_messages.append(msg)
# Breakpoint 1: system message
if new_messages and new_messages[0].get("role") == "system":
new_messages[0] = _mark(new_messages[0])
# Breakpoint 2: second-to-last message (caches conversation history prefix)
if len(new_messages) >= 3:
new_messages[-2] = _mark(new_messages[-2])
new_tools = tools new_tools = tools
if tools: if tools:
new_tools = list(tools) new_tools = list(tools)
new_tools[-1] = {**new_tools[-1], "cache_control": {"type": "ephemeral"}} new_tools[-1] = {**new_tools[-1], "cache_control": cache_marker}
return new_messages, new_tools return new_messages, new_tools