Merge branch 'main' into pr-812

2026-02-20 08:42:31 +00:00
parent 1663517998 72a622aea1
commit 82a318759f
32 changed files with 602 additions and 308 deletions
--- a/nanobot/providers/litellm_provider.py
+++ b/nanobot/providers/litellm_provider.py
@@ -88,11 +88,55 @@ class LiteLLMProvider(LLMProvider):
        # Standard mode: auto-prefix for known providers
        spec = find_by_model(model)
        if spec and spec.litellm_prefix:
+            model = self._canonicalize_explicit_prefix(model, spec.name, spec.litellm_prefix)
            if not any(model.startswith(s) for s in spec.skip_prefixes):
                model = f"{spec.litellm_prefix}/{model}"
-        
+
        return model
+
+    @staticmethod
+    def _canonicalize_explicit_prefix(model: str, spec_name: str, canonical_prefix: str) -> str:
+        """Normalize explicit provider prefixes like `github-copilot/...`."""
+        if "/" not in model:
+            return model
+        prefix, remainder = model.split("/", 1)
+        if prefix.lower().replace("-", "_") != spec_name:
+            return model
+        return f"{canonical_prefix}/{remainder}"
    
+    def _supports_cache_control(self, model: str) -> bool:
+        """Return True when the provider supports cache_control on content blocks."""
+        if self._gateway is not None:
+            return False
+        spec = find_by_model(model)
+        return spec is not None and spec.supports_prompt_caching
+
+    def _apply_cache_control(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]] | None,
+    ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
+        """Return copies of messages and tools with cache_control injected."""
+        new_messages = []
+        for msg in messages:
+            if msg.get("role") == "system":
+                content = msg["content"]
+                if isinstance(content, str):
+                    new_content = [{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}]
+                else:
+                    new_content = list(content)
+                    new_content[-1] = {**new_content[-1], "cache_control": {"type": "ephemeral"}}
+                new_messages.append({**msg, "content": new_content})
+            else:
+                new_messages.append(msg)
+
+        new_tools = tools
+        if tools:
+            new_tools = list(tools)
+            new_tools[-1] = {**new_tools[-1], "cache_control": {"type": "ephemeral"}}
+
+        return new_messages, new_tools
+
    def _apply_model_overrides(self, model: str, kwargs: dict[str, Any]) -> None:
        """Apply model-specific parameter overrides from the registry."""
        model_lower = model.lower()
@@ -124,8 +168,12 @@ class LiteLLMProvider(LLMProvider):
        Returns:
            LLMResponse with content and/or tool calls.
        """
-        model = self._resolve_model(model or self.default_model)
-        
+        original_model = model or self.default_model
+        model = self._resolve_model(original_model)
+
+        if self._supports_cache_control(original_model):
+            messages, tools = self._apply_cache_control(messages, tools)
+
        # Clamp max_tokens to at least 1 — negative or zero values cause
        # LiteLLM to reject the request with "max_tokens must be at least 1".
        max_tokens = max(1, max_tokens)
--- a/nanobot/providers/openai_codex_provider.py
+++ b/nanobot/providers/openai_codex_provider.py
@@ -80,7 +80,7 @@ class OpenAICodexProvider(LLMProvider):


 def _strip_model_prefix(model: str) -> str:
-    if model.startswith("openai-codex/"):
+    if model.startswith("openai-codex/") or model.startswith("openai_codex/"):
        return model.split("/", 1)[1]
    return model

@@ -176,7 +176,7 @@ def _convert_messages(messages: list[dict[str, Any]]) -> tuple[str, list[dict[st

        if role == "tool":
            call_id, _ = _split_tool_call_id(msg.get("tool_call_id"))
-            output_text = content if isinstance(content, str) else json.dumps(content)
+            output_text = content if isinstance(content, str) else json.dumps(content, ensure_ascii=False)
            input_items.append(
                {
                    "type": "function_call_output",
--- a/nanobot/providers/registry.py
+++ b/nanobot/providers/registry.py
@@ -57,6 +57,9 @@ class ProviderSpec:
    # Direct providers bypass LiteLLM entirely (e.g., CustomProvider)
    is_direct: bool = False

+    # Provider supports cache_control on content blocks (e.g. Anthropic prompt caching)
+    supports_prompt_caching: bool = False
+
    @property
    def label(self) -> str:
        return self.display_name or self.name.title()
@@ -173,6 +176,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = (
        default_api_base="",
        strip_model_prefix=False,
        model_overrides=(),
+        supports_prompt_caching=True,
    ),

    # OpenAI: LiteLLM recognizes "gpt-*" natively, no prefix needed.
@@ -402,10 +406,18 @@ def find_by_model(model: str) -> ProviderSpec | None:
    """Match a standard provider by model-name keyword (case-insensitive).
    Skips gateways/local — those are matched by api_key/api_base instead."""
    model_lower = model.lower()
-    for spec in PROVIDERS:
-        if spec.is_gateway or spec.is_local:
-            continue
-        if any(kw in model_lower for kw in spec.keywords):
+    model_normalized = model_lower.replace("-", "_")
+    model_prefix = model_lower.split("/", 1)[0] if "/" in model_lower else ""
+    normalized_prefix = model_prefix.replace("-", "_")
+    std_specs = [s for s in PROVIDERS if not s.is_gateway and not s.is_local]
+
+    # Prefer explicit provider prefix — prevents `github-copilot/...codex` matching openai_codex.
+    for spec in std_specs:
+        if model_prefix and normalized_prefix == spec.name:
+            return spec
+
+    for spec in std_specs:
+        if any(kw in model_lower or kw.replace("-", "_") in model_normalized for kw in spec.keywords):
            return spec
    return None

--- a/nanobot/providers/transcription.py
+++ b/nanobot/providers/transcription.py
@@ -35,7 +35,7 @@ class GroqTranscriptionProvider:
        
        path = Path(file_path)
        if not path.exists():
-            logger.error(f"Audio file not found: {file_path}")
+            logger.error("Audio file not found: {}", file_path)
            return ""
        
        try:
@@ -61,5 +61,5 @@ class GroqTranscriptionProvider:
                    return data.get("text", "")
                    
        except Exception as e:
-            logger.error(f"Groq transcription error: {e}")
+            logger.error("Groq transcription error: {}", e)
            return ""