fix(openrouter): revert custom_llm_provider, always apply gateway prefix

2026-03-15 02:43:50 +00:00
parent 350d110fb9
commit 196e0ddbb6
3 changed files with 61 additions and 20 deletions
--- a/nanobot/providers/litellm_provider.py
+++ b/nanobot/providers/litellm_provider.py
@@ -91,11 +91,10 @@ class LiteLLMProvider(LLMProvider):
    def _resolve_model(self, model: str) -> str:
        """Resolve model name by applying provider/gateway prefixes."""
        if self._gateway:
-            # Gateway mode: apply gateway prefix, skip provider-specific prefixes
            prefix = self._gateway.litellm_prefix
            if self._gateway.strip_model_prefix:
                model = model.split("/")[-1]
-            if prefix and not model.startswith(f"{prefix}/"):
+            if prefix:
                model = f"{prefix}/{model}"
            return model

--- a/nanobot/providers/registry.py
+++ b/nanobot/providers/registry.py
@@ -98,7 +98,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = (
        keywords=("openrouter",),
        env_key="OPENROUTER_API_KEY",
        display_name="OpenRouter",
-        litellm_prefix="",  # routing handled by custom_llm_provider kwarg; no prefix needed
+        litellm_prefix="openrouter",  # anthropic/claude-3 → openrouter/anthropic/claude-3
        skip_prefixes=(),
        env_extras=(),
        is_gateway=True,
@@ -107,7 +107,6 @@ PROVIDERS: tuple[ProviderSpec, ...] = (
        detect_by_base_keyword="openrouter",
        default_api_base="https://openrouter.ai/api/v1",
        strip_model_prefix=False,
-        litellm_kwargs={"custom_llm_provider": "openrouter"},
        model_overrides=(),
        supports_prompt_caching=True,
    ),
--- a/tests/test_litellm_kwargs.py
+++ b/tests/test_litellm_kwargs.py
@@ -1,4 +1,10 @@
-"""Regression tests for PR #2026 — litellm_kwargs injection from ProviderSpec."""
+"""Regression tests for PR #2026 — litellm_kwargs injection from ProviderSpec.
+
+Validates that:
+- OpenRouter uses litellm_prefix (NOT custom_llm_provider) to avoid LiteLLM double-prefixing.
+- The litellm_kwargs mechanism works correctly for providers that declare it.
+- Non-gateway providers are unaffected.
+"""

 from __future__ import annotations

@@ -9,6 +15,7 @@ from unittest.mock import AsyncMock, patch
 import pytest

 from nanobot.providers.litellm_provider import LiteLLMProvider
+from nanobot.providers.registry import find_by_name


 def _fake_response(content: str = "ok") -> SimpleNamespace:
@@ -24,9 +31,23 @@ def _fake_response(content: str = "ok") -> SimpleNamespace:
    return SimpleNamespace(choices=[choice], usage=usage)


+def test_openrouter_spec_uses_prefix_not_custom_llm_provider() -> None:
+    """OpenRouter must rely on litellm_prefix, not custom_llm_provider kwarg.
+
+    LiteLLM internally adds a provider/ prefix when custom_llm_provider is set,
+    which double-prefixes models (openrouter/anthropic/model) and breaks the API.
+    """
+    spec = find_by_name("openrouter")
+    assert spec is not None
+    assert spec.litellm_prefix == "openrouter"
+    assert "custom_llm_provider" not in spec.litellm_kwargs, (
+        "custom_llm_provider causes LiteLLM to double-prefix the model name"
+    )
+
+
@pytest.mark.asyncio
-async def test_openrouter_injects_litellm_kwargs() -> None:
-    """OpenRouter gateway must inject custom_llm_provider into acompletion call."""
+async def test_openrouter_prefixes_model_correctly() -> None:
+    """OpenRouter should prefix model as openrouter/vendor/model for LiteLLM routing."""
    mock_acompletion = AsyncMock(return_value=_fake_response())

    with patch("nanobot.providers.litellm_provider.acompletion", mock_acompletion):
@@ -42,16 +63,14 @@ async def test_openrouter_injects_litellm_kwargs() -> None:
        )

    call_kwargs = mock_acompletion.call_args.kwargs
-    assert call_kwargs.get("custom_llm_provider") == "openrouter", (
-        "OpenRouter gateway should pass custom_llm_provider='openrouter' to acompletion"
-    )
-    assert call_kwargs["model"] == "anthropic/claude-sonnet-4-5", (
-        "Model name must NOT get an 'openrouter/' prefix — routing is via custom_llm_provider"
+    assert call_kwargs["model"] == "openrouter/anthropic/claude-sonnet-4-5", (
+        "LiteLLM needs openrouter/ prefix to detect the provider and strip it before API call"
    )
+    assert "custom_llm_provider" not in call_kwargs


@pytest.mark.asyncio
-async def test_non_gateway_provider_does_not_inject_litellm_kwargs() -> None:
+async def test_non_gateway_provider_no_extra_kwargs() -> None:
    """Standard (non-gateway) providers must NOT inject any litellm_kwargs."""
    mock_acompletion = AsyncMock(return_value=_fake_response())

@@ -89,9 +108,7 @@ async def test_gateway_without_litellm_kwargs_injects_nothing_extra() -> None:
        )

    call_kwargs = mock_acompletion.call_args.kwargs
-    assert "custom_llm_provider" not in call_kwargs, (
-        "AiHubMix gateway has no litellm_kwargs, should not add custom_llm_provider"
-    )
+    assert "custom_llm_provider" not in call_kwargs


@pytest.mark.asyncio
@@ -110,9 +127,35 @@ async def test_openrouter_autodetect_by_key_prefix() -> None:
        )

    call_kwargs = mock_acompletion.call_args.kwargs
-    assert call_kwargs.get("custom_llm_provider") == "openrouter", (
-        "Auto-detected OpenRouter (by sk-or- prefix) should still inject custom_llm_provider"
+    assert call_kwargs["model"] == "openrouter/anthropic/claude-sonnet-4-5", (
+        "Auto-detected OpenRouter should prefix model for LiteLLM routing"
    )
-    assert call_kwargs["model"] == "anthropic/claude-sonnet-4-5", (
-        "Auto-detected OpenRouter must preserve native model name without openrouter/ prefix"
+
+
+@pytest.mark.asyncio
+async def test_openrouter_native_model_id_gets_double_prefixed() -> None:
+    """Models like openrouter/free must be double-prefixed so LiteLLM strips one layer.
+
+    openrouter/free is an actual OpenRouter model ID.  LiteLLM strips the first
+    openrouter/ for routing, so we must send openrouter/openrouter/free to ensure
+    the API receives openrouter/free.
+    """
+    mock_acompletion = AsyncMock(return_value=_fake_response())
+
+    with patch("nanobot.providers.litellm_provider.acompletion", mock_acompletion):
+        provider = LiteLLMProvider(
+            api_key="sk-or-test-key",
+            api_base="https://openrouter.ai/api/v1",
+            default_model="openrouter/free",
+            provider_name="openrouter",
+        )
+        await provider.chat(
+            messages=[{"role": "user", "content": "hello"}],
+            model="openrouter/free",
+        )
+
+    call_kwargs = mock_acompletion.call_args.kwargs
+    assert call_kwargs["model"] == "openrouter/openrouter/free", (
+        "openrouter/free must become openrouter/openrouter/free — "
+        "LiteLLM strips one layer so the API receives openrouter/free"
    )