Merge branch 'HKUDS:main' into feat-volcengine-tuning

2026-03-05 14:14:33 +08:00
parent cf2ed8a6a0 fb74281434
commit 80bf5e55f1
55 changed files with 1640 additions and 1217 deletions
--- a/nanobot/providers/base.py
+++ b/nanobot/providers/base.py
@@ -21,6 +21,7 @@ class LLMResponse:
    finish_reason: str = "stop"
    usage: dict[str, int] = field(default_factory=dict)
    reasoning_content: str | None = None  # Kimi, DeepSeek-R1 etc.
+    thinking_blocks: list[dict] | None = None  # Anthropic extended thinking
    
    @property
    def has_tool_calls(self) -> bool:
@@ -35,7 +36,7 @@ class LLMProvider(ABC):
    Implementations should handle the specifics of each provider's API
    while maintaining a consistent interface.
    """
-    
+
    def __init__(self, api_key: str | None = None, api_base: str | None = None):
        self.api_key = api_key
        self.api_base = api_base
@@ -77,9 +78,15 @@ class LLMProvider(ABC):
                    result.append(clean)
                    continue

+            if isinstance(content, dict):
+                clean = dict(msg)
+                clean["content"] = [content]
+                result.append(clean)
+                continue
+
            result.append(msg)
        return result
-    
+
    @abstractmethod
    async def chat(
        self,
@@ -88,6 +95,7 @@ class LLMProvider(ABC):
        model: str | None = None,
        max_tokens: int = 4096,
        temperature: float = 0.7,
+        reasoning_effort: str | None = None,
    ) -> LLMResponse:
        """
        Send a chat completion request.
@@ -103,7 +111,7 @@ class LLMProvider(ABC):
            LLMResponse with content and/or tool calls.
        """
        pass
-    
+
    @abstractmethod
    def get_default_model(self) -> str:
        """Get the default model for this provider."""
--- a/nanobot/providers/custom_provider.py
+++ b/nanobot/providers/custom_provider.py
@@ -18,13 +18,16 @@ class CustomProvider(LLMProvider):
        self._client = AsyncOpenAI(api_key=api_key, base_url=api_base)

    async def chat(self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None,
-                   model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7) -> LLMResponse:
+                   model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7,
+                   reasoning_effort: str | None = None) -> LLMResponse:
        kwargs: dict[str, Any] = {
            "model": model or self.default_model,
            "messages": self._sanitize_empty_content(messages),
            "max_tokens": max(1, max_tokens),
            "temperature": temperature,
        }
+        if reasoning_effort:
+            kwargs["reasoning_effort"] = reasoning_effort
        if tools:
            kwargs.update(tools=tools, tool_choice="auto")
        try:
--- a/nanobot/providers/litellm_provider.py
+++ b/nanobot/providers/litellm_provider.py
@@ -1,20 +1,25 @@
 """LiteLLM provider implementation for multi-provider support."""

-import json
-import json_repair
 import os
+import secrets
+import string
 from typing import Any

+import json_repair
 import litellm
 from litellm import acompletion

 from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
 from nanobot.providers.registry import find_by_model, find_gateway

-
-# Standard OpenAI chat-completion message keys plus reasoning_content for
-# thinking-enabled models (Kimi k2.5, DeepSeek-R1, etc.).
+# Standard chat-completion message keys.
 _ALLOWED_MSG_KEYS = frozenset({"role", "content", "tool_calls", "tool_call_id", "name", "reasoning_content"})
+_ANTHROPIC_EXTRA_KEYS = frozenset({"thinking_blocks"})
+_ALNUM = string.ascii_letters + string.digits
+
+def _short_tool_id() -> str:
+    """Generate a 9-char alphanumeric ID compatible with all providers (incl. Mistral)."""
+    return "".join(secrets.choice(_ALNUM) for _ in range(9))


 class LiteLLMProvider(LLMProvider):
@@ -25,10 +30,10 @@ class LiteLLMProvider(LLMProvider):
    a unified interface.  Provider-specific logic is driven by the registry
    (see providers/registry.py) — no if-elif chains needed here.
    """
-    
+
    def __init__(
-        self, 
-        api_key: str | None = None, 
+        self,
+        api_key: str | None = None,
        api_base: str | None = None,
        default_model: str = "anthropic/claude-opus-4-5",
        extra_headers: dict[str, str] | None = None,
@@ -37,24 +42,24 @@ class LiteLLMProvider(LLMProvider):
        super().__init__(api_key, api_base)
        self.default_model = default_model
        self.extra_headers = extra_headers or {}
-        
+
        # Detect gateway / local deployment.
        # provider_name (from config key) is the primary signal;
        # api_key / api_base are fallback for auto-detection.
        self._gateway = find_gateway(provider_name, api_key, api_base)
-        
+
        # Configure environment variables
        if api_key:
            self._setup_env(api_key, api_base, default_model)
-        
+
        if api_base:
            litellm.api_base = api_base
-        
+
        # Disable LiteLLM logging noise
        litellm.suppress_debug_info = True
        # Drop unsupported parameters for providers (e.g., gpt-5 rejects some params)
        litellm.drop_params = True
-    
+
    def _setup_env(self, api_key: str, api_base: str | None, model: str) -> None:
        """Set environment variables based on detected provider."""
        spec = self._gateway or find_by_model(model)
@@ -78,7 +83,7 @@ class LiteLLMProvider(LLMProvider):
            resolved = env_val.replace("{api_key}", api_key)
            resolved = resolved.replace("{api_base}", effective_base)
            os.environ.setdefault(env_name, resolved)
-    
+
    def _resolve_model(self, model: str) -> str:
        """Resolve model name by applying provider/gateway prefixes."""
        if self._gateway:
@@ -89,7 +94,7 @@ class LiteLLMProvider(LLMProvider):
            if prefix and not model.startswith(f"{prefix}/"):
                model = f"{prefix}/{model}"
            return model
-        
+
        # Standard mode: auto-prefix for known providers
        spec = find_by_model(model)
        if spec and spec.litellm_prefix:
@@ -108,7 +113,7 @@ class LiteLLMProvider(LLMProvider):
        if prefix.lower().replace("-", "_") != spec_name:
            return model
        return f"{canonical_prefix}/{remainder}"
-    
+
    def _supports_cache_control(self, model: str) -> bool:
        """Return True when the provider supports cache_control on content blocks."""
        if self._gateway is not None:
@@ -151,13 +156,22 @@ class LiteLLMProvider(LLMProvider):
                if pattern in model_lower:
                    kwargs.update(overrides)
                    return
-    
+
    @staticmethod
-    def _sanitize_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    def _extra_msg_keys(original_model: str, resolved_model: str) -> frozenset[str]:
+        """Return provider-specific extra keys to preserve in request messages."""
+        spec = find_by_model(original_model) or find_by_model(resolved_model)
+        if (spec and spec.name == "anthropic") or "claude" in original_model.lower() or resolved_model.startswith("anthropic/"):
+            return _ANTHROPIC_EXTRA_KEYS
+        return frozenset()
+
+    @staticmethod
+    def _sanitize_messages(messages: list[dict[str, Any]], extra_keys: frozenset[str] = frozenset()) -> list[dict[str, Any]]:
        """Strip non-standard keys and ensure assistant messages have a content key."""
+        allowed = _ALLOWED_MSG_KEYS | extra_keys
        sanitized = []
        for msg in messages:
-            clean = {k: v for k, v in msg.items() if k in _ALLOWED_MSG_KEYS}
+            clean = {k: v for k, v in msg.items() if k in allowed}
            # Strict providers require "content" even when assistant only has tool_calls
            if clean.get("role") == "assistant" and "content" not in clean:
                clean["content"] = None
@@ -171,22 +185,24 @@ class LiteLLMProvider(LLMProvider):
        model: str | None = None,
        max_tokens: int = 4096,
        temperature: float = 0.7,
+        reasoning_effort: str | None = None,
    ) -> LLMResponse:
        """
        Send a chat completion request via LiteLLM.
-        
+
        Args:
            messages: List of message dicts with 'role' and 'content'.
            tools: Optional list of tool definitions in OpenAI format.
            model: Model identifier (e.g., 'anthropic/claude-sonnet-4-5').
            max_tokens: Maximum tokens in response.
            temperature: Sampling temperature.
-        
+
        Returns:
            LLMResponse with content and/or tool calls.
        """
        original_model = model or self.default_model
        model = self._resolve_model(original_model)
+        extra_msg_keys = self._extra_msg_keys(original_model, model)

        if self._supports_cache_control(original_model):
            messages, tools = self._apply_cache_control(messages, tools)
@@ -194,33 +210,37 @@ class LiteLLMProvider(LLMProvider):
        # Clamp max_tokens to at least 1 — negative or zero values cause
        # LiteLLM to reject the request with "max_tokens must be at least 1".
        max_tokens = max(1, max_tokens)
-        
+
        kwargs: dict[str, Any] = {
            "model": model,
-            "messages": self._sanitize_messages(self._sanitize_empty_content(messages)),
+            "messages": self._sanitize_messages(self._sanitize_empty_content(messages), extra_keys=extra_msg_keys),
            "max_tokens": max_tokens,
            "temperature": temperature,
        }
-        
+
        # Apply model-specific overrides (e.g. kimi-k2.5 temperature)
        self._apply_model_overrides(model, kwargs)
-        
+
        # Pass api_key directly — more reliable than env vars alone
        if self.api_key:
            kwargs["api_key"] = self.api_key
-        
+
        # Pass api_base for custom endpoints
        if self.api_base:
            kwargs["api_base"] = self.api_base
-        
+
        # Pass extra headers (e.g. APP-Code for AiHubMix)
        if self.extra_headers:
            kwargs["extra_headers"] = self.extra_headers
        
+        if reasoning_effort:
+            kwargs["reasoning_effort"] = reasoning_effort
+            kwargs["drop_params"] = True
+        
        if tools:
            kwargs["tools"] = tools
            kwargs["tool_choice"] = "auto"
-        
+
        try:
            response = await acompletion(**kwargs)
            return self._parse_response(response)
@@ -230,12 +250,12 @@ class LiteLLMProvider(LLMProvider):
                content=f"Error calling LLM: {str(e)}",
                finish_reason="error",
            )
-    
+
    def _parse_response(self, response: Any) -> LLMResponse:
        """Parse LiteLLM response into our standard format."""
        choice = response.choices[0]
        message = choice.message
-        
+
        tool_calls = []
        if hasattr(message, "tool_calls") and message.tool_calls:
            for tc in message.tool_calls:
@@ -243,13 +263,13 @@ class LiteLLMProvider(LLMProvider):
                args = tc.function.arguments
                if isinstance(args, str):
                    args = json_repair.loads(args)
-                
+
                tool_calls.append(ToolCallRequest(
-                    id=tc.id,
+                    id=_short_tool_id(),
                    name=tc.function.name,
                    arguments=args,
                ))
-        
+
        usage = {}
        if hasattr(response, "usage") and response.usage:
            usage = {
@@ -257,8 +277,9 @@ class LiteLLMProvider(LLMProvider):
                "completion_tokens": response.usage.completion_tokens,
                "total_tokens": response.usage.total_tokens,
            }
-        
+
        reasoning_content = getattr(message, "reasoning_content", None) or None
+        thinking_blocks = getattr(message, "thinking_blocks", None) or None
        
        return LLMResponse(
            content=message.content,
@@ -266,8 +287,9 @@ class LiteLLMProvider(LLMProvider):
            finish_reason=choice.finish_reason or "stop",
            usage=usage,
            reasoning_content=reasoning_content,
+            thinking_blocks=thinking_blocks,
        )
-    
+
    def get_default_model(self) -> str:
        """Get the default model."""
        return self.default_model
--- a/nanobot/providers/openai_codex_provider.py
+++ b/nanobot/providers/openai_codex_provider.py
@@ -9,8 +9,8 @@ from typing import Any, AsyncGenerator

 import httpx
 from loguru import logger
-
 from oauth_cli_kit import get_token as get_codex_token
+
 from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest

 DEFAULT_CODEX_URL = "https://chatgpt.com/backend-api/codex/responses"
@@ -31,6 +31,7 @@ class OpenAICodexProvider(LLMProvider):
        model: str | None = None,
        max_tokens: int = 4096,
        temperature: float = 0.7,
+        reasoning_effort: str | None = None,
    ) -> LLMResponse:
        model = model or self.default_model
        system_prompt, input_items = _convert_messages(messages)
@@ -51,6 +52,9 @@ class OpenAICodexProvider(LLMProvider):
            "parallel_tool_calls": True,
        }

+        if reasoning_effort:
+            body["reasoning"] = {"effort": reasoning_effort}
+
        if tools:
            body["tools"] = _convert_tools(tools)

--- a/nanobot/providers/registry.py
+++ b/nanobot/providers/registry.py
@@ -255,7 +255,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = (
    # OpenAI Codex: uses OAuth, not API key.
    ProviderSpec(
        name="openai_codex",
-        keywords=("openai-codex", "codex"),
+        keywords=("openai-codex",),
        env_key="",                         # OAuth-based, no API key
        display_name="OpenAI Codex",
        litellm_prefix="",                  # Not routed through LiteLLM
--- a/nanobot/providers/transcription.py
+++ b/nanobot/providers/transcription.py
@@ -2,7 +2,6 @@

 import os
 from pathlib import Path
-from typing import Any

 import httpx
 from loguru import logger
@@ -11,33 +10,33 @@ from loguru import logger
 class GroqTranscriptionProvider:
    """
    Voice transcription provider using Groq's Whisper API.
-    
+
    Groq offers extremely fast transcription with a generous free tier.
    """
-    
+
    def __init__(self, api_key: str | None = None):
        self.api_key = api_key or os.environ.get("GROQ_API_KEY")
        self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions"
-    
+
    async def transcribe(self, file_path: str | Path) -> str:
        """
        Transcribe an audio file using Groq.
-        
+
        Args:
            file_path: Path to the audio file.
-            
+
        Returns:
            Transcribed text.
        """
        if not self.api_key:
            logger.warning("Groq API key not configured for transcription")
            return ""
-        
+
        path = Path(file_path)
        if not path.exists():
            logger.error("Audio file not found: {}", file_path)
            return ""
-        
+
        try:
            async with httpx.AsyncClient() as client:
                with open(path, "rb") as f:
@@ -48,18 +47,18 @@ class GroqTranscriptionProvider:
                    headers = {
                        "Authorization": f"Bearer {self.api_key}",
                    }
-                    
+
                    response = await client.post(
                        self.api_url,
                        headers=headers,
                        files=files,
                        timeout=60.0
                    )
-                    
+
                    response.raise_for_status()
                    data = response.json()
                    return data.get("text", "")
-                    
+
        except Exception as e:
            logger.error("Groq transcription error: {}", e)
            return ""