Merge branch 'HKUDS:main' into feat-volcengine-tuning

This commit is contained in:
gaoyiman
2026-03-05 14:14:33 +08:00
committed by GitHub
55 changed files with 1640 additions and 1217 deletions

View File

@@ -21,6 +21,7 @@ class LLMResponse:
finish_reason: str = "stop"
usage: dict[str, int] = field(default_factory=dict)
reasoning_content: str | None = None # Kimi, DeepSeek-R1 etc.
thinking_blocks: list[dict] | None = None # Anthropic extended thinking
@property
def has_tool_calls(self) -> bool:
@@ -35,7 +36,7 @@ class LLMProvider(ABC):
Implementations should handle the specifics of each provider's API
while maintaining a consistent interface.
"""
def __init__(self, api_key: str | None = None, api_base: str | None = None):
self.api_key = api_key
self.api_base = api_base
@@ -77,9 +78,15 @@ class LLMProvider(ABC):
result.append(clean)
continue
if isinstance(content, dict):
clean = dict(msg)
clean["content"] = [content]
result.append(clean)
continue
result.append(msg)
return result
@abstractmethod
async def chat(
self,
@@ -88,6 +95,7 @@ class LLMProvider(ABC):
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
reasoning_effort: str | None = None,
) -> LLMResponse:
"""
Send a chat completion request.
@@ -103,7 +111,7 @@ class LLMProvider(ABC):
LLMResponse with content and/or tool calls.
"""
pass
@abstractmethod
def get_default_model(self) -> str:
"""Get the default model for this provider."""

View File

@@ -18,13 +18,16 @@ class CustomProvider(LLMProvider):
self._client = AsyncOpenAI(api_key=api_key, base_url=api_base)
async def chat(self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None,
model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7) -> LLMResponse:
model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7,
reasoning_effort: str | None = None) -> LLMResponse:
kwargs: dict[str, Any] = {
"model": model or self.default_model,
"messages": self._sanitize_empty_content(messages),
"max_tokens": max(1, max_tokens),
"temperature": temperature,
}
if reasoning_effort:
kwargs["reasoning_effort"] = reasoning_effort
if tools:
kwargs.update(tools=tools, tool_choice="auto")
try:

View File

@@ -1,20 +1,25 @@
"""LiteLLM provider implementation for multi-provider support."""
import json
import json_repair
import os
import secrets
import string
from typing import Any
import json_repair
import litellm
from litellm import acompletion
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
from nanobot.providers.registry import find_by_model, find_gateway
# Standard OpenAI chat-completion message keys plus reasoning_content for
# thinking-enabled models (Kimi k2.5, DeepSeek-R1, etc.).
# Standard chat-completion message keys.
_ALLOWED_MSG_KEYS = frozenset({"role", "content", "tool_calls", "tool_call_id", "name", "reasoning_content"})
_ANTHROPIC_EXTRA_KEYS = frozenset({"thinking_blocks"})
_ALNUM = string.ascii_letters + string.digits
def _short_tool_id() -> str:
"""Generate a 9-char alphanumeric ID compatible with all providers (incl. Mistral)."""
return "".join(secrets.choice(_ALNUM) for _ in range(9))
class LiteLLMProvider(LLMProvider):
@@ -25,10 +30,10 @@ class LiteLLMProvider(LLMProvider):
a unified interface. Provider-specific logic is driven by the registry
(see providers/registry.py) — no if-elif chains needed here.
"""
def __init__(
self,
api_key: str | None = None,
self,
api_key: str | None = None,
api_base: str | None = None,
default_model: str = "anthropic/claude-opus-4-5",
extra_headers: dict[str, str] | None = None,
@@ -37,24 +42,24 @@ class LiteLLMProvider(LLMProvider):
super().__init__(api_key, api_base)
self.default_model = default_model
self.extra_headers = extra_headers or {}
# Detect gateway / local deployment.
# provider_name (from config key) is the primary signal;
# api_key / api_base are fallback for auto-detection.
self._gateway = find_gateway(provider_name, api_key, api_base)
# Configure environment variables
if api_key:
self._setup_env(api_key, api_base, default_model)
if api_base:
litellm.api_base = api_base
# Disable LiteLLM logging noise
litellm.suppress_debug_info = True
# Drop unsupported parameters for providers (e.g., gpt-5 rejects some params)
litellm.drop_params = True
def _setup_env(self, api_key: str, api_base: str | None, model: str) -> None:
"""Set environment variables based on detected provider."""
spec = self._gateway or find_by_model(model)
@@ -78,7 +83,7 @@ class LiteLLMProvider(LLMProvider):
resolved = env_val.replace("{api_key}", api_key)
resolved = resolved.replace("{api_base}", effective_base)
os.environ.setdefault(env_name, resolved)
def _resolve_model(self, model: str) -> str:
"""Resolve model name by applying provider/gateway prefixes."""
if self._gateway:
@@ -89,7 +94,7 @@ class LiteLLMProvider(LLMProvider):
if prefix and not model.startswith(f"{prefix}/"):
model = f"{prefix}/{model}"
return model
# Standard mode: auto-prefix for known providers
spec = find_by_model(model)
if spec and spec.litellm_prefix:
@@ -108,7 +113,7 @@ class LiteLLMProvider(LLMProvider):
if prefix.lower().replace("-", "_") != spec_name:
return model
return f"{canonical_prefix}/{remainder}"
def _supports_cache_control(self, model: str) -> bool:
"""Return True when the provider supports cache_control on content blocks."""
if self._gateway is not None:
@@ -151,13 +156,22 @@ class LiteLLMProvider(LLMProvider):
if pattern in model_lower:
kwargs.update(overrides)
return
@staticmethod
def _sanitize_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
def _extra_msg_keys(original_model: str, resolved_model: str) -> frozenset[str]:
"""Return provider-specific extra keys to preserve in request messages."""
spec = find_by_model(original_model) or find_by_model(resolved_model)
if (spec and spec.name == "anthropic") or "claude" in original_model.lower() or resolved_model.startswith("anthropic/"):
return _ANTHROPIC_EXTRA_KEYS
return frozenset()
@staticmethod
def _sanitize_messages(messages: list[dict[str, Any]], extra_keys: frozenset[str] = frozenset()) -> list[dict[str, Any]]:
"""Strip non-standard keys and ensure assistant messages have a content key."""
allowed = _ALLOWED_MSG_KEYS | extra_keys
sanitized = []
for msg in messages:
clean = {k: v for k, v in msg.items() if k in _ALLOWED_MSG_KEYS}
clean = {k: v for k, v in msg.items() if k in allowed}
# Strict providers require "content" even when assistant only has tool_calls
if clean.get("role") == "assistant" and "content" not in clean:
clean["content"] = None
@@ -171,22 +185,24 @@ class LiteLLMProvider(LLMProvider):
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
reasoning_effort: str | None = None,
) -> LLMResponse:
"""
Send a chat completion request via LiteLLM.
Args:
messages: List of message dicts with 'role' and 'content'.
tools: Optional list of tool definitions in OpenAI format.
model: Model identifier (e.g., 'anthropic/claude-sonnet-4-5').
max_tokens: Maximum tokens in response.
temperature: Sampling temperature.
Returns:
LLMResponse with content and/or tool calls.
"""
original_model = model or self.default_model
model = self._resolve_model(original_model)
extra_msg_keys = self._extra_msg_keys(original_model, model)
if self._supports_cache_control(original_model):
messages, tools = self._apply_cache_control(messages, tools)
@@ -194,33 +210,37 @@ class LiteLLMProvider(LLMProvider):
# Clamp max_tokens to at least 1 — negative or zero values cause
# LiteLLM to reject the request with "max_tokens must be at least 1".
max_tokens = max(1, max_tokens)
kwargs: dict[str, Any] = {
"model": model,
"messages": self._sanitize_messages(self._sanitize_empty_content(messages)),
"messages": self._sanitize_messages(self._sanitize_empty_content(messages), extra_keys=extra_msg_keys),
"max_tokens": max_tokens,
"temperature": temperature,
}
# Apply model-specific overrides (e.g. kimi-k2.5 temperature)
self._apply_model_overrides(model, kwargs)
# Pass api_key directly — more reliable than env vars alone
if self.api_key:
kwargs["api_key"] = self.api_key
# Pass api_base for custom endpoints
if self.api_base:
kwargs["api_base"] = self.api_base
# Pass extra headers (e.g. APP-Code for AiHubMix)
if self.extra_headers:
kwargs["extra_headers"] = self.extra_headers
if reasoning_effort:
kwargs["reasoning_effort"] = reasoning_effort
kwargs["drop_params"] = True
if tools:
kwargs["tools"] = tools
kwargs["tool_choice"] = "auto"
try:
response = await acompletion(**kwargs)
return self._parse_response(response)
@@ -230,12 +250,12 @@ class LiteLLMProvider(LLMProvider):
content=f"Error calling LLM: {str(e)}",
finish_reason="error",
)
def _parse_response(self, response: Any) -> LLMResponse:
"""Parse LiteLLM response into our standard format."""
choice = response.choices[0]
message = choice.message
tool_calls = []
if hasattr(message, "tool_calls") and message.tool_calls:
for tc in message.tool_calls:
@@ -243,13 +263,13 @@ class LiteLLMProvider(LLMProvider):
args = tc.function.arguments
if isinstance(args, str):
args = json_repair.loads(args)
tool_calls.append(ToolCallRequest(
id=tc.id,
id=_short_tool_id(),
name=tc.function.name,
arguments=args,
))
usage = {}
if hasattr(response, "usage") and response.usage:
usage = {
@@ -257,8 +277,9 @@ class LiteLLMProvider(LLMProvider):
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens,
}
reasoning_content = getattr(message, "reasoning_content", None) or None
thinking_blocks = getattr(message, "thinking_blocks", None) or None
return LLMResponse(
content=message.content,
@@ -266,8 +287,9 @@ class LiteLLMProvider(LLMProvider):
finish_reason=choice.finish_reason or "stop",
usage=usage,
reasoning_content=reasoning_content,
thinking_blocks=thinking_blocks,
)
def get_default_model(self) -> str:
"""Get the default model."""
return self.default_model

View File

@@ -9,8 +9,8 @@ from typing import Any, AsyncGenerator
import httpx
from loguru import logger
from oauth_cli_kit import get_token as get_codex_token
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
DEFAULT_CODEX_URL = "https://chatgpt.com/backend-api/codex/responses"
@@ -31,6 +31,7 @@ class OpenAICodexProvider(LLMProvider):
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
reasoning_effort: str | None = None,
) -> LLMResponse:
model = model or self.default_model
system_prompt, input_items = _convert_messages(messages)
@@ -51,6 +52,9 @@ class OpenAICodexProvider(LLMProvider):
"parallel_tool_calls": True,
}
if reasoning_effort:
body["reasoning"] = {"effort": reasoning_effort}
if tools:
body["tools"] = _convert_tools(tools)

View File

@@ -255,7 +255,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = (
# OpenAI Codex: uses OAuth, not API key.
ProviderSpec(
name="openai_codex",
keywords=("openai-codex", "codex"),
keywords=("openai-codex",),
env_key="", # OAuth-based, no API key
display_name="OpenAI Codex",
litellm_prefix="", # Not routed through LiteLLM

View File

@@ -2,7 +2,6 @@
import os
from pathlib import Path
from typing import Any
import httpx
from loguru import logger
@@ -11,33 +10,33 @@ from loguru import logger
class GroqTranscriptionProvider:
"""
Voice transcription provider using Groq's Whisper API.
Groq offers extremely fast transcription with a generous free tier.
"""
def __init__(self, api_key: str | None = None):
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions"
async def transcribe(self, file_path: str | Path) -> str:
"""
Transcribe an audio file using Groq.
Args:
file_path: Path to the audio file.
Returns:
Transcribed text.
"""
if not self.api_key:
logger.warning("Groq API key not configured for transcription")
return ""
path = Path(file_path)
if not path.exists():
logger.error("Audio file not found: {}", file_path)
return ""
try:
async with httpx.AsyncClient() as client:
with open(path, "rb") as f:
@@ -48,18 +47,18 @@ class GroqTranscriptionProvider:
headers = {
"Authorization": f"Bearer {self.api_key}",
}
response = await client.post(
self.api_url,
headers=headers,
files=files,
timeout=60.0
)
response.raise_for_status()
data = response.json()
return data.get("text", "")
except Exception as e:
logger.error("Groq transcription error: {}", e)
return ""