merge: upstream/main into feat/add-siliconflow-provider, resolve schema conflict

- Keep siliconflow in ProvidersConfig - Keep openai_codex and github_copilot from upstream/main Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-18 10:50:15 +08:00
parent 66cd21e6ec 95fead24e0
commit e5e5f02e73
25 changed files with 1228 additions and 223 deletions
--- a/nanobot/providers/init.py
+++ b/nanobot/providers/init.py
@@ -2,5 +2,6 @@

 from nanobot.providers.base import LLMProvider, LLMResponse
 from nanobot.providers.litellm_provider import LiteLLMProvider
+from nanobot.providers.openai_codex_provider import OpenAICodexProvider

-__all__ = ["LLMProvider", "LLMResponse", "LiteLLMProvider"]
+__all__ = ["LLMProvider", "LLMResponse", "LiteLLMProvider", "OpenAICodexProvider"]
--- a/nanobot/providers/custom_provider.py
+++ b/nanobot/providers/custom_provider.py
@@ -0,0 +1,47 @@
+"""Direct OpenAI-compatible provider — bypasses LiteLLM."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import json_repair
+from openai import AsyncOpenAI
+
+from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
+
+
+class CustomProvider(LLMProvider):
+
+    def __init__(self, api_key: str = "no-key", api_base: str = "http://localhost:8000/v1", default_model: str = "default"):
+        super().__init__(api_key, api_base)
+        self.default_model = default_model
+        self._client = AsyncOpenAI(api_key=api_key, base_url=api_base)
+
+    async def chat(self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None,
+                   model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7) -> LLMResponse:
+        kwargs: dict[str, Any] = {"model": model or self.default_model, "messages": messages,
+                                  "max_tokens": max(1, max_tokens), "temperature": temperature}
+        if tools:
+            kwargs.update(tools=tools, tool_choice="auto")
+        try:
+            return self._parse(await self._client.chat.completions.create(**kwargs))
+        except Exception as e:
+            return LLMResponse(content=f"Error: {e}", finish_reason="error")
+
+    def _parse(self, response: Any) -> LLMResponse:
+        choice = response.choices[0]
+        msg = choice.message
+        tool_calls = [
+            ToolCallRequest(id=tc.id, name=tc.function.name,
+                            arguments=json_repair.loads(tc.function.arguments) if isinstance(tc.function.arguments, str) else tc.function.arguments)
+            for tc in (msg.tool_calls or [])
+        ]
+        u = response.usage
+        return LLMResponse(
+            content=msg.content, tool_calls=tool_calls, finish_reason=choice.finish_reason or "stop",
+            usage={"prompt_tokens": u.prompt_tokens, "completion_tokens": u.completion_tokens, "total_tokens": u.total_tokens} if u else {},
+            reasoning_content=getattr(msg, "reasoning_content", None),
+        )
+
+    def get_default_model(self) -> str:
+        return self.default_model
--- a/nanobot/providers/litellm_provider.py
+++ b/nanobot/providers/litellm_provider.py
@@ -1,6 +1,7 @@
 """LiteLLM provider implementation for multi-provider support."""

 import json
+import json_repair
 import os
 from typing import Any

@@ -54,6 +55,9 @@ class LiteLLMProvider(LLMProvider):
        spec = self._gateway or find_by_model(model)
        if not spec:
            return
+        if not spec.env_key:
+            # OAuth/provider-only specs (for example: openai_codex)
+            return

        # Gateway/local overrides existing env; standard provider doesn't
        if self._gateway:
@@ -122,6 +126,10 @@ class LiteLLMProvider(LLMProvider):
        """
        model = self._resolve_model(model or self.default_model)
        
+        # Clamp max_tokens to at least 1 — negative or zero values cause
+        # LiteLLM to reject the request with "max_tokens must be at least 1".
+        max_tokens = max(1, max_tokens)
+        
        kwargs: dict[str, Any] = {
            "model": model,
            "messages": messages,
@@ -169,10 +177,7 @@ class LiteLLMProvider(LLMProvider):
                # Parse arguments from JSON string if needed
                args = tc.function.arguments
                if isinstance(args, str):
-                    try:
-                        args = json.loads(args)
-                    except json.JSONDecodeError:
-                        args = {"raw": args}
+                    args = json_repair.loads(args)
                
                tool_calls.append(ToolCallRequest(
                    id=tc.id,
--- a/nanobot/providers/openai_codex_provider.py
+++ b/nanobot/providers/openai_codex_provider.py
@@ -0,0 +1,312 @@
+"""OpenAI Codex Responses Provider."""
+
+from __future__ import annotations
+
+import asyncio
+import hashlib
+import json
+from typing import Any, AsyncGenerator
+
+import httpx
+from loguru import logger
+
+from oauth_cli_kit import get_token as get_codex_token
+from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
+
+DEFAULT_CODEX_URL = "https://chatgpt.com/backend-api/codex/responses"
+DEFAULT_ORIGINATOR = "nanobot"
+
+
+class OpenAICodexProvider(LLMProvider):
+    """Use Codex OAuth to call the Responses API."""
+
+    def __init__(self, default_model: str = "openai-codex/gpt-5.1-codex"):
+        super().__init__(api_key=None, api_base=None)
+        self.default_model = default_model
+
+    async def chat(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]] | None = None,
+        model: str | None = None,
+        max_tokens: int = 4096,
+        temperature: float = 0.7,
+    ) -> LLMResponse:
+        model = model or self.default_model
+        system_prompt, input_items = _convert_messages(messages)
+
+        token = await asyncio.to_thread(get_codex_token)
+        headers = _build_headers(token.account_id, token.access)
+
+        body: dict[str, Any] = {
+            "model": _strip_model_prefix(model),
+            "store": False,
+            "stream": True,
+            "instructions": system_prompt,
+            "input": input_items,
+            "text": {"verbosity": "medium"},
+            "include": ["reasoning.encrypted_content"],
+            "prompt_cache_key": _prompt_cache_key(messages),
+            "tool_choice": "auto",
+            "parallel_tool_calls": True,
+        }
+
+        if tools:
+            body["tools"] = _convert_tools(tools)
+
+        url = DEFAULT_CODEX_URL
+
+        try:
+            try:
+                content, tool_calls, finish_reason = await _request_codex(url, headers, body, verify=True)
+            except Exception as e:
+                if "CERTIFICATE_VERIFY_FAILED" not in str(e):
+                    raise
+                logger.warning("SSL certificate verification failed for Codex API; retrying with verify=False")
+                content, tool_calls, finish_reason = await _request_codex(url, headers, body, verify=False)
+            return LLMResponse(
+                content=content,
+                tool_calls=tool_calls,
+                finish_reason=finish_reason,
+            )
+        except Exception as e:
+            return LLMResponse(
+                content=f"Error calling Codex: {str(e)}",
+                finish_reason="error",
+            )
+
+    def get_default_model(self) -> str:
+        return self.default_model
+
+
+def _strip_model_prefix(model: str) -> str:
+    if model.startswith("openai-codex/"):
+        return model.split("/", 1)[1]
+    return model
+
+
+def _build_headers(account_id: str, token: str) -> dict[str, str]:
+    return {
+        "Authorization": f"Bearer {token}",
+        "chatgpt-account-id": account_id,
+        "OpenAI-Beta": "responses=experimental",
+        "originator": DEFAULT_ORIGINATOR,
+        "User-Agent": "nanobot (python)",
+        "accept": "text/event-stream",
+        "content-type": "application/json",
+    }
+
+
+async def _request_codex(
+    url: str,
+    headers: dict[str, str],
+    body: dict[str, Any],
+    verify: bool,
+) -> tuple[str, list[ToolCallRequest], str]:
+    async with httpx.AsyncClient(timeout=60.0, verify=verify) as client:
+        async with client.stream("POST", url, headers=headers, json=body) as response:
+            if response.status_code != 200:
+                text = await response.aread()
+                raise RuntimeError(_friendly_error(response.status_code, text.decode("utf-8", "ignore")))
+            return await _consume_sse(response)
+
+
+def _convert_tools(tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Convert OpenAI function-calling schema to Codex flat format."""
+    converted: list[dict[str, Any]] = []
+    for tool in tools:
+        fn = (tool.get("function") or {}) if tool.get("type") == "function" else tool
+        name = fn.get("name")
+        if not name:
+            continue
+        params = fn.get("parameters") or {}
+        converted.append({
+            "type": "function",
+            "name": name,
+            "description": fn.get("description") or "",
+            "parameters": params if isinstance(params, dict) else {},
+        })
+    return converted
+
+
+def _convert_messages(messages: list[dict[str, Any]]) -> tuple[str, list[dict[str, Any]]]:
+    system_prompt = ""
+    input_items: list[dict[str, Any]] = []
+
+    for idx, msg in enumerate(messages):
+        role = msg.get("role")
+        content = msg.get("content")
+
+        if role == "system":
+            system_prompt = content if isinstance(content, str) else ""
+            continue
+
+        if role == "user":
+            input_items.append(_convert_user_message(content))
+            continue
+
+        if role == "assistant":
+            # Handle text first.
+            if isinstance(content, str) and content:
+                input_items.append(
+                    {
+                        "type": "message",
+                        "role": "assistant",
+                        "content": [{"type": "output_text", "text": content}],
+                        "status": "completed",
+                        "id": f"msg_{idx}",
+                    }
+                )
+            # Then handle tool calls.
+            for tool_call in msg.get("tool_calls", []) or []:
+                fn = tool_call.get("function") or {}
+                call_id, item_id = _split_tool_call_id(tool_call.get("id"))
+                call_id = call_id or f"call_{idx}"
+                item_id = item_id or f"fc_{idx}"
+                input_items.append(
+                    {
+                        "type": "function_call",
+                        "id": item_id,
+                        "call_id": call_id,
+                        "name": fn.get("name"),
+                        "arguments": fn.get("arguments") or "{}",
+                    }
+                )
+            continue
+
+        if role == "tool":
+            call_id, _ = _split_tool_call_id(msg.get("tool_call_id"))
+            output_text = content if isinstance(content, str) else json.dumps(content)
+            input_items.append(
+                {
+                    "type": "function_call_output",
+                    "call_id": call_id,
+                    "output": output_text,
+                }
+            )
+            continue
+
+    return system_prompt, input_items
+
+
+def _convert_user_message(content: Any) -> dict[str, Any]:
+    if isinstance(content, str):
+        return {"role": "user", "content": [{"type": "input_text", "text": content}]}
+    if isinstance(content, list):
+        converted: list[dict[str, Any]] = []
+        for item in content:
+            if not isinstance(item, dict):
+                continue
+            if item.get("type") == "text":
+                converted.append({"type": "input_text", "text": item.get("text", "")})
+            elif item.get("type") == "image_url":
+                url = (item.get("image_url") or {}).get("url")
+                if url:
+                    converted.append({"type": "input_image", "image_url": url, "detail": "auto"})
+        if converted:
+            return {"role": "user", "content": converted}
+    return {"role": "user", "content": [{"type": "input_text", "text": ""}]}
+
+
+def _split_tool_call_id(tool_call_id: Any) -> tuple[str, str | None]:
+    if isinstance(tool_call_id, str) and tool_call_id:
+        if "|" in tool_call_id:
+            call_id, item_id = tool_call_id.split("|", 1)
+            return call_id, item_id or None
+        return tool_call_id, None
+    return "call_0", None
+
+
+def _prompt_cache_key(messages: list[dict[str, Any]]) -> str:
+    raw = json.dumps(messages, ensure_ascii=True, sort_keys=True)
+    return hashlib.sha256(raw.encode("utf-8")).hexdigest()
+
+
+async def _iter_sse(response: httpx.Response) -> AsyncGenerator[dict[str, Any], None]:
+    buffer: list[str] = []
+    async for line in response.aiter_lines():
+        if line == "":
+            if buffer:
+                data_lines = [l[5:].strip() for l in buffer if l.startswith("data:")]
+                buffer = []
+                if not data_lines:
+                    continue
+                data = "\n".join(data_lines).strip()
+                if not data or data == "[DONE]":
+                    continue
+                try:
+                    yield json.loads(data)
+                except Exception:
+                    continue
+            continue
+        buffer.append(line)
+
+
+async def _consume_sse(response: httpx.Response) -> tuple[str, list[ToolCallRequest], str]:
+    content = ""
+    tool_calls: list[ToolCallRequest] = []
+    tool_call_buffers: dict[str, dict[str, Any]] = {}
+    finish_reason = "stop"
+
+    async for event in _iter_sse(response):
+        event_type = event.get("type")
+        if event_type == "response.output_item.added":
+            item = event.get("item") or {}
+            if item.get("type") == "function_call":
+                call_id = item.get("call_id")
+                if not call_id:
+                    continue
+                tool_call_buffers[call_id] = {
+                    "id": item.get("id") or "fc_0",
+                    "name": item.get("name"),
+                    "arguments": item.get("arguments") or "",
+                }
+        elif event_type == "response.output_text.delta":
+            content += event.get("delta") or ""
+        elif event_type == "response.function_call_arguments.delta":
+            call_id = event.get("call_id")
+            if call_id and call_id in tool_call_buffers:
+                tool_call_buffers[call_id]["arguments"] += event.get("delta") or ""
+        elif event_type == "response.function_call_arguments.done":
+            call_id = event.get("call_id")
+            if call_id and call_id in tool_call_buffers:
+                tool_call_buffers[call_id]["arguments"] = event.get("arguments") or ""
+        elif event_type == "response.output_item.done":
+            item = event.get("item") or {}
+            if item.get("type") == "function_call":
+                call_id = item.get("call_id")
+                if not call_id:
+                    continue
+                buf = tool_call_buffers.get(call_id) or {}
+                args_raw = buf.get("arguments") or item.get("arguments") or "{}"
+                try:
+                    args = json.loads(args_raw)
+                except Exception:
+                    args = {"raw": args_raw}
+                tool_calls.append(
+                    ToolCallRequest(
+                        id=f"{call_id}|{buf.get('id') or item.get('id') or 'fc_0'}",
+                        name=buf.get("name") or item.get("name"),
+                        arguments=args,
+                    )
+                )
+        elif event_type == "response.completed":
+            status = (event.get("response") or {}).get("status")
+            finish_reason = _map_finish_reason(status)
+        elif event_type in {"error", "response.failed"}:
+            raise RuntimeError("Codex response failed")
+
+    return content, tool_calls, finish_reason
+
+
+_FINISH_REASON_MAP = {"completed": "stop", "incomplete": "length", "failed": "error", "cancelled": "error"}
+
+
+def _map_finish_reason(status: str | None) -> str:
+    return _FINISH_REASON_MAP.get(status or "completed", "stop")
+
+
+def _friendly_error(status_code: int, raw: str) -> str:
+    if status_code == 429:
+        return "ChatGPT usage quota exceeded or rate limit triggered. Please try again later."
+    return f"HTTP {status_code}: {raw}"
--- a/nanobot/providers/registry.py
+++ b/nanobot/providers/registry.py
@@ -51,6 +51,12 @@ class ProviderSpec:
    # per-model param overrides, e.g. (("kimi-k2.5", {"temperature": 1.0}),)
    model_overrides: tuple[tuple[str, dict[str, Any]], ...] = ()

+    # OAuth-based providers (e.g., OpenAI Codex) don't use API keys
+    is_oauth: bool = False                   # if True, uses OAuth flow instead of API key
+
+    # Direct providers bypass LiteLLM entirely (e.g., CustomProvider)
+    is_direct: bool = False
+
    @property
    def label(self) -> str:
        return self.display_name or self.name.title()
@@ -62,18 +68,14 @@ class ProviderSpec:

 PROVIDERS: tuple[ProviderSpec, ...] = (

-    # === Custom (user-provided OpenAI-compatible endpoint) =================
-    # No auto-detection — only activates when user explicitly configures "custom".
-
+    # === Custom (direct OpenAI-compatible endpoint, bypasses LiteLLM) ======
    ProviderSpec(
        name="custom",
        keywords=(),
-        env_key="OPENAI_API_KEY",
+        env_key="",
        display_name="Custom",
-        litellm_prefix="openai",
-        skip_prefixes=("openai/",),
-        is_gateway=True,
-        strip_model_prefix=True,
+        litellm_prefix="",
+        is_direct=True,
    ),

    # === Gateways (detected by api_key / api_base, not model name) =========
@@ -176,6 +178,44 @@ PROVIDERS: tuple[ProviderSpec, ...] = (
        model_overrides=(),
    ),

+    # OpenAI Codex: uses OAuth, not API key.
+    ProviderSpec(
+        name="openai_codex",
+        keywords=("openai-codex", "codex"),
+        env_key="",                         # OAuth-based, no API key
+        display_name="OpenAI Codex",
+        litellm_prefix="",                  # Not routed through LiteLLM
+        skip_prefixes=(),
+        env_extras=(),
+        is_gateway=False,
+        is_local=False,
+        detect_by_key_prefix="",
+        detect_by_base_keyword="codex",
+        default_api_base="https://chatgpt.com/backend-api",
+        strip_model_prefix=False,
+        model_overrides=(),
+        is_oauth=True,                      # OAuth-based authentication
+    ),
+
+    # Github Copilot: uses OAuth, not API key.
+    ProviderSpec(
+        name="github_copilot",
+        keywords=("github_copilot", "copilot"),
+        env_key="",                         # OAuth-based, no API key
+        display_name="Github Copilot",
+        litellm_prefix="github_copilot",   # github_copilot/model → github_copilot/model
+        skip_prefixes=("github_copilot/",),
+        env_extras=(),
+        is_gateway=False,
+        is_local=False,
+        detect_by_key_prefix="",
+        detect_by_base_keyword="",
+        default_api_base="",
+        strip_model_prefix=False,
+        model_overrides=(),
+        is_oauth=True,                      # OAuth-based authentication
+    ),
+
    # DeepSeek: needs "deepseek/" prefix for LiteLLM routing.
    ProviderSpec(
        name="deepseek",