From a08aae93e6c8ac2b68a4a8d566899b95fd414844 Mon Sep 17 00:00:00 2001 From: hcanyz Date: Thu, 5 Mar 2026 11:33:20 +0800 Subject: [PATCH 01/15] fix: not imported when LiteLLMProvider is not used LiteLLM:WARNING: get_model_cost_map.py:213 - LiteLLM: Failed to fetch remote model cost map from https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json: The read operation timed out. Falling back to local backup. --- nanobot/cli/commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index b75a2bc..2597928 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -200,8 +200,6 @@ def onboard(): def _make_provider(config: Config): """Create the appropriate LLM provider from config.""" - from nanobot.providers.custom_provider import CustomProvider - from nanobot.providers.litellm_provider import LiteLLMProvider from nanobot.providers.openai_codex_provider import OpenAICodexProvider model = config.agents.defaults.model @@ -213,6 +211,7 @@ def _make_provider(config: Config): return OpenAICodexProvider(default_model=model) # Custom: direct OpenAI-compatible endpoint, bypasses LiteLLM + from nanobot.providers.custom_provider import CustomProvider if provider_name == "custom": return CustomProvider( api_key=p.api_key if p else "no-key", @@ -220,6 +219,7 @@ def _make_provider(config: Config): default_model=model, ) + from nanobot.providers.litellm_provider import LiteLLMProvider from nanobot.providers.registry import find_by_name spec = find_by_name(provider_name) if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and spec.is_oauth): From 5cc3c032450f5a97c1ebf9bf153974ad00ddc725 Mon Sep 17 00:00:00 2001 From: Peixian Gong Date: Thu, 5 Mar 2026 15:15:37 +0800 Subject: [PATCH 02/15] fix: merge tool_calls from multiple choices in LiteLLM response GitHub Copilot's API returns tool_calls split across multiple choices: - choices[0]: content only (tool_calls=null) - choices[1]: tool_calls only (content=null) The existing _parse_response only inspected choices[0], so tool_calls were silently lost, causing the agent to never execute tools when using github_copilot/ models. This fix scans all choices and merges tool_calls + content, so providers that return multi-choice responses work correctly. Single-choice providers (OpenAI, Anthropic, etc.) are unaffected since the loop over one choice is equivalent to the original code. --- nanobot/providers/litellm_provider.py | 44 +++++++++++++++++++-------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index d8d8ace..a1819a2 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -8,6 +8,7 @@ from typing import Any import json_repair import litellm from litellm import acompletion +from loguru import logger from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest from nanobot.providers.registry import find_by_model, find_gateway @@ -255,20 +256,37 @@ class LiteLLMProvider(LLMProvider): """Parse LiteLLM response into our standard format.""" choice = response.choices[0] message = choice.message + content = message.content + finish_reason = choice.finish_reason + + # Some providers (e.g. GitHub Copilot) split content and tool_calls + # across multiple choices. Merge them so tool_calls are not lost. + raw_tool_calls = [] + for ch in response.choices: + msg = ch.message + if hasattr(msg, "tool_calls") and msg.tool_calls: + raw_tool_calls.extend(msg.tool_calls) + if ch.finish_reason in ("tool_calls", "stop"): + finish_reason = ch.finish_reason + if not content and msg.content: + content = msg.content + + if len(response.choices) > 1: + logger.debug("LiteLLM response has {} choices, merged {} tool_calls", + len(response.choices), len(raw_tool_calls)) tool_calls = [] - if hasattr(message, "tool_calls") and message.tool_calls: - for tc in message.tool_calls: - # Parse arguments from JSON string if needed - args = tc.function.arguments - if isinstance(args, str): - args = json_repair.loads(args) + for tc in raw_tool_calls: + # Parse arguments from JSON string if needed + args = tc.function.arguments + if isinstance(args, str): + args = json_repair.loads(args) - tool_calls.append(ToolCallRequest( - id=_short_tool_id(), - name=tc.function.name, - arguments=args, - )) + tool_calls.append(ToolCallRequest( + id=_short_tool_id(), + name=tc.function.name, + arguments=args, + )) usage = {} if hasattr(response, "usage") and response.usage: @@ -282,9 +300,9 @@ class LiteLLMProvider(LLMProvider): thinking_blocks = getattr(message, "thinking_blocks", None) or None return LLMResponse( - content=message.content, + content=content, tool_calls=tool_calls, - finish_reason=choice.finish_reason or "stop", + finish_reason=finish_reason or "stop", usage=usage, reasoning_content=reasoning_content, thinking_blocks=thinking_blocks, From cf3e7e3f38325224dcb342af448ecd17c11d1d13 Mon Sep 17 00:00:00 2001 From: ouyangwulin Date: Thu, 5 Mar 2026 16:54:15 +0800 Subject: [PATCH 03/15] feat: Add Alibaba Cloud Coding Plan API support Add dashscope_coding_plan provider to registry with OpenAI-compatible endpoint for BaiLian coding assistance. - Supports API key detection by 'sk-sp-' prefix pattern - Adds provider config schema entry for proper loading - Updates documentation with configuration instructions - Fixes duplicate MatrixConfig class issue in schema - Follow existing nanobot provider patterns for consistency --- README.md | 1 + nanobot/config/schema.py | 62 +++++++++++----- nanobot/providers/registry.py | 130 ++++++++++++++++------------------ 3 files changed, 109 insertions(+), 84 deletions(-) diff --git a/README.md b/README.md index 33cdeee..2977ccb 100644 --- a/README.md +++ b/README.md @@ -656,6 +656,7 @@ Config file: `~/.nanobot/config.json` > [!TIP] > - **Groq** provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed. > - **Zhipu Coding Plan**: If you're on Zhipu's coding plan, set `"apiBase": "https://open.bigmodel.cn/api/coding/paas/v4"` in your zhipu provider config. +> - **Alibaba Cloud Coding Plan**: If you're on the Alibaba Cloud Coding Plan (BaiLian coding assistance), add configuration for `dashscope_coding_plan` provider with an API key starting with `sk-sp-` in your config. This provider uses OpenAI-compatible endpoint `https://coding.dashscope.aliyuncs.com/v1`. > - **MiniMax (Mainland China)**: If your API key is from MiniMax's mainland China platform (minimaxi.com), set `"apiBase": "https://api.minimaxi.com/v1"` in your minimax provider config. > - **VolcEngine Coding Plan**: If you're on VolcEngine's coding plan, set `"apiBase": "https://ark.cn-beijing.volces.com/api/coding/v3"` in your volcengine provider config. diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 61a7bd2..538fab8 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -29,7 +29,9 @@ class TelegramConfig(Base): enabled: bool = False token: str = "" # Bot token from @BotFather allow_from: list[str] = Field(default_factory=list) # Allowed user IDs or usernames - proxy: str | None = None # HTTP/SOCKS5 proxy URL, e.g. "http://127.0.0.1:7890" or "socks5://127.0.0.1:1080" + proxy: str | None = ( + None # HTTP/SOCKS5 proxy URL, e.g. "http://127.0.0.1:7890" or "socks5://127.0.0.1:1080" + ) reply_to_message: bool = False # If true, bot replies quote the original message @@ -42,7 +44,9 @@ class FeishuConfig(Base): encrypt_key: str = "" # Encrypt Key for event subscription (optional) verification_token: str = "" # Verification Token for event subscription (optional) allow_from: list[str] = Field(default_factory=list) # Allowed user open_ids - react_emoji: str = "THUMBSUP" # Emoji type for message reactions (e.g. THUMBSUP, OK, DONE, SMILE) + react_emoji: str = ( + "THUMBSUP" # Emoji type for message reactions (e.g. THUMBSUP, OK, DONE, SMILE) + ) class DingTalkConfig(Base): @@ -72,9 +76,13 @@ class MatrixConfig(Base): access_token: str = "" user_id: str = "" # @bot:matrix.org device_id: str = "" - e2ee_enabled: bool = True # Enable Matrix E2EE support (encryption + encrypted room handling). - sync_stop_grace_seconds: int = 2 # Max seconds to wait for sync_forever to stop gracefully before cancellation fallback. - max_media_bytes: int = 20 * 1024 * 1024 # Max attachment size accepted for Matrix media handling (inbound + outbound). + e2ee_enabled: bool = True # Enable Matrix E2EE support (encryption + encrypted room handling). + sync_stop_grace_seconds: int = ( + 2 # Max seconds to wait for sync_forever to stop gracefully before cancellation fallback. + ) + max_media_bytes: int = ( + 20 * 1024 * 1024 + ) # Max attachment size accepted for Matrix media handling (inbound + outbound). allow_from: list[str] = Field(default_factory=list) group_policy: Literal["open", "mention", "allowlist"] = "open" group_allow_from: list[str] = Field(default_factory=list) @@ -105,7 +113,9 @@ class EmailConfig(Base): from_address: str = "" # Behavior - auto_reply_enabled: bool = True # If false, inbound email is read but no automatic reply is sent + auto_reply_enabled: bool = ( + True # If false, inbound email is read but no automatic reply is sent + ) poll_interval_seconds: int = 30 mark_seen: bool = True max_body_chars: int = 12000 @@ -183,27 +193,32 @@ class QQConfig(Base): enabled: bool = False app_id: str = "" # 机器人 ID (AppID) from q.qq.com secret: str = "" # 机器人密钥 (AppSecret) from q.qq.com - allow_from: list[str] = Field(default_factory=list) # Allowed user openids (empty = public access) + allow_from: list[str] = Field( + default_factory=list + ) # Allowed user openids (empty = public access) + class MatrixConfig(Base): """Matrix (Element) channel configuration.""" + enabled: bool = False homeserver: str = "https://matrix.org" access_token: str = "" - user_id: str = "" # e.g. @bot:matrix.org + user_id: str = "" # e.g. @bot:matrix.org device_id: str = "" - e2ee_enabled: bool = True # end-to-end encryption support - sync_stop_grace_seconds: int = 2 # graceful sync_forever shutdown timeout - max_media_bytes: int = 20 * 1024 * 1024 # inbound + outbound attachment limit + e2ee_enabled: bool = True # end-to-end encryption support + sync_stop_grace_seconds: int = 2 # graceful sync_forever shutdown timeout + max_media_bytes: int = 20 * 1024 * 1024 # inbound + outbound attachment limit allow_from: list[str] = Field(default_factory=list) group_policy: Literal["open", "mention", "allowlist"] = "open" group_allow_from: list[str] = Field(default_factory=list) allow_room_mentions: bool = False + class ChannelsConfig(Base): """Configuration for chat channels.""" - send_progress: bool = True # stream agent's text progress to the channel + send_progress: bool = True # stream agent's text progress to the channel send_tool_hints: bool = False # stream tool-call hints (e.g. read_file("…")) whatsapp: WhatsAppConfig = Field(default_factory=WhatsAppConfig) telegram: TelegramConfig = Field(default_factory=TelegramConfig) @@ -222,7 +237,9 @@ class AgentDefaults(Base): workspace: str = "~/.nanobot/workspace" model: str = "anthropic/claude-opus-4-5" - provider: str = "auto" # Provider name (e.g. "anthropic", "openrouter") or "auto" for auto-detection + provider: str = ( + "auto" # Provider name (e.g. "anthropic", "openrouter") or "auto" for auto-detection + ) max_tokens: int = 8192 temperature: float = 0.1 max_tool_iterations: int = 40 @@ -255,13 +272,20 @@ class ProvidersConfig(Base): groq: ProviderConfig = Field(default_factory=ProviderConfig) zhipu: ProviderConfig = Field(default_factory=ProviderConfig) dashscope: ProviderConfig = Field(default_factory=ProviderConfig) # 阿里云通义千问 + dashscope_coding_plan: ProviderConfig = Field( + default_factory=ProviderConfig + ) # 阿里云百炼Coding Plan vllm: ProviderConfig = Field(default_factory=ProviderConfig) gemini: ProviderConfig = Field(default_factory=ProviderConfig) moonshot: ProviderConfig = Field(default_factory=ProviderConfig) minimax: ProviderConfig = Field(default_factory=ProviderConfig) aihubmix: ProviderConfig = Field(default_factory=ProviderConfig) # AiHubMix API gateway - siliconflow: ProviderConfig = Field(default_factory=ProviderConfig) # SiliconFlow (硅基流动) API gateway - volcengine: ProviderConfig = Field(default_factory=ProviderConfig) # VolcEngine (火山引擎) API gateway + siliconflow: ProviderConfig = Field( + default_factory=ProviderConfig + ) # SiliconFlow (硅基流动) API gateway + volcengine: ProviderConfig = Field( + default_factory=ProviderConfig + ) # VolcEngine (火山引擎) API gateway openai_codex: ProviderConfig = Field(default_factory=ProviderConfig) # OpenAI Codex (OAuth) github_copilot: ProviderConfig = Field(default_factory=ProviderConfig) # Github Copilot (OAuth) @@ -291,7 +315,9 @@ class WebSearchConfig(Base): class WebToolsConfig(Base): """Web tools configuration.""" - proxy: str | None = None # HTTP/SOCKS5 proxy URL, e.g. "http://127.0.0.1:7890" or "socks5://127.0.0.1:1080" + proxy: str | None = ( + None # HTTP/SOCKS5 proxy URL, e.g. "http://127.0.0.1:7890" or "socks5://127.0.0.1:1080" + ) search: WebSearchConfig = Field(default_factory=WebSearchConfig) @@ -336,7 +362,9 @@ class Config(BaseSettings): """Get expanded workspace path.""" return Path(self.agents.defaults.workspace).expanduser() - def _match_provider(self, model: str | None = None) -> tuple["ProviderConfig | None", str | None]: + def _match_provider( + self, model: str | None = None + ) -> tuple["ProviderConfig | None", str | None]: """Match provider config and its registry name. Returns (config, spec_name).""" from nanobot.providers.registry import PROVIDERS diff --git a/nanobot/providers/registry.py b/nanobot/providers/registry.py index df915b7..da04cd7 100644 --- a/nanobot/providers/registry.py +++ b/nanobot/providers/registry.py @@ -26,33 +26,33 @@ class ProviderSpec: """ # identity - name: str # config field name, e.g. "dashscope" - keywords: tuple[str, ...] # model-name keywords for matching (lowercase) - env_key: str # LiteLLM env var, e.g. "DASHSCOPE_API_KEY" - display_name: str = "" # shown in `nanobot status` + name: str # config field name, e.g. "dashscope" + keywords: tuple[str, ...] # model-name keywords for matching (lowercase) + env_key: str # LiteLLM env var, e.g. "DASHSCOPE_API_KEY" + display_name: str = "" # shown in `nanobot status` # model prefixing - litellm_prefix: str = "" # "dashscope" → model becomes "dashscope/{model}" - skip_prefixes: tuple[str, ...] = () # don't prefix if model already starts with these + litellm_prefix: str = "" # "dashscope" → model becomes "dashscope/{model}" + skip_prefixes: tuple[str, ...] = () # don't prefix if model already starts with these # extra env vars, e.g. (("ZHIPUAI_API_KEY", "{api_key}"),) env_extras: tuple[tuple[str, str], ...] = () # gateway / local detection - is_gateway: bool = False # routes any model (OpenRouter, AiHubMix) - is_local: bool = False # local deployment (vLLM, Ollama) - detect_by_key_prefix: str = "" # match api_key prefix, e.g. "sk-or-" - detect_by_base_keyword: str = "" # match substring in api_base URL - default_api_base: str = "" # fallback base URL + is_gateway: bool = False # routes any model (OpenRouter, AiHubMix) + is_local: bool = False # local deployment (vLLM, Ollama) + detect_by_key_prefix: str = "" # match api_key prefix, e.g. "sk-or-" + detect_by_base_keyword: str = "" # match substring in api_base URL + default_api_base: str = "" # fallback base URL # gateway behavior - strip_model_prefix: bool = False # strip "provider/" before re-prefixing + strip_model_prefix: bool = False # strip "provider/" before re-prefixing # per-model param overrides, e.g. (("kimi-k2.5", {"temperature": 1.0}),) model_overrides: tuple[tuple[str, dict[str, Any]], ...] = () # OAuth-based providers (e.g., OpenAI Codex) don't use API keys - is_oauth: bool = False # if True, uses OAuth flow instead of API key + is_oauth: bool = False # if True, uses OAuth flow instead of API key # Direct providers bypass LiteLLM entirely (e.g., CustomProvider) is_direct: bool = False @@ -70,7 +70,6 @@ class ProviderSpec: # --------------------------------------------------------------------------- PROVIDERS: tuple[ProviderSpec, ...] = ( - # === Custom (direct OpenAI-compatible endpoint, bypasses LiteLLM) ====== ProviderSpec( name="custom", @@ -80,17 +79,15 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( litellm_prefix="", is_direct=True, ), - # === Gateways (detected by api_key / api_base, not model name) ========= # Gateways can route any model, so they win in fallback. - # OpenRouter: global gateway, keys start with "sk-or-" ProviderSpec( name="openrouter", keywords=("openrouter",), env_key="OPENROUTER_API_KEY", display_name="OpenRouter", - litellm_prefix="openrouter", # claude-3 → openrouter/claude-3 + litellm_prefix="openrouter", # claude-3 → openrouter/claude-3 skip_prefixes=(), env_extras=(), is_gateway=True, @@ -102,16 +99,15 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( model_overrides=(), supports_prompt_caching=True, ), - # AiHubMix: global gateway, OpenAI-compatible interface. # strip_model_prefix=True: it doesn't understand "anthropic/claude-3", # so we strip to bare "claude-3" then re-prefix as "openai/claude-3". ProviderSpec( name="aihubmix", keywords=("aihubmix",), - env_key="OPENAI_API_KEY", # OpenAI-compatible + env_key="OPENAI_API_KEY", # OpenAI-compatible display_name="AiHubMix", - litellm_prefix="openai", # → openai/{model} + litellm_prefix="openai", # → openai/{model} skip_prefixes=(), env_extras=(), is_gateway=True, @@ -119,10 +115,9 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( detect_by_key_prefix="", detect_by_base_keyword="aihubmix", default_api_base="https://aihubmix.com/v1", - strip_model_prefix=True, # anthropic/claude-3 → claude-3 → openai/claude-3 + strip_model_prefix=True, # anthropic/claude-3 → claude-3 → openai/claude-3 model_overrides=(), ), - # SiliconFlow (硅基流动): OpenAI-compatible gateway, model names keep org prefix ProviderSpec( name="siliconflow", @@ -140,7 +135,6 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( strip_model_prefix=False, model_overrides=(), ), - # VolcEngine (火山引擎): OpenAI-compatible gateway ProviderSpec( name="volcengine", @@ -158,9 +152,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( strip_model_prefix=False, model_overrides=(), ), - # === Standard providers (matched by model-name keywords) =============== - # Anthropic: LiteLLM recognizes "claude-*" natively, no prefix needed. ProviderSpec( name="anthropic", @@ -179,7 +171,6 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( model_overrides=(), supports_prompt_caching=True, ), - # OpenAI: LiteLLM recognizes "gpt-*" natively, no prefix needed. ProviderSpec( name="openai", @@ -197,14 +188,13 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( strip_model_prefix=False, model_overrides=(), ), - # OpenAI Codex: uses OAuth, not API key. ProviderSpec( name="openai_codex", keywords=("openai-codex",), - env_key="", # OAuth-based, no API key + env_key="", # OAuth-based, no API key display_name="OpenAI Codex", - litellm_prefix="", # Not routed through LiteLLM + litellm_prefix="", # Not routed through LiteLLM skip_prefixes=(), env_extras=(), is_gateway=False, @@ -214,16 +204,15 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( default_api_base="https://chatgpt.com/backend-api", strip_model_prefix=False, model_overrides=(), - is_oauth=True, # OAuth-based authentication + is_oauth=True, # OAuth-based authentication ), - # Github Copilot: uses OAuth, not API key. ProviderSpec( name="github_copilot", keywords=("github_copilot", "copilot"), - env_key="", # OAuth-based, no API key + env_key="", # OAuth-based, no API key display_name="Github Copilot", - litellm_prefix="github_copilot", # github_copilot/model → github_copilot/model + litellm_prefix="github_copilot", # github_copilot/model → github_copilot/model skip_prefixes=("github_copilot/",), env_extras=(), is_gateway=False, @@ -233,17 +222,16 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( default_api_base="", strip_model_prefix=False, model_overrides=(), - is_oauth=True, # OAuth-based authentication + is_oauth=True, # OAuth-based authentication ), - # DeepSeek: needs "deepseek/" prefix for LiteLLM routing. ProviderSpec( name="deepseek", keywords=("deepseek",), env_key="DEEPSEEK_API_KEY", display_name="DeepSeek", - litellm_prefix="deepseek", # deepseek-chat → deepseek/deepseek-chat - skip_prefixes=("deepseek/",), # avoid double-prefix + litellm_prefix="deepseek", # deepseek-chat → deepseek/deepseek-chat + skip_prefixes=("deepseek/",), # avoid double-prefix env_extras=(), is_gateway=False, is_local=False, @@ -253,15 +241,14 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( strip_model_prefix=False, model_overrides=(), ), - # Gemini: needs "gemini/" prefix for LiteLLM. ProviderSpec( name="gemini", keywords=("gemini",), env_key="GEMINI_API_KEY", display_name="Gemini", - litellm_prefix="gemini", # gemini-pro → gemini/gemini-pro - skip_prefixes=("gemini/",), # avoid double-prefix + litellm_prefix="gemini", # gemini-pro → gemini/gemini-pro + skip_prefixes=("gemini/",), # avoid double-prefix env_extras=(), is_gateway=False, is_local=False, @@ -271,7 +258,6 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( strip_model_prefix=False, model_overrides=(), ), - # Zhipu: LiteLLM uses "zai/" prefix. # Also mirrors key to ZHIPUAI_API_KEY (some LiteLLM paths check that). # skip_prefixes: don't add "zai/" when already routed via gateway. @@ -280,11 +266,9 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( keywords=("zhipu", "glm", "zai"), env_key="ZAI_API_KEY", display_name="Zhipu AI", - litellm_prefix="zai", # glm-4 → zai/glm-4 + litellm_prefix="zai", # glm-4 → zai/glm-4 skip_prefixes=("zhipu/", "zai/", "openrouter/", "hosted_vllm/"), - env_extras=( - ("ZHIPUAI_API_KEY", "{api_key}"), - ), + env_extras=(("ZHIPUAI_API_KEY", "{api_key}"),), is_gateway=False, is_local=False, detect_by_key_prefix="", @@ -293,14 +277,13 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( strip_model_prefix=False, model_overrides=(), ), - # DashScope: Qwen models, needs "dashscope/" prefix. ProviderSpec( name="dashscope", keywords=("qwen", "dashscope"), env_key="DASHSCOPE_API_KEY", display_name="DashScope", - litellm_prefix="dashscope", # qwen-max → dashscope/qwen-max + litellm_prefix="dashscope", # qwen-max → dashscope/qwen-max skip_prefixes=("dashscope/", "openrouter/"), env_extras=(), is_gateway=False, @@ -311,7 +294,6 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( strip_model_prefix=False, model_overrides=(), ), - # Moonshot: Kimi models, needs "moonshot/" prefix. # LiteLLM requires MOONSHOT_API_BASE env var to find the endpoint. # Kimi K2.5 API enforces temperature >= 1.0. @@ -320,22 +302,17 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( keywords=("moonshot", "kimi"), env_key="MOONSHOT_API_KEY", display_name="Moonshot", - litellm_prefix="moonshot", # kimi-k2.5 → moonshot/kimi-k2.5 + litellm_prefix="moonshot", # kimi-k2.5 → moonshot/kimi-k2.5 skip_prefixes=("moonshot/", "openrouter/"), - env_extras=( - ("MOONSHOT_API_BASE", "{api_base}"), - ), + env_extras=(("MOONSHOT_API_BASE", "{api_base}"),), is_gateway=False, is_local=False, detect_by_key_prefix="", detect_by_base_keyword="", - default_api_base="https://api.moonshot.ai/v1", # intl; use api.moonshot.cn for China + default_api_base="https://api.moonshot.ai/v1", # intl; use api.moonshot.cn for China strip_model_prefix=False, - model_overrides=( - ("kimi-k2.5", {"temperature": 1.0}), - ), + model_overrides=(("kimi-k2.5", {"temperature": 1.0}),), ), - # MiniMax: needs "minimax/" prefix for LiteLLM routing. # Uses OpenAI-compatible API at api.minimax.io/v1. ProviderSpec( @@ -343,7 +320,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( keywords=("minimax",), env_key="MINIMAX_API_KEY", display_name="MiniMax", - litellm_prefix="minimax", # MiniMax-M2.1 → minimax/MiniMax-M2.1 + litellm_prefix="minimax", # MiniMax-M2.1 → minimax/MiniMax-M2.1 skip_prefixes=("minimax/", "openrouter/"), env_extras=(), is_gateway=False, @@ -354,9 +331,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( strip_model_prefix=False, model_overrides=(), ), - # === Local deployment (matched by config key, NOT by api_base) ========= - # vLLM / any OpenAI-compatible local server. # Detected when config key is "vllm" (provider_name="vllm"). ProviderSpec( @@ -364,20 +339,38 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( keywords=("vllm",), env_key="HOSTED_VLLM_API_KEY", display_name="vLLM/Local", - litellm_prefix="hosted_vllm", # Llama-3-8B → hosted_vllm/Llama-3-8B + litellm_prefix="hosted_vllm", # Llama-3-8B → hosted_vllm/Llama-3-8B skip_prefixes=(), env_extras=(), is_gateway=False, is_local=True, detect_by_key_prefix="", detect_by_base_keyword="", - default_api_base="", # user must provide in config + default_api_base="", # user must provide in config + strip_model_prefix=False, + model_overrides=(), + ), + # === Coding Plan Gateway Providers ===================================== + # Alibaba Cloud Coding Plan: OpenAI-compatible gateway for coding assistance. + # Uses special API key format starting with "sk-sp-" to distinguish it + # from regular dashscope keys. Uses the OpenAI-compatible endpoint. + ProviderSpec( + name="dashscope_coding_plan", + keywords=("dashscope-coding-plan", "coding-plan", "aliyun-coding", "bailian-coding"), + env_key="DASHSCOPE_CODING_PLAN_API_KEY", + display_name="Alibaba Cloud Coding Plan", + litellm_prefix="dashscope", # → dashscope/{model} + skip_prefixes=("dashscope/", "openrouter/"), + env_extras=(), + is_gateway=True, + is_local=False, + detect_by_key_prefix="sk-sp-", # coding plan API keys start with "sk-sp-" + detect_by_base_keyword="coding.dashscope", + default_api_base="https://coding.dashscope.aliyuncs.com/v1", strip_model_prefix=False, model_overrides=(), ), - # === Auxiliary (not a primary LLM provider) ============================ - # Groq: mainly used for Whisper voice transcription, also usable for LLM. # Needs "groq/" prefix for LiteLLM routing. Placed last — it rarely wins fallback. ProviderSpec( @@ -385,8 +378,8 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( keywords=("groq",), env_key="GROQ_API_KEY", display_name="Groq", - litellm_prefix="groq", # llama3-8b-8192 → groq/llama3-8b-8192 - skip_prefixes=("groq/",), # avoid double-prefix + litellm_prefix="groq", # llama3-8b-8192 → groq/llama3-8b-8192 + skip_prefixes=("groq/",), # avoid double-prefix env_extras=(), is_gateway=False, is_local=False, @@ -403,6 +396,7 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( # Lookup helpers # --------------------------------------------------------------------------- + def find_by_model(model: str) -> ProviderSpec | None: """Match a standard provider by model-name keyword (case-insensitive). Skips gateways/local — those are matched by api_key/api_base instead.""" @@ -418,7 +412,9 @@ def find_by_model(model: str) -> ProviderSpec | None: return spec for spec in std_specs: - if any(kw in model_lower or kw.replace("-", "_") in model_normalized for kw in spec.keywords): + if any( + kw in model_lower or kw.replace("-", "_") in model_normalized for kw in spec.keywords + ): return spec return None From 323e5f22cc1be888e2b6f291233d9a96a97edd6c Mon Sep 17 00:00:00 2001 From: suger-m Date: Thu, 5 Mar 2026 11:14:04 +0800 Subject: [PATCH 04/15] refactor(channels): extract split_message utility to reduce code duplication Extract the _split_message function from discord.py and telegram.py into a shared utility function in utils/helpers.py. Changes: - Add split_message() to nanobot/utils/helpers.py with configurable max_len - Update Discord channel to use shared utility (2000 char limit) - Update Telegram channel to use shared utility (4000 char limit) - Remove duplicate implementations from both channels Benefits: - Reduces code duplication - Centralizes message splitting logic for easier maintenance - Makes the function reusable for future channels The function splits content into chunks within max_len, preferring to break at newlines or spaces rather than mid-word. --- nanobot/channels/discord.py | 25 ++----------------------- nanobot/channels/telegram.py | 25 ++++--------------------- nanobot/utils/helpers.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 44 deletions(-) diff --git a/nanobot/channels/discord.py b/nanobot/channels/discord.py index 57e5922..4368540 100644 --- a/nanobot/channels/discord.py +++ b/nanobot/channels/discord.py @@ -13,34 +13,13 @@ from nanobot.bus.events import OutboundMessage from nanobot.bus.queue import MessageBus from nanobot.channels.base import BaseChannel from nanobot.config.schema import DiscordConfig +from nanobot.utils.helpers import split_message DISCORD_API_BASE = "https://discord.com/api/v10" MAX_ATTACHMENT_BYTES = 20 * 1024 * 1024 # 20MB MAX_MESSAGE_LEN = 2000 # Discord message character limit -def _split_message(content: str, max_len: int = MAX_MESSAGE_LEN) -> list[str]: - """Split content into chunks within max_len, preferring line breaks.""" - if not content: - return [] - if len(content) <= max_len: - return [content] - chunks: list[str] = [] - while content: - if len(content) <= max_len: - chunks.append(content) - break - cut = content[:max_len] - pos = cut.rfind('\n') - if pos <= 0: - pos = cut.rfind(' ') - if pos <= 0: - pos = max_len - chunks.append(content[:pos]) - content = content[pos:].lstrip() - return chunks - - class DiscordChannel(BaseChannel): """Discord channel using Gateway websocket.""" @@ -104,7 +83,7 @@ class DiscordChannel(BaseChannel): headers = {"Authorization": f"Bot {self.config.token}"} try: - chunks = _split_message(msg.content or "") + chunks = split_message(msg.content or "", MAX_MESSAGE_LEN) if not chunks: return diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py index c290535..2742181 100644 --- a/nanobot/channels/telegram.py +++ b/nanobot/channels/telegram.py @@ -14,6 +14,9 @@ from nanobot.bus.events import OutboundMessage from nanobot.bus.queue import MessageBus from nanobot.channels.base import BaseChannel from nanobot.config.schema import TelegramConfig +from nanobot.utils.helpers import split_message + +TELEGRAM_MAX_MESSAGE_LEN = 4000 # Telegram message character limit def _markdown_to_telegram_html(text: str) -> str: @@ -79,26 +82,6 @@ def _markdown_to_telegram_html(text: str) -> str: return text -def _split_message(content: str, max_len: int = 4000) -> list[str]: - """Split content into chunks within max_len, preferring line breaks.""" - if len(content) <= max_len: - return [content] - chunks: list[str] = [] - while content: - if len(content) <= max_len: - chunks.append(content) - break - cut = content[:max_len] - pos = cut.rfind('\n') - if pos == -1: - pos = cut.rfind(' ') - if pos == -1: - pos = max_len - chunks.append(content[:pos]) - content = content[pos:].lstrip() - return chunks - - class TelegramChannel(BaseChannel): """ Telegram channel using long polling. @@ -269,7 +252,7 @@ class TelegramChannel(BaseChannel): # Send text content if msg.content and msg.content != "[empty message]": - for chunk in _split_message(msg.content): + for chunk in split_message(msg.content, TELEGRAM_MAX_MESSAGE_LEN): try: html = _markdown_to_telegram_html(chunk) await self._app.bot.send_message( diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py index 3a8c802..8fd81f9 100644 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -34,6 +34,38 @@ def safe_filename(name: str) -> str: return _UNSAFE_CHARS.sub("_", name).strip() +def split_message(content: str, max_len: int = 2000) -> list[str]: + """ + Split content into chunks within max_len, preferring line breaks. + + Args: + content: The text content to split. + max_len: Maximum length per chunk (default 2000 for Discord compatibility). + + Returns: + List of message chunks, each within max_len. + """ + if not content: + return [] + if len(content) <= max_len: + return [content] + chunks: list[str] = [] + while content: + if len(content) <= max_len: + chunks.append(content) + break + cut = content[:max_len] + # Try to break at newline first, then space, then hard break + pos = cut.rfind('\n') + if pos <= 0: + pos = cut.rfind(' ') + if pos <= 0: + pos = max_len + chunks.append(content[:pos]) + content = content[pos:].lstrip() + return chunks + + def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str]: """Sync bundled templates to workspace. Only creates missing files.""" from importlib.resources import files as pkg_files From 6770a6e7e9dbebd5ca3ed490a5dd0b30b7c3d7a3 Mon Sep 17 00:00:00 2001 From: ouyangwulin Date: Thu, 5 Mar 2026 17:34:36 +0800 Subject: [PATCH 05/15] supported aliyun coding plan. --- nanobot/config/schema.py | 34 ++++++++++++++++------------------ nanobot/providers/registry.py | 6 +++--- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 538fab8..15cf2b4 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -5,7 +5,7 @@ from typing import Literal from pydantic import BaseModel, ConfigDict, Field from pydantic.alias_generators import to_camel -from pydantic_settings import BaseSettings +from pydantic_settings import BaseSettings, SettingsConfigDict class Base(BaseModel): @@ -198,23 +198,6 @@ class QQConfig(Base): ) # Allowed user openids (empty = public access) -class MatrixConfig(Base): - """Matrix (Element) channel configuration.""" - - enabled: bool = False - homeserver: str = "https://matrix.org" - access_token: str = "" - user_id: str = "" # e.g. @bot:matrix.org - device_id: str = "" - e2ee_enabled: bool = True # end-to-end encryption support - sync_stop_grace_seconds: int = 2 # graceful sync_forever shutdown timeout - max_media_bytes: int = 20 * 1024 * 1024 # inbound + outbound attachment limit - allow_from: list[str] = Field(default_factory=list) - group_policy: Literal["open", "mention", "allowlist"] = "open" - group_allow_from: list[str] = Field(default_factory=list) - allow_room_mentions: bool = False - - class ChannelsConfig(Base): """Configuration for chat channels.""" @@ -339,6 +322,20 @@ class MCPServerConfig(Base): tool_timeout: int = 30 # Seconds before a tool call is cancelled +class TTSConfig(Base): + """Text-to-Speech configuration.""" + + provider: str = "edge_tts" # Default TTS provider + voice: str = "en-US-ChristopherNeural" # Default voice + speed: float = 1.0 # Voice speed multiplier + + +class AudioConfig(Base): + """Audio configuration.""" + + tts: TTSConfig = Field(default_factory=TTSConfig) + + class ToolsConfig(Base): """Tools configuration.""" @@ -356,6 +353,7 @@ class Config(BaseSettings): providers: ProvidersConfig = Field(default_factory=ProvidersConfig) gateway: GatewayConfig = Field(default_factory=GatewayConfig) tools: ToolsConfig = Field(default_factory=ToolsConfig) + audio: AudioConfig = Field(default_factory=AudioConfig) @property def workspace_path(self) -> Path: diff --git a/nanobot/providers/registry.py b/nanobot/providers/registry.py index da04cd7..3b6659e 100644 --- a/nanobot/providers/registry.py +++ b/nanobot/providers/registry.py @@ -359,15 +359,15 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( keywords=("dashscope-coding-plan", "coding-plan", "aliyun-coding", "bailian-coding"), env_key="DASHSCOPE_CODING_PLAN_API_KEY", display_name="Alibaba Cloud Coding Plan", - litellm_prefix="dashscope", # → dashscope/{model} - skip_prefixes=("dashscope/", "openrouter/"), + litellm_prefix="openai", # → openai/{model} (uses OpenAI-compatible endpoint) + skip_prefixes=("openai/", "dashscope/", "openrouter/"), env_extras=(), is_gateway=True, is_local=False, detect_by_key_prefix="sk-sp-", # coding plan API keys start with "sk-sp-" detect_by_base_keyword="coding.dashscope", default_api_base="https://coding.dashscope.aliyuncs.com/v1", - strip_model_prefix=False, + strip_model_prefix=True, # Strip "dashscope_coding_plan/" prefix model_overrides=(), ), # === Auxiliary (not a primary LLM provider) ============================ From 46192fbd2abe922390be1961819a86dc75c74321 Mon Sep 17 00:00:00 2001 From: coldxiangyu Date: Thu, 5 Mar 2026 20:18:13 +0800 Subject: [PATCH 06/15] fix(context): detect image MIME type from magic bytes instead of file extension Feishu downloads images with incorrect extensions (e.g. .jpg for PNG files). mimetypes.guess_type() relies on the file extension, causing a MIME mismatch that Anthropic rejects with 'image was specified using image/jpeg but appears to be image/png'. Fix: read the first bytes of the image data and detect the real MIME type via magic bytes (PNG: 0x89PNG, JPEG: 0xFFD8FF, GIF: GIF87a/GIF89a, WEBP: RIFF+WEBP). Fall back to mimetypes.guess_type() only when magic bytes are inconclusive. --- nanobot/agent/context.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index df4825f..7ead317 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -12,6 +12,19 @@ from nanobot.agent.memory import MemoryStore from nanobot.agent.skills import SkillsLoader +def _detect_image_mime(data: bytes) -> str | None: + """Detect image MIME type from magic bytes, ignoring file extension.""" + if data[:8] == b"\x89PNG\r\n\x1a\n": + return "image/png" + if data[:3] == b"\xff\xd8\xff": + return "image/jpeg" + if data[:6] in (b"GIF87a", b"GIF89a"): + return "image/gif" + if data[:4] == b"RIFF" and data[8:12] == b"WEBP": + return "image/webp" + return None + + class ContextBuilder: """Builds the context (system prompt + messages) for the agent.""" @@ -136,10 +149,14 @@ Reply directly with text for conversations. Only use the 'message' tool to send images = [] for path in media: p = Path(path) - mime, _ = mimetypes.guess_type(path) - if not p.is_file() or not mime or not mime.startswith("image/"): + if not p.is_file(): continue - b64 = base64.b64encode(p.read_bytes()).decode() + raw = p.read_bytes() + # Detect real MIME type from magic bytes; fallback to filename guess + mime = _detect_image_mime(raw) or mimetypes.guess_type(path)[0] + if not mime or not mime.startswith("image/"): + continue + b64 = base64.b64encode(raw).decode() images.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}}) if not images: From 5ced08b1f23f5ef275465fbe3140f64d42c95ced Mon Sep 17 00:00:00 2001 From: pikaqqqqqq Date: Fri, 6 Mar 2026 01:54:00 +0800 Subject: [PATCH 07/15] fix(feishu): use msg_type "media" for mp4 video files Previously, mp4 video files were sent with msg_type "file", which meant users had to download them to play. Feishu requires msg_type "media" for audio and video files to enable inline playback in the chat. Changes: - Add _VIDEO_EXTS constant for video file extensions (.mp4, .mov, .avi) - Use msg_type "media" for both audio (_AUDIO_EXTS) and video (_VIDEO_EXTS) - Keep msg_type "file" for documents and other file types The upload_file API already uses file_type="mp4" for video files via the existing _FILE_TYPE_MAP, so only the send msg_type needed fixing. --- nanobot/channels/feishu.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/nanobot/channels/feishu.py b/nanobot/channels/feishu.py index e6f0049..3847ac1 100644 --- a/nanobot/channels/feishu.py +++ b/nanobot/channels/feishu.py @@ -474,6 +474,7 @@ class FeishuChannel(BaseChannel): _IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp", ".ico", ".tiff", ".tif"} _AUDIO_EXTS = {".opus"} + _VIDEO_EXTS = {".mp4", ".mov", ".avi"} _FILE_TYPE_MAP = { ".opus": "opus", ".mp4": "mp4", ".pdf": "pdf", ".doc": "doc", ".docx": "doc", ".xls": "xls", ".xlsx": "xls", ".ppt": "ppt", ".pptx": "ppt", @@ -682,7 +683,12 @@ class FeishuChannel(BaseChannel): else: key = await loop.run_in_executor(None, self._upload_file_sync, file_path) if key: - media_type = "audio" if ext in self._AUDIO_EXTS else "file" + # Use msg_type "media" for audio/video so users can play inline; + # "file" for everything else (documents, archives, etc.) + if ext in self._AUDIO_EXTS or ext in self._VIDEO_EXTS: + media_type = "media" + else: + media_type = "file" await loop.run_in_executor( None, self._send_message_sync, receive_id_type, msg.chat_id, media_type, json.dumps({"file_key": key}, ensure_ascii=False), From 9ab4155991627e45dd2c88b028d35c55b82ecce9 Mon Sep 17 00:00:00 2001 From: nanobot-contributor Date: Fri, 6 Mar 2026 09:57:03 +0800 Subject: [PATCH 08/15] fix(cli): add Windows compatibility for signal handlers (PR #1400) SIGHUP and SIGPIPE are not available on Windows. Add hasattr() checks before registering these signal handlers to prevent AttributeError on Windows systems. Fixes compatibility issue introduced in PR #1400. --- nanobot/cli/commands.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index aca0778..eb3d833 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -7,6 +7,18 @@ import signal import sys from pathlib import Path +# Force UTF-8 encoding for Windows console +if sys.platform == "win32": + import locale + if sys.stdout.encoding != "utf-8": + os.environ["PYTHONIOENCODING"] = "utf-8" + # Re-open stdout/stderr with UTF-8 encoding + try: + sys.stdout.reconfigure(encoding="utf-8", errors="replace") + sys.stderr.reconfigure(encoding="utf-8", errors="replace") + except Exception: + pass + import typer from prompt_toolkit import PromptSession from prompt_toolkit.formatted_text import HTML @@ -525,9 +537,13 @@ def agent( signal.signal(signal.SIGINT, _handle_signal) signal.signal(signal.SIGTERM, _handle_signal) - signal.signal(signal.SIGHUP, _handle_signal) + # SIGHUP is not available on Windows + if hasattr(signal, 'SIGHUP'): + signal.signal(signal.SIGHUP, _handle_signal) # Ignore SIGPIPE to prevent silent process termination when writing to closed pipes - signal.signal(signal.SIGPIPE, signal.SIG_IGN) + # SIGPIPE is not available on Windows + if hasattr(signal, 'SIGPIPE'): + signal.signal(signal.SIGPIPE, signal.SIG_IGN) async def run_interactive(): bus_task = asyncio.create_task(agent_loop.run()) From c3526a7fdb2418d68c03d34db5ee43b624edbce9 Mon Sep 17 00:00:00 2001 From: PiKaqqqqqq <281705236@qq.com> Date: Fri, 6 Mar 2026 10:11:53 +0800 Subject: [PATCH 09/15] fix(feishu): smart message format selection (fixes #1548) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of always sending interactive cards, detect the optimal message format based on content: - text: short plain text (≤200 chars, no markdown) - post: medium text with links (≤2000 chars) - interactive: complex content (code, tables, headings, bold, lists) --- nanobot/channels/feishu.py | 143 +++++++++++++++++++++++++++++++++++-- pr-description.md | 47 ++++++++++++ 2 files changed, 186 insertions(+), 4 deletions(-) create mode 100644 pr-description.md diff --git a/nanobot/channels/feishu.py b/nanobot/channels/feishu.py index e6f0049..c405493 100644 --- a/nanobot/channels/feishu.py +++ b/nanobot/channels/feishu.py @@ -472,6 +472,121 @@ class FeishuChannel(BaseChannel): return elements or [{"tag": "markdown", "content": content}] + # ── Smart format detection ────────────────────────────────────────── + # Patterns that indicate "complex" markdown needing card rendering + _COMPLEX_MD_RE = re.compile( + r"```" # fenced code block + r"|^\|.+\|.*\n\s*\|[-:\s|]+\|" # markdown table (header + separator) + r"|^#{1,6}\s+" # headings + , re.MULTILINE, + ) + + # Simple markdown patterns (bold, italic, strikethrough) + _SIMPLE_MD_RE = re.compile( + r"\*\*.+?\*\*" # **bold** + r"|__.+?__" # __bold__ + r"|(? str: + """Determine the optimal Feishu message format for *content*. + + Returns one of: + - ``"text"`` – plain text, short and no markdown + - ``"post"`` – rich text (links only, moderate length) + - ``"interactive"`` – card with full markdown rendering + """ + stripped = content.strip() + + # Complex markdown (code blocks, tables, headings) → always card + if cls._COMPLEX_MD_RE.search(stripped): + return "interactive" + + # Long content → card (better readability with card layout) + if len(stripped) > cls._POST_MAX_LEN: + return "interactive" + + # Has bold/italic/strikethrough → card (post format can't render these) + if cls._SIMPLE_MD_RE.search(stripped): + return "interactive" + + # Has list items → card (post format can't render list bullets well) + if cls._LIST_RE.search(stripped) or cls._OLIST_RE.search(stripped): + return "interactive" + + # Has links → post format (supports tags) + if cls._MD_LINK_RE.search(stripped): + return "post" + + # Short plain text → text format + if len(stripped) <= cls._TEXT_MAX_LEN: + return "text" + + # Medium plain text without any formatting → post format + return "post" + + @classmethod + def _markdown_to_post(cls, content: str) -> str: + """Convert markdown content to Feishu post message JSON. + + Handles links ``[text](url)`` as ``a`` tags; everything else as ``text`` tags. + Each line becomes a paragraph (row) in the post body. + """ + lines = content.strip().split("\n") + paragraphs: list[list[dict]] = [] + + for line in lines: + elements: list[dict] = [] + last_end = 0 + + for m in cls._MD_LINK_RE.finditer(line): + # Text before this link + before = line[last_end:m.start()] + if before: + elements.append({"tag": "text", "text": before}) + elements.append({ + "tag": "a", + "text": m.group(1), + "href": m.group(2), + }) + last_end = m.end() + + # Remaining text after last link + remaining = line[last_end:] + if remaining: + elements.append({"tag": "text", "text": remaining}) + + # Empty line → empty paragraph for spacing + if not elements: + elements.append({"tag": "text", "text": ""}) + + paragraphs.append(elements) + + post_body = { + "zh_cn": { + "content": paragraphs, + } + } + return json.dumps(post_body, ensure_ascii=False) + _IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp", ".ico", ".tiff", ".tif"} _AUDIO_EXTS = {".opus"} _FILE_TYPE_MAP = { @@ -689,14 +804,34 @@ class FeishuChannel(BaseChannel): ) if msg.content and msg.content.strip(): - elements = self._build_card_elements(msg.content) - for chunk in self._split_elements_by_table_limit(elements): - card = {"config": {"wide_screen_mode": True}, "elements": chunk} + fmt = self._detect_msg_format(msg.content) + + if fmt == "text": + # Short plain text – send as simple text message + text_body = json.dumps({"text": msg.content.strip()}, ensure_ascii=False) await loop.run_in_executor( None, self._send_message_sync, - receive_id_type, msg.chat_id, "interactive", json.dumps(card, ensure_ascii=False), + receive_id_type, msg.chat_id, "text", text_body, ) + elif fmt == "post": + # Medium content with links – send as rich-text post + post_body = self._markdown_to_post(msg.content) + await loop.run_in_executor( + None, self._send_message_sync, + receive_id_type, msg.chat_id, "post", post_body, + ) + + else: + # Complex / long content – send as interactive card + elements = self._build_card_elements(msg.content) + for chunk in self._split_elements_by_table_limit(elements): + card = {"config": {"wide_screen_mode": True}, "elements": chunk} + await loop.run_in_executor( + None, self._send_message_sync, + receive_id_type, msg.chat_id, "interactive", json.dumps(card, ensure_ascii=False), + ) + except Exception as e: logger.error("Error sending Feishu message: {}", e) diff --git a/pr-description.md b/pr-description.md new file mode 100644 index 0000000..dacab5c --- /dev/null +++ b/pr-description.md @@ -0,0 +1,47 @@ +## fix(feishu): smart message format selection (fixes #1548) + +### Problem + +Currently, the Feishu channel sends **all** messages as interactive cards (`msg_type: "interactive"`). This is overkill for short, simple replies like "OK" or "收到" — they look heavy and unnatural compared to normal chat messages. + +### Solution + +Implement smart message format selection that picks the most appropriate Feishu message type based on content analysis: + +| Content Type | Format | `msg_type` | +|---|---|---| +| Short plain text (≤ 200 chars, no markdown) | Text | `text` | +| Medium text with links (≤ 2000 chars, no complex formatting) | Rich Text Post | `post` | +| Long text, code blocks, tables, headings, bold/italic, lists | Interactive Card | `interactive` | + +### How it works + +1. **`_detect_msg_format(content)`** — Analyzes the message content and returns the optimal format: + - Checks for complex markdown (code blocks, tables, headings) → `interactive` + - Checks for simple markdown (bold, italic, lists) → `interactive` + - Checks for links → `post` (Feishu post format supports `` tags natively) + - Short plain text → `text` + - Medium plain text → `post` + +2. **`_markdown_to_post(content)`** — Converts markdown links `[text](url)` to Feishu post format with proper `a` tags. Each line becomes a paragraph in the post body. + +3. **Modified `send()` method** — Uses `_detect_msg_format()` to choose the right format, then dispatches to the appropriate sending logic. + +### Design decisions + +- **Post format for links only**: Feishu's post format (`[[{"tag":"text",...}]]`) doesn't support bold/italic rendering, so we only use it for messages containing links (where the `a` tag adds real value). Messages with bold/italic/lists still use cards which render markdown properly. +- **Conservative thresholds**: 200 chars for text, 2000 chars for post — these keep the UX natural without being too aggressive. +- **Backward compatible**: The card rendering path is completely unchanged. Only the routing logic is new. + +### Testing + +Format detection tested against 13 cases covering all content types: +- ✅ Plain text → `text` +- ✅ Links → `post` +- ✅ Bold/italic/code/tables/headings/lists → `interactive` +- ✅ Long content → `interactive` +- ✅ Post format generates valid Feishu post JSON with proper `a` tags + +### Changes + +- `nanobot/channels/feishu.py`: Added `_detect_msg_format()`, `_markdown_to_post()`, and updated `send()` method From 6fb4204ac6a5109a4ff068a17975615498c40c05 Mon Sep 17 00:00:00 2001 From: nanobot-contributor Date: Fri, 6 Mar 2026 11:47:00 +0800 Subject: [PATCH 10/15] fix(memory): handle list type tool call arguments Some LLM providers return tool_calls[0].arguments as a list instead of dict or str. Add handling to extract the first dict element from the list. Fixes /new command warning: 'unexpected arguments type list' --- nanobot/agent/memory.py | 7 +++ tests/test_memory_consolidation_types.py | 75 ++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index 93c1825..80fba5e 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -128,6 +128,13 @@ class MemoryStore: # Some providers return arguments as a JSON string instead of dict if isinstance(args, str): args = json.loads(args) + # Some providers return arguments as a list (handle edge case) + if isinstance(args, list): + if args and isinstance(args[0], dict): + args = args[0] + else: + logger.warning("Memory consolidation: unexpected arguments type list with non-dict content") + return False if not isinstance(args, dict): logger.warning("Memory consolidation: unexpected arguments type {}", type(args).__name__) return False diff --git a/tests/test_memory_consolidation_types.py b/tests/test_memory_consolidation_types.py index 375c802..ff15584 100644 --- a/tests/test_memory_consolidation_types.py +++ b/tests/test_memory_consolidation_types.py @@ -145,3 +145,78 @@ class TestMemoryConsolidationTypeHandling: assert result is True provider.chat.assert_not_called() + + @pytest.mark.asyncio + async def test_list_arguments_extracts_first_dict(self, tmp_path: Path) -> None: + """Some providers return arguments as a list - extract first element if it's a dict.""" + store = MemoryStore(tmp_path) + provider = AsyncMock() + + # Simulate arguments being a list containing a dict + response = LLMResponse( + content=None, + tool_calls=[ + ToolCallRequest( + id="call_1", + name="save_memory", + arguments=[{ + "history_entry": "[2026-01-01] User discussed testing.", + "memory_update": "# Memory\nUser likes testing.", + }], + ) + ], + ) + provider.chat = AsyncMock(return_value=response) + session = _make_session(message_count=60) + + result = await store.consolidate(session, provider, "test-model", memory_window=50) + + assert result is True + assert "User discussed testing." in store.history_file.read_text() + assert "User likes testing." in store.memory_file.read_text() + + @pytest.mark.asyncio + async def test_list_arguments_empty_list_returns_false(self, tmp_path: Path) -> None: + """Empty list arguments should return False.""" + store = MemoryStore(tmp_path) + provider = AsyncMock() + + response = LLMResponse( + content=None, + tool_calls=[ + ToolCallRequest( + id="call_1", + name="save_memory", + arguments=[], + ) + ], + ) + provider.chat = AsyncMock(return_value=response) + session = _make_session(message_count=60) + + result = await store.consolidate(session, provider, "test-model", memory_window=50) + + assert result is False + + @pytest.mark.asyncio + async def test_list_arguments_non_dict_content_returns_false(self, tmp_path: Path) -> None: + """List with non-dict content should return False.""" + store = MemoryStore(tmp_path) + provider = AsyncMock() + + response = LLMResponse( + content=None, + tool_calls=[ + ToolCallRequest( + id="call_1", + name="save_memory", + arguments=["string", "content"], + ) + ], + ) + provider.chat = AsyncMock(return_value=response) + session = _make_session(message_count=60) + + result = await store.consolidate(session, provider, "test-model", memory_window=50) + + assert result is False From fc0b38c3047c20241c94b38f1be6138191da41f6 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Fri, 6 Mar 2026 05:27:39 +0000 Subject: [PATCH 11/15] fix(memory): improve warning message for empty/non-dict list arguments --- nanobot/agent/memory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index 80fba5e..21fe77d 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -133,7 +133,7 @@ class MemoryStore: if args and isinstance(args[0], dict): args = args[0] else: - logger.warning("Memory consolidation: unexpected arguments type list with non-dict content") + logger.warning("Memory consolidation: unexpected arguments as empty or non-dict list") return False if not isinstance(args, dict): logger.warning("Memory consolidation: unexpected arguments type {}", type(args).__name__) From ba63f6f62d9b2181b56863d8efe32215fe8f6321 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Fri, 6 Mar 2026 06:09:46 +0000 Subject: [PATCH 12/15] chore: remove pr-description.md from repo --- pr-description.md | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) delete mode 100644 pr-description.md diff --git a/pr-description.md b/pr-description.md deleted file mode 100644 index dacab5c..0000000 --- a/pr-description.md +++ /dev/null @@ -1,47 +0,0 @@ -## fix(feishu): smart message format selection (fixes #1548) - -### Problem - -Currently, the Feishu channel sends **all** messages as interactive cards (`msg_type: "interactive"`). This is overkill for short, simple replies like "OK" or "收到" — they look heavy and unnatural compared to normal chat messages. - -### Solution - -Implement smart message format selection that picks the most appropriate Feishu message type based on content analysis: - -| Content Type | Format | `msg_type` | -|---|---|---| -| Short plain text (≤ 200 chars, no markdown) | Text | `text` | -| Medium text with links (≤ 2000 chars, no complex formatting) | Rich Text Post | `post` | -| Long text, code blocks, tables, headings, bold/italic, lists | Interactive Card | `interactive` | - -### How it works - -1. **`_detect_msg_format(content)`** — Analyzes the message content and returns the optimal format: - - Checks for complex markdown (code blocks, tables, headings) → `interactive` - - Checks for simple markdown (bold, italic, lists) → `interactive` - - Checks for links → `post` (Feishu post format supports `` tags natively) - - Short plain text → `text` - - Medium plain text → `post` - -2. **`_markdown_to_post(content)`** — Converts markdown links `[text](url)` to Feishu post format with proper `a` tags. Each line becomes a paragraph in the post body. - -3. **Modified `send()` method** — Uses `_detect_msg_format()` to choose the right format, then dispatches to the appropriate sending logic. - -### Design decisions - -- **Post format for links only**: Feishu's post format (`[[{"tag":"text",...}]]`) doesn't support bold/italic rendering, so we only use it for messages containing links (where the `a` tag adds real value). Messages with bold/italic/lists still use cards which render markdown properly. -- **Conservative thresholds**: 200 chars for text, 2000 chars for post — these keep the UX natural without being too aggressive. -- **Backward compatible**: The card rendering path is completely unchanged. Only the routing logic is new. - -### Testing - -Format detection tested against 13 cases covering all content types: -- ✅ Plain text → `text` -- ✅ Links → `post` -- ✅ Bold/italic/code/tables/headings/lists → `interactive` -- ✅ Long content → `interactive` -- ✅ Post format generates valid Feishu post JSON with proper `a` tags - -### Changes - -- `nanobot/channels/feishu.py`: Added `_detect_msg_format()`, `_markdown_to_post()`, and updated `send()` method From 3a01fe536a37c8424fc196b1b0aad3535a50af93 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Fri, 6 Mar 2026 06:49:09 +0000 Subject: [PATCH 13/15] refactor: move detect_image_mime to utils/helpers for reuse --- nanobot/agent/context.py | 16 ++-------------- nanobot/utils/helpers.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index 7ead317..27511fa 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -10,19 +10,7 @@ from typing import Any from nanobot.agent.memory import MemoryStore from nanobot.agent.skills import SkillsLoader - - -def _detect_image_mime(data: bytes) -> str | None: - """Detect image MIME type from magic bytes, ignoring file extension.""" - if data[:8] == b"\x89PNG\r\n\x1a\n": - return "image/png" - if data[:3] == b"\xff\xd8\xff": - return "image/jpeg" - if data[:6] in (b"GIF87a", b"GIF89a"): - return "image/gif" - if data[:4] == b"RIFF" and data[8:12] == b"WEBP": - return "image/webp" - return None +from nanobot.utils.helpers import detect_image_mime class ContextBuilder: @@ -153,7 +141,7 @@ Reply directly with text for conversations. Only use the 'message' tool to send continue raw = p.read_bytes() # Detect real MIME type from magic bytes; fallback to filename guess - mime = _detect_image_mime(raw) or mimetypes.guess_type(path)[0] + mime = detect_image_mime(raw) or mimetypes.guess_type(path)[0] if not mime or not mime.startswith("image/"): continue b64 = base64.b64encode(raw).decode() diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py index 3a8c802..b543174 100644 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -5,6 +5,19 @@ from datetime import datetime from pathlib import Path +def detect_image_mime(data: bytes) -> str | None: + """Detect image MIME type from magic bytes, ignoring file extension.""" + if data[:8] == b"\x89PNG\r\n\x1a\n": + return "image/png" + if data[:3] == b"\xff\xd8\xff": + return "image/jpeg" + if data[:6] in (b"GIF87a", b"GIF89a"): + return "image/gif" + if data[:4] == b"RIFF" and data[8:12] == b"WEBP": + return "image/webp" + return None + + def ensure_dir(path: Path) -> Path: """Ensure directory exists, return it.""" path.mkdir(parents=True, exist_ok=True) From b817463939c9529ab119fec1c7be89dd2da68606 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Fri, 6 Mar 2026 07:13:04 +0000 Subject: [PATCH 14/15] chore: simplify Alibaba Coding Plan to apiBase hint, remove dedicated provider --- README.md | 2 +- nanobot/config/schema.py | 28 +++------------------------- nanobot/providers/registry.py | 20 -------------------- 3 files changed, 4 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 627bb37..0c49608 100644 --- a/README.md +++ b/README.md @@ -662,9 +662,9 @@ Config file: `~/.nanobot/config.json` > [!TIP] > - **Groq** provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed. > - **Zhipu Coding Plan**: If you're on Zhipu's coding plan, set `"apiBase": "https://open.bigmodel.cn/api/coding/paas/v4"` in your zhipu provider config. -> - **Alibaba Cloud Coding Plan**: If you're on the Alibaba Cloud Coding Plan (BaiLian coding assistance), add configuration for `dashscope_coding_plan` provider with an API key starting with `sk-sp-` in your config. This provider uses OpenAI-compatible endpoint `https://coding.dashscope.aliyuncs.com/v1`. > - **MiniMax (Mainland China)**: If your API key is from MiniMax's mainland China platform (minimaxi.com), set `"apiBase": "https://api.minimaxi.com/v1"` in your minimax provider config. > - **VolcEngine Coding Plan**: If you're on VolcEngine's coding plan, set `"apiBase": "https://ark.cn-beijing.volces.com/api/coding/v3"` in your volcengine provider config. +> - **Alibaba Cloud Coding Plan**: If you're on the Alibaba Cloud Coding Plan (BaiLian), set `"apiBase": "https://coding.dashscope.aliyuncs.com/v1"` in your dashscope provider config. | Provider | Purpose | Get API Key | |----------|---------|-------------| diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index e80c8d0..2073eeb 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -5,7 +5,7 @@ from typing import Literal from pydantic import BaseModel, ConfigDict, Field from pydantic.alias_generators import to_camel -from pydantic_settings import BaseSettings, SettingsConfigDict +from pydantic_settings import BaseSettings class Base(BaseModel): @@ -258,20 +258,13 @@ class ProvidersConfig(Base): groq: ProviderConfig = Field(default_factory=ProviderConfig) zhipu: ProviderConfig = Field(default_factory=ProviderConfig) dashscope: ProviderConfig = Field(default_factory=ProviderConfig) # 阿里云通义千问 - dashscope_coding_plan: ProviderConfig = Field( - default_factory=ProviderConfig - ) # 阿里云百炼Coding Plan vllm: ProviderConfig = Field(default_factory=ProviderConfig) gemini: ProviderConfig = Field(default_factory=ProviderConfig) moonshot: ProviderConfig = Field(default_factory=ProviderConfig) minimax: ProviderConfig = Field(default_factory=ProviderConfig) aihubmix: ProviderConfig = Field(default_factory=ProviderConfig) # AiHubMix API gateway - siliconflow: ProviderConfig = Field( - default_factory=ProviderConfig - ) # SiliconFlow (硅基流动) API gateway - volcengine: ProviderConfig = Field( - default_factory=ProviderConfig - ) # VolcEngine (火山引擎) API gateway + siliconflow: ProviderConfig = Field(default_factory=ProviderConfig) # SiliconFlow (硅基流动) + volcengine: ProviderConfig = Field(default_factory=ProviderConfig) # VolcEngine (火山引擎) openai_codex: ProviderConfig = Field(default_factory=ProviderConfig) # OpenAI Codex (OAuth) github_copilot: ProviderConfig = Field(default_factory=ProviderConfig) # Github Copilot (OAuth) @@ -326,20 +319,6 @@ class MCPServerConfig(Base): tool_timeout: int = 30 # seconds before a tool call is cancelled -class TTSConfig(Base): - """Text-to-Speech configuration.""" - - provider: str = "edge_tts" # Default TTS provider - voice: str = "en-US-ChristopherNeural" # Default voice - speed: float = 1.0 # Voice speed multiplier - - -class AudioConfig(Base): - """Audio configuration.""" - - tts: TTSConfig = Field(default_factory=TTSConfig) - - class ToolsConfig(Base): """Tools configuration.""" @@ -357,7 +336,6 @@ class Config(BaseSettings): providers: ProvidersConfig = Field(default_factory=ProvidersConfig) gateway: GatewayConfig = Field(default_factory=GatewayConfig) tools: ToolsConfig = Field(default_factory=ToolsConfig) - audio: AudioConfig = Field(default_factory=AudioConfig) @property def workspace_path(self) -> Path: diff --git a/nanobot/providers/registry.py b/nanobot/providers/registry.py index 3b6659e..59ba31a 100644 --- a/nanobot/providers/registry.py +++ b/nanobot/providers/registry.py @@ -350,26 +350,6 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( strip_model_prefix=False, model_overrides=(), ), - # === Coding Plan Gateway Providers ===================================== - # Alibaba Cloud Coding Plan: OpenAI-compatible gateway for coding assistance. - # Uses special API key format starting with "sk-sp-" to distinguish it - # from regular dashscope keys. Uses the OpenAI-compatible endpoint. - ProviderSpec( - name="dashscope_coding_plan", - keywords=("dashscope-coding-plan", "coding-plan", "aliyun-coding", "bailian-coding"), - env_key="DASHSCOPE_CODING_PLAN_API_KEY", - display_name="Alibaba Cloud Coding Plan", - litellm_prefix="openai", # → openai/{model} (uses OpenAI-compatible endpoint) - skip_prefixes=("openai/", "dashscope/", "openrouter/"), - env_extras=(), - is_gateway=True, - is_local=False, - detect_by_key_prefix="sk-sp-", # coding plan API keys start with "sk-sp-" - detect_by_base_keyword="coding.dashscope", - default_api_base="https://coding.dashscope.aliyuncs.com/v1", - strip_model_prefix=True, # Strip "dashscope_coding_plan/" prefix - model_overrides=(), - ), # === Auxiliary (not a primary LLM provider) ============================ # Groq: mainly used for Whisper voice transcription, also usable for LLM. # Needs "groq/" prefix for LiteLLM routing. Placed last — it rarely wins fallback. From dcebb94b014cd80dd93e80acad0664cf708ae767 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Fri, 6 Mar 2026 07:16:20 +0000 Subject: [PATCH 15/15] style: remove trailing whitespace --- nanobot/providers/litellm_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index a1819a2..620424e 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -298,7 +298,7 @@ class LiteLLMProvider(LLMProvider): reasoning_content = getattr(message, "reasoning_content", None) or None thinking_blocks = getattr(message, "thinking_blocks", None) or None - + return LLMResponse( content=content, tool_calls=tool_calls,