Merge branch 'main' into feat-volcengine-tuning

2026-03-06 14:47:08 +08:00
parent 0d60acf2d5 3bbeb147b6
commit b3710165c0
14 changed files with 624 additions and 69 deletions
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -202,9 +202,18 @@ class AgentLoop:

            if response.has_tool_calls:
                if on_progress:
-                    clean = self._strip_think(response.content)
-                    if clean:
-                        await on_progress(clean)
+                    thoughts = [
+                        self._strip_think(response.content),
+                        response.reasoning_content,
+                        *(
+                            f"Thinking [{b.get('signature', '...')}]:\n{b.get('thought', '...')}"
+                            for b in (response.thinking_blocks or [])
+                            if isinstance(b, dict) and "signature" in b
+                        ),
+                    ]
+                    combined_thoughts = "\n\n".join(filter(None, thoughts))
+                    if combined_thoughts:
+                        await on_progress(combined_thoughts)
                    await on_progress(self._tool_hint(response.tool_calls), tool_hint=True)

                tool_call_dicts = [
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -128,6 +128,13 @@ class MemoryStore:
            # Some providers return arguments as a JSON string instead of dict
            if isinstance(args, str):
                args = json.loads(args)
+            # Some providers return arguments as a list (handle edge case)
+            if isinstance(args, list):
+                if args and isinstance(args[0], dict):
+                    args = args[0]
+                else:
+                    logger.warning("Memory consolidation: unexpected arguments as empty or non-dict list")
+                    return False
            if not isinstance(args, dict):
                logger.warning("Memory consolidation: unexpected arguments type {}", type(args).__name__)
                return False
--- a/nanobot/agent/tools/mcp.py
+++ b/nanobot/agent/tools/mcp.py
@@ -58,17 +58,48 @@ async def connect_mcp_servers(
 ) -> None:
    """Connect to configured MCP servers and register their tools."""
    from mcp import ClientSession, StdioServerParameters
+    from mcp.client.sse import sse_client
    from mcp.client.stdio import stdio_client
+    from mcp.client.streamable_http import streamable_http_client

    for name, cfg in mcp_servers.items():
        try:
-            if cfg.command:
+            transport_type = cfg.type
+            if not transport_type:
+                if cfg.command:
+                    transport_type = "stdio"
+                elif cfg.url:
+                    # Convention: URLs ending with /sse use SSE transport; others use streamableHttp
+                    transport_type = (
+                        "sse" if cfg.url.rstrip("/").endswith("/sse") else "streamableHttp"
+                    )
+                else:
+                    logger.warning("MCP server '{}': no command or url configured, skipping", name)
+                    continue
+
+            if transport_type == "stdio":
                params = StdioServerParameters(
                    command=cfg.command, args=cfg.args, env=cfg.env or None
                )
                read, write = await stack.enter_async_context(stdio_client(params))
-            elif cfg.url:
-                from mcp.client.streamable_http import streamable_http_client
+            elif transport_type == "sse":
+                def httpx_client_factory(
+                    headers: dict[str, str] | None = None,
+                    timeout: httpx.Timeout | None = None,
+                    auth: httpx.Auth | None = None,
+                ) -> httpx.AsyncClient:
+                    merged_headers = {**(cfg.headers or {}), **(headers or {})}
+                    return httpx.AsyncClient(
+                        headers=merged_headers or None,
+                        follow_redirects=True,
+                        timeout=timeout,
+                        auth=auth,
+                    )
+
+                read, write = await stack.enter_async_context(
+                    sse_client(cfg.url, httpx_client_factory=httpx_client_factory)
+                )
+            elif transport_type == "streamableHttp":
                # Always provide an explicit httpx client so MCP HTTP transport does not
                # inherit httpx's default 5s timeout and preempt the higher-level tool timeout.
                http_client = await stack.enter_async_context(
@@ -82,7 +113,7 @@ async def connect_mcp_servers(
                    streamable_http_client(cfg.url, http_client=http_client)
                )
            else:
-                logger.warning("MCP server '{}': no command or url configured, skipping", name)
+                logger.warning("MCP server '{}': unknown transport type '{}'", name, transport_type)
                continue

            session = await stack.enter_async_context(ClientSession(read, write))
--- a/nanobot/channels/discord.py
+++ b/nanobot/channels/discord.py
@@ -54,6 +54,7 @@ class DiscordChannel(BaseChannel):
        self._heartbeat_task: asyncio.Task | None = None
        self._typing_tasks: dict[str, asyncio.Task] = {}
        self._http: httpx.AsyncClient | None = None
+        self._bot_user_id: str | None = None

    async def start(self) -> None:
        """Start the Discord gateway connection."""
@@ -170,6 +171,10 @@ class DiscordChannel(BaseChannel):
                await self._identify()
            elif op == 0 and event_type == "READY":
                logger.info("Discord gateway READY")
+                # Capture bot user ID for mention detection
+                user_data = payload.get("user") or {}
+                self._bot_user_id = user_data.get("id")
+                logger.info("Discord bot connected as user {}", self._bot_user_id)
            elif op == 0 and event_type == "MESSAGE_CREATE":
                await self._handle_message_create(payload)
            elif op == 7:
@@ -226,6 +231,7 @@ class DiscordChannel(BaseChannel):
        sender_id = str(author.get("id", ""))
        channel_id = str(payload.get("channel_id", ""))
        content = payload.get("content") or ""
+        guild_id = payload.get("guild_id")

        if not sender_id or not channel_id:
            return
@@ -233,6 +239,11 @@ class DiscordChannel(BaseChannel):
        if not self.is_allowed(sender_id):
            return

+        # Check group channel policy (DMs always respond if is_allowed passes)
+        if guild_id is not None:
+            if not self._should_respond_in_group(payload, content):
+                return
+
        content_parts = [content] if content else []
        media_paths: list[str] = []
        media_dir = Path.home() / ".nanobot" / "media"
@@ -269,11 +280,32 @@ class DiscordChannel(BaseChannel):
            media=media_paths,
            metadata={
                "message_id": str(payload.get("id", "")),
-                "guild_id": payload.get("guild_id"),
+                "guild_id": guild_id,
                "reply_to": reply_to,
            },
        )

+    def _should_respond_in_group(self, payload: dict[str, Any], content: str) -> bool:
+        """Check if bot should respond in a group channel based on policy."""
+        if self.config.group_policy == "open":
+            return True
+
+        if self.config.group_policy == "mention":
+            # Check if bot was mentioned in the message
+            if self._bot_user_id:
+                # Check mentions array
+                mentions = payload.get("mentions") or []
+                for mention in mentions:
+                    if str(mention.get("id")) == self._bot_user_id:
+                        return True
+                # Also check content for mention format <@USER_ID>
+                if f"<@{self._bot_user_id}>" in content or f"<@!{self._bot_user_id}>" in content:
+                    return True
+            logger.debug("Discord message in {} ignored (bot not mentioned)", payload.get("channel_id"))
+            return False
+
+        return True
+
    async def _start_typing(self, channel_id: str) -> None:
        """Start periodic typing indicator for a channel."""
        await self._stop_typing(channel_id)
--- a/nanobot/channels/feishu.py
+++ b/nanobot/channels/feishu.py
@@ -290,16 +290,28 @@ class FeishuChannel(BaseChannel):
            log_level=lark.LogLevel.INFO
        )

-        # Start WebSocket client in a separate thread with reconnect loop
+        # Start WebSocket client in a separate thread with reconnect loop.
+        # A dedicated event loop is created for this thread so that lark_oapi's
+        # module-level `loop = asyncio.get_event_loop()` picks up an idle loop
+        # instead of the already-running main asyncio loop, which would cause
+        # "This event loop is already running" errors.
        def run_ws():
-            while self._running:
-                try:
-                    self._ws_client.start()
-                except Exception as e:
-                    logger.warning("Feishu WebSocket error: {}", e)
-                if self._running:
-                    import time
-                    time.sleep(5)
+            import time
+            import lark_oapi.ws.client as _lark_ws_client
+            ws_loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(ws_loop)
+            # Patch the module-level loop used by lark's ws Client.start()
+            _lark_ws_client.loop = ws_loop
+            try:
+                while self._running:
+                    try:
+                        self._ws_client.start()
+                    except Exception as e:
+                        logger.warning("Feishu WebSocket error: {}", e)
+                    if self._running:
+                        time.sleep(5)
+            finally:
+                ws_loop.close()

        self._ws_thread = threading.Thread(target=run_ws, daemon=True)
        self._ws_thread.start()
@@ -398,6 +410,34 @@ class FeishuChannel(BaseChannel):
            elements.extend(self._split_headings(remaining))
        return elements or [{"tag": "markdown", "content": content}]

+    @staticmethod
+    def _split_elements_by_table_limit(elements: list[dict], max_tables: int = 1) -> list[list[dict]]:
+        """Split card elements into groups with at most *max_tables* table elements each.
+
+        Feishu cards have a hard limit of one table per card (API error 11310).
+        When the rendered content contains multiple markdown tables each table is
+        placed in a separate card message so every table reaches the user.
+        """
+        if not elements:
+            return [[]]
+        groups: list[list[dict]] = []
+        current: list[dict] = []
+        table_count = 0
+        for el in elements:
+            if el.get("tag") == "table":
+                if table_count >= max_tables:
+                    if current:
+                        groups.append(current)
+                    current = []
+                    table_count = 0
+                current.append(el)
+                table_count += 1
+            else:
+                current.append(el)
+        if current:
+            groups.append(current)
+        return groups or [[]]
+
    def _split_headings(self, content: str) -> list[dict]:
        """Split content by headings, converting headings to div elements."""
        protected = content
@@ -432,8 +472,124 @@ class FeishuChannel(BaseChannel):

        return elements or [{"tag": "markdown", "content": content}]

+    # ── Smart format detection ──────────────────────────────────────────
+    # Patterns that indicate "complex" markdown needing card rendering
+    _COMPLEX_MD_RE = re.compile(
+        r"```"                        # fenced code block
+        r"|^\|.+\|.*\n\s*\|[-:\s|]+\|"  # markdown table (header + separator)
+        r"|^#{1,6}\s+"                # headings
+        , re.MULTILINE,
+    )
+
+    # Simple markdown patterns (bold, italic, strikethrough)
+    _SIMPLE_MD_RE = re.compile(
+        r"\*\*.+?\*\*"               # **bold**
+        r"|__.+?__"                   # __bold__
+        r"|(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)"  # *italic* (single *)
+        r"|~~.+?~~"                   # ~~strikethrough~~
+        , re.DOTALL,
+    )
+
+    # Markdown link: [text](url)
+    _MD_LINK_RE = re.compile(r"\[([^\]]+)\]\((https?://[^\)]+)\)")
+
+    # Unordered list items
+    _LIST_RE = re.compile(r"^[\s]*[-*+]\s+", re.MULTILINE)
+
+    # Ordered list items
+    _OLIST_RE = re.compile(r"^[\s]*\d+\.\s+", re.MULTILINE)
+
+    # Max length for plain text format
+    _TEXT_MAX_LEN = 200
+
+    # Max length for post (rich text) format; beyond this, use card
+    _POST_MAX_LEN = 2000
+
+    @classmethod
+    def _detect_msg_format(cls, content: str) -> str:
+        """Determine the optimal Feishu message format for *content*.
+
+        Returns one of:
+        - ``"text"``        – plain text, short and no markdown
+        - ``"post"``        – rich text (links only, moderate length)
+        - ``"interactive"`` – card with full markdown rendering
+        """
+        stripped = content.strip()
+
+        # Complex markdown (code blocks, tables, headings) → always card
+        if cls._COMPLEX_MD_RE.search(stripped):
+            return "interactive"
+
+        # Long content → card (better readability with card layout)
+        if len(stripped) > cls._POST_MAX_LEN:
+            return "interactive"
+
+        # Has bold/italic/strikethrough → card (post format can't render these)
+        if cls._SIMPLE_MD_RE.search(stripped):
+            return "interactive"
+
+        # Has list items → card (post format can't render list bullets well)
+        if cls._LIST_RE.search(stripped) or cls._OLIST_RE.search(stripped):
+            return "interactive"
+
+        # Has links → post format (supports <a> tags)
+        if cls._MD_LINK_RE.search(stripped):
+            return "post"
+
+        # Short plain text → text format
+        if len(stripped) <= cls._TEXT_MAX_LEN:
+            return "text"
+
+        # Medium plain text without any formatting → post format
+        return "post"
+
+    @classmethod
+    def _markdown_to_post(cls, content: str) -> str:
+        """Convert markdown content to Feishu post message JSON.
+
+        Handles links ``[text](url)`` as ``a`` tags; everything else as ``text`` tags.
+        Each line becomes a paragraph (row) in the post body.
+        """
+        lines = content.strip().split("\n")
+        paragraphs: list[list[dict]] = []
+
+        for line in lines:
+            elements: list[dict] = []
+            last_end = 0
+
+            for m in cls._MD_LINK_RE.finditer(line):
+                # Text before this link
+                before = line[last_end:m.start()]
+                if before:
+                    elements.append({"tag": "text", "text": before})
+                elements.append({
+                    "tag": "a",
+                    "text": m.group(1),
+                    "href": m.group(2),
+                })
+                last_end = m.end()
+
+            # Remaining text after last link
+            remaining = line[last_end:]
+            if remaining:
+                elements.append({"tag": "text", "text": remaining})
+
+            # Empty line → empty paragraph for spacing
+            if not elements:
+                elements.append({"tag": "text", "text": ""})
+
+            paragraphs.append(elements)
+
+        post_body = {
+            "zh_cn": {
+                "content": paragraphs,
+            }
+        }
+        return json.dumps(post_body, ensure_ascii=False)
+
    _IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp", ".ico", ".tiff", ".tif"}
    _AUDIO_EXTS = {".opus"}
+    _VIDEO_EXTS = {".mp4", ".mov", ".avi"}
    _FILE_TYPE_MAP = {
        ".opus": "opus", ".mp4": "mp4", ".pdf": "pdf", ".doc": "doc", ".docx": "doc",
        ".xls": "xls", ".xlsx": "xls", ".ppt": "ppt", ".pptx": "ppt",
@@ -642,18 +798,45 @@ class FeishuChannel(BaseChannel):
                else:
                    key = await loop.run_in_executor(None, self._upload_file_sync, file_path)
                    if key:
-                        media_type = "audio" if ext in self._AUDIO_EXTS else "file"
+                        # Use msg_type "media" for audio/video so users can play inline;
+                        # "file" for everything else (documents, archives, etc.)
+                        if ext in self._AUDIO_EXTS or ext in self._VIDEO_EXTS:
+                            media_type = "media"
+                        else:
+                            media_type = "file"
                        await loop.run_in_executor(
                            None, self._send_message_sync,
                            receive_id_type, msg.chat_id, media_type, json.dumps({"file_key": key}, ensure_ascii=False),
                        )

            if msg.content and msg.content.strip():
-                card = {"config": {"wide_screen_mode": True}, "elements": self._build_card_elements(msg.content)}
-                await loop.run_in_executor(
-                    None, self._send_message_sync,
-                    receive_id_type, msg.chat_id, "interactive", json.dumps(card, ensure_ascii=False),
-                )
+                fmt = self._detect_msg_format(msg.content)
+
+                if fmt == "text":
+                    # Short plain text – send as simple text message
+                    text_body = json.dumps({"text": msg.content.strip()}, ensure_ascii=False)
+                    await loop.run_in_executor(
+                        None, self._send_message_sync,
+                        receive_id_type, msg.chat_id, "text", text_body,
+                    )
+
+                elif fmt == "post":
+                    # Medium content with links – send as rich-text post
+                    post_body = self._markdown_to_post(msg.content)
+                    await loop.run_in_executor(
+                        None, self._send_message_sync,
+                        receive_id_type, msg.chat_id, "post", post_body,
+                    )
+
+                else:
+                    # Complex / long content – send as interactive card
+                    elements = self._build_card_elements(msg.content)
+                    for chunk in self._split_elements_by_table_limit(elements):
+                        card = {"config": {"wide_screen_mode": True}, "elements": chunk}
+                        await loop.run_in_executor(
+                            None, self._send_message_sync,
+                            receive_id_type, msg.chat_id, "interactive", json.dumps(card, ensure_ascii=False),
+                        )

        except Exception as e:
            logger.error("Error sending Feishu message: {}", e)
--- a/nanobot/channels/telegram.py
+++ b/nanobot/channels/telegram.py
@@ -225,7 +225,9 @@ class TelegramChannel(BaseChannel):
            logger.warning("Telegram bot not running")
            return

-        self._stop_typing(msg.chat_id)
+        # Only stop typing indicator for final responses
+        if not msg.metadata.get("_progress", False):
+            self._stop_typing(msg.chat_id)

        try:
            chat_id = int(msg.chat_id)
@@ -269,23 +271,41 @@ class TelegramChannel(BaseChannel):

        # Send text content
        if msg.content and msg.content != "[empty message]":
+            is_progress = msg.metadata.get("_progress", False)
+            draft_id = msg.metadata.get("message_id")
+            
            for chunk in _split_message(msg.content):
                try:
                    html = _markdown_to_telegram_html(chunk)
-                    await self._app.bot.send_message(
-                        chat_id=chat_id,
-                        text=html,
-                        parse_mode="HTML",
-                        reply_parameters=reply_params
-                    )
+                    if is_progress and draft_id:
+                        await self._app.bot.send_message_draft(
+                            chat_id=chat_id,
+                            draft_id=draft_id,
+                            text=html,
+                            parse_mode="HTML"
+                        )
+                    else:
+                        await self._app.bot.send_message(
+                            chat_id=chat_id,
+                            text=html,
+                            parse_mode="HTML",
+                            reply_parameters=reply_params
+                        )
                except Exception as e:
                    logger.warning("HTML parse failed, falling back to plain text: {}", e)
                    try:
-                        await self._app.bot.send_message(
-                            chat_id=chat_id,
-                            text=chunk,
-                            reply_parameters=reply_params
-                        )
+                        if is_progress and draft_id:
+                            await self._app.bot.send_message_draft(
+                                chat_id=chat_id,
+                                draft_id=draft_id,
+                                text=chunk
+                            )
+                        else:
+                            await self._app.bot.send_message(
+                                chat_id=chat_id,
+                                text=chunk,
+                                reply_parameters=reply_params
+                            )
                    except Exception as e2:
                        logger.error("Error sending Telegram message: {}", e2)

--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -7,6 +7,18 @@ import signal
 import sys
 from pathlib import Path

+# Force UTF-8 encoding for Windows console
+if sys.platform == "win32":
+    import locale
+    if sys.stdout.encoding != "utf-8":
+        os.environ["PYTHONIOENCODING"] = "utf-8"
+        # Re-open stdout/stderr with UTF-8 encoding
+        try:
+            sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+            sys.stderr.reconfigure(encoding="utf-8", errors="replace")
+        except Exception:
+            pass
+
 import typer
 from prompt_toolkit import PromptSession
 from prompt_toolkit.formatted_text import HTML
@@ -244,13 +256,15 @@ def _make_provider(config: Config):
@app.command()
 def gateway(
    port: int = typer.Option(18790, "--port", "-p", help="Gateway port"),
+    workspace: str | None = typer.Option(None, "--workspace", "-w", help="Workspace directory"),
+    config: str | None = typer.Option(None, "--config", "-c", help="Config file path"),
    verbose: bool = typer.Option(False, "--verbose", "-v", help="Verbose output"),
 ):
    """Start the nanobot gateway."""
    from nanobot.agent.loop import AgentLoop
    from nanobot.bus.queue import MessageBus
    from nanobot.channels.manager import ChannelManager
-    from nanobot.config.loader import get_data_dir, load_config
+    from nanobot.config.loader import load_config
    from nanobot.cron.service import CronService
    from nanobot.cron.types import CronJob
    from nanobot.heartbeat.service import HeartbeatService
@@ -260,16 +274,20 @@ def gateway(
        import logging
        logging.basicConfig(level=logging.DEBUG)

-    console.print(f"{__logo__} Starting nanobot gateway on port {port}...")
+    config_path = Path(config) if config else None
+    config = load_config(config_path)
+    if workspace:
+        config.agents.defaults.workspace = workspace

-    config = load_config()
+    console.print(f"{__logo__} Starting nanobot gateway on port {port}...")
    sync_workspace_templates(config.workspace_path)
    bus = MessageBus()
    provider = _make_provider(config)
    session_manager = SessionManager(config.workspace_path)

    # Create cron service first (callback set after agent creation)
-    cron_store_path = get_data_dir() / "cron" / "jobs.json"
+    # Use workspace path for per-instance cron store
+    cron_store_path = config.workspace_path / "cron" / "jobs.json"
    cron = CronService(cron_store_path)

    # Create agent with cron service
@@ -511,12 +529,21 @@ def agent(
        else:
            cli_channel, cli_chat_id = "cli", session_id

-        def _exit_on_sigint(signum, frame):
+        def _handle_signal(signum, frame):
+            sig_name = signal.Signals(signum).name
            _restore_terminal()
-            console.print("\nGoodbye!")
-            os._exit(0)
+            console.print(f"\nReceived {sig_name}, goodbye!")
+            sys.exit(0)

-        signal.signal(signal.SIGINT, _exit_on_sigint)
+        signal.signal(signal.SIGINT, _handle_signal)
+        signal.signal(signal.SIGTERM, _handle_signal)
+        # SIGHUP is not available on Windows
+        if hasattr(signal, 'SIGHUP'):
+            signal.signal(signal.SIGHUP, _handle_signal)
+        # Ignore SIGPIPE to prevent silent process termination when writing to closed pipes
+        # SIGPIPE is not available on Windows
+        if hasattr(signal, 'SIGPIPE'):
+            signal.signal(signal.SIGPIPE, signal.SIG_IGN)

        async def run_interactive():
            bus_task = asyncio.create_task(agent_loop.run())
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -29,7 +29,9 @@ class TelegramConfig(Base):
    enabled: bool = False
    token: str = ""  # Bot token from @BotFather
    allow_from: list[str] = Field(default_factory=list)  # Allowed user IDs or usernames
-    proxy: str | None = None  # HTTP/SOCKS5 proxy URL, e.g. "http://127.0.0.1:7890" or "socks5://127.0.0.1:1080"
+    proxy: str | None = (
+        None  # HTTP/SOCKS5 proxy URL, e.g. "http://127.0.0.1:7890" or "socks5://127.0.0.1:1080"
+    )
    reply_to_message: bool = False  # If true, bot replies quote the original message


@@ -42,7 +44,9 @@ class FeishuConfig(Base):
    encrypt_key: str = ""  # Encrypt Key for event subscription (optional)
    verification_token: str = ""  # Verification Token for event subscription (optional)
    allow_from: list[str] = Field(default_factory=list)  # Allowed user open_ids
-    react_emoji: str = "THUMBSUP"  # Emoji type for message reactions (e.g. THUMBSUP, OK, DONE, SMILE)
+    react_emoji: str = (
+        "THUMBSUP"  # Emoji type for message reactions (e.g. THUMBSUP, OK, DONE, SMILE)
+    )


 class DingTalkConfig(Base):
@@ -62,6 +66,7 @@ class DiscordConfig(Base):
    allow_from: list[str] = Field(default_factory=list)  # Allowed user IDs
    gateway_url: str = "wss://gateway.discord.gg/?v=10&encoding=json"
    intents: int = 37377  # GUILDS + GUILD_MESSAGES + DIRECT_MESSAGES + MESSAGE_CONTENT
+    group_policy: Literal["mention", "open"] = "mention"


 class MatrixConfig(Base):
@@ -72,9 +77,13 @@ class MatrixConfig(Base):
    access_token: str = ""
    user_id: str = ""  # @bot:matrix.org
    device_id: str = ""
-    e2ee_enabled: bool = True # Enable Matrix E2EE support (encryption + encrypted room handling).
-    sync_stop_grace_seconds: int = 2 # Max seconds to wait for sync_forever to stop gracefully before cancellation fallback.
-    max_media_bytes: int = 20 * 1024 * 1024 # Max attachment size accepted for Matrix media handling (inbound + outbound).
+    e2ee_enabled: bool = True  # Enable Matrix E2EE support (encryption + encrypted room handling).
+    sync_stop_grace_seconds: int = (
+        2  # Max seconds to wait for sync_forever to stop gracefully before cancellation fallback.
+    )
+    max_media_bytes: int = (
+        20 * 1024 * 1024
+    )  # Max attachment size accepted for Matrix media handling (inbound + outbound).
    allow_from: list[str] = Field(default_factory=list)
    group_policy: Literal["open", "mention", "allowlist"] = "open"
    group_allow_from: list[str] = Field(default_factory=list)
@@ -105,7 +114,9 @@ class EmailConfig(Base):
    from_address: str = ""

    # Behavior
-    auto_reply_enabled: bool = True  # If false, inbound email is read but no automatic reply is sent
+    auto_reply_enabled: bool = (
+        True  # If false, inbound email is read but no automatic reply is sent
+    )
    poll_interval_seconds: int = 30
    mark_seen: bool = True
    max_body_chars: int = 12000
@@ -183,27 +194,32 @@ class QQConfig(Base):
    enabled: bool = False
    app_id: str = ""  # 机器人 ID (AppID) from q.qq.com
    secret: str = ""  # 机器人密钥 (AppSecret) from q.qq.com
-    allow_from: list[str] = Field(default_factory=list)  # Allowed user openids (empty = public access)
+    allow_from: list[str] = Field(
+        default_factory=list
+    )  # Allowed user openids (empty = public access)
+

 class MatrixConfig(Base):
    """Matrix (Element) channel configuration."""
+
    enabled: bool = False
    homeserver: str = "https://matrix.org"
    access_token: str = ""
-    user_id: str = ""                       # e.g. @bot:matrix.org
+    user_id: str = ""  # e.g. @bot:matrix.org
    device_id: str = ""
-    e2ee_enabled: bool = True               # end-to-end encryption support
-    sync_stop_grace_seconds: int = 2        # graceful sync_forever shutdown timeout
-    max_media_bytes: int = 20 * 1024 * 1024 # inbound + outbound attachment limit
+    e2ee_enabled: bool = True  # end-to-end encryption support
+    sync_stop_grace_seconds: int = 2  # graceful sync_forever shutdown timeout
+    max_media_bytes: int = 20 * 1024 * 1024  # inbound + outbound attachment limit
    allow_from: list[str] = Field(default_factory=list)
    group_policy: Literal["open", "mention", "allowlist"] = "open"
    group_allow_from: list[str] = Field(default_factory=list)
    allow_room_mentions: bool = False

+
 class ChannelsConfig(Base):
    """Configuration for chat channels."""

-    send_progress: bool = True    # stream agent's text progress to the channel
+    send_progress: bool = True  # stream agent's text progress to the channel
    send_tool_hints: bool = False  # stream tool-call hints (e.g. read_file("…"))
    whatsapp: WhatsAppConfig = Field(default_factory=WhatsAppConfig)
    telegram: TelegramConfig = Field(default_factory=TelegramConfig)
@@ -222,7 +238,9 @@ class AgentDefaults(Base):

    workspace: str = "~/.nanobot/workspace"
    model: str = "anthropic/claude-opus-4-5"
-    provider: str = "auto"  # Provider name (e.g. "anthropic", "openrouter") or "auto" for auto-detection
+    provider: str = (
+        "auto"  # Provider name (e.g. "anthropic", "openrouter") or "auto" for auto-detection
+    )
    max_tokens: int = 8192
    temperature: float = 0.1
    max_tool_iterations: int = 40
@@ -260,8 +278,12 @@ class ProvidersConfig(Base):
    moonshot: ProviderConfig = Field(default_factory=ProviderConfig)
    minimax: ProviderConfig = Field(default_factory=ProviderConfig)
    aihubmix: ProviderConfig = Field(default_factory=ProviderConfig)  # AiHubMix API gateway
-    siliconflow: ProviderConfig = Field(default_factory=ProviderConfig)  # SiliconFlow (硅基流动) API gateway
-    volcengine: ProviderConfig = Field(default_factory=ProviderConfig)  # VolcEngine (火山引擎) pay-per-use
+    siliconflow: ProviderConfig = Field(
+        default_factory=ProviderConfig
+    )  # SiliconFlow (硅基流动) API gateway
+    volcengine: ProviderConfig = Field(
+        default_factory=ProviderConfig
+    )  # VolcEngine (火山引擎) pay-per-use
    volcengine_coding_plan: ProviderConfig = Field(default_factory=ProviderConfig)  # VolcEngine Coding Plan
    byteplus: ProviderConfig = Field(default_factory=ProviderConfig)  # BytePlus (火山引擎海外版) pay-per-use
    byteplus_coding_plan: ProviderConfig = Field(default_factory=ProviderConfig)  # BytePlus Coding Plan
@@ -294,7 +316,9 @@ class WebSearchConfig(Base):
 class WebToolsConfig(Base):
    """Web tools configuration."""

-    proxy: str | None = None  # HTTP/SOCKS5 proxy URL, e.g. "http://127.0.0.1:7890" or "socks5://127.0.0.1:1080"
+    proxy: str | None = (
+        None  # HTTP/SOCKS5 proxy URL, e.g. "http://127.0.0.1:7890" or "socks5://127.0.0.1:1080"
+    )
    search: WebSearchConfig = Field(default_factory=WebSearchConfig)


@@ -308,12 +332,13 @@ class ExecToolConfig(Base):
 class MCPServerConfig(Base):
    """MCP server connection configuration (stdio or HTTP)."""

+    type: Literal["stdio", "sse", "streamableHttp"] | None = None  # auto-detected if omitted
    command: str = ""  # Stdio: command to run (e.g. "npx")
    args: list[str] = Field(default_factory=list)  # Stdio: command arguments
    env: dict[str, str] = Field(default_factory=dict)  # Stdio: extra env vars
-    url: str = ""  # HTTP: streamable HTTP endpoint URL
-    headers: dict[str, str] = Field(default_factory=dict)  # HTTP: Custom HTTP Headers
-    tool_timeout: int = 30  # Seconds before a tool call is cancelled
+    url: str = ""  # HTTP/SSE: endpoint URL
+    headers: dict[str, str] = Field(default_factory=dict)  # HTTP/SSE: custom headers
+    tool_timeout: int = 30  # seconds before a tool call is cancelled


 class ToolsConfig(Base):
@@ -339,7 +364,9 @@ class Config(BaseSettings):
        """Get expanded workspace path."""
        return Path(self.agents.defaults.workspace).expanduser()

-    def _match_provider(self, model: str | None = None) -> tuple["ProviderConfig | None", str | None]:
+    def _match_provider(
+        self, model: str | None = None
+    ) -> tuple["ProviderConfig | None", str | None]:
        """Match provider config and its registry name. Returns (config, spec_name)."""
        from nanobot.providers.registry import PROVIDERS

--- a/nanobot/providers/custom_provider.py
+++ b/nanobot/providers/custom_provider.py
@@ -2,6 +2,7 @@

 from __future__ import annotations

+import uuid
 from typing import Any

 import json_repair
@@ -15,7 +16,12 @@ class CustomProvider(LLMProvider):
    def __init__(self, api_key: str = "no-key", api_base: str = "http://localhost:8000/v1", default_model: str = "default"):
        super().__init__(api_key, api_base)
        self.default_model = default_model
-        self._client = AsyncOpenAI(api_key=api_key, base_url=api_base)
+        # Keep affinity stable for this provider instance to improve backend cache locality.
+        self._client = AsyncOpenAI(
+            api_key=api_key,
+            base_url=api_base,
+            default_headers={"x-session-affinity": uuid.uuid4().hex},
+        )

    async def chat(self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None,
                   model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7,