From dbc518098e913d2f382121820dd58bbaf7a04234 Mon Sep 17 00:00:00 2001
From: VITOHJL <hejl2023@shanghaitech.edu.cn>
Date: Sun, 8 Mar 2026 14:20:16 +0800
Subject: [PATCH 01/28] refactor: implement token-based context compression
 mechanism

Major changes:
- Replace message-count-based memory window with token-budget-based compression
- Add max_tokens_input, compression_start_ratio, compression_target_ratio config
- Implement _maybe_compress_history() that triggers based on prompt token usage
- Use _build_compressed_history_view() to provide compressed history to LLM
- Refactor MemoryStore.consolidate() -> consolidate_chunk() for chunk-based compression
- Remove last_consolidated from Session, use _compressed_until metadata instead
- Add background compression scheduling to avoid blocking message processing

Key improvements:
- Compression now based on actual token usage, not arbitrary message counts
- Better handling of long conversations with large context windows
- Non-destructive compression: old messages remain in session, but excluded from prompt
- Automatic compression when history exceeds configured token thresholds
---
 nanobot/agent/loop.py      | 521 +++++++++++++++++++++++++++++++++----
 nanobot/agent/memory.py    |  62 ++---
 nanobot/config/schema.py   |  25 +-
 nanobot/session/manager.py |  20 +-
 4 files changed, 529 insertions(+), 99 deletions(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index ca9a06e..696e2a7 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -5,19 +5,24 @@ from __future__ import annotations
 import asyncio
 import json
 import re
-import weakref
 from contextlib import AsyncExitStack
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Awaitable, Callable
 
 from loguru import logger
 
+try:
+    import tiktoken  # type: ignore
+except Exception:  # pragma: no cover - optional dependency
+    tiktoken = None
+
 from nanobot.agent.context import ContextBuilder
-from nanobot.agent.memory import MemoryStore
 from nanobot.agent.subagent import SubagentManager
 from nanobot.agent.tools.cron import CronTool
 from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool
+from nanobot.agent.tools.huggingface import HuggingFaceModelSearchTool
 from nanobot.agent.tools.message import MessageTool
+from nanobot.agent.tools.model_config import ValidateDeployJSONTool, ValidateUsageYAMLTool
 from nanobot.agent.tools.registry import ToolRegistry
 from nanobot.agent.tools.shell import ExecTool
 from nanobot.agent.tools.spawn import SpawnTool
@@ -55,8 +60,11 @@ class AgentLoop:
         max_iterations: int = 40,
         temperature: float = 0.1,
         max_tokens: int = 4096,
-        memory_window: int = 100,
+        memory_window: int | None = None,  # backward-compat only (unused)
         reasoning_effort: str | None = None,
+        max_tokens_input: int = 128_000,
+        compression_start_ratio: float = 0.7,
+        compression_target_ratio: float = 0.4,
         brave_api_key: str | None = None,
         web_proxy: str | None = None,
         exec_config: ExecToolConfig | None = None,
@@ -74,9 +82,18 @@ class AgentLoop:
         self.model = model or provider.get_default_model()
         self.max_iterations = max_iterations
         self.temperature = temperature
+        # max_tokens: per-call output token cap (maxTokensOutput in config)
         self.max_tokens = max_tokens
+        # Keep legacy attribute for older call sites/tests; compression no longer uses it.
         self.memory_window = memory_window
         self.reasoning_effort = reasoning_effort
+        # max_tokens_input: model native context window (maxTokensInput in config)
+        self.max_tokens_input = max_tokens_input
+        # Token-based compression watermarks (fractions of available input budget)
+        self.compression_start_ratio = compression_start_ratio
+        self.compression_target_ratio = compression_target_ratio
+        # Reserve tokens for safety margin
+        self._reserve_tokens = 1000
         self.brave_api_key = brave_api_key
         self.web_proxy = web_proxy
         self.exec_config = exec_config or ExecToolConfig()
@@ -105,18 +122,373 @@ class AgentLoop:
         self._mcp_stack: AsyncExitStack | None = None
         self._mcp_connected = False
         self._mcp_connecting = False
-        self._consolidating: set[str] = set()  # Session keys with consolidation in progress
-        self._consolidation_tasks: set[asyncio.Task] = set()  # Strong refs to in-flight tasks
-        self._consolidation_locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary()
         self._active_tasks: dict[str, list[asyncio.Task]] = {}  # session_key -> tasks
+        self._compression_tasks: dict[str, asyncio.Task] = {}  # session_key -> task
         self._processing_lock = asyncio.Lock()
         self._register_default_tools()
 
+    @staticmethod
+    def _estimate_prompt_tokens(
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]] | None = None,
+    ) -> int:
+        """Estimate prompt tokens with tiktoken (fallback only)."""
+        if tiktoken is None:
+            return 0
+
+        try:
+            enc = tiktoken.get_encoding("cl100k_base")
+            parts: list[str] = []
+            for msg in messages:
+                content = msg.get("content")
+                if isinstance(content, str):
+                    parts.append(content)
+                elif isinstance(content, list):
+                    for part in content:
+                        if isinstance(part, dict) and part.get("type") == "text":
+                            txt = part.get("text", "")
+                            if txt:
+                                parts.append(txt)
+            if tools:
+                parts.append(json.dumps(tools, ensure_ascii=False))
+            return len(enc.encode("\n".join(parts)))
+        except Exception:
+            return 0
+
+    def _estimate_prompt_tokens_chain(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]] | None = None,
+    ) -> tuple[int, str]:
+        """Unified prompt-token estimation: provider counter -> tiktoken."""
+        provider_counter = getattr(self.provider, "estimate_prompt_tokens", None)
+        if callable(provider_counter):
+            try:
+                tokens, source = provider_counter(messages, tools, self.model)
+                if isinstance(tokens, (int, float)) and tokens > 0:
+                    return int(tokens), str(source or "provider_counter")
+            except Exception:
+                logger.debug("Provider token counter failed; fallback to tiktoken")
+
+        estimated = self._estimate_prompt_tokens(messages, tools)
+        if estimated > 0:
+            return int(estimated), "tiktoken"
+        return 0, "none"
+
+    @staticmethod
+    def _estimate_completion_tokens(content: str) -> int:
+        """Estimate completion tokens with tiktoken (fallback only)."""
+        if tiktoken is None:
+            return 0
+        try:
+            enc = tiktoken.get_encoding("cl100k_base")
+            return len(enc.encode(content or ""))
+        except Exception:
+            return 0
+
+    def _get_compressed_until(self, session: Session) -> int:
+        """Read/normalize compressed boundary and migrate old metadata format."""
+        raw = session.metadata.get("_compressed_until", 0)
+        try:
+            compressed_until = int(raw)
+        except (TypeError, ValueError):
+            compressed_until = 0
+
+        if compressed_until <= 0:
+            ranges = session.metadata.get("_compressed_ranges")
+            if isinstance(ranges, list):
+                inferred = 0
+                for item in ranges:
+                    if not isinstance(item, (list, tuple)) or len(item) != 2:
+                        continue
+                    try:
+                        inferred = max(inferred, int(item[1]))
+                    except (TypeError, ValueError):
+                        continue
+                compressed_until = inferred
+
+        compressed_until = max(0, min(compressed_until, len(session.messages)))
+        session.metadata["_compressed_until"] = compressed_until
+        # 兼容旧版本：一旦迁移出连续边界，就可以清理旧字段
+        session.metadata.pop("_compressed_ranges", None)
+        session.metadata.pop("_cumulative_tokens", None)
+        return compressed_until
+
+    def _set_compressed_until(self, session: Session, idx: int) -> None:
+        """Persist a contiguous compressed boundary."""
+        session.metadata["_compressed_until"] = max(0, min(int(idx), len(session.messages)))
+        session.metadata.pop("_compressed_ranges", None)
+        session.metadata.pop("_cumulative_tokens", None)
+
+    @staticmethod
+    def _estimate_message_tokens(message: dict[str, Any]) -> int:
+        """Rough token estimate for a single persisted message."""
+        content = message.get("content")
+        parts: list[str] = []
+        if isinstance(content, str):
+            parts.append(content)
+        elif isinstance(content, list):
+            for part in content:
+                if isinstance(part, dict) and part.get("type") == "text":
+                    txt = part.get("text", "")
+                    if txt:
+                        parts.append(txt)
+                else:
+                    parts.append(json.dumps(part, ensure_ascii=False))
+        elif content is not None:
+            parts.append(json.dumps(content, ensure_ascii=False))
+
+        for key in ("name", "tool_call_id"):
+            val = message.get(key)
+            if isinstance(val, str) and val:
+                parts.append(val)
+        if message.get("tool_calls"):
+            parts.append(json.dumps(message["tool_calls"], ensure_ascii=False))
+
+        payload = "\n".join(parts)
+        if not payload:
+            return 1
+        if tiktoken is not None:
+            try:
+                enc = tiktoken.get_encoding("cl100k_base")
+                return max(1, len(enc.encode(payload)))
+            except Exception:
+                pass
+        return max(1, len(payload) // 4)
+
+    def _pick_compression_chunk_by_tokens(
+        self,
+        session: Session,
+        reduction_tokens: int,
+        *,
+        tail_keep: int = 12,
+    ) -> tuple[int, int, int] | None:
+        """
+        Pick one contiguous old chunk so its estimated size is roughly enough
+        to reduce `reduction_tokens`.
+        """
+        messages = session.messages
+        start = self._get_compressed_until(session)
+        if len(messages) - start <= tail_keep + 2:
+            return None
+
+        end_limit = len(messages) - tail_keep
+        if end_limit - start < 2:
+            return None
+
+        target = max(1, reduction_tokens)
+        end = start
+        collected = 0
+        while end < end_limit and collected < target:
+            collected += self._estimate_message_tokens(messages[end])
+            end += 1
+
+        if end - start < 2:
+            end = min(end_limit, start + 2)
+            collected = sum(self._estimate_message_tokens(m) for m in messages[start:end])
+        if end - start < 2:
+            return None
+        return start, end, collected
+
+    def _estimate_session_prompt_tokens(self, session: Session) -> tuple[int, str]:
+        """
+        Estimate current full prompt tokens for this session view
+        (system + compressed history view + runtime/user placeholder + tools).
+        """
+        history = self._build_compressed_history_view(session)
+        channel, chat_id = (session.key.split(":", 1) if ":" in session.key else (None, None))
+        probe_messages = self.context.build_messages(
+            history=history,
+            current_message="[token-probe]",
+            channel=channel,
+            chat_id=chat_id,
+        )
+        return self._estimate_prompt_tokens_chain(probe_messages, self.tools.get_definitions())
+
+    async def _maybe_compress_history(
+        self,
+        session: Session,
+    ) -> None:
+        """
+        End-of-turn policy:
+        - Estimate current prompt usage from persisted session view.
+        - If above start ratio, perform one best-effort compression chunk.
+        """
+        if not session.messages:
+            self._set_compressed_until(session, 0)
+            return
+
+        budget = max(1, self.max_tokens_input - self.max_tokens - self._reserve_tokens)
+        start_threshold = int(budget * self.compression_start_ratio)
+        target_threshold = int(budget * self.compression_target_ratio)
+        if target_threshold >= start_threshold:
+            target_threshold = max(0, start_threshold - 1)
+
+        current_tokens, token_source = self._estimate_session_prompt_tokens(session)
+        current_ratio = current_tokens / budget if budget else 0.0
+        if current_tokens <= 0:
+            logger.debug("Compression skip {}: token estimate unavailable", session.key)
+            return
+        if current_tokens < start_threshold:
+            logger.debug(
+                "Compression idle {}: {}/{} ({:.1%}) via {}",
+                session.key,
+                current_tokens,
+                budget,
+                current_ratio,
+                token_source,
+            )
+            return
+        logger.info(
+            "Compression trigger {}: {}/{} ({:.1%}) via {}",
+            session.key,
+            current_tokens,
+            budget,
+            current_ratio,
+            token_source,
+        )
+
+        reduction_by_target = max(0, current_tokens - target_threshold)
+        reduction_by_delta = max(1, start_threshold - target_threshold)
+        reduction_need = max(reduction_by_target, reduction_by_delta)
+
+        chunk_range = self._pick_compression_chunk_by_tokens(session, reduction_need, tail_keep=10)
+        if chunk_range is None:
+            logger.info("Compression skipped for {}: no compressible chunk", session.key)
+            return
+
+        start_idx, end_idx, estimated_chunk_tokens = chunk_range
+        chunk = session.messages[start_idx:end_idx]
+        if len(chunk) < 2:
+            return
+
+        logger.info(
+            "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})",
+            session.key,
+            start_idx,
+            end_idx - 1,
+            len(chunk),
+            estimated_chunk_tokens,
+            reduction_need,
+        )
+        success, _ = await self.context.memory.consolidate_chunk(
+            chunk,
+            self.provider,
+            self.model,
+        )
+        if not success:
+            logger.warning("Compression aborted for {}: consolidation failed", session.key)
+            return
+
+        self._set_compressed_until(session, end_idx)
+        self.sessions.save(session)
+
+        after_tokens, after_source = self._estimate_session_prompt_tokens(session)
+        after_ratio = after_tokens / budget if budget else 0.0
+        reduced = max(0, current_tokens - after_tokens)
+        reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0
+        logger.info(
+            "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})",
+            session.key,
+            after_tokens,
+            budget,
+            after_ratio,
+            after_source,
+            reduced,
+            reduced_ratio,
+        )
+
+    def _schedule_background_compression(self, session_key: str) -> None:
+        """Schedule best-effort background compression for a session."""
+        existing = self._compression_tasks.get(session_key)
+        if existing is not None and not existing.done():
+            return
+
+        async def _runner() -> None:
+            session = self.sessions.get_or_create(session_key)
+            try:
+                await self._maybe_compress_history(session)
+            except Exception:
+                logger.exception("Background compression failed for {}", session_key)
+
+        task = asyncio.create_task(_runner())
+        self._compression_tasks[session_key] = task
+
+        def _cleanup(t: asyncio.Task) -> None:
+            cur = self._compression_tasks.get(session_key)
+            if cur is t:
+                self._compression_tasks.pop(session_key, None)
+            try:
+                t.result()
+            except BaseException:
+                pass
+
+        task.add_done_callback(_cleanup)
+
+    async def wait_for_background_compression(self, timeout_s: float | None = None) -> None:
+        """Wait for currently scheduled compression tasks."""
+        pending = [t for t in self._compression_tasks.values() if not t.done()]
+        if not pending:
+            return
+
+        logger.info("Waiting for {} background compression task(s)", len(pending))
+        waiter = asyncio.gather(*pending, return_exceptions=True)
+        if timeout_s is None:
+            await waiter
+            return
+
+        try:
+            await asyncio.wait_for(waiter, timeout=timeout_s)
+        except asyncio.TimeoutError:
+            logger.warning(
+                "Background compression wait timed out after {}s ({} task(s) still running)",
+                timeout_s,
+                len([t for t in self._compression_tasks.values() if not t.done()]),
+            )
+
+    def _build_compressed_history_view(
+        self,
+        session: Session,
+    ) -> list[dict]:
+        """Build non-destructive history view using the compressed boundary."""
+        compressed_until = self._get_compressed_until(session)
+        if compressed_until <= 0:
+            return session.get_history(max_messages=0)
+
+        notice_msg: dict[str, Any] = {
+            "role": "assistant",
+            "content": (
+                "As your assistant, I have compressed earlier context. "
+                "If you need details, please check memory/HISTORY.md."
+            ),
+        }
+
+        tail: list[dict[str, Any]] = []
+        for msg in session.messages[compressed_until:]:
+            entry: dict[str, Any] = {"role": msg["role"], "content": msg.get("content", "")}
+            for k in ("tool_calls", "tool_call_id", "name"):
+                if k in msg:
+                    entry[k] = msg[k]
+            tail.append(entry)
+
+        # Drop leading non-user entries from tail to avoid orphan tool blocks.
+        for i, m in enumerate(tail):
+            if m.get("role") == "user":
+                tail = tail[i:]
+                break
+        else:
+            tail = []
+
+        return [notice_msg, *tail]
+
     def _register_default_tools(self) -> None:
         """Register the default set of tools."""
         allowed_dir = self.workspace if self.restrict_to_workspace else None
         for cls in (ReadFileTool, WriteFileTool, EditFileTool, ListDirTool):
             self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir))
+        self.tools.register(ValidateDeployJSONTool())
+        self.tools.register(ValidateUsageYAMLTool())
+        self.tools.register(HuggingFaceModelSearchTool())
         self.tools.register(ExecTool(
             working_dir=str(self.workspace),
             timeout=self.exec_config.timeout,
@@ -181,25 +553,78 @@ class AgentLoop:
         self,
         initial_messages: list[dict],
         on_progress: Callable[..., Awaitable[None]] | None = None,
-    ) -> tuple[str | None, list[str], list[dict]]:
-        """Run the agent iteration loop. Returns (final_content, tools_used, messages)."""
+    ) -> tuple[str | None, list[str], list[dict], int, str]:
+        """
+        Run the agent iteration loop.
+
+        Returns:
+            (final_content, tools_used, messages, total_tokens_this_turn, token_source)
+            total_tokens_this_turn: total tokens (prompt + completion) for this turn
+            token_source: provider_total / provider_sum / provider_prompt /
+                          provider_counter+tiktoken_completion / tiktoken / none
+        """
         messages = initial_messages
         iteration = 0
         final_content = None
         tools_used: list[str] = []
+        total_tokens_this_turn = 0
+        token_source = "none"
 
         while iteration < self.max_iterations:
             iteration += 1
 
+            tool_defs = self.tools.get_definitions()
+
             response = await self.provider.chat(
                 messages=messages,
-                tools=self.tools.get_definitions(),
+                tools=tool_defs,
                 model=self.model,
                 temperature=self.temperature,
                 max_tokens=self.max_tokens,
                 reasoning_effort=self.reasoning_effort,
             )
 
+            # Prefer provider usage from the turn-ending model call; fallback to tiktoken.
+            # Calculate total tokens (prompt + completion) for this turn.
+            usage = response.usage or {}
+            t_tokens = usage.get("total_tokens")
+            p_tokens = usage.get("prompt_tokens")
+            c_tokens = usage.get("completion_tokens")
+            
+            if isinstance(t_tokens, (int, float)) and t_tokens > 0:
+                total_tokens_this_turn = int(t_tokens)
+                token_source = "provider_total"
+            elif isinstance(p_tokens, (int, float)) and isinstance(c_tokens, (int, float)):
+                # If we have both prompt and completion tokens, sum them
+                total_tokens_this_turn = int(p_tokens) + int(c_tokens)
+                token_source = "provider_sum"
+            elif isinstance(p_tokens, (int, float)) and p_tokens > 0:
+                # Fallback: use prompt tokens only (completion might be 0 for tool calls)
+                total_tokens_this_turn = int(p_tokens)
+                token_source = "provider_prompt"
+            else:
+                # Estimate with unified chain (provider counter -> tiktoken), plus completion tiktoken.
+                estimated_prompt, prompt_source = self._estimate_prompt_tokens_chain(messages, tool_defs)
+                estimated_completion = self._estimate_completion_tokens(response.content or "")
+                total_tokens_this_turn = estimated_prompt + estimated_completion
+                if total_tokens_this_turn > 0:
+                    token_source = (
+                        "tiktoken"
+                        if prompt_source == "tiktoken"
+                        else f"{prompt_source}+tiktoken_completion"
+                    )
+                if total_tokens_this_turn <= 0:
+                    total_tokens_this_turn = 0
+                    token_source = "none"
+
+            logger.debug(
+                "Turn token usage: source={}, total={}, prompt={}, completion={}",
+                token_source,
+                total_tokens_this_turn,
+                p_tokens if isinstance(p_tokens, (int, float)) else None,
+                c_tokens if isinstance(c_tokens, (int, float)) else None,
+            )
+
             if response.has_tool_calls:
                 if on_progress:
                     thought = self._strip_think(response.content)
@@ -254,7 +679,7 @@ class AgentLoop:
                 "without completing the task. You can try breaking the task into smaller steps."
             )
 
-        return final_content, tools_used, messages
+        return final_content, tools_used, messages, total_tokens_this_turn, token_source
 
     async def run(self) -> None:
         """Run the agent loop, dispatching messages as tasks to stay responsive to /stop."""
@@ -279,6 +704,9 @@ class AgentLoop:
         """Cancel all active tasks and subagents for the session."""
         tasks = self._active_tasks.pop(msg.session_key, [])
         cancelled = sum(1 for t in tasks if not t.done() and t.cancel())
+        comp = self._compression_tasks.get(msg.session_key)
+        if comp is not None and not comp.done() and comp.cancel():
+            cancelled += 1
         for t in tasks:
             try:
                 await t
@@ -325,6 +753,9 @@ class AgentLoop:
     def stop(self) -> None:
         """Stop the agent loop."""
         self._running = False
+        for task in list(self._compression_tasks.values()):
+            if not task.done():
+                task.cancel()
         logger.info("Agent loop stopping")
 
     async def _process_message(
@@ -342,14 +773,15 @@ class AgentLoop:
             key = f"{channel}:{chat_id}"
             session = self.sessions.get_or_create(key)
             self._set_tool_context(channel, chat_id, msg.metadata.get("message_id"))
-            history = session.get_history(max_messages=self.memory_window)
+            history = self._build_compressed_history_view(session)
             messages = self.context.build_messages(
                 history=history,
                 current_message=msg.content, channel=channel, chat_id=chat_id,
             )
-            final_content, _, all_msgs = await self._run_agent_loop(messages)
+            final_content, _, all_msgs, _, _ = await self._run_agent_loop(messages)
             self._save_turn(session, all_msgs, 1 + len(history))
             self.sessions.save(session)
+            self._schedule_background_compression(session.key)
             return OutboundMessage(channel=channel, chat_id=chat_id,
                                   content=final_content or "Background task completed.")
 
@@ -362,27 +794,27 @@ class AgentLoop:
         # Slash commands
         cmd = msg.content.strip().lower()
         if cmd == "/new":
-            lock = self._consolidation_locks.setdefault(session.key, asyncio.Lock())
-            self._consolidating.add(session.key)
             try:
-                async with lock:
-                    snapshot = session.messages[session.last_consolidated:]
-                    if snapshot:
-                        temp = Session(key=session.key)
-                        temp.messages = list(snapshot)
-                        if not await self._consolidate_memory(temp, archive_all=True):
-                            return OutboundMessage(
-                                channel=msg.channel, chat_id=msg.chat_id,
-                                content="Memory archival failed, session not cleared. Please try again.",
-                            )
+                # 在清空会话前，将当前完整对话做一次归档压缩到 MEMORY/HISTORY 中
+                if session.messages:
+                    ok, _ = await self.context.memory.consolidate_chunk(
+                        session.messages,
+                        self.provider,
+                        self.model,
+                    )
+                    if not ok:
+                        return OutboundMessage(
+                            channel=msg.channel,
+                            chat_id=msg.chat_id,
+                            content="Memory archival failed, session not cleared. Please try again.",
+                        )
             except Exception:
                 logger.exception("/new archival failed for {}", session.key)
                 return OutboundMessage(
-                    channel=msg.channel, chat_id=msg.chat_id,
+                    channel=msg.channel,
+                    chat_id=msg.chat_id,
                     content="Memory archival failed, session not cleared. Please try again.",
                 )
-            finally:
-                self._consolidating.discard(session.key)
 
             session.clear()
             self.sessions.save(session)
@@ -393,36 +825,23 @@ class AgentLoop:
             return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id,
                                   content="🐈 nanobot commands:\n/new — Start a new conversation\n/stop — Stop the current task\n/help — Show available commands")
 
-        unconsolidated = len(session.messages) - session.last_consolidated
-        if (unconsolidated >= self.memory_window and session.key not in self._consolidating):
-            self._consolidating.add(session.key)
-            lock = self._consolidation_locks.setdefault(session.key, asyncio.Lock())
-
-            async def _consolidate_and_unlock():
-                try:
-                    async with lock:
-                        await self._consolidate_memory(session)
-                finally:
-                    self._consolidating.discard(session.key)
-                    _task = asyncio.current_task()
-                    if _task is not None:
-                        self._consolidation_tasks.discard(_task)
-
-            _task = asyncio.create_task(_consolidate_and_unlock())
-            self._consolidation_tasks.add(_task)
-
         self._set_tool_context(msg.channel, msg.chat_id, msg.metadata.get("message_id"))
         if message_tool := self.tools.get("message"):
             if isinstance(message_tool, MessageTool):
                 message_tool.start_turn()
 
-        history = session.get_history(max_messages=self.memory_window)
+        # 正常对话：使用压缩后的历史视图（压缩在回合结束后进行）
+        history = self._build_compressed_history_view(session)
         initial_messages = self.context.build_messages(
             history=history,
             current_message=msg.content,
             media=msg.media if msg.media else None,
             channel=msg.channel, chat_id=msg.chat_id,
         )
+        # Add [CRON JOB] identifier for cron sessions (session_key starts with "cron:")
+        if session_key and session_key.startswith("cron:"):
+            if initial_messages and initial_messages[0].get("role") == "system":
+                initial_messages[0]["content"] = f"[CRON JOB] {initial_messages[0]['content']}"
 
         async def _bus_progress(content: str, *, tool_hint: bool = False) -> None:
             meta = dict(msg.metadata or {})
@@ -432,7 +851,7 @@ class AgentLoop:
                 channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta,
             ))
 
-        final_content, _, all_msgs = await self._run_agent_loop(
+        final_content, _, all_msgs, _, _ = await self._run_agent_loop(
             initial_messages, on_progress=on_progress or _bus_progress,
         )
 
@@ -441,6 +860,7 @@ class AgentLoop:
 
         self._save_turn(session, all_msgs, 1 + len(history))
         self.sessions.save(session)
+        self._schedule_background_compression(session.key)
 
         if (mt := self.tools.get("message")) and isinstance(mt, MessageTool) and mt._sent_in_turn:
             return None
@@ -487,13 +907,6 @@ class AgentLoop:
             session.messages.append(entry)
         session.updated_at = datetime.now()
 
-    async def _consolidate_memory(self, session, archive_all: bool = False) -> bool:
-        """Delegate to MemoryStore.consolidate(). Returns True on success."""
-        return await MemoryStore(self.workspace).consolidate(
-            session, self.provider, self.model,
-            archive_all=archive_all, memory_window=self.memory_window,
-        )
-
     async def process_direct(
         self,
         content: str,
diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index 21fe77d..c8896c8 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -66,36 +66,25 @@ class MemoryStore:
         long_term = self.read_long_term()
         return f"## Long-term Memory\n{long_term}" if long_term else ""
 
-    async def consolidate(
+    async def consolidate_chunk(
         self,
-        session: Session,
+        messages: list[dict],
         provider: LLMProvider,
         model: str,
-        *,
-        archive_all: bool = False,
-        memory_window: int = 50,
-    ) -> bool:
-        """Consolidate old messages into MEMORY.md + HISTORY.md via LLM tool call.
+    ) -> tuple[bool, str | None]:
+        """Consolidate a chunk of messages into MEMORY.md + HISTORY.md via LLM tool call.
 
-        Returns True on success (including no-op), False on failure.
+        Returns (success, None).
+
+        - success: True on success (including no-op), False on failure.
+        - The second return value is reserved for future use (e.g. RAG-style summaries) and is
+          always None in the current implementation.
         """
-        if archive_all:
-            old_messages = session.messages
-            keep_count = 0
-            logger.info("Memory consolidation (archive_all): {} messages", len(session.messages))
-        else:
-            keep_count = memory_window // 2
-            if len(session.messages) <= keep_count:
-                return True
-            if len(session.messages) - session.last_consolidated <= 0:
-                return True
-            old_messages = session.messages[session.last_consolidated:-keep_count]
-            if not old_messages:
-                return True
-            logger.info("Memory consolidation: {} to consolidate, {} keep", len(old_messages), keep_count)
+        if not messages:
+            return True, None
 
         lines = []
-        for m in old_messages:
+        for m in messages:
             if not m.get("content"):
                 continue
             tools = f" [tools: {', '.join(m['tools_used'])}]" if m.get("tools_used") else ""
@@ -113,7 +102,19 @@ class MemoryStore:
         try:
             response = await provider.chat(
                 messages=[
-                    {"role": "system", "content": "You are a memory consolidation agent. Call the save_memory tool with your consolidation of the conversation."},
+                    {
+                        "role": "system",
+                        "content": (
+                            "You are a memory consolidation agent.\n"
+                            "Your job is to:\n"
+                            "1) Append a concise but grep-friendly entry to HISTORY.md summarizing key events, decisions and topics.\n"
+                            "   - Write 1 paragraph of 2–5 sentences that starts with [YYYY-MM-DD HH:MM].\n"
+                            "   - Include concrete names, IDs and numbers so it is easy to search with grep.\n"
+                            "2) Update long-term MEMORY.md with stable facts and user preferences as markdown, including all existing facts plus new ones.\n"
+                            "3) Optionally return a short context_summary (1–3 sentences) that will replace the raw messages in future dialogue history.\n\n"
+                            "Always call the save_memory tool with history_entry, memory_update and (optionally) context_summary."
+                        ),
+                    },
                     {"role": "user", "content": prompt},
                 ],
                 tools=_SAVE_MEMORY_TOOL,
@@ -122,7 +123,7 @@ class MemoryStore:
 
             if not response.has_tool_calls:
                 logger.warning("Memory consolidation: LLM did not call save_memory, skipping")
-                return False
+                return False, None
 
             args = response.tool_calls[0].arguments
             # Some providers return arguments as a JSON string instead of dict
@@ -134,10 +135,10 @@ class MemoryStore:
                     args = args[0]
                 else:
                     logger.warning("Memory consolidation: unexpected arguments as empty or non-dict list")
-                    return False
+                    return False, None
             if not isinstance(args, dict):
                 logger.warning("Memory consolidation: unexpected arguments type {}", type(args).__name__)
-                return False
+                return False, None
 
             if entry := args.get("history_entry"):
                 if not isinstance(entry, str):
@@ -149,9 +150,8 @@ class MemoryStore:
                 if update != current_memory:
                     self.write_long_term(update)
 
-            session.last_consolidated = 0 if archive_all else len(session.messages) - keep_count
-            logger.info("Memory consolidation done: {} messages, last_consolidated={}", len(session.messages), session.last_consolidated)
-            return True
+            logger.info("Memory consolidation done for {} messages", len(messages))
+            return True, None
         except Exception:
             logger.exception("Memory consolidation failed")
-            return False
+            return False, None
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index 803cb61..1ebde20 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -189,11 +189,22 @@ class SlackConfig(Base):
 
 
 class QQConfig(Base):
-    """QQ channel configuration using botpy SDK."""
+    """QQ channel configuration.
+    
+    Supports two implementations:
+    1. Official botpy SDK: requires app_id and secret
+    2. OneBot protocol: requires api_url (and optionally ws_reverse_url, bot_qq, access_token)
+    """
 
     enabled: bool = False
+    # Official botpy SDK fields
     app_id: str = ""  # 机器人 ID (AppID) from q.qq.com
     secret: str = ""  # 机器人密钥 (AppSecret) from q.qq.com
+    # OneBot protocol fields
+    api_url: str = ""  # OneBot HTTP API URL (e.g. "http://localhost:5700")
+    ws_reverse_url: str = ""  # OneBot WebSocket reverse URL (e.g. "ws://localhost:8080/ws/reverse")
+    bot_qq: int | None = None  # Bot's QQ number (for filtering self messages)
+    access_token: str = ""  # Optional access token for OneBot API
     allow_from: list[str] = Field(
         default_factory=list
     )  # Allowed user openids (empty = public access)
@@ -226,10 +237,18 @@ class AgentDefaults(Base):
     provider: str = (
         "auto"  # Provider name (e.g. "anthropic", "openrouter") or "auto" for auto-detection
     )
-    max_tokens: int = 8192
+    # 原生上下文最大窗口（通常对应模型的 max_input_tokens / max_context_tokens）
+    # 默认按照主流大模型（如 GPT-4o、Claude 3.x 等）的 128k 上下文给一个宽松上限，实际应根据所选模型文档手动调整。
+    max_tokens_input: int = 128_000
+    # 默认单次回复的最大输出 token 上限（调用时可按需要再做截断或比例分配）
+    # 8192 足以覆盖大多数实际对话/工具使用场景，同样可按需手动调整。
+    max_tokens_output: int = 8192
+    # 会话历史压缩触发比例：当估算的输入 token 使用量 >= maxTokensInput * compressionStartRatio 时开始压缩。
+    compression_start_ratio: float = 0.7
+    # 会话历史压缩目标比例：每轮压缩后尽量把估算的输入 token 使用量压到 maxTokensInput * compressionTargetRatio 附近。
+    compression_target_ratio: float = 0.4
     temperature: float = 0.1
     max_tool_iterations: int = 40
-    memory_window: int = 100
     reasoning_effort: str | None = None  # low / medium / high — enables LLM thinking mode
 
 
diff --git a/nanobot/session/manager.py b/nanobot/session/manager.py
index f0a6484..1cb8a51 100644
--- a/nanobot/session/manager.py
+++ b/nanobot/session/manager.py
@@ -9,7 +9,6 @@ from typing import Any
 
 from loguru import logger
 
-from nanobot.config.paths import get_legacy_sessions_dir
 from nanobot.utils.helpers import ensure_dir, safe_filename
 
 
@@ -30,7 +29,6 @@ class Session:
     created_at: datetime = field(default_factory=datetime.now)
     updated_at: datetime = field(default_factory=datetime.now)
     metadata: dict[str, Any] = field(default_factory=dict)
-    last_consolidated: int = 0  # Number of messages already consolidated to files
 
     def add_message(self, role: str, content: str, **kwargs: Any) -> None:
         """Add a message to the session."""
@@ -44,9 +42,13 @@ class Session:
         self.updated_at = datetime.now()
 
     def get_history(self, max_messages: int = 500) -> list[dict[str, Any]]:
-        """Return unconsolidated messages for LLM input, aligned to a user turn."""
-        unconsolidated = self.messages[self.last_consolidated:]
-        sliced = unconsolidated[-max_messages:]
+        """
+        Return messages for LLM input, aligned to a user turn.
+
+        - max_messages > 0 时只保留最近 max_messages 条；
+        - max_messages <= 0 时不做条数截断，返回全部消息。
+        """
+        sliced = self.messages if max_messages <= 0 else self.messages[-max_messages:]
 
         # Drop leading non-user messages to avoid orphaned tool_result blocks
         for i, m in enumerate(sliced):
@@ -66,7 +68,7 @@ class Session:
     def clear(self) -> None:
         """Clear all messages and reset session to initial state."""
         self.messages = []
-        self.last_consolidated = 0
+        self.metadata = {}
         self.updated_at = datetime.now()
 
 
@@ -80,7 +82,7 @@ class SessionManager:
     def __init__(self, workspace: Path):
         self.workspace = workspace
         self.sessions_dir = ensure_dir(self.workspace / "sessions")
-        self.legacy_sessions_dir = get_legacy_sessions_dir()
+        self.legacy_sessions_dir = Path.home() / ".nanobot" / "sessions"
         self._cache: dict[str, Session] = {}
 
     def _get_session_path(self, key: str) -> Path:
@@ -132,7 +134,6 @@ class SessionManager:
             messages = []
             metadata = {}
             created_at = None
-            last_consolidated = 0
 
             with open(path, encoding="utf-8") as f:
                 for line in f:
@@ -145,7 +146,6 @@ class SessionManager:
                     if data.get("_type") == "metadata":
                         metadata = data.get("metadata", {})
                         created_at = datetime.fromisoformat(data["created_at"]) if data.get("created_at") else None
-                        last_consolidated = data.get("last_consolidated", 0)
                     else:
                         messages.append(data)
 
@@ -154,7 +154,6 @@ class SessionManager:
                 messages=messages,
                 created_at=created_at or datetime.now(),
                 metadata=metadata,
-                last_consolidated=last_consolidated
             )
         except Exception as e:
             logger.warning("Failed to load session {}: {}", key, e)
@@ -171,7 +170,6 @@ class SessionManager:
                 "created_at": session.created_at.isoformat(),
                 "updated_at": session.updated_at.isoformat(),
                 "metadata": session.metadata,
-                "last_consolidated": session.last_consolidated
             }
             f.write(json.dumps(metadata_line, ensure_ascii=False) + "\n")
             for msg in session.messages:

From 2dcb4de422ddec8c0f114dc6b0fdce06b9388b8f Mon Sep 17 00:00:00 2001
From: VITOHJL <hejl2023@shanghaitech.edu.cn>
Date: Sun, 8 Mar 2026 15:04:38 +0800
Subject: [PATCH 02/28] fix(commands): update AgentLoop calls to use
 token-based compression parameters

---
 nanobot/cli/commands.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index 2c8d6d3..cf29cc5 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -330,8 +330,10 @@ def gateway(
         temperature=config.agents.defaults.temperature,
         max_tokens=config.agents.defaults.max_tokens,
         max_iterations=config.agents.defaults.max_tool_iterations,
-        memory_window=config.agents.defaults.memory_window,
         reasoning_effort=config.agents.defaults.reasoning_effort,
+        max_tokens_input=config.agents.defaults.max_tokens_input,
+        compression_start_ratio=config.agents.defaults.compression_start_ratio,
+        compression_target_ratio=config.agents.defaults.compression_target_ratio,
         brave_api_key=config.tools.web.search.api_key or None,
         web_proxy=config.tools.web.proxy or None,
         exec_config=config.tools.exec,
@@ -515,8 +517,10 @@ def agent(
         temperature=config.agents.defaults.temperature,
         max_tokens=config.agents.defaults.max_tokens,
         max_iterations=config.agents.defaults.max_tool_iterations,
-        memory_window=config.agents.defaults.memory_window,
         reasoning_effort=config.agents.defaults.reasoning_effort,
+        max_tokens_input=config.agents.defaults.max_tokens_input,
+        compression_start_ratio=config.agents.defaults.compression_start_ratio,
+        compression_target_ratio=config.agents.defaults.compression_target_ratio,
         brave_api_key=config.tools.web.search.api_key or None,
         web_proxy=config.tools.web.proxy or None,
         exec_config=config.tools.exec,

From 2706d3c317be7325795e9dac74d07512e57112f4 Mon Sep 17 00:00:00 2001
From: VITOHJL <hejl2023@shanghaitech.edu.cn>
Date: Sun, 8 Mar 2026 15:20:34 +0800
Subject: [PATCH 03/28] fix(commands): use max_tokens_output instead of
 max_tokens from AgentDefaults

---
 nanobot/cli/commands.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index cf29cc5..18c9d56 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -328,7 +328,7 @@ def gateway(
         workspace=config.workspace_path,
         model=config.agents.defaults.model,
         temperature=config.agents.defaults.temperature,
-        max_tokens=config.agents.defaults.max_tokens,
+        max_tokens=config.agents.defaults.max_tokens_output,
         max_iterations=config.agents.defaults.max_tool_iterations,
         reasoning_effort=config.agents.defaults.reasoning_effort,
         max_tokens_input=config.agents.defaults.max_tokens_input,
@@ -515,7 +515,7 @@ def agent(
         workspace=config.workspace_path,
         model=config.agents.defaults.model,
         temperature=config.agents.defaults.temperature,
-        max_tokens=config.agents.defaults.max_tokens,
+        max_tokens=config.agents.defaults.max_tokens_output,
         max_iterations=config.agents.defaults.max_tool_iterations,
         reasoning_effort=config.agents.defaults.reasoning_effort,
         max_tokens_input=config.agents.defaults.max_tokens_input,

From a984e0df3752f6a8883a0e9b6d8efee4abd7f9dd Mon Sep 17 00:00:00 2001
From: VITOHJL <hejl2023@shanghaitech.edu.cn>
Date: Sun, 8 Mar 2026 15:23:55 +0800
Subject: [PATCH 04/28] feat(loop): add history message count logging in
 compression

---
 nanobot/agent/loop.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 696e2a7..5d316ea 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -362,6 +362,7 @@ class AgentLoop:
         if len(chunk) < 2:
             return
 
+        before_msg_count = len(session.messages)
         logger.info(
             "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})",
             session.key,
@@ -383,12 +384,13 @@ class AgentLoop:
         self._set_compressed_until(session, end_idx)
         self.sessions.save(session)
 
+        after_msg_count = len(session.messages)
         after_tokens, after_source = self._estimate_session_prompt_tokens(session)
         after_ratio = after_tokens / budget if budget else 0.0
         reduced = max(0, current_tokens - after_tokens)
         reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0
         logger.info(
-            "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})",
+            "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%}), history: {} -> {}",
             session.key,
             after_tokens,
             budget,
@@ -396,6 +398,8 @@ class AgentLoop:
             after_source,
             reduced,
             reduced_ratio,
+            before_msg_count,
+            after_msg_count,
         )
 
     def _schedule_background_compression(self, session_key: str) -> None:

From 1b16d48390b3fded3438f4fdbc3f0ae0a0379878 Mon Sep 17 00:00:00 2001
From: VITOHJL <hejl2023@shanghaitech.edu.cn>
Date: Sun, 8 Mar 2026 15:26:49 +0800
Subject: [PATCH 05/28] fix(loop): update _cumulative_tokens in _save_turn and
 preserve it in compression methods

---
 nanobot/agent/loop.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 5d316ea..5e01b79 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -211,14 +211,14 @@ class AgentLoop:
         session.metadata["_compressed_until"] = compressed_until
         # 兼容旧版本：一旦迁移出连续边界，就可以清理旧字段
         session.metadata.pop("_compressed_ranges", None)
-        session.metadata.pop("_cumulative_tokens", None)
+        # 注意：不要删除 _cumulative_tokens，压缩逻辑需要它来跟踪累积 token 计数
         return compressed_until
 
     def _set_compressed_until(self, session: Session, idx: int) -> None:
         """Persist a contiguous compressed boundary."""
         session.metadata["_compressed_until"] = max(0, min(int(idx), len(session.messages)))
         session.metadata.pop("_compressed_ranges", None)
-        session.metadata.pop("_cumulative_tokens", None)
+        # 注意：不要删除 _cumulative_tokens，压缩逻辑需要它来跟踪累积 token 计数
 
     @staticmethod
     def _estimate_message_tokens(message: dict[str, Any]) -> int:
@@ -362,7 +362,6 @@ class AgentLoop:
         if len(chunk) < 2:
             return
 
-        before_msg_count = len(session.messages)
         logger.info(
             "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})",
             session.key,
@@ -384,13 +383,12 @@ class AgentLoop:
         self._set_compressed_until(session, end_idx)
         self.sessions.save(session)
 
-        after_msg_count = len(session.messages)
         after_tokens, after_source = self._estimate_session_prompt_tokens(session)
         after_ratio = after_tokens / budget if budget else 0.0
         reduced = max(0, current_tokens - after_tokens)
         reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0
         logger.info(
-            "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%}), history: {} -> {}",
+            "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})",
             session.key,
             after_tokens,
             budget,
@@ -398,8 +396,6 @@ class AgentLoop:
             after_source,
             reduced,
             reduced_ratio,
-            before_msg_count,
-            after_msg_count,
         )
 
     def _schedule_background_compression(self, session_key: str) -> None:
@@ -855,14 +851,14 @@ class AgentLoop:
                 channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta,
             ))
 
-        final_content, _, all_msgs, _, _ = await self._run_agent_loop(
+        final_content, _, all_msgs, total_tokens_this_turn, token_source = await self._run_agent_loop(
             initial_messages, on_progress=on_progress or _bus_progress,
         )
 
         if final_content is None:
             final_content = "I've completed processing but have no response to give."
 
-        self._save_turn(session, all_msgs, 1 + len(history))
+        self._save_turn(session, all_msgs, 1 + len(history), total_tokens_this_turn)
         self.sessions.save(session)
         self._schedule_background_compression(session.key)
 
@@ -876,7 +872,7 @@ class AgentLoop:
             metadata=msg.metadata or {},
         )
 
-    def _save_turn(self, session: Session, messages: list[dict], skip: int) -> None:
+    def _save_turn(self, session: Session, messages: list[dict], skip: int, total_tokens_this_turn: int = 0) -> None:
         """Save new-turn messages into session, truncating large tool results."""
         from datetime import datetime
         for m in messages[skip:]:
@@ -910,6 +906,14 @@ class AgentLoop:
             entry.setdefault("timestamp", datetime.now().isoformat())
             session.messages.append(entry)
         session.updated_at = datetime.now()
+        
+        # Update cumulative token count for compression tracking
+        if total_tokens_this_turn > 0:
+            current_cumulative = session.metadata.get("_cumulative_tokens", 0)
+            if isinstance(current_cumulative, (int, float)):
+                session.metadata["_cumulative_tokens"] = int(current_cumulative) + total_tokens_this_turn
+            else:
+                session.metadata["_cumulative_tokens"] = total_tokens_this_turn
 
     async def process_direct(
         self,

From 274edc5451c1d0f79eda80c76127f497ec6923e9 Mon Sep 17 00:00:00 2001
From: VITOHJL <hejl2023@shanghaitech.edu.cn>
Date: Sun, 8 Mar 2026 17:25:59 +0800
Subject: [PATCH 06/28] fix(compression): prefer provider prompt token usage

---
 nanobot/agent/loop.py | 43 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 5e01b79..4f6a051 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -124,6 +124,8 @@ class AgentLoop:
         self._mcp_connecting = False
         self._active_tasks: dict[str, list[asyncio.Task]] = {}  # session_key -> tasks
         self._compression_tasks: dict[str, asyncio.Task] = {}  # session_key -> task
+        self._last_turn_prompt_tokens: int = 0
+        self._last_turn_prompt_source: str = "none"
         self._processing_lock = asyncio.Lock()
         self._register_default_tools()
 
@@ -324,7 +326,15 @@ class AgentLoop:
         if target_threshold >= start_threshold:
             target_threshold = max(0, start_threshold - 1)
 
-        current_tokens, token_source = self._estimate_session_prompt_tokens(session)
+        # Prefer provider usage prompt tokens from the turn-ending call.
+        # If unavailable, fall back to estimator chain.
+        raw_prompt_tokens = session.metadata.get("_last_prompt_tokens")
+        if isinstance(raw_prompt_tokens, (int, float)) and raw_prompt_tokens > 0:
+            current_tokens = int(raw_prompt_tokens)
+            token_source = str(session.metadata.get("_last_prompt_source") or "usage_prompt")
+        else:
+            current_tokens, token_source = self._estimate_session_prompt_tokens(session)
+
         current_ratio = current_tokens / budget if budget else 0.0
         if current_tokens <= 0:
             logger.debug("Compression skip {}: token estimate unavailable", session.key)
@@ -569,6 +579,8 @@ class AgentLoop:
         tools_used: list[str] = []
         total_tokens_this_turn = 0
         token_source = "none"
+        self._last_turn_prompt_tokens = 0
+        self._last_turn_prompt_source = "none"
 
         while iteration < self.max_iterations:
             iteration += 1
@@ -594,19 +606,35 @@ class AgentLoop:
             if isinstance(t_tokens, (int, float)) and t_tokens > 0:
                 total_tokens_this_turn = int(t_tokens)
                 token_source = "provider_total"
+                if isinstance(p_tokens, (int, float)) and p_tokens > 0:
+                    self._last_turn_prompt_tokens = int(p_tokens)
+                    self._last_turn_prompt_source = "usage_prompt"
+                elif isinstance(c_tokens, (int, float)):
+                    prompt_derived = int(t_tokens) - int(c_tokens)
+                    if prompt_derived > 0:
+                        self._last_turn_prompt_tokens = prompt_derived
+                        self._last_turn_prompt_source = "usage_total_minus_completion"
             elif isinstance(p_tokens, (int, float)) and isinstance(c_tokens, (int, float)):
                 # If we have both prompt and completion tokens, sum them
                 total_tokens_this_turn = int(p_tokens) + int(c_tokens)
                 token_source = "provider_sum"
+                if p_tokens > 0:
+                    self._last_turn_prompt_tokens = int(p_tokens)
+                    self._last_turn_prompt_source = "usage_prompt"
             elif isinstance(p_tokens, (int, float)) and p_tokens > 0:
                 # Fallback: use prompt tokens only (completion might be 0 for tool calls)
                 total_tokens_this_turn = int(p_tokens)
                 token_source = "provider_prompt"
+                self._last_turn_prompt_tokens = int(p_tokens)
+                self._last_turn_prompt_source = "usage_prompt"
             else:
                 # Estimate with unified chain (provider counter -> tiktoken), plus completion tiktoken.
                 estimated_prompt, prompt_source = self._estimate_prompt_tokens_chain(messages, tool_defs)
                 estimated_completion = self._estimate_completion_tokens(response.content or "")
                 total_tokens_this_turn = estimated_prompt + estimated_completion
+                if estimated_prompt > 0:
+                    self._last_turn_prompt_tokens = int(estimated_prompt)
+                    self._last_turn_prompt_source = str(prompt_source or "tiktoken")
                 if total_tokens_this_turn > 0:
                     token_source = (
                         "tiktoken"
@@ -779,6 +807,12 @@ class AgentLoop:
                 current_message=msg.content, channel=channel, chat_id=chat_id,
             )
             final_content, _, all_msgs, _, _ = await self._run_agent_loop(messages)
+            if self._last_turn_prompt_tokens > 0:
+                session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens
+                session.metadata["_last_prompt_source"] = self._last_turn_prompt_source
+            else:
+                session.metadata.pop("_last_prompt_tokens", None)
+                session.metadata.pop("_last_prompt_source", None)
             self._save_turn(session, all_msgs, 1 + len(history))
             self.sessions.save(session)
             self._schedule_background_compression(session.key)
@@ -858,6 +892,13 @@ class AgentLoop:
         if final_content is None:
             final_content = "I've completed processing but have no response to give."
 
+        if self._last_turn_prompt_tokens > 0:
+            session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens
+            session.metadata["_last_prompt_source"] = self._last_turn_prompt_source
+        else:
+            session.metadata.pop("_last_prompt_tokens", None)
+            session.metadata.pop("_last_prompt_source", None)
+
         self._save_turn(session, all_msgs, 1 + len(history), total_tokens_this_turn)
         self.sessions.save(session)
         self._schedule_background_compression(session.key)

From a660a25504b48170579a57496378e2fd843a556f Mon Sep 17 00:00:00 2001
From: chengyongru <2755839590@qq.com>
Date: Mon, 9 Mar 2026 22:00:45 +0800
Subject: [PATCH 07/28] feat(wecom): add wecom channel [wobsocket]

support text/audio[wecom support audio message by default]
---
 nanobot/channels/manager.py |  14 +-
 nanobot/channels/wecom.py   | 352 ++++++++++++++++++++++++++++++++++++
 nanobot/config/schema.py    |   9 +
 pyproject.toml              |   1 +
 4 files changed, 375 insertions(+), 1 deletion(-)
 create mode 100644 nanobot/channels/wecom.py

diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py
index 51539dd..369795a 100644
--- a/nanobot/channels/manager.py
+++ b/nanobot/channels/manager.py
@@ -7,7 +7,6 @@ from typing import Any
 
 from loguru import logger
 
-from nanobot.bus.events import OutboundMessage
 from nanobot.bus.queue import MessageBus
 from nanobot.channels.base import BaseChannel
 from nanobot.config.schema import Config
@@ -150,6 +149,19 @@ class ChannelManager:
             except ImportError as e:
                 logger.warning("Matrix channel not available: {}", e)
 
+        # WeCom channel
+        if self.config.channels.wecom.enabled:
+            try:
+                from nanobot.channels.wecom import WecomChannel
+                self.channels["wecom"] = WecomChannel(
+                    self.config.channels.wecom,
+                    self.bus,
+                    groq_api_key=self.config.providers.groq.api_key,
+                )
+                logger.info("WeCom channel enabled")
+            except ImportError as e:
+                logger.warning("WeCom channel not available: {}", e)
+
         self._validate_allow_from()
 
     def _validate_allow_from(self) -> None:
diff --git a/nanobot/channels/wecom.py b/nanobot/channels/wecom.py
new file mode 100644
index 0000000..dc97311
--- /dev/null
+++ b/nanobot/channels/wecom.py
@@ -0,0 +1,352 @@
+"""WeCom (Enterprise WeChat) channel implementation using wecom_aibot_sdk."""
+
+import asyncio
+import importlib.util
+from collections import OrderedDict
+from typing import Any
+
+from loguru import logger
+
+from nanobot.bus.events import OutboundMessage
+from nanobot.bus.queue import MessageBus
+from nanobot.channels.base import BaseChannel
+from nanobot.config.paths import get_media_dir
+from nanobot.config.schema import WecomConfig
+
+WECOM_AVAILABLE = importlib.util.find_spec("wecom_aibot_sdk") is not None
+
+# Message type display mapping
+MSG_TYPE_MAP = {
+    "image": "[image]",
+    "voice": "[voice]",
+    "file": "[file]",
+    "mixed": "[mixed content]",
+}
+
+
+class WecomChannel(BaseChannel):
+    """
+    WeCom (Enterprise WeChat) channel using WebSocket long connection.
+
+    Uses WebSocket to receive events - no public IP or webhook required.
+
+    Requires:
+    - Bot ID and Secret from WeCom AI Bot platform
+    """
+
+    name = "wecom"
+
+    def __init__(self, config: WecomConfig, bus: MessageBus, groq_api_key: str = ""):
+        super().__init__(config, bus)
+        self.config: WecomConfig = config
+        self.groq_api_key = groq_api_key
+        self._client: Any = None
+        self._processed_message_ids: OrderedDict[str, None] = OrderedDict()
+        self._loop: asyncio.AbstractEventLoop | None = None
+        self._generate_req_id = None
+        # Store frame headers for each chat to enable replies
+        self._chat_frames: dict[str, Any] = {}
+
+    async def start(self) -> None:
+        """Start the WeCom bot with WebSocket long connection."""
+        if not WECOM_AVAILABLE:
+            logger.error("WeCom SDK not installed. Run: pip install wecom-aibot-sdk-python")
+            return
+
+        if not self.config.bot_id or not self.config.secret:
+            logger.error("WeCom bot_id and secret not configured")
+            return
+
+        from wecom_aibot_sdk import WSClient, generate_req_id
+
+        self._running = True
+        self._loop = asyncio.get_running_loop()
+        self._generate_req_id = generate_req_id
+
+        # Create WebSocket client
+        self._client = WSClient({
+            "bot_id": self.config.bot_id,
+            "secret": self.config.secret,
+            "reconnect_interval": 1000,
+            "max_reconnect_attempts": -1,  # Infinite reconnect
+            "heartbeat_interval": 30000,
+        })
+
+        # Register event handlers
+        self._client.on("connected", self._on_connected)
+        self._client.on("authenticated", self._on_authenticated)
+        self._client.on("disconnected", self._on_disconnected)
+        self._client.on("error", self._on_error)
+        self._client.on("message.text", self._on_text_message)
+        self._client.on("message.image", self._on_image_message)
+        self._client.on("message.voice", self._on_voice_message)
+        self._client.on("message.file", self._on_file_message)
+        self._client.on("message.mixed", self._on_mixed_message)
+        self._client.on("event.enter_chat", self._on_enter_chat)
+
+        logger.info("WeCom bot starting with WebSocket long connection")
+        logger.info("No public IP required - using WebSocket to receive events")
+
+        # Connect
+        await self._client.connect_async()
+
+        # Keep running until stopped
+        while self._running:
+            await asyncio.sleep(1)
+
+    async def stop(self) -> None:
+        """Stop the WeCom bot."""
+        self._running = False
+        if self._client:
+            self._client.disconnect()
+        logger.info("WeCom bot stopped")
+
+    async def _on_connected(self, frame: Any) -> None:
+        """Handle WebSocket connected event."""
+        logger.info("WeCom WebSocket connected")
+
+    async def _on_authenticated(self, frame: Any) -> None:
+        """Handle authentication success event."""
+        logger.info("WeCom authenticated successfully")
+
+    async def _on_disconnected(self, frame: Any) -> None:
+        """Handle WebSocket disconnected event."""
+        reason = frame.body if hasattr(frame, 'body') else str(frame)
+        logger.warning("WeCom WebSocket disconnected: {}", reason)
+
+    async def _on_error(self, frame: Any) -> None:
+        """Handle error event."""
+        logger.error("WeCom error: {}", frame)
+
+    async def _on_text_message(self, frame: Any) -> None:
+        """Handle text message."""
+        await self._process_message(frame, "text")
+
+    async def _on_image_message(self, frame: Any) -> None:
+        """Handle image message."""
+        await self._process_message(frame, "image")
+
+    async def _on_voice_message(self, frame: Any) -> None:
+        """Handle voice message."""
+        await self._process_message(frame, "voice")
+
+    async def _on_file_message(self, frame: Any) -> None:
+        """Handle file message."""
+        await self._process_message(frame, "file")
+
+    async def _on_mixed_message(self, frame: Any) -> None:
+        """Handle mixed content message."""
+        await self._process_message(frame, "mixed")
+
+    async def _on_enter_chat(self, frame: Any) -> None:
+        """Handle enter_chat event (user opens chat with bot)."""
+        try:
+            # Extract body from WsFrame dataclass or dict
+            if hasattr(frame, 'body'):
+                body = frame.body or {}
+            elif isinstance(frame, dict):
+                body = frame.get("body", frame)
+            else:
+                body = {}
+
+            chat_id = body.get("chatid", "") if isinstance(body, dict) else ""
+
+            if chat_id and self.config.welcome_message:
+                await self._client.reply_welcome(frame, {
+                    "msgtype": "text",
+                    "text": {"content": self.config.welcome_message},
+                })
+        except Exception as e:
+            logger.error("Error handling enter_chat: {}", e)
+
+    async def _process_message(self, frame: Any, msg_type: str) -> None:
+        """Process incoming message and forward to bus."""
+        try:
+            # Extract body from WsFrame dataclass or dict
+            if hasattr(frame, 'body'):
+                body = frame.body or {}
+            elif isinstance(frame, dict):
+                body = frame.get("body", frame)
+            else:
+                body = {}
+
+            # Ensure body is a dict
+            if not isinstance(body, dict):
+                logger.warning("Invalid body type: {}", type(body))
+                return
+
+            # Extract message info
+            msg_id = body.get("msgid", "")
+            if not msg_id:
+                msg_id = f"{body.get('chatid', '')}_{body.get('sendertime', '')}"
+
+            # Deduplication check
+            if msg_id in self._processed_message_ids:
+                return
+            self._processed_message_ids[msg_id] = None
+
+            # Trim cache
+            while len(self._processed_message_ids) > 1000:
+                self._processed_message_ids.popitem(last=False)
+
+            # Extract sender info from "from" field (SDK format)
+            from_info = body.get("from", {})
+            sender_id = from_info.get("userid", "unknown") if isinstance(from_info, dict) else "unknown"
+
+            # For single chat, chatid is the sender's userid
+            # For group chat, chatid is provided in body
+            chat_type = body.get("chattype", "single")
+            chat_id = body.get("chatid", sender_id)
+
+            content_parts = []
+
+            if msg_type == "text":
+                text = body.get("text", {}).get("content", "")
+                if text:
+                    content_parts.append(text)
+
+            elif msg_type == "image":
+                image_info = body.get("image", {})
+                file_url = image_info.get("url", "")
+                aes_key = image_info.get("aeskey", "")
+
+                if file_url and aes_key:
+                    file_path = await self._download_and_save_media(file_url, aes_key, "image")
+                    if file_path:
+                        import os
+                        filename = os.path.basename(file_path)
+                        content_parts.append(f"[image: {filename}]\n[Image: source: {file_path}]")
+                    else:
+                        content_parts.append("[image: download failed]")
+                else:
+                    content_parts.append("[image: download failed]")
+
+            elif msg_type == "voice":
+                voice_info = body.get("voice", {})
+                # Voice message already contains transcribed content from WeCom
+                voice_content = voice_info.get("content", "")
+                if voice_content:
+                    content_parts.append(f"[voice] {voice_content}")
+                else:
+                    content_parts.append("[voice]")
+
+            elif msg_type == "file":
+                file_info = body.get("file", {})
+                file_url = file_info.get("url", "")
+                aes_key = file_info.get("aeskey", "")
+                file_name = file_info.get("name", "unknown")
+
+                if file_url and aes_key:
+                    file_path = await self._download_and_save_media(file_url, aes_key, "file", file_name)
+                    if file_path:
+                        content_parts.append(f"[file: {file_name}]\n[File: source: {file_path}]")
+                    else:
+                        content_parts.append(f"[file: {file_name}: download failed]")
+                else:
+                    content_parts.append(f"[file: {file_name}: download failed]")
+
+            elif msg_type == "mixed":
+                # Mixed content contains multiple message items
+                msg_items = body.get("mixed", {}).get("item", [])
+                for item in msg_items:
+                    item_type = item.get("type", "")
+                    if item_type == "text":
+                        text = item.get("text", {}).get("content", "")
+                        if text:
+                            content_parts.append(text)
+                    else:
+                        content_parts.append(MSG_TYPE_MAP.get(item_type, f"[{item_type}]"))
+
+            else:
+                content_parts.append(MSG_TYPE_MAP.get(msg_type, f"[{msg_type}]"))
+
+            content = "\n".join(content_parts) if content_parts else ""
+
+            if not content:
+                return
+
+            # Store frame for this chat to enable replies
+            self._chat_frames[chat_id] = frame
+
+            # Forward to message bus
+            # Note: media paths are included in content for broader model compatibility
+            await self._handle_message(
+                sender_id=sender_id,
+                chat_id=chat_id,
+                content=content,
+                media=None,
+                metadata={
+                    "message_id": msg_id,
+                    "msg_type": msg_type,
+                    "chat_type": chat_type,
+                }
+            )
+
+        except Exception as e:
+            logger.error("Error processing WeCom message: {}", e)
+
+    async def _download_and_save_media(
+        self,
+        file_url: str,
+        aes_key: str,
+        media_type: str,
+        filename: str | None = None,
+    ) -> str | None:
+        """
+        Download and decrypt media from WeCom.
+
+        Returns:
+            file_path or None if download failed
+        """
+        try:
+            data, fname = await self._client.download_file(file_url, aes_key)
+
+            if not data:
+                logger.warning("Failed to download media from WeCom")
+                return None
+
+            media_dir = get_media_dir("wecom")
+            if not filename:
+                filename = fname or f"{media_type}_{hash(file_url) % 100000}"
+
+            file_path = media_dir / filename
+            file_path.write_bytes(data)
+            logger.debug("Downloaded {} to {}", media_type, file_path)
+            return str(file_path)
+
+        except Exception as e:
+            logger.error("Error downloading media: {}", e)
+            return None
+
+    async def send(self, msg: OutboundMessage) -> None:
+        """Send a message through WeCom."""
+        if not self._client:
+            logger.warning("WeCom client not initialized")
+            return
+
+        try:
+            content = msg.content.strip()
+            if not content:
+                return
+
+            # Get the stored frame for this chat
+            frame = self._chat_frames.get(msg.chat_id)
+            if not frame:
+                logger.warning("No frame found for chat {}, cannot reply", msg.chat_id)
+                return
+
+            # Use streaming reply for better UX
+            stream_id = self._generate_req_id("stream")
+
+            # Send as streaming message with finish=True
+            await self._client.reply_stream(
+                frame,
+                stream_id,
+                content,
+                finish=True,
+            )
+
+            logger.debug("WeCom message sent to {}", msg.chat_id)
+
+        except Exception as e:
+            logger.error("Error sending WeCom message: {}", e)
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index 803cb61..63eae48 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -199,7 +199,15 @@ class QQConfig(Base):
     )  # Allowed user openids (empty = public access)
 
 
+class WecomConfig(Base):
+    """WeCom (Enterprise WeChat) AI Bot channel configuration."""
 
+    enabled: bool = False
+    bot_id: str = ""  # Bot ID from WeCom AI Bot platform
+    secret: str = ""  # Bot Secret from WeCom AI Bot platform
+    allow_from: list[str] = Field(default_factory=list)  # Allowed user IDs
+    welcome_message: str = ""  # Welcome message for enter_chat event
+    react_emoji: str = "eyes"  # Emoji for message reactions
 
 class ChannelsConfig(Base):
     """Configuration for chat channels."""
@@ -216,6 +224,7 @@ class ChannelsConfig(Base):
     slack: SlackConfig = Field(default_factory=SlackConfig)
     qq: QQConfig = Field(default_factory=QQConfig)
     matrix: MatrixConfig = Field(default_factory=MatrixConfig)
+    wecom: WecomConfig = Field(default_factory=WecomConfig)
 
 
 class AgentDefaults(Base):
diff --git a/pyproject.toml b/pyproject.toml
index 62cf616..fac53ce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,7 @@ dependencies = [
     "json-repair>=0.57.0,<1.0.0",
     "chardet>=3.0.2,<6.0.0",
     "openai>=2.8.0",
+    "wecom-aibot-sdk-python>=0.1.2",
 ]
 
 [project.optional-dependencies]

From 45c0eebae5a700cfa5da28c2ff31208f34180509 Mon Sep 17 00:00:00 2001
From: chengyongru <2755839590@qq.com>
Date: Tue, 10 Mar 2026 00:53:23 +0800
Subject: [PATCH 08/28] docs(wecom): add wecom configuration guide in readme

---
 README.md | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/README.md b/README.md
index d3401ea..3d5fb63 100644
--- a/README.md
+++ b/README.md
@@ -207,6 +207,7 @@ Connect nanobot to your favorite chat platform.
 | **Slack** | Bot token + App-Level token |
 | **Email** | IMAP/SMTP credentials |
 | **QQ** | App ID + App Secret |
+| **Wecom** | Bot ID + App Secret |
 
 <details>
 <summary><b>Telegram</b> (Recommended)</summary>
@@ -676,6 +677,44 @@ nanobot gateway
 
 </details>
 
+<details>
+<summary><b>Wecom (企业微信)</b></summary>
+
+Uses **WebSocket** long connection — no public IP required.
+
+**1. Create a wecom bot**
+
+In the client's workspace, click on "Intelligent Robot" to create a robot and choose API mode for creation.
+Select to create in "long connection" mode, and obtain Bot ID and Secret.
+
+**2. Configure**
+
+```json
+{
+  "channels": {
+    "wecom": {
+      "enabled": true,
+      "botId": "your_bot_id",
+      "secret": "your_secret",
+      "allowFrom": [
+        "your_id"
+      ]
+    }
+  }
+}
+```
+
+**3. Run**
+
+```bash
+nanobot gateway
+```
+
+> [!TIP]
+> wecom uses WebSocket to receive messages — no webhook or public IP needed!
+
+</details>
+
 ## 🌐 Agent Social Network
 
 🐈 nanobot is capable of linking to the agent social network (agent community). **Just send one message and your nanobot joins automatically!**

From 2ffeb9295bdb4a5ef308498f60f45b2448ab48d2 Mon Sep 17 00:00:00 2001
From: lailoo <ll1042668699@gmail.com>
Date: Wed, 11 Mar 2026 00:47:09 +0800
Subject: [PATCH 09/28] fix(subagent): preserve reasoning_content in assistant
 messages

Subagent's _run_subagent() was dropping reasoning_content and
thinking_blocks when building assistant messages for the conversation
history. Providers like Deepseek Reasoner require reasoning_content on
every assistant message when thinking mode is active, causing a 400
BadRequestError on the second LLM round-trip.

Align with the main AgentLoop which already preserves these fields via
ContextBuilder.add_assistant_message().

Closes #1834
---
 nanobot/agent/subagent.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
index f9eda1f..308e67d 100644
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -145,11 +145,19 @@ class SubagentManager:
                         }
                         for tc in response.tool_calls
                     ]
-                    messages.append({
+                    assistant_msg: dict[str, Any] = {
                         "role": "assistant",
                         "content": response.content or "",
                         "tool_calls": tool_call_dicts,
-                    })
+                    }
+                    # Preserve reasoning_content for providers that require it
+                    # (e.g. Deepseek Reasoner mandates this field on every
+                    # assistant message when thinking mode is active).
+                    if response.reasoning_content is not None:
+                        assistant_msg["reasoning_content"] = response.reasoning_content
+                    if response.thinking_blocks:
+                        assistant_msg["thinking_blocks"] = response.thinking_blocks
+                    messages.append(assistant_msg)
 
                     # Execute tools
                     for tool_call in response.tool_calls:

From 62ccda43b980d53c5ac7a79adf8edf43294f1fdb Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Tue, 10 Mar 2026 19:55:06 +0000
Subject: [PATCH 10/28] refactor(memory): switch consolidation to token-based
 context windows

Move consolidation policy into MemoryConsolidator, keep backward compatibility for legacy config, and compress history by token budget instead of message count.
---
 nanobot/agent/loop.py                    | 544 ++---------------------
 nanobot/agent/memory.py                  | 243 +++++++---
 nanobot/cli/commands.py                  |  26 +-
 nanobot/config/schema.py                 |  32 +-
 nanobot/session/manager.py               |  20 +-
 nanobot/utils/helpers.py                 |  85 ++++
 pyproject.toml                           |   1 +
 tests/test_commands.py                   |  33 ++
 tests/test_config_migration.py           |  88 ++++
 tests/test_consolidate_offset.py         | 297 ++-----------
 tests/test_loop_consolidation_tokens.py  | 190 ++++++++
 tests/test_memory_consolidation_types.py |  51 +--
 tests/test_message_tool_suppress.py      |  10 +-
 13 files changed, 709 insertions(+), 911 deletions(-)
 create mode 100644 tests/test_config_migration.py
 create mode 100644 tests/test_loop_consolidation_tokens.py

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index ba35a23..8605a09 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -11,18 +11,12 @@ from typing import TYPE_CHECKING, Any, Awaitable, Callable
 
 from loguru import logger
 
-try:
-    import tiktoken  # type: ignore
-except Exception:  # pragma: no cover - optional dependency
-    tiktoken = None
-
 from nanobot.agent.context import ContextBuilder
+from nanobot.agent.memory import MemoryConsolidator
 from nanobot.agent.subagent import SubagentManager
 from nanobot.agent.tools.cron import CronTool
 from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool
-from nanobot.agent.tools.huggingface import HuggingFaceModelSearchTool
 from nanobot.agent.tools.message import MessageTool
-from nanobot.agent.tools.model_config import ValidateDeployJSONTool, ValidateUsageYAMLTool
 from nanobot.agent.tools.registry import ToolRegistry
 from nanobot.agent.tools.shell import ExecTool
 from nanobot.agent.tools.spawn import SpawnTool
@@ -60,11 +54,8 @@ class AgentLoop:
         max_iterations: int = 40,
         temperature: float = 0.1,
         max_tokens: int = 4096,
-        memory_window: int | None = None,  # backward-compat only (unused)
         reasoning_effort: str | None = None,
-        max_tokens_input: int = 128_000,
-        compression_start_ratio: float = 0.7,
-        compression_target_ratio: float = 0.4,
+        context_window_tokens: int = 65_536,
         brave_api_key: str | None = None,
         web_proxy: str | None = None,
         exec_config: ExecToolConfig | None = None,
@@ -82,18 +73,9 @@ class AgentLoop:
         self.model = model or provider.get_default_model()
         self.max_iterations = max_iterations
         self.temperature = temperature
-        # max_tokens: per-call output token cap (maxTokensOutput in config)
         self.max_tokens = max_tokens
-        # Keep legacy attribute for older call sites/tests; compression no longer uses it.
-        self.memory_window = memory_window
         self.reasoning_effort = reasoning_effort
-        # max_tokens_input: model native context window (maxTokensInput in config)
-        self.max_tokens_input = max_tokens_input
-        # Token-based compression watermarks (fractions of available input budget)
-        self.compression_start_ratio = compression_start_ratio
-        self.compression_target_ratio = compression_target_ratio
-        # Reserve tokens for safety margin
-        self._reserve_tokens = 1000
+        self.context_window_tokens = context_window_tokens
         self.brave_api_key = brave_api_key
         self.web_proxy = web_proxy
         self.exec_config = exec_config or ExecToolConfig()
@@ -123,382 +105,23 @@ class AgentLoop:
         self._mcp_connected = False
         self._mcp_connecting = False
         self._active_tasks: dict[str, list[asyncio.Task]] = {}  # session_key -> tasks
-        self._compression_tasks: dict[str, asyncio.Task] = {}  # session_key -> task
-        self._last_turn_prompt_tokens: int = 0
-        self._last_turn_prompt_source: str = "none"
         self._processing_lock = asyncio.Lock()
+        self.memory_consolidator = MemoryConsolidator(
+            workspace=workspace,
+            provider=provider,
+            model=self.model,
+            sessions=self.sessions,
+            context_window_tokens=context_window_tokens,
+            build_messages=self.context.build_messages,
+            get_tool_definitions=self.tools.get_definitions,
+        )
         self._register_default_tools()
 
-    @staticmethod
-    def _estimate_prompt_tokens(
-        messages: list[dict[str, Any]],
-        tools: list[dict[str, Any]] | None = None,
-    ) -> int:
-        """Estimate prompt tokens with tiktoken (fallback only)."""
-        if tiktoken is None:
-            return 0
-
-        try:
-            enc = tiktoken.get_encoding("cl100k_base")
-            parts: list[str] = []
-            for msg in messages:
-                content = msg.get("content")
-                if isinstance(content, str):
-                    parts.append(content)
-                elif isinstance(content, list):
-                    for part in content:
-                        if isinstance(part, dict) and part.get("type") == "text":
-                            txt = part.get("text", "")
-                            if txt:
-                                parts.append(txt)
-            if tools:
-                parts.append(json.dumps(tools, ensure_ascii=False))
-            return len(enc.encode("\n".join(parts)))
-        except Exception:
-            return 0
-
-    def _estimate_prompt_tokens_chain(
-        self,
-        messages: list[dict[str, Any]],
-        tools: list[dict[str, Any]] | None = None,
-    ) -> tuple[int, str]:
-        """Unified prompt-token estimation: provider counter -> tiktoken."""
-        provider_counter = getattr(self.provider, "estimate_prompt_tokens", None)
-        if callable(provider_counter):
-            try:
-                tokens, source = provider_counter(messages, tools, self.model)
-                if isinstance(tokens, (int, float)) and tokens > 0:
-                    return int(tokens), str(source or "provider_counter")
-            except Exception:
-                logger.debug("Provider token counter failed; fallback to tiktoken")
-
-        estimated = self._estimate_prompt_tokens(messages, tools)
-        if estimated > 0:
-            return int(estimated), "tiktoken"
-        return 0, "none"
-
-    @staticmethod
-    def _estimate_completion_tokens(content: str) -> int:
-        """Estimate completion tokens with tiktoken (fallback only)."""
-        if tiktoken is None:
-            return 0
-        try:
-            enc = tiktoken.get_encoding("cl100k_base")
-            return len(enc.encode(content or ""))
-        except Exception:
-            return 0
-
-    def _get_compressed_until(self, session: Session) -> int:
-        """Read/normalize compressed boundary and migrate old metadata format."""
-        raw = session.metadata.get("_compressed_until", 0)
-        try:
-            compressed_until = int(raw)
-        except (TypeError, ValueError):
-            compressed_until = 0
-
-        if compressed_until <= 0:
-            ranges = session.metadata.get("_compressed_ranges")
-            if isinstance(ranges, list):
-                inferred = 0
-                for item in ranges:
-                    if not isinstance(item, (list, tuple)) or len(item) != 2:
-                        continue
-                    try:
-                        inferred = max(inferred, int(item[1]))
-                    except (TypeError, ValueError):
-                        continue
-                compressed_until = inferred
-
-        compressed_until = max(0, min(compressed_until, len(session.messages)))
-        session.metadata["_compressed_until"] = compressed_until
-        # 兼容旧版本：一旦迁移出连续边界，就可以清理旧字段
-        session.metadata.pop("_compressed_ranges", None)
-        # 注意：不要删除 _cumulative_tokens，压缩逻辑需要它来跟踪累积 token 计数
-        return compressed_until
-
-    def _set_compressed_until(self, session: Session, idx: int) -> None:
-        """Persist a contiguous compressed boundary."""
-        session.metadata["_compressed_until"] = max(0, min(int(idx), len(session.messages)))
-        session.metadata.pop("_compressed_ranges", None)
-        # 注意：不要删除 _cumulative_tokens，压缩逻辑需要它来跟踪累积 token 计数
-
-    @staticmethod
-    def _estimate_message_tokens(message: dict[str, Any]) -> int:
-        """Rough token estimate for a single persisted message."""
-        content = message.get("content")
-        parts: list[str] = []
-        if isinstance(content, str):
-            parts.append(content)
-        elif isinstance(content, list):
-            for part in content:
-                if isinstance(part, dict) and part.get("type") == "text":
-                    txt = part.get("text", "")
-                    if txt:
-                        parts.append(txt)
-                else:
-                    parts.append(json.dumps(part, ensure_ascii=False))
-        elif content is not None:
-            parts.append(json.dumps(content, ensure_ascii=False))
-
-        for key in ("name", "tool_call_id"):
-            val = message.get(key)
-            if isinstance(val, str) and val:
-                parts.append(val)
-        if message.get("tool_calls"):
-            parts.append(json.dumps(message["tool_calls"], ensure_ascii=False))
-
-        payload = "\n".join(parts)
-        if not payload:
-            return 1
-        if tiktoken is not None:
-            try:
-                enc = tiktoken.get_encoding("cl100k_base")
-                return max(1, len(enc.encode(payload)))
-            except Exception:
-                pass
-        return max(1, len(payload) // 4)
-
-    def _pick_compression_chunk_by_tokens(
-        self,
-        session: Session,
-        reduction_tokens: int,
-        *,
-        tail_keep: int = 12,
-    ) -> tuple[int, int, int] | None:
-        """
-        Pick one contiguous old chunk so its estimated size is roughly enough
-        to reduce `reduction_tokens`.
-        """
-        messages = session.messages
-        start = self._get_compressed_until(session)
-        if len(messages) - start <= tail_keep + 2:
-            return None
-
-        end_limit = len(messages) - tail_keep
-        if end_limit - start < 2:
-            return None
-
-        target = max(1, reduction_tokens)
-        end = start
-        collected = 0
-        while end < end_limit and collected < target:
-            collected += self._estimate_message_tokens(messages[end])
-            end += 1
-
-        if end - start < 2:
-            end = min(end_limit, start + 2)
-            collected = sum(self._estimate_message_tokens(m) for m in messages[start:end])
-        if end - start < 2:
-            return None
-        return start, end, collected
-
-    def _estimate_session_prompt_tokens(self, session: Session) -> tuple[int, str]:
-        """
-        Estimate current full prompt tokens for this session view
-        (system + compressed history view + runtime/user placeholder + tools).
-        """
-        history = self._build_compressed_history_view(session)
-        channel, chat_id = (session.key.split(":", 1) if ":" in session.key else (None, None))
-        probe_messages = self.context.build_messages(
-            history=history,
-            current_message="[token-probe]",
-            channel=channel,
-            chat_id=chat_id,
-        )
-        return self._estimate_prompt_tokens_chain(probe_messages, self.tools.get_definitions())
-
-    async def _maybe_compress_history(
-        self,
-        session: Session,
-    ) -> None:
-        """
-        End-of-turn policy:
-        - Estimate current prompt usage from persisted session view.
-        - If above start ratio, perform one best-effort compression chunk.
-        """
-        if not session.messages:
-            self._set_compressed_until(session, 0)
-            return
-
-        budget = max(1, self.max_tokens_input - self.max_tokens - self._reserve_tokens)
-        start_threshold = int(budget * self.compression_start_ratio)
-        target_threshold = int(budget * self.compression_target_ratio)
-        if target_threshold >= start_threshold:
-            target_threshold = max(0, start_threshold - 1)
-
-        # Prefer provider usage prompt tokens from the turn-ending call.
-        # If unavailable, fall back to estimator chain.
-        raw_prompt_tokens = session.metadata.get("_last_prompt_tokens")
-        if isinstance(raw_prompt_tokens, (int, float)) and raw_prompt_tokens > 0:
-            current_tokens = int(raw_prompt_tokens)
-            token_source = str(session.metadata.get("_last_prompt_source") or "usage_prompt")
-        else:
-            current_tokens, token_source = self._estimate_session_prompt_tokens(session)
-
-        current_ratio = current_tokens / budget if budget else 0.0
-        if current_tokens <= 0:
-            logger.debug("Compression skip {}: token estimate unavailable", session.key)
-            return
-        if current_tokens < start_threshold:
-            logger.debug(
-                "Compression idle {}: {}/{} ({:.1%}) via {}",
-                session.key,
-                current_tokens,
-                budget,
-                current_ratio,
-                token_source,
-            )
-            return
-        logger.info(
-            "Compression trigger {}: {}/{} ({:.1%}) via {}",
-            session.key,
-            current_tokens,
-            budget,
-            current_ratio,
-            token_source,
-        )
-
-        reduction_by_target = max(0, current_tokens - target_threshold)
-        reduction_by_delta = max(1, start_threshold - target_threshold)
-        reduction_need = max(reduction_by_target, reduction_by_delta)
-
-        chunk_range = self._pick_compression_chunk_by_tokens(session, reduction_need, tail_keep=10)
-        if chunk_range is None:
-            logger.info("Compression skipped for {}: no compressible chunk", session.key)
-            return
-
-        start_idx, end_idx, estimated_chunk_tokens = chunk_range
-        chunk = session.messages[start_idx:end_idx]
-        if len(chunk) < 2:
-            return
-
-        logger.info(
-            "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})",
-            session.key,
-            start_idx,
-            end_idx - 1,
-            len(chunk),
-            estimated_chunk_tokens,
-            reduction_need,
-        )
-        success, _ = await self.context.memory.consolidate_chunk(
-            chunk,
-            self.provider,
-            self.model,
-        )
-        if not success:
-            logger.warning("Compression aborted for {}: consolidation failed", session.key)
-            return
-
-        self._set_compressed_until(session, end_idx)
-        self.sessions.save(session)
-
-        after_tokens, after_source = self._estimate_session_prompt_tokens(session)
-        after_ratio = after_tokens / budget if budget else 0.0
-        reduced = max(0, current_tokens - after_tokens)
-        reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0
-        logger.info(
-            "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})",
-            session.key,
-            after_tokens,
-            budget,
-            after_ratio,
-            after_source,
-            reduced,
-            reduced_ratio,
-        )
-
-    def _schedule_background_compression(self, session_key: str) -> None:
-        """Schedule best-effort background compression for a session."""
-        existing = self._compression_tasks.get(session_key)
-        if existing is not None and not existing.done():
-            return
-
-        async def _runner() -> None:
-            session = self.sessions.get_or_create(session_key)
-            try:
-                await self._maybe_compress_history(session)
-            except Exception:
-                logger.exception("Background compression failed for {}", session_key)
-
-        task = asyncio.create_task(_runner())
-        self._compression_tasks[session_key] = task
-
-        def _cleanup(t: asyncio.Task) -> None:
-            cur = self._compression_tasks.get(session_key)
-            if cur is t:
-                self._compression_tasks.pop(session_key, None)
-            try:
-                t.result()
-            except BaseException:
-                pass
-
-        task.add_done_callback(_cleanup)
-
-    async def wait_for_background_compression(self, timeout_s: float | None = None) -> None:
-        """Wait for currently scheduled compression tasks."""
-        pending = [t for t in self._compression_tasks.values() if not t.done()]
-        if not pending:
-            return
-
-        logger.info("Waiting for {} background compression task(s)", len(pending))
-        waiter = asyncio.gather(*pending, return_exceptions=True)
-        if timeout_s is None:
-            await waiter
-            return
-
-        try:
-            await asyncio.wait_for(waiter, timeout=timeout_s)
-        except asyncio.TimeoutError:
-            logger.warning(
-                "Background compression wait timed out after {}s ({} task(s) still running)",
-                timeout_s,
-                len([t for t in self._compression_tasks.values() if not t.done()]),
-            )
-
-    def _build_compressed_history_view(
-        self,
-        session: Session,
-    ) -> list[dict]:
-        """Build non-destructive history view using the compressed boundary."""
-        compressed_until = self._get_compressed_until(session)
-        if compressed_until <= 0:
-            return session.get_history(max_messages=0)
-
-        notice_msg: dict[str, Any] = {
-            "role": "assistant",
-            "content": (
-                "As your assistant, I have compressed earlier context. "
-                "If you need details, please check memory/HISTORY.md."
-            ),
-        }
-
-        tail: list[dict[str, Any]] = []
-        for msg in session.messages[compressed_until:]:
-            entry: dict[str, Any] = {"role": msg["role"], "content": msg.get("content", "")}
-            for k in ("tool_calls", "tool_call_id", "name"):
-                if k in msg:
-                    entry[k] = msg[k]
-            tail.append(entry)
-
-        # Drop leading non-user entries from tail to avoid orphan tool blocks.
-        for i, m in enumerate(tail):
-            if m.get("role") == "user":
-                tail = tail[i:]
-                break
-        else:
-            tail = []
-
-        return [notice_msg, *tail]
-
     def _register_default_tools(self) -> None:
         """Register the default set of tools."""
         allowed_dir = self.workspace if self.restrict_to_workspace else None
         for cls in (ReadFileTool, WriteFileTool, EditFileTool, ListDirTool):
             self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir))
-        self.tools.register(ValidateDeployJSONTool())
-        self.tools.register(ValidateUsageYAMLTool())
-        self.tools.register(HuggingFaceModelSearchTool())
         self.tools.register(ExecTool(
             working_dir=str(self.workspace),
             timeout=self.exec_config.timeout,
@@ -563,24 +186,12 @@ class AgentLoop:
         self,
         initial_messages: list[dict],
         on_progress: Callable[..., Awaitable[None]] | None = None,
-    ) -> tuple[str | None, list[str], list[dict], int, str]:
-        """
-        Run the agent iteration loop.
-
-        Returns:
-            (final_content, tools_used, messages, total_tokens_this_turn, token_source)
-            total_tokens_this_turn: total tokens (prompt + completion) for this turn
-            token_source: provider_total / provider_sum / provider_prompt /
-                          provider_counter+tiktoken_completion / tiktoken / none
-        """
+    ) -> tuple[str | None, list[str], list[dict]]:
+        """Run the agent iteration loop."""
         messages = initial_messages
         iteration = 0
         final_content = None
         tools_used: list[str] = []
-        total_tokens_this_turn = 0
-        token_source = "none"
-        self._last_turn_prompt_tokens = 0
-        self._last_turn_prompt_source = "none"
 
         while iteration < self.max_iterations:
             iteration += 1
@@ -596,63 +207,6 @@ class AgentLoop:
                 reasoning_effort=self.reasoning_effort,
             )
 
-            # Prefer provider usage from the turn-ending model call; fallback to tiktoken.
-            # Calculate total tokens (prompt + completion) for this turn.
-            usage = response.usage or {}
-            t_tokens = usage.get("total_tokens")
-            p_tokens = usage.get("prompt_tokens")
-            c_tokens = usage.get("completion_tokens")
-            
-            if isinstance(t_tokens, (int, float)) and t_tokens > 0:
-                total_tokens_this_turn = int(t_tokens)
-                token_source = "provider_total"
-                if isinstance(p_tokens, (int, float)) and p_tokens > 0:
-                    self._last_turn_prompt_tokens = int(p_tokens)
-                    self._last_turn_prompt_source = "usage_prompt"
-                elif isinstance(c_tokens, (int, float)):
-                    prompt_derived = int(t_tokens) - int(c_tokens)
-                    if prompt_derived > 0:
-                        self._last_turn_prompt_tokens = prompt_derived
-                        self._last_turn_prompt_source = "usage_total_minus_completion"
-            elif isinstance(p_tokens, (int, float)) and isinstance(c_tokens, (int, float)):
-                # If we have both prompt and completion tokens, sum them
-                total_tokens_this_turn = int(p_tokens) + int(c_tokens)
-                token_source = "provider_sum"
-                if p_tokens > 0:
-                    self._last_turn_prompt_tokens = int(p_tokens)
-                    self._last_turn_prompt_source = "usage_prompt"
-            elif isinstance(p_tokens, (int, float)) and p_tokens > 0:
-                # Fallback: use prompt tokens only (completion might be 0 for tool calls)
-                total_tokens_this_turn = int(p_tokens)
-                token_source = "provider_prompt"
-                self._last_turn_prompt_tokens = int(p_tokens)
-                self._last_turn_prompt_source = "usage_prompt"
-            else:
-                # Estimate with unified chain (provider counter -> tiktoken), plus completion tiktoken.
-                estimated_prompt, prompt_source = self._estimate_prompt_tokens_chain(messages, tool_defs)
-                estimated_completion = self._estimate_completion_tokens(response.content or "")
-                total_tokens_this_turn = estimated_prompt + estimated_completion
-                if estimated_prompt > 0:
-                    self._last_turn_prompt_tokens = int(estimated_prompt)
-                    self._last_turn_prompt_source = str(prompt_source or "tiktoken")
-                if total_tokens_this_turn > 0:
-                    token_source = (
-                        "tiktoken"
-                        if prompt_source == "tiktoken"
-                        else f"{prompt_source}+tiktoken_completion"
-                    )
-                if total_tokens_this_turn <= 0:
-                    total_tokens_this_turn = 0
-                    token_source = "none"
-
-            logger.debug(
-                "Turn token usage: source={}, total={}, prompt={}, completion={}",
-                token_source,
-                total_tokens_this_turn,
-                p_tokens if isinstance(p_tokens, (int, float)) else None,
-                c_tokens if isinstance(c_tokens, (int, float)) else None,
-            )
-
             if response.has_tool_calls:
                 if on_progress:
                     thought = self._strip_think(response.content)
@@ -707,7 +261,7 @@ class AgentLoop:
                 "without completing the task. You can try breaking the task into smaller steps."
             )
 
-        return final_content, tools_used, messages, total_tokens_this_turn, token_source
+        return final_content, tools_used, messages
 
     async def run(self) -> None:
         """Run the agent loop, dispatching messages as tasks to stay responsive to /stop."""
@@ -732,9 +286,6 @@ class AgentLoop:
         """Cancel all active tasks and subagents for the session."""
         tasks = self._active_tasks.pop(msg.session_key, [])
         cancelled = sum(1 for t in tasks if not t.done() and t.cancel())
-        comp = self._compression_tasks.get(msg.session_key)
-        if comp is not None and not comp.done() and comp.cancel():
-            cancelled += 1
         for t in tasks:
             try:
                 await t
@@ -781,9 +332,6 @@ class AgentLoop:
     def stop(self) -> None:
         """Stop the agent loop."""
         self._running = False
-        for task in list(self._compression_tasks.values()):
-            if not task.done():
-                task.cancel()
         logger.info("Agent loop stopping")
 
     async def _process_message(
@@ -800,22 +348,17 @@ class AgentLoop:
             logger.info("Processing system message from {}", msg.sender_id)
             key = f"{channel}:{chat_id}"
             session = self.sessions.get_or_create(key)
+            await self.memory_consolidator.maybe_consolidate_by_tokens(session)
             self._set_tool_context(channel, chat_id, msg.metadata.get("message_id"))
-            history = self._build_compressed_history_view(session)
+            history = session.get_history(max_messages=0)
             messages = self.context.build_messages(
                 history=history,
                 current_message=msg.content, channel=channel, chat_id=chat_id,
             )
-            final_content, _, all_msgs, _, _ = await self._run_agent_loop(messages)
-            if self._last_turn_prompt_tokens > 0:
-                session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens
-                session.metadata["_last_prompt_source"] = self._last_turn_prompt_source
-            else:
-                session.metadata.pop("_last_prompt_tokens", None)
-                session.metadata.pop("_last_prompt_source", None)
+            final_content, _, all_msgs = await self._run_agent_loop(messages)
             self._save_turn(session, all_msgs, 1 + len(history))
             self.sessions.save(session)
-            self._schedule_background_compression(session.key)
+            await self.memory_consolidator.maybe_consolidate_by_tokens(session)
             return OutboundMessage(channel=channel, chat_id=chat_id,
                                   content=final_content or "Background task completed.")
 
@@ -829,19 +372,12 @@ class AgentLoop:
         cmd = msg.content.strip().lower()
         if cmd == "/new":
             try:
-                # 在清空会话前，将当前完整对话做一次归档压缩到 MEMORY/HISTORY 中
-                if session.messages:
-                    ok, _ = await self.context.memory.consolidate_chunk(
-                        session.messages,
-                        self.provider,
-                        self.model,
+                if not await self.memory_consolidator.archive_unconsolidated(session):
+                    return OutboundMessage(
+                        channel=msg.channel,
+                        chat_id=msg.chat_id,
+                        content="Memory archival failed, session not cleared. Please try again.",
                     )
-                    if not ok:
-                        return OutboundMessage(
-                            channel=msg.channel,
-                            chat_id=msg.chat_id,
-                            content="Memory archival failed, session not cleared. Please try again.",
-                        )
             except Exception:
                 logger.exception("/new archival failed for {}", session.key)
                 return OutboundMessage(
@@ -859,23 +395,20 @@ class AgentLoop:
             return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id,
                                   content="🐈 nanobot commands:\n/new — Start a new conversation\n/stop — Stop the current task\n/help — Show available commands")
 
+        await self.memory_consolidator.maybe_consolidate_by_tokens(session)
+
         self._set_tool_context(msg.channel, msg.chat_id, msg.metadata.get("message_id"))
         if message_tool := self.tools.get("message"):
             if isinstance(message_tool, MessageTool):
                 message_tool.start_turn()
 
-        # 正常对话：使用压缩后的历史视图（压缩在回合结束后进行）
-        history = self._build_compressed_history_view(session)
+        history = session.get_history(max_messages=0)
         initial_messages = self.context.build_messages(
             history=history,
             current_message=msg.content,
             media=msg.media if msg.media else None,
             channel=msg.channel, chat_id=msg.chat_id,
         )
-        # Add [CRON JOB] identifier for cron sessions (session_key starts with "cron:")
-        if session_key and session_key.startswith("cron:"):
-            if initial_messages and initial_messages[0].get("role") == "system":
-                initial_messages[0]["content"] = f"[CRON JOB] {initial_messages[0]['content']}"
 
         async def _bus_progress(content: str, *, tool_hint: bool = False) -> None:
             meta = dict(msg.metadata or {})
@@ -885,23 +418,16 @@ class AgentLoop:
                 channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta,
             ))
 
-        final_content, _, all_msgs, total_tokens_this_turn, token_source = await self._run_agent_loop(
+        final_content, _, all_msgs = await self._run_agent_loop(
             initial_messages, on_progress=on_progress or _bus_progress,
         )
 
         if final_content is None:
             final_content = "I've completed processing but have no response to give."
 
-        if self._last_turn_prompt_tokens > 0:
-            session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens
-            session.metadata["_last_prompt_source"] = self._last_turn_prompt_source
-        else:
-            session.metadata.pop("_last_prompt_tokens", None)
-            session.metadata.pop("_last_prompt_source", None)
-
-        self._save_turn(session, all_msgs, 1 + len(history), total_tokens_this_turn)
+        self._save_turn(session, all_msgs, 1 + len(history))
         self.sessions.save(session)
-        self._schedule_background_compression(session.key)
+        await self.memory_consolidator.maybe_consolidate_by_tokens(session)
 
         if (mt := self.tools.get("message")) and isinstance(mt, MessageTool) and mt._sent_in_turn:
             return None
@@ -913,7 +439,7 @@ class AgentLoop:
             metadata=msg.metadata or {},
         )
 
-    def _save_turn(self, session: Session, messages: list[dict], skip: int, total_tokens_this_turn: int = 0) -> None:
+    def _save_turn(self, session: Session, messages: list[dict], skip: int) -> None:
         """Save new-turn messages into session, truncating large tool results."""
         from datetime import datetime
         for m in messages[skip:]:
@@ -947,14 +473,6 @@ class AgentLoop:
             entry.setdefault("timestamp", datetime.now().isoformat())
             session.messages.append(entry)
         session.updated_at = datetime.now()
-        
-        # Update cumulative token count for compression tracking
-        if total_tokens_this_turn > 0:
-            current_cumulative = session.metadata.get("_cumulative_tokens", 0)
-            if isinstance(current_cumulative, (int, float)):
-                session.metadata["_cumulative_tokens"] = int(current_cumulative) + total_tokens_this_turn
-            else:
-                session.metadata["_cumulative_tokens"] = total_tokens_this_turn
 
     async def process_direct(
         self,
diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index e29788a..cd5f54f 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -2,17 +2,19 @@
 
 from __future__ import annotations
 
+import asyncio
 import json
+import weakref
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Callable
 
 from loguru import logger
 
-from nanobot.utils.helpers import ensure_dir
+from nanobot.utils.helpers import ensure_dir, estimate_message_tokens, estimate_prompt_tokens_chain
 
 if TYPE_CHECKING:
     from nanobot.providers.base import LLMProvider
-    from nanobot.session.manager import Session
+    from nanobot.session.manager import Session, SessionManager
 
 
 _SAVE_MEMORY_TOOL = [
@@ -26,7 +28,7 @@ _SAVE_MEMORY_TOOL = [
                 "properties": {
                     "history_entry": {
                         "type": "string",
-                        "description": "A paragraph (2-5 sentences) summarizing key events/decisions/topics. "
+                        "description": "A paragraph summarizing key events/decisions/topics. "
                         "Start with [YYYY-MM-DD HH:MM]. Include detail useful for grep search.",
                     },
                     "memory_update": {
@@ -42,6 +44,20 @@ _SAVE_MEMORY_TOOL = [
 ]
 
 
+def _ensure_text(value: Any) -> str:
+    """Normalize tool-call payload values to text for file storage."""
+    return value if isinstance(value, str) else json.dumps(value, ensure_ascii=False)
+
+
+def _normalize_save_memory_args(args: Any) -> dict[str, Any] | None:
+    """Normalize provider tool-call arguments to the expected dict shape."""
+    if isinstance(args, str):
+        args = json.loads(args)
+    if isinstance(args, list):
+        return args[0] if args and isinstance(args[0], dict) else None
+    return args if isinstance(args, dict) else None
+
+
 class MemoryStore:
     """Two-layer memory: MEMORY.md (long-term facts) + HISTORY.md (grep-searchable log)."""
 
@@ -66,29 +82,27 @@ class MemoryStore:
         long_term = self.read_long_term()
         return f"## Long-term Memory\n{long_term}" if long_term else ""
 
-    async def consolidate_chunk(
+    @staticmethod
+    def _format_messages(messages: list[dict]) -> str:
+        lines = []
+        for message in messages:
+            if not message.get("content"):
+                continue
+            tools = f" [tools: {', '.join(message['tools_used'])}]" if message.get("tools_used") else ""
+            lines.append(
+                f"[{message.get('timestamp', '?')[:16]}] {message['role'].upper()}{tools}: {message['content']}"
+            )
+        return "\n".join(lines)
+
+    async def consolidate(
         self,
         messages: list[dict],
         provider: LLMProvider,
         model: str,
-    ) -> tuple[bool, str | None]:
-        """Consolidate a chunk of messages into MEMORY.md + HISTORY.md via LLM tool call.
-
-        Returns (success, None).
-
-        - success: True on success (including no-op), False on failure.
-        - The second return value is reserved for future use (e.g. RAG-style summaries) and is
-          always None in the current implementation.
-        """
+    ) -> bool:
+        """Consolidate the provided message chunk into MEMORY.md + HISTORY.md."""
         if not messages:
-            return True, None
-
-        lines = []
-        for m in messages:
-            if not m.get("content"):
-                continue
-            tools = f" [tools: {', '.join(m['tools_used'])}]" if m.get("tools_used") else ""
-            lines.append(f"[{m.get('timestamp', '?')[:16]}] {m['role'].upper()}{tools}: {m['content']}")
+            return True
 
         current_memory = self.read_long_term()
         prompt = f"""Process this conversation and call the save_memory tool with your consolidation.
@@ -97,24 +111,12 @@ class MemoryStore:
 {current_memory or "(empty)"}
 
 ## Conversation to Process
-{chr(10).join(lines)}"""
+{self._format_messages(messages)}"""
 
         try:
             response = await provider.chat_with_retry(
                 messages=[
-                    {
-                        "role": "system",
-                        "content": (
-                            "You are a memory consolidation agent.\n"
-                            "Your job is to:\n"
-                            "1) Append a concise but grep-friendly entry to HISTORY.md summarizing key events, decisions and topics.\n"
-                            "   - Write 1 paragraph of 2–5 sentences that starts with [YYYY-MM-DD HH:MM].\n"
-                            "   - Include concrete names, IDs and numbers so it is easy to search with grep.\n"
-                            "2) Update long-term MEMORY.md with stable facts and user preferences as markdown, including all existing facts plus new ones.\n"
-                            "3) Optionally return a short context_summary (1–3 sentences) that will replace the raw messages in future dialogue history.\n\n"
-                            "Always call the save_memory tool with history_entry, memory_update and (optionally) context_summary."
-                        ),
-                    },
+                    {"role": "system", "content": "You are a memory consolidation agent. Call the save_memory tool with your consolidation of the conversation."},
                     {"role": "user", "content": prompt},
                 ],
                 tools=_SAVE_MEMORY_TOOL,
@@ -123,35 +125,160 @@ class MemoryStore:
 
             if not response.has_tool_calls:
                 logger.warning("Memory consolidation: LLM did not call save_memory, skipping")
-                return False, None
+                return False
 
-            args = response.tool_calls[0].arguments
-            # Some providers return arguments as a JSON string instead of dict
-            if isinstance(args, str):
-                args = json.loads(args)
-            # Some providers return arguments as a list (handle edge case)
-            if isinstance(args, list):
-                if args and isinstance(args[0], dict):
-                    args = args[0]
-                else:
-                    logger.warning("Memory consolidation: unexpected arguments as empty or non-dict list")
-                    return False, None
-            if not isinstance(args, dict):
-                logger.warning("Memory consolidation: unexpected arguments type {}", type(args).__name__)
-                return False, None
+            args = _normalize_save_memory_args(response.tool_calls[0].arguments)
+            if args is None:
+                logger.warning("Memory consolidation: unexpected save_memory arguments")
+                return False
 
             if entry := args.get("history_entry"):
-                if not isinstance(entry, str):
-                    entry = json.dumps(entry, ensure_ascii=False)
-                self.append_history(entry)
+                self.append_history(_ensure_text(entry))
             if update := args.get("memory_update"):
-                if not isinstance(update, str):
-                    update = json.dumps(update, ensure_ascii=False)
+                update = _ensure_text(update)
                 if update != current_memory:
                     self.write_long_term(update)
 
             logger.info("Memory consolidation done for {} messages", len(messages))
-            return True, None
+            return True
         except Exception:
             logger.exception("Memory consolidation failed")
-            return False, None
+            return False
+
+
+class MemoryConsolidator:
+    """Owns consolidation policy, locking, and session offset updates."""
+
+    _MAX_CONSOLIDATION_ROUNDS = 5
+
+    def __init__(
+        self,
+        workspace: Path,
+        provider: LLMProvider,
+        model: str,
+        sessions: SessionManager,
+        context_window_tokens: int,
+        build_messages: Callable[..., list[dict[str, Any]]],
+        get_tool_definitions: Callable[[], list[dict[str, Any]]],
+    ):
+        self.store = MemoryStore(workspace)
+        self.provider = provider
+        self.model = model
+        self.sessions = sessions
+        self.context_window_tokens = context_window_tokens
+        self._build_messages = build_messages
+        self._get_tool_definitions = get_tool_definitions
+        self._locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary()
+
+    def get_lock(self, session_key: str) -> asyncio.Lock:
+        """Return the shared consolidation lock for one session."""
+        return self._locks.setdefault(session_key, asyncio.Lock())
+
+    async def consolidate_messages(self, messages: list[dict[str, object]]) -> bool:
+        """Archive a selected message chunk into persistent memory."""
+        return await self.store.consolidate(messages, self.provider, self.model)
+
+    def pick_consolidation_boundary(
+        self,
+        session: Session,
+        tokens_to_remove: int,
+    ) -> tuple[int, int] | None:
+        """Pick a user-turn boundary that removes enough old prompt tokens."""
+        start = session.last_consolidated
+        if start >= len(session.messages) or tokens_to_remove <= 0:
+            return None
+
+        removed_tokens = 0
+        last_boundary: tuple[int, int] | None = None
+        for idx in range(start, len(session.messages)):
+            message = session.messages[idx]
+            if idx > start and message.get("role") == "user":
+                last_boundary = (idx, removed_tokens)
+                if removed_tokens >= tokens_to_remove:
+                    return last_boundary
+            removed_tokens += estimate_message_tokens(message)
+
+        return last_boundary
+
+    def estimate_session_prompt_tokens(self, session: Session) -> tuple[int, str]:
+        """Estimate current prompt size for the normal session history view."""
+        history = session.get_history(max_messages=0)
+        channel, chat_id = (session.key.split(":", 1) if ":" in session.key else (None, None))
+        probe_messages = self._build_messages(
+            history=history,
+            current_message="[token-probe]",
+            channel=channel,
+            chat_id=chat_id,
+        )
+        return estimate_prompt_tokens_chain(
+            self.provider,
+            self.model,
+            probe_messages,
+            self._get_tool_definitions(),
+        )
+
+    async def archive_unconsolidated(self, session: Session) -> bool:
+        """Archive the full unconsolidated tail for /new-style session rollover."""
+        lock = self.get_lock(session.key)
+        async with lock:
+            snapshot = session.messages[session.last_consolidated:]
+            if not snapshot:
+                return True
+            return await self.consolidate_messages(snapshot)
+
+    async def maybe_consolidate_by_tokens(self, session: Session) -> None:
+        """Loop: archive old messages until prompt fits within half the context window."""
+        if not session.messages or self.context_window_tokens <= 0:
+            return
+
+        lock = self.get_lock(session.key)
+        async with lock:
+            target = self.context_window_tokens // 2
+            estimated, source = self.estimate_session_prompt_tokens(session)
+            if estimated <= 0:
+                return
+            if estimated < self.context_window_tokens:
+                logger.debug(
+                    "Token consolidation idle {}: {}/{} via {}",
+                    session.key,
+                    estimated,
+                    self.context_window_tokens,
+                    source,
+                )
+                return
+
+            for round_num in range(self._MAX_CONSOLIDATION_ROUNDS):
+                if estimated <= target:
+                    return
+
+                boundary = self.pick_consolidation_boundary(session, max(1, estimated - target))
+                if boundary is None:
+                    logger.debug(
+                        "Token consolidation: no safe boundary for {} (round {})",
+                        session.key,
+                        round_num,
+                    )
+                    return
+
+                end_idx = boundary[0]
+                chunk = session.messages[session.last_consolidated:end_idx]
+                if not chunk:
+                    return
+
+                logger.info(
+                    "Token consolidation round {} for {}: {}/{} via {}, chunk={} msgs",
+                    round_num,
+                    session.key,
+                    estimated,
+                    self.context_window_tokens,
+                    source,
+                    len(chunk),
+                )
+                if not await self.consolidate_messages(chunk):
+                    return
+                session.last_consolidated = end_idx
+                self.sessions.save(session)
+
+                estimated, source = self.estimate_session_prompt_tokens(session)
+                if estimated <= 0:
+                    return
diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index 36e2a53..cf69450 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -191,6 +191,8 @@ def onboard():
         save_config(Config())
         console.print(f"[green]✓[/green] Created config at {config_path}")
 
+    console.print("[dim]Config template now uses `maxTokens` + `contextWindowTokens`; `memoryWindow` is no longer a runtime setting.[/dim]")
+
     # Create workspace
     workspace = get_workspace_path()
 
@@ -283,6 +285,16 @@ def _load_runtime_config(config: str | None = None, workspace: str | None = None
     return loaded
 
 
+def _print_deprecated_memory_window_notice(config: Config) -> None:
+    """Warn when running with old memoryWindow-only config."""
+    if config.agents.defaults.should_warn_deprecated_memory_window:
+        console.print(
+            "[yellow]Hint:[/yellow] Detected deprecated `memoryWindow` without "
+            "`contextWindowTokens`. `memoryWindow` is ignored; run "
+            "[cyan]nanobot onboard[/cyan] to refresh your config template."
+        )
+
+
 # ============================================================================
 # Gateway / Server
 # ============================================================================
@@ -310,6 +322,7 @@ def gateway(
         logging.basicConfig(level=logging.DEBUG)
 
     config = _load_runtime_config(config, workspace)
+    _print_deprecated_memory_window_notice(config)
     port = port if port is not None else config.gateway.port
 
     console.print(f"{__logo__} Starting nanobot gateway on port {port}...")
@@ -329,12 +342,10 @@ def gateway(
         workspace=config.workspace_path,
         model=config.agents.defaults.model,
         temperature=config.agents.defaults.temperature,
-        max_tokens=config.agents.defaults.max_tokens_output,
+        max_tokens=config.agents.defaults.max_tokens,
         max_iterations=config.agents.defaults.max_tool_iterations,
         reasoning_effort=config.agents.defaults.reasoning_effort,
-        max_tokens_input=config.agents.defaults.max_tokens_input,
-        compression_start_ratio=config.agents.defaults.compression_start_ratio,
-        compression_target_ratio=config.agents.defaults.compression_target_ratio,
+        context_window_tokens=config.agents.defaults.context_window_tokens,
         brave_api_key=config.tools.web.search.api_key or None,
         web_proxy=config.tools.web.proxy or None,
         exec_config=config.tools.exec,
@@ -496,6 +507,7 @@ def agent(
     from nanobot.cron.service import CronService
 
     config = _load_runtime_config(config, workspace)
+    _print_deprecated_memory_window_notice(config)
     sync_workspace_templates(config.workspace_path)
 
     bus = MessageBus()
@@ -516,12 +528,10 @@ def agent(
         workspace=config.workspace_path,
         model=config.agents.defaults.model,
         temperature=config.agents.defaults.temperature,
-        max_tokens=config.agents.defaults.max_tokens_output,
+        max_tokens=config.agents.defaults.max_tokens,
         max_iterations=config.agents.defaults.max_tool_iterations,
         reasoning_effort=config.agents.defaults.reasoning_effort,
-        max_tokens_input=config.agents.defaults.max_tokens_input,
-        compression_start_ratio=config.agents.defaults.compression_start_ratio,
-        compression_target_ratio=config.agents.defaults.compression_target_ratio,
+        context_window_tokens=config.agents.defaults.context_window_tokens,
         brave_api_key=config.tools.web.search.api_key or None,
         web_proxy=config.tools.web.proxy or None,
         exec_config=config.tools.exec,
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index 0e41d12..a2de239 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -190,22 +190,11 @@ class SlackConfig(Base):
 
 
 class QQConfig(Base):
-    """QQ channel configuration.
-    
-    Supports two implementations:
-    1. Official botpy SDK: requires app_id and secret
-    2. OneBot protocol: requires api_url (and optionally ws_reverse_url, bot_qq, access_token)
-    """
+    """QQ channel configuration using botpy SDK."""
 
     enabled: bool = False
-    # Official botpy SDK fields
     app_id: str = ""  # 机器人 ID (AppID) from q.qq.com
     secret: str = ""  # 机器人密钥 (AppSecret) from q.qq.com
-    # OneBot protocol fields
-    api_url: str = ""  # OneBot HTTP API URL (e.g. "http://localhost:5700")
-    ws_reverse_url: str = ""  # OneBot WebSocket reverse URL (e.g. "ws://localhost:8080/ws/reverse")
-    bot_qq: int | None = None  # Bot's QQ number (for filtering self messages)
-    access_token: str = ""  # Optional access token for OneBot API
     allow_from: list[str] = Field(
         default_factory=list
     )  # Allowed user openids (empty = public access)
@@ -238,20 +227,19 @@ class AgentDefaults(Base):
     provider: str = (
         "auto"  # Provider name (e.g. "anthropic", "openrouter") or "auto" for auto-detection
     )
-    # 原生上下文最大窗口（通常对应模型的 max_input_tokens / max_context_tokens）
-    # 默认按照主流大模型（如 GPT-4o、Claude 3.x 等）的 128k 上下文给一个宽松上限，实际应根据所选模型文档手动调整。
-    max_tokens_input: int = 128_000
-    # 默认单次回复的最大输出 token 上限（调用时可按需要再做截断或比例分配）
-    # 8192 足以覆盖大多数实际对话/工具使用场景，同样可按需手动调整。
-    max_tokens_output: int = 8192
-    # 会话历史压缩触发比例：当估算的输入 token 使用量 >= maxTokensInput * compressionStartRatio 时开始压缩。
-    compression_start_ratio: float = 0.7
-    # 会话历史压缩目标比例：每轮压缩后尽量把估算的输入 token 使用量压到 maxTokensInput * compressionTargetRatio 附近。
-    compression_target_ratio: float = 0.4
+    max_tokens: int = 8192
+    context_window_tokens: int = 65_536
     temperature: float = 0.1
     max_tool_iterations: int = 40
+    # Deprecated compatibility field: accepted from old configs but ignored at runtime.
+    memory_window: int | None = Field(default=None, exclude=True)
     reasoning_effort: str | None = None  # low / medium / high — enables LLM thinking mode
 
+    @property
+    def should_warn_deprecated_memory_window(self) -> bool:
+        """Return True when old memoryWindow is present without contextWindowTokens."""
+        return self.memory_window is not None and "context_window_tokens" not in self.model_fields_set
+
 
 class AgentsConfig(Base):
     """Agent configuration."""
diff --git a/nanobot/session/manager.py b/nanobot/session/manager.py
index 1cb8a51..f0a6484 100644
--- a/nanobot/session/manager.py
+++ b/nanobot/session/manager.py
@@ -9,6 +9,7 @@ from typing import Any
 
 from loguru import logger
 
+from nanobot.config.paths import get_legacy_sessions_dir
 from nanobot.utils.helpers import ensure_dir, safe_filename
 
 
@@ -29,6 +30,7 @@ class Session:
     created_at: datetime = field(default_factory=datetime.now)
     updated_at: datetime = field(default_factory=datetime.now)
     metadata: dict[str, Any] = field(default_factory=dict)
+    last_consolidated: int = 0  # Number of messages already consolidated to files
 
     def add_message(self, role: str, content: str, **kwargs: Any) -> None:
         """Add a message to the session."""
@@ -42,13 +44,9 @@ class Session:
         self.updated_at = datetime.now()
 
     def get_history(self, max_messages: int = 500) -> list[dict[str, Any]]:
-        """
-        Return messages for LLM input, aligned to a user turn.
-
-        - max_messages > 0 时只保留最近 max_messages 条；
-        - max_messages <= 0 时不做条数截断，返回全部消息。
-        """
-        sliced = self.messages if max_messages <= 0 else self.messages[-max_messages:]
+        """Return unconsolidated messages for LLM input, aligned to a user turn."""
+        unconsolidated = self.messages[self.last_consolidated:]
+        sliced = unconsolidated[-max_messages:]
 
         # Drop leading non-user messages to avoid orphaned tool_result blocks
         for i, m in enumerate(sliced):
@@ -68,7 +66,7 @@ class Session:
     def clear(self) -> None:
         """Clear all messages and reset session to initial state."""
         self.messages = []
-        self.metadata = {}
+        self.last_consolidated = 0
         self.updated_at = datetime.now()
 
 
@@ -82,7 +80,7 @@ class SessionManager:
     def __init__(self, workspace: Path):
         self.workspace = workspace
         self.sessions_dir = ensure_dir(self.workspace / "sessions")
-        self.legacy_sessions_dir = Path.home() / ".nanobot" / "sessions"
+        self.legacy_sessions_dir = get_legacy_sessions_dir()
         self._cache: dict[str, Session] = {}
 
     def _get_session_path(self, key: str) -> Path:
@@ -134,6 +132,7 @@ class SessionManager:
             messages = []
             metadata = {}
             created_at = None
+            last_consolidated = 0
 
             with open(path, encoding="utf-8") as f:
                 for line in f:
@@ -146,6 +145,7 @@ class SessionManager:
                     if data.get("_type") == "metadata":
                         metadata = data.get("metadata", {})
                         created_at = datetime.fromisoformat(data["created_at"]) if data.get("created_at") else None
+                        last_consolidated = data.get("last_consolidated", 0)
                     else:
                         messages.append(data)
 
@@ -154,6 +154,7 @@ class SessionManager:
                 messages=messages,
                 created_at=created_at or datetime.now(),
                 metadata=metadata,
+                last_consolidated=last_consolidated
             )
         except Exception as e:
             logger.warning("Failed to load session {}: {}", key, e)
@@ -170,6 +171,7 @@ class SessionManager:
                 "created_at": session.created_at.isoformat(),
                 "updated_at": session.updated_at.isoformat(),
                 "metadata": session.metadata,
+                "last_consolidated": session.last_consolidated
             }
             f.write(json.dumps(metadata_line, ensure_ascii=False) + "\n")
             for msg in session.messages:
diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py
index 57c60dc..9242ba6 100644
--- a/nanobot/utils/helpers.py
+++ b/nanobot/utils/helpers.py
@@ -1,8 +1,12 @@
 """Utility functions for nanobot."""
 
+import json
 import re
 from datetime import datetime
 from pathlib import Path
+from typing import Any
+
+import tiktoken
 
 
 def detect_image_mime(data: bytes) -> str | None:
@@ -68,6 +72,87 @@ def split_message(content: str, max_len: int = 2000) -> list[str]:
     return chunks
 
 
+def estimate_prompt_tokens(
+    messages: list[dict[str, Any]],
+    tools: list[dict[str, Any]] | None = None,
+) -> int:
+    """Estimate prompt tokens with tiktoken."""
+    try:
+        enc = tiktoken.get_encoding("cl100k_base")
+        parts: list[str] = []
+        for msg in messages:
+            content = msg.get("content")
+            if isinstance(content, str):
+                parts.append(content)
+            elif isinstance(content, list):
+                for part in content:
+                    if isinstance(part, dict) and part.get("type") == "text":
+                        txt = part.get("text", "")
+                        if txt:
+                            parts.append(txt)
+        if tools:
+            parts.append(json.dumps(tools, ensure_ascii=False))
+        return len(enc.encode("\n".join(parts)))
+    except Exception:
+        return 0
+
+
+def estimate_message_tokens(message: dict[str, Any]) -> int:
+    """Estimate prompt tokens contributed by one persisted message."""
+    content = message.get("content")
+    parts: list[str] = []
+    if isinstance(content, str):
+        parts.append(content)
+    elif isinstance(content, list):
+        for part in content:
+            if isinstance(part, dict) and part.get("type") == "text":
+                text = part.get("text", "")
+                if text:
+                    parts.append(text)
+            else:
+                parts.append(json.dumps(part, ensure_ascii=False))
+    elif content is not None:
+        parts.append(json.dumps(content, ensure_ascii=False))
+
+    for key in ("name", "tool_call_id"):
+        value = message.get(key)
+        if isinstance(value, str) and value:
+            parts.append(value)
+    if message.get("tool_calls"):
+        parts.append(json.dumps(message["tool_calls"], ensure_ascii=False))
+
+    payload = "\n".join(parts)
+    if not payload:
+        return 1
+    try:
+        enc = tiktoken.get_encoding("cl100k_base")
+        return max(1, len(enc.encode(payload)))
+    except Exception:
+        return max(1, len(payload) // 4)
+
+
+def estimate_prompt_tokens_chain(
+    provider: Any,
+    model: str | None,
+    messages: list[dict[str, Any]],
+    tools: list[dict[str, Any]] | None = None,
+) -> tuple[int, str]:
+    """Estimate prompt tokens via provider counter first, then tiktoken fallback."""
+    provider_counter = getattr(provider, "estimate_prompt_tokens", None)
+    if callable(provider_counter):
+        try:
+            tokens, source = provider_counter(messages, tools, model)
+            if isinstance(tokens, (int, float)) and tokens > 0:
+                return int(tokens), str(source or "provider_counter")
+        except Exception:
+            pass
+
+    estimated = estimate_prompt_tokens(messages, tools)
+    if estimated > 0:
+        return int(estimated), "tiktoken"
+    return 0, "none"
+
+
 def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str]:
     """Sync bundled templates to workspace. Only creates missing files."""
     from importlib.resources import files as pkg_files
diff --git a/pyproject.toml b/pyproject.toml
index 62cf616..0344348 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,7 @@ dependencies = [
     "json-repair>=0.57.0,<1.0.0",
     "chardet>=3.0.2,<6.0.0",
     "openai>=2.8.0",
+    "tiktoken>=0.12.0,<1.0.0",
 ]
 
 [project.optional-dependencies]
diff --git a/tests/test_commands.py b/tests/test_commands.py
index 5e3760a..1375a3a 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -267,6 +267,16 @@ def test_agent_workspace_override_wins_over_config_workspace(mock_agent_runtime,
     assert mock_agent_runtime["agent_loop_cls"].call_args.kwargs["workspace"] == workspace_path
 
 
+def test_agent_warns_about_deprecated_memory_window(mock_agent_runtime):
+    mock_agent_runtime["config"].agents.defaults.memory_window = 100
+
+    result = runner.invoke(app, ["agent", "-m", "hello"])
+
+    assert result.exit_code == 0
+    assert "memoryWindow" in result.stdout
+    assert "contextWindowTokens" in result.stdout
+
+
 def test_gateway_uses_workspace_from_config_by_default(monkeypatch, tmp_path: Path) -> None:
     config_file = tmp_path / "instance" / "config.json"
     config_file.parent.mkdir(parents=True)
@@ -327,6 +337,29 @@ def test_gateway_workspace_option_overrides_config(monkeypatch, tmp_path: Path)
     assert seen["workspace"] == override
     assert config.workspace_path == override
 
+
+def test_gateway_warns_about_deprecated_memory_window(monkeypatch, tmp_path: Path) -> None:
+    config_file = tmp_path / "instance" / "config.json"
+    config_file.parent.mkdir(parents=True)
+    config_file.write_text("{}")
+
+    config = Config()
+    config.agents.defaults.memory_window = 100
+
+    monkeypatch.setattr("nanobot.config.loader.set_config_path", lambda _path: None)
+    monkeypatch.setattr("nanobot.config.loader.load_config", lambda _path=None: config)
+    monkeypatch.setattr("nanobot.cli.commands.sync_workspace_templates", lambda _path: None)
+    monkeypatch.setattr(
+        "nanobot.cli.commands._make_provider",
+        lambda _config: (_ for _ in ()).throw(_StopGateway("stop")),
+    )
+
+    result = runner.invoke(app, ["gateway", "--config", str(config_file)])
+
+    assert isinstance(result.exception, _StopGateway)
+    assert "memoryWindow" in result.stdout
+    assert "contextWindowTokens" in result.stdout
+
 def test_gateway_uses_config_directory_for_cron_store(monkeypatch, tmp_path: Path) -> None:
     config_file = tmp_path / "instance" / "config.json"
     config_file.parent.mkdir(parents=True)
diff --git a/tests/test_config_migration.py b/tests/test_config_migration.py
new file mode 100644
index 0000000..62e601e
--- /dev/null
+++ b/tests/test_config_migration.py
@@ -0,0 +1,88 @@
+import json
+
+from typer.testing import CliRunner
+
+from nanobot.cli.commands import app
+from nanobot.config.loader import load_config, save_config
+
+runner = CliRunner()
+
+
+def test_load_config_keeps_max_tokens_and_warns_on_legacy_memory_window(tmp_path) -> None:
+    config_path = tmp_path / "config.json"
+    config_path.write_text(
+        json.dumps(
+            {
+                "agents": {
+                    "defaults": {
+                        "maxTokens": 1234,
+                        "memoryWindow": 42,
+                    }
+                }
+            }
+        ),
+        encoding="utf-8",
+    )
+
+    config = load_config(config_path)
+
+    assert config.agents.defaults.max_tokens == 1234
+    assert config.agents.defaults.context_window_tokens == 65_536
+    assert config.agents.defaults.should_warn_deprecated_memory_window is True
+
+
+def test_save_config_writes_context_window_tokens_but_not_memory_window(tmp_path) -> None:
+    config_path = tmp_path / "config.json"
+    config_path.write_text(
+        json.dumps(
+            {
+                "agents": {
+                    "defaults": {
+                        "maxTokens": 2222,
+                        "memoryWindow": 30,
+                    }
+                }
+            }
+        ),
+        encoding="utf-8",
+    )
+
+    config = load_config(config_path)
+    save_config(config, config_path)
+    saved = json.loads(config_path.read_text(encoding="utf-8"))
+    defaults = saved["agents"]["defaults"]
+
+    assert defaults["maxTokens"] == 2222
+    assert defaults["contextWindowTokens"] == 65_536
+    assert "memoryWindow" not in defaults
+
+
+def test_onboard_refresh_rewrites_legacy_config_template(tmp_path, monkeypatch) -> None:
+    config_path = tmp_path / "config.json"
+    workspace = tmp_path / "workspace"
+    config_path.write_text(
+        json.dumps(
+            {
+                "agents": {
+                    "defaults": {
+                        "maxTokens": 3333,
+                        "memoryWindow": 50,
+                    }
+                }
+            }
+        ),
+        encoding="utf-8",
+    )
+
+    monkeypatch.setattr("nanobot.config.loader.get_config_path", lambda: config_path)
+    monkeypatch.setattr("nanobot.cli.commands.get_workspace_path", lambda: workspace)
+
+    result = runner.invoke(app, ["onboard"], input="n\n")
+
+    assert result.exit_code == 0
+    assert "contextWindowTokens" in result.stdout
+    saved = json.loads(config_path.read_text(encoding="utf-8"))
+    defaults = saved["agents"]["defaults"]
+    assert defaults["maxTokens"] == 3333
+    assert defaults["contextWindowTokens"] == 65_536
+    assert "memoryWindow" not in defaults
diff --git a/tests/test_consolidate_offset.py b/tests/test_consolidate_offset.py
index a3213dd..7d12338 100644
--- a/tests/test_consolidate_offset.py
+++ b/tests/test_consolidate_offset.py
@@ -480,226 +480,35 @@ class TestEmptyAndBoundarySessions:
         assert_messages_content(old_messages, 10, 34)
 
 
-class TestConsolidationDeduplicationGuard:
-    """Test that consolidation tasks are deduplicated and serialized."""
+class TestNewCommandArchival:
+    """Test /new archival behavior with the simplified consolidation flow."""
 
-    @pytest.mark.asyncio
-    async def test_consolidation_guard_prevents_duplicate_tasks(self, tmp_path: Path) -> None:
-        """Concurrent messages above memory_window spawn only one consolidation task."""
+    @staticmethod
+    def _make_loop(tmp_path: Path):
         from nanobot.agent.loop import AgentLoop
-        from nanobot.bus.events import InboundMessage
         from nanobot.bus.queue import MessageBus
         from nanobot.providers.base import LLMResponse
 
         bus = MessageBus()
         provider = MagicMock()
         provider.get_default_model.return_value = "test-model"
+        provider.estimate_prompt_tokens.return_value = (10_000, "test")
         loop = AgentLoop(
-            bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10
+            bus=bus,
+            provider=provider,
+            workspace=tmp_path,
+            model="test-model",
+            context_window_tokens=1,
         )
-
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
+        loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
         loop.tools.get_definitions = MagicMock(return_value=[])
-
-        session = loop.sessions.get_or_create("cli:test")
-        for i in range(15):
-            session.add_message("user", f"msg{i}")
-            session.add_message("assistant", f"resp{i}")
-        loop.sessions.save(session)
-
-        consolidation_calls = 0
-
-        async def _fake_consolidate(_session, archive_all: bool = False) -> None:
-            nonlocal consolidation_calls
-            consolidation_calls += 1
-            await asyncio.sleep(0.05)
-
-        loop._consolidate_memory = _fake_consolidate  # type: ignore[method-assign]
-
-        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello")
-        await loop._process_message(msg)
-        await loop._process_message(msg)
-        await asyncio.sleep(0.1)
-
-        assert consolidation_calls == 1, (
-            f"Expected exactly 1 consolidation, got {consolidation_calls}"
-        )
-
-    @pytest.mark.asyncio
-    async def test_new_command_guard_prevents_concurrent_consolidation(
-        self, tmp_path: Path
-    ) -> None:
-        """/new command does not run consolidation concurrently with in-flight consolidation."""
-        from nanobot.agent.loop import AgentLoop
-        from nanobot.bus.events import InboundMessage
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "test-model"
-        loop = AgentLoop(
-            bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10
-        )
-
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
-        loop.tools.get_definitions = MagicMock(return_value=[])
-
-        session = loop.sessions.get_or_create("cli:test")
-        for i in range(15):
-            session.add_message("user", f"msg{i}")
-            session.add_message("assistant", f"resp{i}")
-        loop.sessions.save(session)
-
-        consolidation_calls = 0
-        active = 0
-        max_active = 0
-
-        async def _fake_consolidate(_session, archive_all: bool = False) -> None:
-            nonlocal consolidation_calls, active, max_active
-            consolidation_calls += 1
-            active += 1
-            max_active = max(max_active, active)
-            await asyncio.sleep(0.05)
-            active -= 1
-
-        loop._consolidate_memory = _fake_consolidate  # type: ignore[method-assign]
-
-        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello")
-        await loop._process_message(msg)
-
-        new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
-        await loop._process_message(new_msg)
-        await asyncio.sleep(0.1)
-
-        assert consolidation_calls == 2, (
-            f"Expected normal + /new consolidations, got {consolidation_calls}"
-        )
-        assert max_active == 1, (
-            f"Expected serialized consolidation, observed concurrency={max_active}"
-        )
-
-    @pytest.mark.asyncio
-    async def test_consolidation_tasks_are_referenced(self, tmp_path: Path) -> None:
-        """create_task results are tracked in _consolidation_tasks while in flight."""
-        from nanobot.agent.loop import AgentLoop
-        from nanobot.bus.events import InboundMessage
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "test-model"
-        loop = AgentLoop(
-            bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10
-        )
-
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
-        loop.tools.get_definitions = MagicMock(return_value=[])
-
-        session = loop.sessions.get_or_create("cli:test")
-        for i in range(15):
-            session.add_message("user", f"msg{i}")
-            session.add_message("assistant", f"resp{i}")
-        loop.sessions.save(session)
-
-        started = asyncio.Event()
-
-        async def _slow_consolidate(_session, archive_all: bool = False) -> None:
-            started.set()
-            await asyncio.sleep(0.1)
-
-        loop._consolidate_memory = _slow_consolidate  # type: ignore[method-assign]
-
-        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello")
-        await loop._process_message(msg)
-
-        await started.wait()
-        assert len(loop._consolidation_tasks) == 1, "Task must be referenced while in-flight"
-
-        await asyncio.sleep(0.15)
-        assert len(loop._consolidation_tasks) == 0, (
-            "Task reference must be removed after completion"
-        )
-
-    @pytest.mark.asyncio
-    async def test_new_waits_for_inflight_consolidation_and_preserves_messages(
-        self, tmp_path: Path
-    ) -> None:
-        """/new waits for in-flight consolidation and archives before clear."""
-        from nanobot.agent.loop import AgentLoop
-        from nanobot.bus.events import InboundMessage
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "test-model"
-        loop = AgentLoop(
-            bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10
-        )
-
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
-        loop.tools.get_definitions = MagicMock(return_value=[])
-
-        session = loop.sessions.get_or_create("cli:test")
-        for i in range(15):
-            session.add_message("user", f"msg{i}")
-            session.add_message("assistant", f"resp{i}")
-        loop.sessions.save(session)
-
-        started = asyncio.Event()
-        release = asyncio.Event()
-        archived_count = 0
-
-        async def _fake_consolidate(sess, archive_all: bool = False) -> bool:
-            nonlocal archived_count
-            if archive_all:
-                archived_count = len(sess.messages)
-                return True
-            started.set()
-            await release.wait()
-            return True
-
-        loop._consolidate_memory = _fake_consolidate  # type: ignore[method-assign]
-
-        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello")
-        await loop._process_message(msg)
-        await started.wait()
-
-        new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
-        pending_new = asyncio.create_task(loop._process_message(new_msg))
-
-        await asyncio.sleep(0.02)
-        assert not pending_new.done(), "/new should wait while consolidation is in-flight"
-
-        release.set()
-        response = await pending_new
-        assert response is not None
-        assert "new session started" in response.content.lower()
-        assert archived_count > 0, "Expected /new archival to process a non-empty snapshot"
-
-        session_after = loop.sessions.get_or_create("cli:test")
-        assert session_after.messages == [], "Session should be cleared after successful archival"
+        return loop
 
     @pytest.mark.asyncio
     async def test_new_does_not_clear_session_when_archive_fails(self, tmp_path: Path) -> None:
-        """/new must keep session data if archive step reports failure."""
-        from nanobot.agent.loop import AgentLoop
         from nanobot.bus.events import InboundMessage
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "test-model"
-        loop = AgentLoop(
-            bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10
-        )
-
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
-        loop.tools.get_definitions = MagicMock(return_value=[])
 
+        loop = self._make_loop(tmp_path)
         session = loop.sessions.get_or_create("cli:test")
         for i in range(5):
             session.add_message("user", f"msg{i}")
@@ -707,111 +516,61 @@ class TestConsolidationDeduplicationGuard:
         loop.sessions.save(session)
         before_count = len(session.messages)
 
-        async def _failing_consolidate(sess, archive_all: bool = False) -> bool:
-            if archive_all:
-                return False
-            return True
+        async def _failing_consolidate(_messages) -> bool:
+            return False
 
-        loop._consolidate_memory = _failing_consolidate  # type: ignore[method-assign]
+        loop.memory_consolidator.consolidate_messages = _failing_consolidate  # type: ignore[method-assign]
 
         new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
         response = await loop._process_message(new_msg)
 
         assert response is not None
         assert "failed" in response.content.lower()
-        session_after = loop.sessions.get_or_create("cli:test")
-        assert len(session_after.messages) == before_count, (
-            "Session must remain intact when /new archival fails"
-        )
+        assert len(loop.sessions.get_or_create("cli:test").messages) == before_count
 
     @pytest.mark.asyncio
-    async def test_new_archives_only_unconsolidated_messages_after_inflight_task(
-        self, tmp_path: Path
-    ) -> None:
-        """/new should archive only messages not yet consolidated by prior task."""
-        from nanobot.agent.loop import AgentLoop
+    async def test_new_archives_only_unconsolidated_messages(self, tmp_path: Path) -> None:
         from nanobot.bus.events import InboundMessage
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "test-model"
-        loop = AgentLoop(
-            bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10
-        )
-
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
-        loop.tools.get_definitions = MagicMock(return_value=[])
 
+        loop = self._make_loop(tmp_path)
         session = loop.sessions.get_or_create("cli:test")
         for i in range(15):
             session.add_message("user", f"msg{i}")
             session.add_message("assistant", f"resp{i}")
+        session.last_consolidated = len(session.messages) - 3
         loop.sessions.save(session)
 
-        started = asyncio.Event()
-        release = asyncio.Event()
         archived_count = -1
 
-        async def _fake_consolidate(sess, archive_all: bool = False) -> bool:
+        async def _fake_consolidate(messages) -> bool:
             nonlocal archived_count
-            if archive_all:
-                archived_count = len(sess.messages)
-                return True
-
-            started.set()
-            await release.wait()
-            sess.last_consolidated = len(sess.messages) - 3
+            archived_count = len(messages)
             return True
 
-        loop._consolidate_memory = _fake_consolidate  # type: ignore[method-assign]
-
-        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello")
-        await loop._process_message(msg)
-        await started.wait()
+        loop.memory_consolidator.consolidate_messages = _fake_consolidate  # type: ignore[method-assign]
 
         new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
-        pending_new = asyncio.create_task(loop._process_message(new_msg))
-        await asyncio.sleep(0.02)
-        assert not pending_new.done()
-
-        release.set()
-        response = await pending_new
+        response = await loop._process_message(new_msg)
 
         assert response is not None
         assert "new session started" in response.content.lower()
-        assert archived_count == 3, (
-            f"Expected only unconsolidated tail to archive, got {archived_count}"
-        )
+        assert archived_count == 3
 
     @pytest.mark.asyncio
     async def test_new_clears_session_and_responds(self, tmp_path: Path) -> None:
-        """/new clears session and returns confirmation."""
-        from nanobot.agent.loop import AgentLoop
         from nanobot.bus.events import InboundMessage
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "test-model"
-        loop = AgentLoop(
-            bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10
-        )
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
-        loop.tools.get_definitions = MagicMock(return_value=[])
 
+        loop = self._make_loop(tmp_path)
         session = loop.sessions.get_or_create("cli:test")
         for i in range(3):
             session.add_message("user", f"msg{i}")
             session.add_message("assistant", f"resp{i}")
         loop.sessions.save(session)
 
-        async def _ok_consolidate(sess, archive_all: bool = False) -> bool:
+        async def _ok_consolidate(_messages) -> bool:
             return True
 
-        loop._consolidate_memory = _ok_consolidate  # type: ignore[method-assign]
+        loop.memory_consolidator.consolidate_messages = _ok_consolidate  # type: ignore[method-assign]
 
         new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
         response = await loop._process_message(new_msg)
diff --git a/tests/test_loop_consolidation_tokens.py b/tests/test_loop_consolidation_tokens.py
new file mode 100644
index 0000000..b0f3dda
--- /dev/null
+++ b/tests/test_loop_consolidation_tokens.py
@@ -0,0 +1,190 @@
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.agent.loop import AgentLoop
+import nanobot.agent.memory as memory_module
+from nanobot.bus.queue import MessageBus
+from nanobot.providers.base import LLMResponse
+
+
+def _make_loop(tmp_path, *, estimated_tokens: int, context_window_tokens: int) -> AgentLoop:
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+    provider.estimate_prompt_tokens.return_value = (estimated_tokens, "test-counter")
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
+
+    loop = AgentLoop(
+        bus=MessageBus(),
+        provider=provider,
+        workspace=tmp_path,
+        model="test-model",
+        context_window_tokens=context_window_tokens,
+    )
+    loop.tools.get_definitions = MagicMock(return_value=[])
+    return loop
+
+
+@pytest.mark.asyncio
+async def test_prompt_below_threshold_does_not_consolidate(tmp_path) -> None:
+    loop = _make_loop(tmp_path, estimated_tokens=100, context_window_tokens=200)
+    loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True)  # type: ignore[method-assign]
+
+    await loop.process_direct("hello", session_key="cli:test")
+
+    loop.memory_consolidator.consolidate_messages.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_prompt_above_threshold_triggers_consolidation(tmp_path, monkeypatch) -> None:
+    loop = _make_loop(tmp_path, estimated_tokens=1000, context_window_tokens=200)
+    loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True)  # type: ignore[method-assign]
+    session = loop.sessions.get_or_create("cli:test")
+    session.messages = [
+        {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
+        {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
+        {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
+    ]
+    loop.sessions.save(session)
+    monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _message: 500)
+
+    await loop.process_direct("hello", session_key="cli:test")
+
+    assert loop.memory_consolidator.consolidate_messages.await_count >= 1
+
+
+@pytest.mark.asyncio
+async def test_prompt_above_threshold_archives_until_next_user_boundary(tmp_path, monkeypatch) -> None:
+    loop = _make_loop(tmp_path, estimated_tokens=1000, context_window_tokens=200)
+    loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True)  # type: ignore[method-assign]
+
+    session = loop.sessions.get_or_create("cli:test")
+    session.messages = [
+        {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
+        {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
+        {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
+        {"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"},
+        {"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"},
+    ]
+    loop.sessions.save(session)
+
+    token_map = {"u1": 120, "a1": 120, "u2": 120, "a2": 120, "u3": 120}
+    monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda message: token_map[message["content"]])
+
+    await loop.memory_consolidator.maybe_consolidate_by_tokens(session)
+
+    archived_chunk = loop.memory_consolidator.consolidate_messages.await_args.args[0]
+    assert [message["content"] for message in archived_chunk] == ["u1", "a1", "u2", "a2"]
+    assert session.last_consolidated == 4
+
+
+@pytest.mark.asyncio
+async def test_consolidation_loops_until_target_met(tmp_path, monkeypatch) -> None:
+    """Verify maybe_consolidate_by_tokens keeps looping until under threshold."""
+    loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200)
+    loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True)  # type: ignore[method-assign]
+
+    session = loop.sessions.get_or_create("cli:test")
+    session.messages = [
+        {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
+        {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
+        {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
+        {"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"},
+        {"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"},
+        {"role": "assistant", "content": "a3", "timestamp": "2026-01-01T00:00:05"},
+        {"role": "user", "content": "u4", "timestamp": "2026-01-01T00:00:06"},
+    ]
+    loop.sessions.save(session)
+
+    call_count = [0]
+    def mock_estimate(_session):
+        call_count[0] += 1
+        if call_count[0] == 1:
+            return (500, "test")
+        if call_count[0] == 2:
+            return (300, "test")
+        return (80, "test")
+
+    loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate  # type: ignore[method-assign]
+    monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 100)
+
+    await loop.memory_consolidator.maybe_consolidate_by_tokens(session)
+
+    assert loop.memory_consolidator.consolidate_messages.await_count == 2
+    assert session.last_consolidated == 6
+
+
+@pytest.mark.asyncio
+async def test_consolidation_continues_below_trigger_until_half_target(tmp_path, monkeypatch) -> None:
+    """Once triggered, consolidation should continue until it drops below half threshold."""
+    loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200)
+    loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True)  # type: ignore[method-assign]
+
+    session = loop.sessions.get_or_create("cli:test")
+    session.messages = [
+        {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
+        {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
+        {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
+        {"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"},
+        {"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"},
+        {"role": "assistant", "content": "a3", "timestamp": "2026-01-01T00:00:05"},
+        {"role": "user", "content": "u4", "timestamp": "2026-01-01T00:00:06"},
+    ]
+    loop.sessions.save(session)
+
+    call_count = [0]
+
+    def mock_estimate(_session):
+        call_count[0] += 1
+        if call_count[0] == 1:
+            return (500, "test")
+        if call_count[0] == 2:
+            return (150, "test")
+        return (80, "test")
+
+    loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate  # type: ignore[method-assign]
+    monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 100)
+
+    await loop.memory_consolidator.maybe_consolidate_by_tokens(session)
+
+    assert loop.memory_consolidator.consolidate_messages.await_count == 2
+    assert session.last_consolidated == 6
+
+
+@pytest.mark.asyncio
+async def test_preflight_consolidation_before_llm_call(tmp_path, monkeypatch) -> None:
+    """Verify preflight consolidation runs before the LLM call in process_direct."""
+    order: list[str] = []
+
+    loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200)
+
+    async def track_consolidate(messages):
+        order.append("consolidate")
+        return True
+    loop.memory_consolidator.consolidate_messages = track_consolidate  # type: ignore[method-assign]
+
+    async def track_llm(*args, **kwargs):
+        order.append("llm")
+        return LLMResponse(content="ok", tool_calls=[])
+    loop.provider.chat_with_retry = track_llm
+
+    session = loop.sessions.get_or_create("cli:test")
+    session.messages = [
+        {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
+        {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
+        {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
+    ]
+    loop.sessions.save(session)
+    monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 500)
+
+    call_count = [0]
+    def mock_estimate(_session):
+        call_count[0] += 1
+        return (1000 if call_count[0] <= 1 else 80, "test")
+    loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate  # type: ignore[method-assign]
+
+    await loop.process_direct("hello", session_key="cli:test")
+
+    assert "consolidate" in order
+    assert "llm" in order
+    assert order.index("consolidate") < order.index("llm")
diff --git a/tests/test_memory_consolidation_types.py b/tests/test_memory_consolidation_types.py
index 2605bf7..0263f01 100644
--- a/tests/test_memory_consolidation_types.py
+++ b/tests/test_memory_consolidation_types.py
@@ -7,7 +7,7 @@ tool call response, it should serialize them to JSON instead of raising TypeErro
 
 import json
 from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import AsyncMock
 
 import pytest
 
@@ -15,15 +15,12 @@ from nanobot.agent.memory import MemoryStore
 from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
 
 
-def _make_session(message_count: int = 30, memory_window: int = 50):
-    """Create a mock session with messages."""
-    session = MagicMock()
-    session.messages = [
+def _make_messages(message_count: int = 30):
+    """Create a list of mock messages."""
+    return [
         {"role": "user", "content": f"msg{i}", "timestamp": "2026-01-01 00:00"}
         for i in range(message_count)
     ]
-    session.last_consolidated = 0
-    return session
 
 
 def _make_tool_response(history_entry, memory_update):
@@ -74,9 +71,9 @@ class TestMemoryConsolidationTypeHandling:
             )
         )
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is True
         assert store.history_file.exists()
@@ -95,9 +92,9 @@ class TestMemoryConsolidationTypeHandling:
             )
         )
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is True
         assert store.history_file.exists()
@@ -131,9 +128,9 @@ class TestMemoryConsolidationTypeHandling:
         )
         provider.chat = AsyncMock(return_value=response)
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is True
         assert "User discussed testing." in store.history_file.read_text()
@@ -147,22 +144,22 @@ class TestMemoryConsolidationTypeHandling:
             return_value=LLMResponse(content="I summarized the conversation.", tool_calls=[])
         )
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is False
         assert not store.history_file.exists()
 
     @pytest.mark.asyncio
-    async def test_skips_when_few_messages(self, tmp_path: Path) -> None:
-        """Consolidation should be a no-op when messages < keep_count."""
+    async def test_skips_when_message_chunk_is_empty(self, tmp_path: Path) -> None:
+        """Consolidation should be a no-op when the selected chunk is empty."""
         store = MemoryStore(tmp_path)
         provider = AsyncMock()
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=10)
+        messages: list[dict] = []
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is True
         provider.chat.assert_not_called()
@@ -189,9 +186,9 @@ class TestMemoryConsolidationTypeHandling:
         )
         provider.chat = AsyncMock(return_value=response)
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is True
         assert "User discussed testing." in store.history_file.read_text()
@@ -215,9 +212,9 @@ class TestMemoryConsolidationTypeHandling:
         )
         provider.chat = AsyncMock(return_value=response)
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is False
 
@@ -239,9 +236,9 @@ class TestMemoryConsolidationTypeHandling:
         )
         provider.chat = AsyncMock(return_value=response)
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is False
 
@@ -255,7 +252,7 @@ class TestMemoryConsolidationTypeHandling:
                 memory_update="# Memory\nUser likes testing.",
             ),
         ])
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
         delays: list[int] = []
 
         async def _fake_sleep(delay: int) -> None:
@@ -263,7 +260,7 @@ class TestMemoryConsolidationTypeHandling:
 
         monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is True
         assert provider.calls == 2
diff --git a/tests/test_message_tool_suppress.py b/tests/test_message_tool_suppress.py
index 63b0fd1..1091de4 100644
--- a/tests/test_message_tool_suppress.py
+++ b/tests/test_message_tool_suppress.py
@@ -16,7 +16,7 @@ def _make_loop(tmp_path: Path) -> AgentLoop:
     bus = MessageBus()
     provider = MagicMock()
     provider.get_default_model.return_value = "test-model"
-    return AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10)
+    return AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
 
 
 class TestMessageToolSuppressLogic:
@@ -33,7 +33,7 @@ class TestMessageToolSuppressLogic:
             LLMResponse(content="", tool_calls=[tool_call]),
             LLMResponse(content="Done", tool_calls=[]),
         ])
-        loop.provider.chat = AsyncMock(side_effect=lambda *a, **kw: next(calls))
+        loop.provider.chat_with_retry = AsyncMock(side_effect=lambda *a, **kw: next(calls))
         loop.tools.get_definitions = MagicMock(return_value=[])
 
         sent: list[OutboundMessage] = []
@@ -58,7 +58,7 @@ class TestMessageToolSuppressLogic:
             LLMResponse(content="", tool_calls=[tool_call]),
             LLMResponse(content="I've sent the email.", tool_calls=[]),
         ])
-        loop.provider.chat = AsyncMock(side_effect=lambda *a, **kw: next(calls))
+        loop.provider.chat_with_retry = AsyncMock(side_effect=lambda *a, **kw: next(calls))
         loop.tools.get_definitions = MagicMock(return_value=[])
 
         sent: list[OutboundMessage] = []
@@ -77,7 +77,7 @@ class TestMessageToolSuppressLogic:
     @pytest.mark.asyncio
     async def test_not_suppress_when_no_message_tool_used(self, tmp_path: Path) -> None:
         loop = _make_loop(tmp_path)
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="Hello!", tool_calls=[]))
+        loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="Hello!", tool_calls=[]))
         loop.tools.get_definitions = MagicMock(return_value=[])
 
         msg = InboundMessage(channel="feishu", sender_id="user1", chat_id="chat123", content="Hi")
@@ -98,7 +98,7 @@ class TestMessageToolSuppressLogic:
             ),
             LLMResponse(content="Done", tool_calls=[]),
         ])
-        loop.provider.chat = AsyncMock(side_effect=lambda *a, **kw: next(calls))
+        loop.provider.chat_with_retry = AsyncMock(side_effect=lambda *a, **kw: next(calls))
         loop.tools.get_definitions = MagicMock(return_value=[])
         loop.tools.execute = AsyncMock(return_value="ok")
 

From a44ee115d1188a62012d3d7cc38077ff5013f4ee Mon Sep 17 00:00:00 2001
From: greyishsong <greyishsong@qq.com>
Date: Wed, 11 Mar 2026 09:02:28 +0800
Subject: [PATCH 11/28] fix: bump litellm version to 1.82.1 for Moonshot
 provider support

see issue #1628
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 62cf616..7127354 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ classifiers = [
 
 dependencies = [
     "typer>=0.20.0,<1.0.0",
-    "litellm>=1.81.5,<2.0.0",
+    "litellm>=1.82.1,<2.0.0",
     "pydantic>=2.12.0,<3.0.0",
     "pydantic-settings>=2.12.0,<3.0.0",
     "websockets>=16.0,<17.0",

From d1df53aaf783d44394d3d335948b5eaf31af803f Mon Sep 17 00:00:00 2001
From: YinAnPing <yapex@192.168.5.4>
Date: Wed, 11 Mar 2026 09:30:33 +0800
Subject: [PATCH 12/28] fix: exclude hidden files when syncing workspace
 templates

Skip files starting with '.' (e.g., macOS extended attributes like ._AGENTS.md)
to prevent UnicodeDecodeError during template synchronization.
---
 nanobot/utils/helpers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 mode change 100644 => 100755 nanobot/utils/helpers.py

diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py
old mode 100644
new mode 100755
index 57c60dc..a387b79
--- a/nanobot/utils/helpers.py
+++ b/nanobot/utils/helpers.py
@@ -88,7 +88,7 @@ def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str]
         added.append(str(dest.relative_to(workspace)))
 
     for item in tpl.iterdir():
-        if item.name.endswith(".md"):
+        if item.name.endswith(".md") and not item.name.startswith("."):
             _write(item, workspace / item.name)
     _write(tpl / "memory" / "MEMORY.md", workspace / "memory" / "MEMORY.md")
     _write(None, workspace / "memory" / "HISTORY.md")

From 35d811c99790b71ef34c5908b23168eeb526ca6b Mon Sep 17 00:00:00 2001
From: dingyanyi2019 <dingyanyi2019@outlook.com>
Date: Wed, 11 Mar 2026 10:19:43 +0800
Subject: [PATCH 13/28] feat: support retrieving DingTalk voice recognition
 text

---
 nanobot/channels/dingtalk.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nanobot/channels/dingtalk.py b/nanobot/channels/dingtalk.py
index 3c301a9..cdcba57 100644
--- a/nanobot/channels/dingtalk.py
+++ b/nanobot/channels/dingtalk.py
@@ -57,6 +57,8 @@ class NanobotDingTalkHandler(CallbackHandler):
             content = ""
             if chatbot_msg.text:
                 content = chatbot_msg.text.content.strip()
+            elif chatbot_msg.extensions.get("content", {}).get("recognition"):
+                content = chatbot_msg.extensions["content"]["recognition"].strip()
             if not content:
                 content = message.data.get("text", {}).get("content", "").strip()
 

From 91f17cad00b14b7a550f154791be3fc8eb12b746 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 03:40:33 +0000
Subject: [PATCH 14/28] feat(dingtalk): support voice recognition text fallback

Read DingTalk recognition text when text.content is empty, and add a handler-level regression test for voice transcript delivery.
---
 tests/test_dingtalk_channel.py | 47 +++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/tests/test_dingtalk_channel.py b/tests/test_dingtalk_channel.py
index 7595a33..6051014 100644
--- a/tests/test_dingtalk_channel.py
+++ b/tests/test_dingtalk_channel.py
@@ -1,9 +1,11 @@
+import asyncio
 from types import SimpleNamespace
 
 import pytest
 
 from nanobot.bus.queue import MessageBus
-from nanobot.channels.dingtalk import DingTalkChannel
+import nanobot.channels.dingtalk as dingtalk_module
+from nanobot.channels.dingtalk import DingTalkChannel, NanobotDingTalkHandler
 from nanobot.config.schema import DingTalkConfig
 
 
@@ -64,3 +66,46 @@ async def test_group_send_uses_group_messages_api() -> None:
     assert call["url"] == "https://api.dingtalk.com/v1.0/robot/groupMessages/send"
     assert call["json"]["openConversationId"] == "conv123"
     assert call["json"]["msgKey"] == "sampleMarkdown"
+
+
+@pytest.mark.asyncio
+async def test_handler_uses_voice_recognition_text_when_text_is_empty(monkeypatch) -> None:
+    bus = MessageBus()
+    channel = DingTalkChannel(
+        DingTalkConfig(client_id="app", client_secret="secret", allow_from=["user1"]),
+        bus,
+    )
+    handler = NanobotDingTalkHandler(channel)
+
+    class _FakeChatbotMessage:
+        text = None
+        extensions = {"content": {"recognition": "voice transcript"}}
+        sender_staff_id = "user1"
+        sender_id = "fallback-user"
+        sender_nick = "Alice"
+        message_type = "audio"
+
+        @staticmethod
+        def from_dict(_data):
+            return _FakeChatbotMessage()
+
+    monkeypatch.setattr(dingtalk_module, "ChatbotMessage", _FakeChatbotMessage)
+    monkeypatch.setattr(dingtalk_module, "AckMessage", SimpleNamespace(STATUS_OK="OK"))
+
+    status, body = await handler.process(
+        SimpleNamespace(
+            data={
+                "conversationType": "2",
+                "conversationId": "conv123",
+                "text": {"content": ""},
+            }
+        )
+    )
+
+    await asyncio.gather(*list(channel._background_tasks))
+    msg = await bus.consume_inbound()
+
+    assert (status, body) == ("OK", "OK")
+    assert msg.content == "voice transcript"
+    assert msg.sender_id == "user1"
+    assert msg.chat_id == "group:conv123"

From ddccf25bb1be8529d453d2344eea21bd593021c2 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 03:47:24 +0000
Subject: [PATCH 15/28] fix(subagent): preserve reasoning fields across tool
 turns

Share assistant message construction between the main agent and subagents, and add a regression test to keep reasoning_content and thinking_blocks in follow-up tool rounds.
---
 nanobot/agent/context.py  | 16 +++++++--------
 nanobot/agent/subagent.py | 21 +++++++------------
 nanobot/utils/helpers.py  | 17 ++++++++++++++++
 tests/test_task_cancel.py | 43 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 74 insertions(+), 23 deletions(-)

diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py
index 2c648eb..e47fcb8 100644
--- a/nanobot/agent/context.py
+++ b/nanobot/agent/context.py
@@ -10,7 +10,7 @@ from typing import Any
 
 from nanobot.agent.memory import MemoryStore
 from nanobot.agent.skills import SkillsLoader
-from nanobot.utils.helpers import detect_image_mime
+from nanobot.utils.helpers import build_assistant_message, detect_image_mime
 
 
 class ContextBuilder:
@@ -182,12 +182,10 @@ Reply directly with text for conversations. Only use the 'message' tool to send
         thinking_blocks: list[dict] | None = None,
     ) -> list[dict[str, Any]]:
         """Add an assistant message to the message list."""
-        msg: dict[str, Any] = {"role": "assistant", "content": content}
-        if tool_calls:
-            msg["tool_calls"] = tool_calls
-        if reasoning_content is not None:
-            msg["reasoning_content"] = reasoning_content
-        if thinking_blocks:
-            msg["thinking_blocks"] = thinking_blocks
-        messages.append(msg)
+        messages.append(build_assistant_message(
+            content,
+            tool_calls=tool_calls,
+            reasoning_content=reasoning_content,
+            thinking_blocks=thinking_blocks,
+        ))
         return messages
diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
index 308e67d..eff0b4f 100644
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -16,6 +16,7 @@ from nanobot.bus.events import InboundMessage
 from nanobot.bus.queue import MessageBus
 from nanobot.config.schema import ExecToolConfig
 from nanobot.providers.base import LLMProvider
+from nanobot.utils.helpers import build_assistant_message
 
 
 class SubagentManager:
@@ -133,7 +134,6 @@ class SubagentManager:
                 )
 
                 if response.has_tool_calls:
-                    # Add assistant message with tool calls
                     tool_call_dicts = [
                         {
                             "id": tc.id,
@@ -145,19 +145,12 @@ class SubagentManager:
                         }
                         for tc in response.tool_calls
                     ]
-                    assistant_msg: dict[str, Any] = {
-                        "role": "assistant",
-                        "content": response.content or "",
-                        "tool_calls": tool_call_dicts,
-                    }
-                    # Preserve reasoning_content for providers that require it
-                    # (e.g. Deepseek Reasoner mandates this field on every
-                    # assistant message when thinking mode is active).
-                    if response.reasoning_content is not None:
-                        assistant_msg["reasoning_content"] = response.reasoning_content
-                    if response.thinking_blocks:
-                        assistant_msg["thinking_blocks"] = response.thinking_blocks
-                    messages.append(assistant_msg)
+                    messages.append(build_assistant_message(
+                        response.content or "",
+                        tool_calls=tool_call_dicts,
+                        reasoning_content=response.reasoning_content,
+                        thinking_blocks=response.thinking_blocks,
+                    ))
 
                     # Execute tools
                     for tool_call in response.tool_calls:
diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py
index 9242ba6..6d2c670 100644
--- a/nanobot/utils/helpers.py
+++ b/nanobot/utils/helpers.py
@@ -72,6 +72,23 @@ def split_message(content: str, max_len: int = 2000) -> list[str]:
     return chunks
 
 
+def build_assistant_message(
+    content: str | None,
+    tool_calls: list[dict[str, Any]] | None = None,
+    reasoning_content: str | None = None,
+    thinking_blocks: list[dict] | None = None,
+) -> dict[str, Any]:
+    """Build a provider-safe assistant message with optional reasoning fields."""
+    msg: dict[str, Any] = {"role": "assistant", "content": content}
+    if tool_calls:
+        msg["tool_calls"] = tool_calls
+    if reasoning_content is not None:
+        msg["reasoning_content"] = reasoning_content
+    if thinking_blocks:
+        msg["thinking_blocks"] = thinking_blocks
+    return msg
+
+
 def estimate_prompt_tokens(
     messages: list[dict[str, Any]],
     tools: list[dict[str, Any]] | None = None,
diff --git a/tests/test_task_cancel.py b/tests/test_task_cancel.py
index 27a2d73..62ab2cc 100644
--- a/tests/test_task_cancel.py
+++ b/tests/test_task_cancel.py
@@ -165,3 +165,46 @@ class TestSubagentCancellation:
         provider.get_default_model.return_value = "test-model"
         mgr = SubagentManager(provider=provider, workspace=MagicMock(), bus=bus)
         assert await mgr.cancel_by_session("nonexistent") == 0
+
+    @pytest.mark.asyncio
+    async def test_subagent_preserves_reasoning_fields_in_tool_turn(self, monkeypatch, tmp_path):
+        from nanobot.agent.subagent import SubagentManager
+        from nanobot.bus.queue import MessageBus
+        from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+        bus = MessageBus()
+        provider = MagicMock()
+        provider.get_default_model.return_value = "test-model"
+
+        captured_second_call: list[dict] = []
+
+        call_count = {"n": 0}
+
+        async def scripted_chat_with_retry(*, messages, **kwargs):
+            call_count["n"] += 1
+            if call_count["n"] == 1:
+                return LLMResponse(
+                    content="thinking",
+                    tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})],
+                    reasoning_content="hidden reasoning",
+                    thinking_blocks=[{"type": "thinking", "thinking": "step"}],
+                )
+            captured_second_call[:] = messages
+            return LLMResponse(content="done", tool_calls=[])
+        provider.chat_with_retry = scripted_chat_with_retry
+        mgr = SubagentManager(provider=provider, workspace=tmp_path, bus=bus)
+
+        async def fake_execute(self, name, arguments):
+            return "tool result"
+
+        monkeypatch.setattr("nanobot.agent.tools.registry.ToolRegistry.execute", fake_execute)
+
+        await mgr._run_subagent("sub-1", "do task", "label", {"channel": "test", "chat_id": "c1"})
+
+        assistant_messages = [
+            msg for msg in captured_second_call
+            if msg.get("role") == "assistant" and msg.get("tool_calls")
+        ]
+        assert len(assistant_messages) == 1
+        assert assistant_messages[0]["reasoning_content"] == "hidden reasoning"
+        assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}]

From 76c6063141f84d8bde3f3a95896c36e4e673c5c7 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 03:50:54 +0000
Subject: [PATCH 16/28] chore: normalize helpers.py file mode

---
 nanobot/utils/helpers.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 nanobot/utils/helpers.py

diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py
old mode 100755
new mode 100644

From dee4f27dce4a8837eea4b97b882314c50a2b74e3 Mon Sep 17 00:00:00 2001
From: "Jerome Sonnet (letzdoo)" <jerome.sonnet@letzdoo.com>
Date: Wed, 11 Mar 2026 07:43:28 +0400
Subject: [PATCH 17/28] feat: add Ollama as a local LLM provider

Add native Ollama support so local models (e.g. nemotron-3-nano) can be
used without an API key. Adds ProviderSpec with ollama_chat LiteLLM
prefix, ProvidersConfig field, and skips API key validation for local
providers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 nanobot/cli/commands.py       |  2 +-
 nanobot/config/schema.py      |  5 +++--
 nanobot/providers/registry.py | 17 +++++++++++++++++
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index cf69450..8387b28 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -252,7 +252,7 @@ def _make_provider(config: Config):
     from nanobot.providers.litellm_provider import LiteLLMProvider
     from nanobot.providers.registry import find_by_name
     spec = find_by_name(provider_name)
-    if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and spec.is_oauth):
+    if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and (spec.is_oauth or spec.is_local)):
         console.print("[red]Error: No API key configured.[/red]")
         console.print("Set one in ~/.nanobot/config.json under providers section")
         raise typer.Exit(1)
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index a2de239..9b5821b 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -272,6 +272,7 @@ class ProvidersConfig(Base):
     moonshot: ProviderConfig = Field(default_factory=ProviderConfig)
     minimax: ProviderConfig = Field(default_factory=ProviderConfig)
     aihubmix: ProviderConfig = Field(default_factory=ProviderConfig)  # AiHubMix API gateway
+    ollama: ProviderConfig = Field(default_factory=ProviderConfig)  # Ollama local models
     siliconflow: ProviderConfig = Field(default_factory=ProviderConfig)  # SiliconFlow (硅基流动)
     volcengine: ProviderConfig = Field(default_factory=ProviderConfig)  # VolcEngine (火山引擎)
     openai_codex: ProviderConfig = Field(default_factory=ProviderConfig)  # OpenAI Codex (OAuth)
@@ -375,14 +376,14 @@ class Config(BaseSettings):
         for spec in PROVIDERS:
             p = getattr(self.providers, spec.name, None)
             if p and model_prefix and normalized_prefix == spec.name:
-                if spec.is_oauth or p.api_key:
+                if spec.is_oauth or spec.is_local or p.api_key:
                     return p, spec.name
 
         # Match by keyword (order follows PROVIDERS registry)
         for spec in PROVIDERS:
             p = getattr(self.providers, spec.name, None)
             if p and any(_kw_matches(kw) for kw in spec.keywords):
-                if spec.is_oauth or p.api_key:
+                if spec.is_oauth or spec.is_local or p.api_key:
                     return p, spec.name
 
         # Fallback: gateways first, then others (follows registry order)
diff --git a/nanobot/providers/registry.py b/nanobot/providers/registry.py
index 3ba1a0e..c4bcfe2 100644
--- a/nanobot/providers/registry.py
+++ b/nanobot/providers/registry.py
@@ -360,6 +360,23 @@ PROVIDERS: tuple[ProviderSpec, ...] = (
         strip_model_prefix=False,
         model_overrides=(),
     ),
+    # === Ollama (local, OpenAI-compatible) ===================================
+    ProviderSpec(
+        name="ollama",
+        keywords=("ollama", "nemotron"),
+        env_key="OLLAMA_API_KEY",
+        display_name="Ollama",
+        litellm_prefix="ollama_chat",  # model → ollama_chat/model
+        skip_prefixes=("ollama/", "ollama_chat/"),
+        env_extras=(),
+        is_gateway=False,
+        is_local=True,
+        detect_by_key_prefix="",
+        detect_by_base_keyword="11434",
+        default_api_base="http://localhost:11434",
+        strip_model_prefix=False,
+        model_overrides=(),
+    ),
     # === Auxiliary (not a primary LLM provider) ============================
     # Groq: mainly used for Whisper voice transcription, also usable for LLM.
     # Needs "groq/" prefix for LiteLLM routing. Placed last — it rarely wins fallback.

From c7e2622ee1cb313ca3f7a4a31779813cc3ebc27b Mon Sep 17 00:00:00 2001
From: ethanclaw <ethanbot@163.com>
Date: Wed, 11 Mar 2026 12:25:28 +0800
Subject: [PATCH 18/28] fix(subagent): pass reasoning_content and
 thinking_blocks in subagent messages

Fix issue #1834: Spawn/subagent tool fails with Deepseek Reasoner
due to missing reasoning_content field when using thinking mode.

The subagent was not including reasoning_content and thinking_blocks
in assistant messages with tool calls, causing the Deepseek API to
reject subsequent requests.

- Add reasoning_content to assistant message when subagent makes tool calls
- Add thinking_blocks to assistant message for Anthropic extended thinking
- Add tests to verify both fields are properly passed

Fixes #1834
---
 nanobot/agent/subagent.py        |   2 +
 tests/test_subagent_reasoning.py | 144 +++++++++++++++++++++++++++++++
 2 files changed, 146 insertions(+)
 create mode 100644 tests/test_subagent_reasoning.py

diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
index f9eda1f..6163a52 100644
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -149,6 +149,8 @@ class SubagentManager:
                         "role": "assistant",
                         "content": response.content or "",
                         "tool_calls": tool_call_dicts,
+                        "reasoning_content": response.reasoning_content,
+                        "thinking_blocks": response.thinking_blocks,
                     })
 
                     # Execute tools
diff --git a/tests/test_subagent_reasoning.py b/tests/test_subagent_reasoning.py
new file mode 100644
index 0000000..5e70506
--- /dev/null
+++ b/tests/test_subagent_reasoning.py
@@ -0,0 +1,144 @@
+"""Tests for subagent reasoning_content and thinking_blocks handling."""
+
+from __future__ import annotations
+
+import asyncio
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+
+class TestSubagentReasoningContent:
+    """Test that subagent properly handles reasoning_content and thinking_blocks."""
+
+    @pytest.mark.asyncio
+    async def test_subagent_message_includes_reasoning_content(self):
+        """Verify reasoning_content is included in assistant messages with tool calls.
+
+        This is the fix for issue #1834: Spawn/subagent tool fails with
+        Deepseek Reasoner due to missing reasoning_content field.
+        """
+        from nanobot.agent.subagent import SubagentManager
+        from nanobot.bus.queue import MessageBus
+        from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+        bus = MessageBus()
+        provider = MagicMock()
+        provider.get_default_model.return_value = "deepseek-reasoner"
+
+        # Create a real Path object for workspace
+        workspace = Path("/tmp/test_workspace")
+        workspace.mkdir(parents=True, exist_ok=True)
+
+        # Capture messages that are sent to the provider
+        captured_messages = []
+
+        async def mock_chat(*args, **kwargs):
+            captured_messages.append(kwargs.get("messages", []))
+            # Return response with tool calls and reasoning_content
+            tool_call = ToolCallRequest(
+                id="test-1",
+                name="read_file",
+                arguments={"path": "/test.txt"},
+            )
+            return LLMResponse(
+                content="",
+                tool_calls=[tool_call],
+                reasoning_content="I need to read this file first",
+            )
+
+        provider.chat_with_retry = AsyncMock(side_effect=mock_chat)
+
+        mgr = SubagentManager(provider=provider, workspace=workspace, bus=bus)
+
+        # Mock the tools registry
+        with patch("nanobot.agent.subagent.ToolRegistry") as MockToolRegistry:
+            mock_registry = MagicMock()
+            mock_registry.get_definitions.return_value = []
+            mock_registry.execute = AsyncMock(return_value="file content")
+            MockToolRegistry.return_value = mock_registry
+
+            result = await mgr.spawn(
+                task="Read a file",
+                label="test",
+                origin_channel="cli",
+                origin_chat_id="direct",
+                session_key="cli:direct",
+            )
+
+            # Wait for the task to complete
+            await asyncio.sleep(0.5)
+
+        # Check the captured messages
+        assert len(captured_messages) >= 1
+        # Find the assistant message with tool_calls
+        found = False
+        for msg_list in captured_messages:
+            for msg in msg_list:
+                if msg.get("role") == "assistant" and msg.get("tool_calls"):
+                    assert "reasoning_content" in msg, "reasoning_content should be in assistant message with tool_calls"
+                    assert msg["reasoning_content"] == "I need to read this file first"
+                    found = True
+        assert found, "Should have found an assistant message with tool_calls"
+
+    @pytest.mark.asyncio
+    async def test_subagent_message_includes_thinking_blocks(self):
+        """Verify thinking_blocks is included in assistant messages with tool calls."""
+        from nanobot.agent.subagent import SubagentManager
+        from nanobot.bus.queue import MessageBus
+        from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+        bus = MessageBus()
+        provider = MagicMock()
+        provider.get_default_model.return_value = "claude-sonnet"
+
+        workspace = Path("/tmp/test_workspace2")
+        workspace.mkdir(parents=True, exist_ok=True)
+
+        captured_messages = []
+
+        async def mock_chat(*args, **kwargs):
+            captured_messages.append(kwargs.get("messages", []))
+            tool_call = ToolCallRequest(
+                id="test-2",
+                name="read_file",
+                arguments={"path": "/test.txt"},
+            )
+            return LLMResponse(
+                content="",
+                tool_calls=[tool_call],
+                thinking_blocks=[
+                    {"signature": "sig1", "thought": "thinking step 1"},
+                    {"signature": "sig2", "thought": "thinking step 2"},
+                ],
+            )
+
+        provider.chat_with_retry = AsyncMock(side_effect=mock_chat)
+
+        mgr = SubagentManager(provider=provider, workspace=workspace, bus=bus)
+
+        with patch("nanobot.agent.subagent.ToolRegistry") as MockToolRegistry:
+            mock_registry = MagicMock()
+            mock_registry.get_definitions.return_value = []
+            mock_registry.execute = AsyncMock(return_value="file content")
+            MockToolRegistry.return_value = mock_registry
+
+            result = await mgr.spawn(
+                task="Read a file",
+                label="test",
+                origin_channel="cli",
+                origin_chat_id="direct",
+            )
+
+            await asyncio.sleep(0.5)
+
+        # Check the captured messages
+        found = False
+        for msg_list in captured_messages:
+            for msg in msg_list:
+                if msg.get("role") == "assistant" and msg.get("tool_calls"):
+                    assert "thinking_blocks" in msg, "thinking_blocks should be in assistant message with tool_calls"
+                    assert len(msg["thinking_blocks"]) == 2
+                    found = True
+        assert found, "Should have found an assistant message with tool_calls"

From 12104c8d46c0b688e0db21617b23d54f012970ba Mon Sep 17 00:00:00 2001
From: ethanclaw <ethanbot@163.com>
Date: Wed, 11 Mar 2026 14:22:33 +0800
Subject: [PATCH 19/28] fix(memory): pass temperature, max_tokens and
 reasoning_effort to memory consolidation

Fix issue #1823: Memory consolidation does not inherit agent temperature
and maxTokens configuration.

The agent's configured generation parameters were not being passed through
to the memory consolidation call, causing it to fall back to default values.
This resulted in the consolidation response being truncated before the
save_memory tool call was emitted.

- Pass temperature, max_tokens, reasoning_effort from AgentLoop to
  MemoryConsolidator and then to MemoryStore.consolidate()
- Forward these parameters to the provider.chat_with_retry() call

Fixes #1823
---
 nanobot/agent/loop.py   |  3 +++
 nanobot/agent/memory.py | 21 ++++++++++++++++++++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 8605a09..edf1e8e 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -114,6 +114,9 @@ class AgentLoop:
             context_window_tokens=context_window_tokens,
             build_messages=self.context.build_messages,
             get_tool_definitions=self.tools.get_definitions,
+            temperature=self.temperature,
+            max_tokens=self.max_tokens,
+            reasoning_effort=self.reasoning_effort,
         )
         self._register_default_tools()
 
diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index cd5f54f..d79887b 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -99,6 +99,9 @@ class MemoryStore:
         messages: list[dict],
         provider: LLMProvider,
         model: str,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        reasoning_effort: str | None = None,
     ) -> bool:
         """Consolidate the provided message chunk into MEMORY.md + HISTORY.md."""
         if not messages:
@@ -121,6 +124,9 @@ class MemoryStore:
                 ],
                 tools=_SAVE_MEMORY_TOOL,
                 model=model,
+                temperature=temperature,
+                max_tokens=max_tokens,
+                reasoning_effort=reasoning_effort,
             )
 
             if not response.has_tool_calls:
@@ -160,6 +166,9 @@ class MemoryConsolidator:
         context_window_tokens: int,
         build_messages: Callable[..., list[dict[str, Any]]],
         get_tool_definitions: Callable[[], list[dict[str, Any]]],
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        reasoning_effort: str | None = None,
     ):
         self.store = MemoryStore(workspace)
         self.provider = provider
@@ -168,6 +177,9 @@ class MemoryConsolidator:
         self.context_window_tokens = context_window_tokens
         self._build_messages = build_messages
         self._get_tool_definitions = get_tool_definitions
+        self._temperature = temperature
+        self._max_tokens = max_tokens
+        self._reasoning_effort = reasoning_effort
         self._locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary()
 
     def get_lock(self, session_key: str) -> asyncio.Lock:
@@ -176,7 +188,14 @@ class MemoryConsolidator:
 
     async def consolidate_messages(self, messages: list[dict[str, object]]) -> bool:
         """Archive a selected message chunk into persistent memory."""
-        return await self.store.consolidate(messages, self.provider, self.model)
+        return await self.store.consolidate(
+            messages,
+            self.provider,
+            self.model,
+            temperature=self._temperature,
+            max_tokens=self._max_tokens,
+            reasoning_effort=self._reasoning_effort,
+        )
 
     def pick_consolidation_boundary(
         self,

From d0b4f0d70d025ba3ffa0a9127b280d8325bb698f Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 07:57:12 +0000
Subject: [PATCH 20/28] feat(wecom): add WeCom channel with SDK pinned to
 GitHub tag v0.1.2

---
 README.md                   | 25 ++++++++++++++-----------
 nanobot/channels/manager.py |  1 -
 nanobot/channels/wecom.py   |  8 ++++----
 nanobot/config/schema.py    |  2 +-
 pyproject.toml              |  4 +++-
 5 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 5be0ce5..6e8211e 100644
--- a/README.md
+++ b/README.md
@@ -208,7 +208,7 @@ Connect nanobot to your favorite chat platform.
 | **Slack** | Bot token + App-Level token |
 | **Email** | IMAP/SMTP credentials |
 | **QQ** | App ID + App Secret |
-| **Wecom** | Bot ID + App Secret |
+| **Wecom** | Bot ID + Bot Secret |
 
 <details>
 <summary><b>Telegram</b> (Recommended)</summary>
@@ -683,12 +683,17 @@ nanobot gateway
 
 Uses **WebSocket** long connection — no public IP required.
 
-**1. Create a wecom bot**
+**1. Install the optional dependency**
 
-In the client's workspace, click on "Intelligent Robot" to create a robot and choose API mode for creation.
-Select to create in "long connection" mode, and obtain Bot ID and Secret.
+```bash
+pip install nanobot-ai[wecom]
+```
 
-**2. Configure**
+**2. Create a WeCom AI Bot**
+
+Go to the WeCom admin console → Intelligent Robot → Create Robot → select **API mode** with **long connection**. Copy the Bot ID and Secret.
+
+**3. Configure**
 
 ```json
 {
@@ -696,23 +701,21 @@ Select to create in "long connection" mode, and obtain Bot ID and Secret.
     "wecom": {
       "enabled": true,
       "botId": "your_bot_id",
-      "secret": "your_secret",
-      "allowFrom": [
-        "your_id"
-      ]
+      "secret": "your_bot_secret",
+      "allowFrom": ["your_id"]
     }
   }
 }
 ```
 
-**3. Run**
+**4. Run**
 
 ```bash
 nanobot gateway
 ```
 
 > [!TIP]
-> wecom uses WebSocket to receive messages — no webhook or public IP needed!
+> WeCom uses WebSocket to receive messages — no webhook or public IP needed!
 
 </details>
 
diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py
index 369795a..2c5cd3f 100644
--- a/nanobot/channels/manager.py
+++ b/nanobot/channels/manager.py
@@ -156,7 +156,6 @@ class ChannelManager:
                 self.channels["wecom"] = WecomChannel(
                     self.config.channels.wecom,
                     self.bus,
-                    groq_api_key=self.config.providers.groq.api_key,
                 )
                 logger.info("WeCom channel enabled")
             except ImportError as e:
diff --git a/nanobot/channels/wecom.py b/nanobot/channels/wecom.py
index dc97311..1c44451 100644
--- a/nanobot/channels/wecom.py
+++ b/nanobot/channels/wecom.py
@@ -2,6 +2,7 @@
 
 import asyncio
 import importlib.util
+import os
 from collections import OrderedDict
 from typing import Any
 
@@ -36,10 +37,9 @@ class WecomChannel(BaseChannel):
 
     name = "wecom"
 
-    def __init__(self, config: WecomConfig, bus: MessageBus, groq_api_key: str = ""):
+    def __init__(self, config: WecomConfig, bus: MessageBus):
         super().__init__(config, bus)
         self.config: WecomConfig = config
-        self.groq_api_key = groq_api_key
         self._client: Any = None
         self._processed_message_ids: OrderedDict[str, None] = OrderedDict()
         self._loop: asyncio.AbstractEventLoop | None = None
@@ -50,7 +50,7 @@ class WecomChannel(BaseChannel):
     async def start(self) -> None:
         """Start the WeCom bot with WebSocket long connection."""
         if not WECOM_AVAILABLE:
-            logger.error("WeCom SDK not installed. Run: pip install wecom-aibot-sdk-python")
+            logger.error("WeCom SDK not installed. Run: pip install nanobot-ai[wecom]")
             return
 
         if not self.config.bot_id or not self.config.secret:
@@ -213,7 +213,6 @@ class WecomChannel(BaseChannel):
                 if file_url and aes_key:
                     file_path = await self._download_and_save_media(file_url, aes_key, "image")
                     if file_path:
-                        import os
                         filename = os.path.basename(file_path)
                         content_parts.append(f"[image: {filename}]\n[Image: source: {file_path}]")
                     else:
@@ -308,6 +307,7 @@ class WecomChannel(BaseChannel):
             media_dir = get_media_dir("wecom")
             if not filename:
                 filename = fname or f"{media_type}_{hash(file_url) % 100000}"
+            filename = os.path.basename(filename)
 
             file_path = media_dir / filename
             file_path.write_bytes(data)
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index b772d18..bb0d286 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -208,7 +208,7 @@ class WecomConfig(Base):
     secret: str = ""  # Bot Secret from WeCom AI Bot platform
     allow_from: list[str] = Field(default_factory=list)  # Allowed user IDs
     welcome_message: str = ""  # Welcome message for enter_chat event
-    react_emoji: str = "eyes"  # Emoji for message reactions
+
 
 class ChannelsConfig(Base):
     """Configuration for chat channels."""
diff --git a/pyproject.toml b/pyproject.toml
index 0582be6..9868513 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,11 +44,13 @@ dependencies = [
     "json-repair>=0.57.0,<1.0.0",
     "chardet>=3.0.2,<6.0.0",
     "openai>=2.8.0",
-    "wecom-aibot-sdk-python>=0.1.2",
     "tiktoken>=0.12.0,<1.0.0",
 ]
 
 [project.optional-dependencies]
+wecom = [
+    "wecom-aibot-sdk-python @ git+https://github.com/chengyongru/wecom_aibot_sdk.git@v0.1.2",
+]
 matrix = [
     "matrix-nio[e2e]>=0.25.2",
     "mistune>=3.0.0,<4.0.0",

From 7ceddcded643432f0f4b78aa22de7ad107b61f3a Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 08:04:14 +0000
Subject: [PATCH 21/28] fix(wecom): await async disconnect, add SDK attribution
 in README

---
 README.md                 | 7 +++----
 nanobot/channels/wecom.py | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 6e8211e..2a49214 100644
--- a/README.md
+++ b/README.md
@@ -681,7 +681,9 @@ nanobot gateway
 <details>
 <summary><b>Wecom (企业微信)</b></summary>
 
-Uses **WebSocket** long connection — no public IP required.
+> Here we use [wecom-aibot-sdk-python](https://github.com/chengyongru/wecom_aibot_sdk) (community Python version of the official [@wecom/aibot-node-sdk](https://www.npmjs.com/package/@wecom/aibot-node-sdk)).
+>
+> Uses **WebSocket** long connection — no public IP required.
 
 **1. Install the optional dependency**
 
@@ -714,9 +716,6 @@ Go to the WeCom admin console → Intelligent Robot → Create Robot → select
 nanobot gateway
 ```
 
-> [!TIP]
-> WeCom uses WebSocket to receive messages — no webhook or public IP needed!
-
 </details>
 
 ## 🌐 Agent Social Network
diff --git a/nanobot/channels/wecom.py b/nanobot/channels/wecom.py
index 1c44451..72be9e2 100644
--- a/nanobot/channels/wecom.py
+++ b/nanobot/channels/wecom.py
@@ -98,7 +98,7 @@ class WecomChannel(BaseChannel):
         """Stop the WeCom bot."""
         self._running = False
         if self._client:
-            self._client.disconnect()
+            await self._client.disconnect()
         logger.info("WeCom bot stopped")
 
     async def _on_connected(self, frame: Any) -> None:

From 486df1ddbd8db4fb248115851254b8fbb03c09f0 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 08:10:38 +0000
Subject: [PATCH 22/28] docs: update table of contents in README

---
 README.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/README.md b/README.md
index 2a49214..ed4e8e7 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,25 @@
 
 📏 Real-time line count: run `bash core_agent_lines.sh` to verify anytime.
 
+## Table of Contents
+
+- [News](#-news)
+- [Key Features](#key-features-of-nanobot)
+- [Architecture](#️-architecture)
+- [Features](#-features)
+- [Install](#-install)
+- [Quick Start](#-quick-start)
+- [Chat Apps](#-chat-apps)
+- [Agent Social Network](#-agent-social-network)
+- [Configuration](#️-configuration)
+- [Multiple Instances](#-multiple-instances)
+- [CLI Reference](#-cli-reference)
+- [Docker](#-docker)
+- [Linux Service](#-linux-service)
+- [Project Structure](#-project-structure)
+- [Contribute & Roadmap](#-contribute--roadmap)
+- [Star History](#-star-history)
+
 ## 📢 News
 
 - **2026-03-08** 🚀 Released **v0.1.4.post4** — a reliability-packed release with safer defaults, better multi-instance support, sturdier MCP, and major channel and provider improvements. Please see [release notes](https://github.com/HKUDS/nanobot/releases/tag/v0.1.4.post4) for details.

From ec87946c04ccf4d453ffea02febcb747139c415c Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 08:11:28 +0000
Subject: [PATCH 23/28] docs: update table of contents position

---
 README.md | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index ed4e8e7..f0e1a6b 100644
--- a/README.md
+++ b/README.md
@@ -18,25 +18,6 @@
 
 📏 Real-time line count: run `bash core_agent_lines.sh` to verify anytime.
 
-## Table of Contents
-
-- [News](#-news)
-- [Key Features](#key-features-of-nanobot)
-- [Architecture](#️-architecture)
-- [Features](#-features)
-- [Install](#-install)
-- [Quick Start](#-quick-start)
-- [Chat Apps](#-chat-apps)
-- [Agent Social Network](#-agent-social-network)
-- [Configuration](#️-configuration)
-- [Multiple Instances](#-multiple-instances)
-- [CLI Reference](#-cli-reference)
-- [Docker](#-docker)
-- [Linux Service](#-linux-service)
-- [Project Structure](#-project-structure)
-- [Contribute & Roadmap](#-contribute--roadmap)
-- [Star History](#-star-history)
-
 ## 📢 News
 
 - **2026-03-08** 🚀 Released **v0.1.4.post4** — a reliability-packed release with safer defaults, better multi-instance support, sturdier MCP, and major channel and provider improvements. Please see [release notes](https://github.com/HKUDS/nanobot/releases/tag/v0.1.4.post4) for details.
@@ -97,6 +78,25 @@
   <img src="nanobot_arch.png" alt="nanobot architecture" width="800">
 </p>
 
+## Table of Contents
+
+- [News](#-news)
+- [Key Features](#key-features-of-nanobot)
+- [Architecture](#️-architecture)
+- [Features](#-features)
+- [Install](#-install)
+- [Quick Start](#-quick-start)
+- [Chat Apps](#-chat-apps)
+- [Agent Social Network](#-agent-social-network)
+- [Configuration](#️-configuration)
+- [Multiple Instances](#-multiple-instances)
+- [CLI Reference](#-cli-reference)
+- [Docker](#-docker)
+- [Linux Service](#-linux-service)
+- [Project Structure](#-project-structure)
+- [Contribute & Roadmap](#-contribute--roadmap)
+- [Star History](#-star-history)
+
 ## ✨ Features
 
 <table align="center">

From 4478838424496b6c233c5402d7fa205f33c683e6 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 08:42:12 +0000
Subject: [PATCH 24/28] fix(pr-1863): complete Ollama provider routing and
 README docs

---
 README.md                | 32 ++++++++++++++++++++++++++++++++
 nano.2091796.save        |  2 ++
 nano.2095802.save        |  2 ++
 nanobot/config/schema.py | 13 +++++++++++--
 tests/test_commands.py   | 29 +++++++++++++++++++++++++++++
 5 files changed, 76 insertions(+), 2 deletions(-)
 create mode 100644 nano.2091796.save
 create mode 100644 nano.2095802.save

diff --git a/README.md b/README.md
index f0e1a6b..8dba2d7 100644
--- a/README.md
+++ b/README.md
@@ -778,6 +778,7 @@ Config file: `~/.nanobot/config.json`
 | `dashscope` | LLM (Qwen) | [dashscope.console.aliyun.com](https://dashscope.console.aliyun.com) |
 | `moonshot` | LLM (Moonshot/Kimi) | [platform.moonshot.cn](https://platform.moonshot.cn) |
 | `zhipu` | LLM (Zhipu GLM) | [open.bigmodel.cn](https://open.bigmodel.cn) |
+| `ollama` | LLM (local, Ollama) | — |
 | `vllm` | LLM (local, any OpenAI-compatible server) | — |
 | `openai_codex` | LLM (Codex, OAuth) | `nanobot provider login openai-codex` |
 | `github_copilot` | LLM (GitHub Copilot, OAuth) | `nanobot provider login github-copilot` |
@@ -843,6 +844,37 @@ Connects directly to any OpenAI-compatible endpoint — LM Studio, llama.cpp, To
 
 </details>
 
+<details>
+<summary><b>Ollama (local)</b></summary>
+
+Run a local model with Ollama, then add to config:
+
+**1. Start Ollama** (example):
+```bash
+ollama run llama3.2
+```
+
+**2. Add to config** (partial — merge into `~/.nanobot/config.json`):
+```json
+{
+  "providers": {
+    "ollama": {
+      "apiBase": "http://localhost:11434"
+    }
+  },
+  "agents": {
+    "defaults": {
+      "provider": "ollama",
+      "model": "llama3.2"
+    }
+  }
+}
+```
+
+> `provider: "auto"` also works when `providers.ollama.apiBase` is configured, but setting `"provider": "ollama"` is the clearest option.
+
+</details>
+
 <details>
 <summary><b>vLLM (local / OpenAI-compatible)</b></summary>
 
diff --git a/nano.2091796.save b/nano.2091796.save
new file mode 100644
index 0000000..6953168
--- /dev/null
+++ b/nano.2091796.save
@@ -0,0 +1,2 @@
+da activate base
+
diff --git a/nano.2095802.save b/nano.2095802.save
new file mode 100644
index 0000000..6953168
--- /dev/null
+++ b/nano.2095802.save
@@ -0,0 +1,2 @@
+da activate base
+
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index d2ef713..1b26dd7 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -395,6 +395,15 @@ class Config(BaseSettings):
                 if spec.is_oauth or spec.is_local or p.api_key:
                     return p, spec.name
 
+        # Fallback: configured local providers can route models without
+        # provider-specific keywords (for example plain "llama3.2" on Ollama).
+        for spec in PROVIDERS:
+            if not spec.is_local:
+                continue
+            p = getattr(self.providers, spec.name, None)
+            if p and p.api_base:
+                return p, spec.name
+
         # Fallback: gateways first, then others (follows registry order)
         # OAuth providers are NOT valid fallbacks — they require explicit model selection
         for spec in PROVIDERS:
@@ -421,7 +430,7 @@ class Config(BaseSettings):
         return p.api_key if p else None
 
     def get_api_base(self, model: str | None = None) -> str | None:
-        """Get API base URL for the given model. Applies default URLs for known gateways."""
+        """Get API base URL for the given model. Applies default URLs for gateway/local providers."""
         from nanobot.providers.registry import find_by_name
 
         p, name = self._match_provider(model)
@@ -432,7 +441,7 @@ class Config(BaseSettings):
         # to avoid polluting the global litellm.api_base.
         if name:
             spec = find_by_name(name)
-            if spec and spec.is_gateway and spec.default_api_base:
+            if spec and (spec.is_gateway or spec.is_local) and spec.default_api_base:
                 return spec.default_api_base
         return None
 
diff --git a/tests/test_commands.py b/tests/test_commands.py
index 1375a3a..583ef6f 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -114,6 +114,35 @@ def test_config_matches_openai_codex_with_hyphen_prefix():
     assert config.get_provider_name() == "openai_codex"
 
 
+def test_config_matches_explicit_ollama_prefix_without_api_key():
+    config = Config()
+    config.agents.defaults.model = "ollama/llama3.2"
+
+    assert config.get_provider_name() == "ollama"
+    assert config.get_api_base() == "http://localhost:11434"
+
+
+def test_config_explicit_ollama_provider_uses_default_localhost_api_base():
+    config = Config()
+    config.agents.defaults.provider = "ollama"
+    config.agents.defaults.model = "llama3.2"
+
+    assert config.get_provider_name() == "ollama"
+    assert config.get_api_base() == "http://localhost:11434"
+
+
+def test_config_auto_detects_ollama_from_local_api_base():
+    config = Config.model_validate(
+        {
+            "agents": {"defaults": {"provider": "auto", "model": "llama3.2"}},
+            "providers": {"ollama": {"apiBase": "http://localhost:11434"}},
+        }
+    )
+
+    assert config.get_provider_name() == "ollama"
+    assert config.get_api_base() == "http://localhost:11434"
+
+
 def test_find_by_model_prefers_explicit_prefix_over_generic_codex_keyword():
     spec = find_by_model("github-copilot/gpt-5.3-codex")
 

From 89eff6f573d52af025ae9cb7e9db6ea8a0ad698f Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 08:44:38 +0000
Subject: [PATCH 25/28] chore: remove stray nano backup files

---
 .gitignore        | 1 +
 nano.2091796.save | 2 --
 nano.2095802.save | 2 --
 3 files changed, 1 insertion(+), 4 deletions(-)
 delete mode 100644 nano.2091796.save
 delete mode 100644 nano.2095802.save

diff --git a/.gitignore b/.gitignore
index 374875a..c50cab8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,4 +20,5 @@ __pycache__/
 poetry.lock
 .pytest_cache/
 botpy.log
+nano.*.save
 
diff --git a/nano.2091796.save b/nano.2091796.save
deleted file mode 100644
index 6953168..0000000
--- a/nano.2091796.save
+++ /dev/null
@@ -1,2 +0,0 @@
-da activate base
-
diff --git a/nano.2095802.save b/nano.2095802.save
deleted file mode 100644
index 6953168..0000000
--- a/nano.2095802.save
+++ /dev/null
@@ -1,2 +0,0 @@
-da activate base
-

From c72c2ce7e2b84fda1fd5933fc28d90137f936d03 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 09:47:04 +0000
Subject: [PATCH 26/28] refactor: move generation settings to provider level,
 eliminate parameter passthrough

---
 nanobot/agent/loop.py                    |  15 ---
 nanobot/agent/memory.py                  |  22 +---
 nanobot/agent/subagent.py                |   9 --
 nanobot/cli/commands.py                  |  57 +++++----
 nanobot/providers/base.py                |  38 +++++-
 tests/test_memory_consolidation_types.py |  23 ++++
 tests/test_provider_retry.py             |  35 +++++-
 tests/test_subagent_reasoning.py         | 144 -----------------------
 8 files changed, 120 insertions(+), 223 deletions(-)
 delete mode 100644 tests/test_subagent_reasoning.py

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index edf1e8e..b1bfd2f 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -52,9 +52,6 @@ class AgentLoop:
         workspace: Path,
         model: str | None = None,
         max_iterations: int = 40,
-        temperature: float = 0.1,
-        max_tokens: int = 4096,
-        reasoning_effort: str | None = None,
         context_window_tokens: int = 65_536,
         brave_api_key: str | None = None,
         web_proxy: str | None = None,
@@ -72,9 +69,6 @@ class AgentLoop:
         self.workspace = workspace
         self.model = model or provider.get_default_model()
         self.max_iterations = max_iterations
-        self.temperature = temperature
-        self.max_tokens = max_tokens
-        self.reasoning_effort = reasoning_effort
         self.context_window_tokens = context_window_tokens
         self.brave_api_key = brave_api_key
         self.web_proxy = web_proxy
@@ -90,9 +84,6 @@ class AgentLoop:
             workspace=workspace,
             bus=bus,
             model=self.model,
-            temperature=self.temperature,
-            max_tokens=self.max_tokens,
-            reasoning_effort=reasoning_effort,
             brave_api_key=brave_api_key,
             web_proxy=web_proxy,
             exec_config=self.exec_config,
@@ -114,9 +105,6 @@ class AgentLoop:
             context_window_tokens=context_window_tokens,
             build_messages=self.context.build_messages,
             get_tool_definitions=self.tools.get_definitions,
-            temperature=self.temperature,
-            max_tokens=self.max_tokens,
-            reasoning_effort=self.reasoning_effort,
         )
         self._register_default_tools()
 
@@ -205,9 +193,6 @@ class AgentLoop:
                 messages=messages,
                 tools=tool_defs,
                 model=self.model,
-                temperature=self.temperature,
-                max_tokens=self.max_tokens,
-                reasoning_effort=self.reasoning_effort,
             )
 
             if response.has_tool_calls:
diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index d79887b..59ba40e 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -57,7 +57,6 @@ def _normalize_save_memory_args(args: Any) -> dict[str, Any] | None:
         return args[0] if args and isinstance(args[0], dict) else None
     return args if isinstance(args, dict) else None
 
-
 class MemoryStore:
     """Two-layer memory: MEMORY.md (long-term facts) + HISTORY.md (grep-searchable log)."""
 
@@ -99,9 +98,6 @@ class MemoryStore:
         messages: list[dict],
         provider: LLMProvider,
         model: str,
-        temperature: float | None = None,
-        max_tokens: int | None = None,
-        reasoning_effort: str | None = None,
     ) -> bool:
         """Consolidate the provided message chunk into MEMORY.md + HISTORY.md."""
         if not messages:
@@ -124,9 +120,6 @@ class MemoryStore:
                 ],
                 tools=_SAVE_MEMORY_TOOL,
                 model=model,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                reasoning_effort=reasoning_effort,
             )
 
             if not response.has_tool_calls:
@@ -166,9 +159,6 @@ class MemoryConsolidator:
         context_window_tokens: int,
         build_messages: Callable[..., list[dict[str, Any]]],
         get_tool_definitions: Callable[[], list[dict[str, Any]]],
-        temperature: float | None = None,
-        max_tokens: int | None = None,
-        reasoning_effort: str | None = None,
     ):
         self.store = MemoryStore(workspace)
         self.provider = provider
@@ -177,9 +167,6 @@ class MemoryConsolidator:
         self.context_window_tokens = context_window_tokens
         self._build_messages = build_messages
         self._get_tool_definitions = get_tool_definitions
-        self._temperature = temperature
-        self._max_tokens = max_tokens
-        self._reasoning_effort = reasoning_effort
         self._locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary()
 
     def get_lock(self, session_key: str) -> asyncio.Lock:
@@ -188,14 +175,7 @@ class MemoryConsolidator:
 
     async def consolidate_messages(self, messages: list[dict[str, object]]) -> bool:
         """Archive a selected message chunk into persistent memory."""
-        return await self.store.consolidate(
-            messages,
-            self.provider,
-            self.model,
-            temperature=self._temperature,
-            max_tokens=self._max_tokens,
-            reasoning_effort=self._reasoning_effort,
-        )
+        return await self.store.consolidate(messages, self.provider, self.model)
 
     def pick_consolidation_boundary(
         self,
diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
index eff0b4f..21b8b32 100644
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -28,9 +28,6 @@ class SubagentManager:
         workspace: Path,
         bus: MessageBus,
         model: str | None = None,
-        temperature: float = 0.7,
-        max_tokens: int = 4096,
-        reasoning_effort: str | None = None,
         brave_api_key: str | None = None,
         web_proxy: str | None = None,
         exec_config: "ExecToolConfig | None" = None,
@@ -41,9 +38,6 @@ class SubagentManager:
         self.workspace = workspace
         self.bus = bus
         self.model = model or provider.get_default_model()
-        self.temperature = temperature
-        self.max_tokens = max_tokens
-        self.reasoning_effort = reasoning_effort
         self.brave_api_key = brave_api_key
         self.web_proxy = web_proxy
         self.exec_config = exec_config or ExecToolConfig()
@@ -128,9 +122,6 @@ class SubagentManager:
                     messages=messages,
                     tools=tools.get_definitions(),
                     model=self.model,
-                    temperature=self.temperature,
-                    max_tokens=self.max_tokens,
-                    reasoning_effort=self.reasoning_effort,
                 )
 
                 if response.has_tool_calls:
diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index 8387b28..f5ac859 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -215,6 +215,7 @@ def onboard():
 
 def _make_provider(config: Config):
     """Create the appropriate LLM provider from config."""
+    from nanobot.providers.base import GenerationSettings
     from nanobot.providers.openai_codex_provider import OpenAICodexProvider
     from nanobot.providers.azure_openai_provider import AzureOpenAIProvider
 
@@ -224,46 +225,50 @@ def _make_provider(config: Config):
 
     # OpenAI Codex (OAuth)
     if provider_name == "openai_codex" or model.startswith("openai-codex/"):
-        return OpenAICodexProvider(default_model=model)
-
+        provider = OpenAICodexProvider(default_model=model)
     # Custom: direct OpenAI-compatible endpoint, bypasses LiteLLM
-    from nanobot.providers.custom_provider import CustomProvider
-    if provider_name == "custom":
-        return CustomProvider(
+    elif provider_name == "custom":
+        from nanobot.providers.custom_provider import CustomProvider
+        provider = CustomProvider(
             api_key=p.api_key if p else "no-key",
             api_base=config.get_api_base(model) or "http://localhost:8000/v1",
             default_model=model,
         )
-
     # Azure OpenAI: direct Azure OpenAI endpoint with deployment name
-    if provider_name == "azure_openai":
+    elif provider_name == "azure_openai":
         if not p or not p.api_key or not p.api_base:
             console.print("[red]Error: Azure OpenAI requires api_key and api_base.[/red]")
             console.print("Set them in ~/.nanobot/config.json under providers.azure_openai section")
             console.print("Use the model field to specify the deployment name.")
             raise typer.Exit(1)
-        
-        return AzureOpenAIProvider(
+        provider = AzureOpenAIProvider(
             api_key=p.api_key,
             api_base=p.api_base,
             default_model=model,
         )
+    else:
+        from nanobot.providers.litellm_provider import LiteLLMProvider
+        from nanobot.providers.registry import find_by_name
+        spec = find_by_name(provider_name)
+        if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and (spec.is_oauth or spec.is_local)):
+            console.print("[red]Error: No API key configured.[/red]")
+            console.print("Set one in ~/.nanobot/config.json under providers section")
+            raise typer.Exit(1)
+        provider = LiteLLMProvider(
+            api_key=p.api_key if p else None,
+            api_base=config.get_api_base(model),
+            default_model=model,
+            extra_headers=p.extra_headers if p else None,
+            provider_name=provider_name,
+        )
 
-    from nanobot.providers.litellm_provider import LiteLLMProvider
-    from nanobot.providers.registry import find_by_name
-    spec = find_by_name(provider_name)
-    if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and (spec.is_oauth or spec.is_local)):
-        console.print("[red]Error: No API key configured.[/red]")
-        console.print("Set one in ~/.nanobot/config.json under providers section")
-        raise typer.Exit(1)
-
-    return LiteLLMProvider(
-        api_key=p.api_key if p else None,
-        api_base=config.get_api_base(model),
-        default_model=model,
-        extra_headers=p.extra_headers if p else None,
-        provider_name=provider_name,
+    defaults = config.agents.defaults
+    provider.generation = GenerationSettings(
+        temperature=defaults.temperature,
+        max_tokens=defaults.max_tokens,
+        reasoning_effort=defaults.reasoning_effort,
     )
+    return provider
 
 
 def _load_runtime_config(config: str | None = None, workspace: str | None = None) -> Config:
@@ -341,10 +346,7 @@ def gateway(
         provider=provider,
         workspace=config.workspace_path,
         model=config.agents.defaults.model,
-        temperature=config.agents.defaults.temperature,
-        max_tokens=config.agents.defaults.max_tokens,
         max_iterations=config.agents.defaults.max_tool_iterations,
-        reasoning_effort=config.agents.defaults.reasoning_effort,
         context_window_tokens=config.agents.defaults.context_window_tokens,
         brave_api_key=config.tools.web.search.api_key or None,
         web_proxy=config.tools.web.proxy or None,
@@ -527,10 +529,7 @@ def agent(
         provider=provider,
         workspace=config.workspace_path,
         model=config.agents.defaults.model,
-        temperature=config.agents.defaults.temperature,
-        max_tokens=config.agents.defaults.max_tokens,
         max_iterations=config.agents.defaults.max_tool_iterations,
-        reasoning_effort=config.agents.defaults.reasoning_effort,
         context_window_tokens=config.agents.defaults.context_window_tokens,
         brave_api_key=config.tools.web.search.api_key or None,
         web_proxy=config.tools.web.proxy or None,
diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py
index a3b6c47..d4ea60d 100644
--- a/nanobot/providers/base.py
+++ b/nanobot/providers/base.py
@@ -32,6 +32,21 @@ class LLMResponse:
         return len(self.tool_calls) > 0
 
 
+@dataclass(frozen=True)
+class GenerationSettings:
+    """Default generation parameters for LLM calls.
+
+    Stored on the provider so every call site inherits the same defaults
+    without having to pass temperature / max_tokens / reasoning_effort
+    through every layer.  Individual call sites can still override by
+    passing explicit keyword arguments to chat() / chat_with_retry().
+    """
+
+    temperature: float = 0.7
+    max_tokens: int = 4096
+    reasoning_effort: str | None = None
+
+
 class LLMProvider(ABC):
     """
     Abstract base class for LLM providers.
@@ -56,9 +71,12 @@ class LLMProvider(ABC):
         "temporarily unavailable",
     )
 
+    _SENTINEL = object()
+
     def __init__(self, api_key: str | None = None, api_base: str | None = None):
         self.api_key = api_key
         self.api_base = api_base
+        self.generation: GenerationSettings = GenerationSettings()
 
     @staticmethod
     def _sanitize_empty_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
@@ -155,11 +173,23 @@ class LLMProvider(ABC):
         messages: list[dict[str, Any]],
         tools: list[dict[str, Any]] | None = None,
         model: str | None = None,
-        max_tokens: int = 4096,
-        temperature: float = 0.7,
-        reasoning_effort: str | None = None,
+        max_tokens: object = _SENTINEL,
+        temperature: object = _SENTINEL,
+        reasoning_effort: object = _SENTINEL,
     ) -> LLMResponse:
-        """Call chat() with retry on transient provider failures."""
+        """Call chat() with retry on transient provider failures.
+
+        Parameters default to ``self.generation`` when not explicitly passed,
+        so callers no longer need to thread temperature / max_tokens /
+        reasoning_effort through every layer.
+        """
+        if max_tokens is self._SENTINEL:
+            max_tokens = self.generation.max_tokens
+        if temperature is self._SENTINEL:
+            temperature = self.generation.temperature
+        if reasoning_effort is self._SENTINEL:
+            reasoning_effort = self.generation.reasoning_effort
+
         for attempt, delay in enumerate(self._CHAT_RETRY_DELAYS, start=1):
             try:
                 response = await self.chat(
diff --git a/tests/test_memory_consolidation_types.py b/tests/test_memory_consolidation_types.py
index 0263f01..69be858 100644
--- a/tests/test_memory_consolidation_types.py
+++ b/tests/test_memory_consolidation_types.py
@@ -265,3 +265,26 @@ class TestMemoryConsolidationTypeHandling:
         assert result is True
         assert provider.calls == 2
         assert delays == [1]
+
+    @pytest.mark.asyncio
+    async def test_consolidation_delegates_to_provider_defaults(self, tmp_path: Path) -> None:
+        """Consolidation no longer passes generation params — the provider owns them."""
+        store = MemoryStore(tmp_path)
+        provider = AsyncMock()
+        provider.chat_with_retry = AsyncMock(
+            return_value=_make_tool_response(
+                history_entry="[2026-01-01] User discussed testing.",
+                memory_update="# Memory\nUser likes testing.",
+            )
+        )
+        messages = _make_messages(message_count=60)
+
+        result = await store.consolidate(messages, provider, "test-model")
+
+        assert result is True
+        provider.chat_with_retry.assert_awaited_once()
+        _, kwargs = provider.chat_with_retry.await_args
+        assert kwargs["model"] == "test-model"
+        assert "temperature" not in kwargs
+        assert "max_tokens" not in kwargs
+        assert "reasoning_effort" not in kwargs
diff --git a/tests/test_provider_retry.py b/tests/test_provider_retry.py
index 751ecc3..2420399 100644
--- a/tests/test_provider_retry.py
+++ b/tests/test_provider_retry.py
@@ -2,7 +2,7 @@ import asyncio
 
 import pytest
 
-from nanobot.providers.base import LLMProvider, LLMResponse
+from nanobot.providers.base import GenerationSettings, LLMProvider, LLMResponse
 
 
 class ScriptedProvider(LLMProvider):
@@ -10,9 +10,11 @@ class ScriptedProvider(LLMProvider):
         super().__init__()
         self._responses = list(responses)
         self.calls = 0
+        self.last_kwargs: dict = {}
 
     async def chat(self, *args, **kwargs) -> LLMResponse:
         self.calls += 1
+        self.last_kwargs = kwargs
         response = self._responses.pop(0)
         if isinstance(response, BaseException):
             raise response
@@ -90,3 +92,34 @@ async def test_chat_with_retry_preserves_cancelled_error() -> None:
 
     with pytest.raises(asyncio.CancelledError):
         await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
+
+
+@pytest.mark.asyncio
+async def test_chat_with_retry_uses_provider_generation_defaults() -> None:
+    """When callers omit generation params, provider.generation defaults are used."""
+    provider = ScriptedProvider([LLMResponse(content="ok")])
+    provider.generation = GenerationSettings(temperature=0.2, max_tokens=321, reasoning_effort="high")
+
+    await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
+
+    assert provider.last_kwargs["temperature"] == 0.2
+    assert provider.last_kwargs["max_tokens"] == 321
+    assert provider.last_kwargs["reasoning_effort"] == "high"
+
+
+@pytest.mark.asyncio
+async def test_chat_with_retry_explicit_override_beats_defaults() -> None:
+    """Explicit kwargs should override provider.generation defaults."""
+    provider = ScriptedProvider([LLMResponse(content="ok")])
+    provider.generation = GenerationSettings(temperature=0.2, max_tokens=321, reasoning_effort="high")
+
+    await provider.chat_with_retry(
+        messages=[{"role": "user", "content": "hello"}],
+        temperature=0.9,
+        max_tokens=9999,
+        reasoning_effort="low",
+    )
+
+    assert provider.last_kwargs["temperature"] == 0.9
+    assert provider.last_kwargs["max_tokens"] == 9999
+    assert provider.last_kwargs["reasoning_effort"] == "low"
diff --git a/tests/test_subagent_reasoning.py b/tests/test_subagent_reasoning.py
deleted file mode 100644
index 5e70506..0000000
--- a/tests/test_subagent_reasoning.py
+++ /dev/null
@@ -1,144 +0,0 @@
-"""Tests for subagent reasoning_content and thinking_blocks handling."""
-
-from __future__ import annotations
-
-import asyncio
-from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-
-class TestSubagentReasoningContent:
-    """Test that subagent properly handles reasoning_content and thinking_blocks."""
-
-    @pytest.mark.asyncio
-    async def test_subagent_message_includes_reasoning_content(self):
-        """Verify reasoning_content is included in assistant messages with tool calls.
-
-        This is the fix for issue #1834: Spawn/subagent tool fails with
-        Deepseek Reasoner due to missing reasoning_content field.
-        """
-        from nanobot.agent.subagent import SubagentManager
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse, ToolCallRequest
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "deepseek-reasoner"
-
-        # Create a real Path object for workspace
-        workspace = Path("/tmp/test_workspace")
-        workspace.mkdir(parents=True, exist_ok=True)
-
-        # Capture messages that are sent to the provider
-        captured_messages = []
-
-        async def mock_chat(*args, **kwargs):
-            captured_messages.append(kwargs.get("messages", []))
-            # Return response with tool calls and reasoning_content
-            tool_call = ToolCallRequest(
-                id="test-1",
-                name="read_file",
-                arguments={"path": "/test.txt"},
-            )
-            return LLMResponse(
-                content="",
-                tool_calls=[tool_call],
-                reasoning_content="I need to read this file first",
-            )
-
-        provider.chat_with_retry = AsyncMock(side_effect=mock_chat)
-
-        mgr = SubagentManager(provider=provider, workspace=workspace, bus=bus)
-
-        # Mock the tools registry
-        with patch("nanobot.agent.subagent.ToolRegistry") as MockToolRegistry:
-            mock_registry = MagicMock()
-            mock_registry.get_definitions.return_value = []
-            mock_registry.execute = AsyncMock(return_value="file content")
-            MockToolRegistry.return_value = mock_registry
-
-            result = await mgr.spawn(
-                task="Read a file",
-                label="test",
-                origin_channel="cli",
-                origin_chat_id="direct",
-                session_key="cli:direct",
-            )
-
-            # Wait for the task to complete
-            await asyncio.sleep(0.5)
-
-        # Check the captured messages
-        assert len(captured_messages) >= 1
-        # Find the assistant message with tool_calls
-        found = False
-        for msg_list in captured_messages:
-            for msg in msg_list:
-                if msg.get("role") == "assistant" and msg.get("tool_calls"):
-                    assert "reasoning_content" in msg, "reasoning_content should be in assistant message with tool_calls"
-                    assert msg["reasoning_content"] == "I need to read this file first"
-                    found = True
-        assert found, "Should have found an assistant message with tool_calls"
-
-    @pytest.mark.asyncio
-    async def test_subagent_message_includes_thinking_blocks(self):
-        """Verify thinking_blocks is included in assistant messages with tool calls."""
-        from nanobot.agent.subagent import SubagentManager
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse, ToolCallRequest
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "claude-sonnet"
-
-        workspace = Path("/tmp/test_workspace2")
-        workspace.mkdir(parents=True, exist_ok=True)
-
-        captured_messages = []
-
-        async def mock_chat(*args, **kwargs):
-            captured_messages.append(kwargs.get("messages", []))
-            tool_call = ToolCallRequest(
-                id="test-2",
-                name="read_file",
-                arguments={"path": "/test.txt"},
-            )
-            return LLMResponse(
-                content="",
-                tool_calls=[tool_call],
-                thinking_blocks=[
-                    {"signature": "sig1", "thought": "thinking step 1"},
-                    {"signature": "sig2", "thought": "thinking step 2"},
-                ],
-            )
-
-        provider.chat_with_retry = AsyncMock(side_effect=mock_chat)
-
-        mgr = SubagentManager(provider=provider, workspace=workspace, bus=bus)
-
-        with patch("nanobot.agent.subagent.ToolRegistry") as MockToolRegistry:
-            mock_registry = MagicMock()
-            mock_registry.get_definitions.return_value = []
-            mock_registry.execute = AsyncMock(return_value="file content")
-            MockToolRegistry.return_value = mock_registry
-
-            result = await mgr.spawn(
-                task="Read a file",
-                label="test",
-                origin_channel="cli",
-                origin_chat_id="direct",
-            )
-
-            await asyncio.sleep(0.5)
-
-        # Check the captured messages
-        found = False
-        for msg_list in captured_messages:
-            for msg in msg_list:
-                if msg.get("role") == "assistant" and msg.get("tool_calls"):
-                    assert "thinking_blocks" in msg, "thinking_blocks should be in assistant message with tool_calls"
-                    assert len(msg["thinking_blocks"]) == 2
-                    found = True
-        assert found, "Should have found an assistant message with tool_calls"

From 2c5226550d0083ceb41cf4042925682753e2adb5 Mon Sep 17 00:00:00 2001
From: for13to1 <for13to1@outlook.com>
Date: Wed, 11 Mar 2026 20:35:04 +0800
Subject: [PATCH 27/28] feat: allow direct references in hatch metadata for
 wecom dep

---
 pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 9868513..a52c0c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,6 +72,9 @@ nanobot = "nanobot.cli.commands:app"
 requires = ["hatchling"]
 build-backend = "hatchling.build"
 
+[tool.hatch.metadata]
+allow-direct-references = true
+
 [tool.hatch.build.targets.wheel]
 packages = ["nanobot"]
 

From 254cfd48babf74cca4bbe7baedda7b540b897cbb Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 14:23:19 +0000
Subject: [PATCH 28/28] refactor: auto-discover channels via pkgutil, eliminate
 hardcoded registry

---
 nanobot/channels/base.py     |  18 +++++
 nanobot/channels/dingtalk.py |   1 +
 nanobot/channels/discord.py  |   1 +
 nanobot/channels/email.py    |   1 +
 nanobot/channels/feishu.py   |  18 ++---
 nanobot/channels/manager.py  | 140 ++++-------------------------------
 nanobot/channels/matrix.py   |  18 +++--
 nanobot/channels/mochat.py   |   1 +
 nanobot/channels/qq.py       |   1 +
 nanobot/channels/registry.py |  35 +++++++++
 nanobot/channels/slack.py    |   1 +
 nanobot/channels/telegram.py |  16 +---
 nanobot/channels/wecom.py    |   1 +
 nanobot/channels/whatsapp.py |   1 +
 nanobot/cli/commands.py      |  91 ++++-------------------
 15 files changed, 111 insertions(+), 233 deletions(-)
 create mode 100644 nanobot/channels/registry.py

diff --git a/nanobot/channels/base.py b/nanobot/channels/base.py
index dc53ba4..74c540a 100644
--- a/nanobot/channels/base.py
+++ b/nanobot/channels/base.py
@@ -1,6 +1,9 @@
 """Base channel interface for chat platforms."""
 
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
+from pathlib import Path
 from typing import Any
 
 from loguru import logger
@@ -18,6 +21,8 @@ class BaseChannel(ABC):
     """
 
     name: str = "base"
+    display_name: str = "Base"
+    transcription_api_key: str = ""
 
     def __init__(self, config: Any, bus: MessageBus):
         """
@@ -31,6 +36,19 @@ class BaseChannel(ABC):
         self.bus = bus
         self._running = False
 
+    async def transcribe_audio(self, file_path: str | Path) -> str:
+        """Transcribe an audio file via Groq Whisper. Returns empty string on failure."""
+        if not self.transcription_api_key:
+            return ""
+        try:
+            from nanobot.providers.transcription import GroqTranscriptionProvider
+
+            provider = GroqTranscriptionProvider(api_key=self.transcription_api_key)
+            return await provider.transcribe(file_path)
+        except Exception as e:
+            logger.warning("{}: audio transcription failed: {}", self.name, e)
+            return ""
+
     @abstractmethod
     async def start(self) -> None:
         """
diff --git a/nanobot/channels/dingtalk.py b/nanobot/channels/dingtalk.py
index cdcba57..4626d95 100644
--- a/nanobot/channels/dingtalk.py
+++ b/nanobot/channels/dingtalk.py
@@ -114,6 +114,7 @@ class DingTalkChannel(BaseChannel):
     """
 
     name = "dingtalk"
+    display_name = "DingTalk"
     _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"}
     _AUDIO_EXTS = {".amr", ".mp3", ".wav", ".ogg", ".m4a", ".aac"}
     _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm"}
diff --git a/nanobot/channels/discord.py b/nanobot/channels/discord.py
index 2ee4f77..afa20c9 100644
--- a/nanobot/channels/discord.py
+++ b/nanobot/channels/discord.py
@@ -25,6 +25,7 @@ class DiscordChannel(BaseChannel):
     """Discord channel using Gateway websocket."""
 
     name = "discord"
+    display_name = "Discord"
 
     def __init__(self, config: DiscordConfig, bus: MessageBus):
         super().__init__(config, bus)
diff --git a/nanobot/channels/email.py b/nanobot/channels/email.py
index 16771fb..46c2103 100644
--- a/nanobot/channels/email.py
+++ b/nanobot/channels/email.py
@@ -35,6 +35,7 @@ class EmailChannel(BaseChannel):
     """
 
     name = "email"
+    display_name = "Email"
     _IMAP_MONTHS = (
         "Jan",
         "Feb",
diff --git a/nanobot/channels/feishu.py b/nanobot/channels/feishu.py
index 0409c32..160b9b4 100644
--- a/nanobot/channels/feishu.py
+++ b/nanobot/channels/feishu.py
@@ -244,11 +244,11 @@ class FeishuChannel(BaseChannel):
     """
 
     name = "feishu"
+    display_name = "Feishu"
 
-    def __init__(self, config: FeishuConfig, bus: MessageBus, groq_api_key: str = ""):
+    def __init__(self, config: FeishuConfig, bus: MessageBus):
         super().__init__(config, bus)
         self.config: FeishuConfig = config
-        self.groq_api_key = groq_api_key
         self._client: Any = None
         self._ws_client: Any = None
         self._ws_thread: threading.Thread | None = None
@@ -928,16 +928,10 @@ class FeishuChannel(BaseChannel):
                 if file_path:
                     media_paths.append(file_path)
 
-                # Transcribe audio using Groq Whisper
-                if msg_type == "audio" and file_path and self.groq_api_key:
-                    try:
-                        from nanobot.providers.transcription import GroqTranscriptionProvider
-                        transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key)
-                        transcription = await transcriber.transcribe(file_path)
-                        if transcription:
-                            content_text = f"[transcription: {transcription}]"
-                    except Exception as e:
-                        logger.warning("Failed to transcribe audio: {}", e)
+                if msg_type == "audio" and file_path:
+                    transcription = await self.transcribe_audio(file_path)
+                    if transcription:
+                        content_text = f"[transcription: {transcription}]"
 
                 content_parts.append(content_text)
 
diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py
index 2c5cd3f..8288ad0 100644
--- a/nanobot/channels/manager.py
+++ b/nanobot/channels/manager.py
@@ -31,135 +31,23 @@ class ChannelManager:
         self._init_channels()
 
     def _init_channels(self) -> None:
-        """Initialize channels based on config."""
+        """Initialize channels discovered via pkgutil scan."""
+        from nanobot.channels.registry import discover_channel_names, load_channel_class
 
-        # Telegram channel
-        if self.config.channels.telegram.enabled:
+        groq_key = self.config.providers.groq.api_key
+
+        for modname in discover_channel_names():
+            section = getattr(self.config.channels, modname, None)
+            if not section or not getattr(section, "enabled", False):
+                continue
             try:
-                from nanobot.channels.telegram import TelegramChannel
-                self.channels["telegram"] = TelegramChannel(
-                    self.config.channels.telegram,
-                    self.bus,
-                    groq_api_key=self.config.providers.groq.api_key,
-                )
-                logger.info("Telegram channel enabled")
+                cls = load_channel_class(modname)
+                channel = cls(section, self.bus)
+                channel.transcription_api_key = groq_key
+                self.channels[modname] = channel
+                logger.info("{} channel enabled", cls.display_name)
             except ImportError as e:
-                logger.warning("Telegram channel not available: {}", e)
-
-        # WhatsApp channel
-        if self.config.channels.whatsapp.enabled:
-            try:
-                from nanobot.channels.whatsapp import WhatsAppChannel
-                self.channels["whatsapp"] = WhatsAppChannel(
-                    self.config.channels.whatsapp, self.bus
-                )
-                logger.info("WhatsApp channel enabled")
-            except ImportError as e:
-                logger.warning("WhatsApp channel not available: {}", e)
-
-        # Discord channel
-        if self.config.channels.discord.enabled:
-            try:
-                from nanobot.channels.discord import DiscordChannel
-                self.channels["discord"] = DiscordChannel(
-                    self.config.channels.discord, self.bus
-                )
-                logger.info("Discord channel enabled")
-            except ImportError as e:
-                logger.warning("Discord channel not available: {}", e)
-
-        # Feishu channel
-        if self.config.channels.feishu.enabled:
-            try:
-                from nanobot.channels.feishu import FeishuChannel
-                self.channels["feishu"] = FeishuChannel(
-                    self.config.channels.feishu, self.bus,
-                    groq_api_key=self.config.providers.groq.api_key,
-                )
-                logger.info("Feishu channel enabled")
-            except ImportError as e:
-                logger.warning("Feishu channel not available: {}", e)
-
-        # Mochat channel
-        if self.config.channels.mochat.enabled:
-            try:
-                from nanobot.channels.mochat import MochatChannel
-
-                self.channels["mochat"] = MochatChannel(
-                    self.config.channels.mochat, self.bus
-                )
-                logger.info("Mochat channel enabled")
-            except ImportError as e:
-                logger.warning("Mochat channel not available: {}", e)
-
-        # DingTalk channel
-        if self.config.channels.dingtalk.enabled:
-            try:
-                from nanobot.channels.dingtalk import DingTalkChannel
-                self.channels["dingtalk"] = DingTalkChannel(
-                    self.config.channels.dingtalk, self.bus
-                )
-                logger.info("DingTalk channel enabled")
-            except ImportError as e:
-                logger.warning("DingTalk channel not available: {}", e)
-
-        # Email channel
-        if self.config.channels.email.enabled:
-            try:
-                from nanobot.channels.email import EmailChannel
-                self.channels["email"] = EmailChannel(
-                    self.config.channels.email, self.bus
-                )
-                logger.info("Email channel enabled")
-            except ImportError as e:
-                logger.warning("Email channel not available: {}", e)
-
-        # Slack channel
-        if self.config.channels.slack.enabled:
-            try:
-                from nanobot.channels.slack import SlackChannel
-                self.channels["slack"] = SlackChannel(
-                    self.config.channels.slack, self.bus
-                )
-                logger.info("Slack channel enabled")
-            except ImportError as e:
-                logger.warning("Slack channel not available: {}", e)
-
-        # QQ channel
-        if self.config.channels.qq.enabled:
-            try:
-                from nanobot.channels.qq import QQChannel
-                self.channels["qq"] = QQChannel(
-                    self.config.channels.qq,
-                    self.bus,
-                )
-                logger.info("QQ channel enabled")
-            except ImportError as e:
-                logger.warning("QQ channel not available: {}", e)
-
-        # Matrix channel
-        if self.config.channels.matrix.enabled:
-            try:
-                from nanobot.channels.matrix import MatrixChannel
-                self.channels["matrix"] = MatrixChannel(
-                    self.config.channels.matrix,
-                    self.bus,
-                )
-                logger.info("Matrix channel enabled")
-            except ImportError as e:
-                logger.warning("Matrix channel not available: {}", e)
-
-        # WeCom channel
-        if self.config.channels.wecom.enabled:
-            try:
-                from nanobot.channels.wecom import WecomChannel
-                self.channels["wecom"] = WecomChannel(
-                    self.config.channels.wecom,
-                    self.bus,
-                )
-                logger.info("WeCom channel enabled")
-            except ImportError as e:
-                logger.warning("WeCom channel not available: {}", e)
+                logger.warning("{} channel not available: {}", modname, e)
 
         self._validate_allow_from()
 
diff --git a/nanobot/channels/matrix.py b/nanobot/channels/matrix.py
index 63cb0ca..0d7a908 100644
--- a/nanobot/channels/matrix.py
+++ b/nanobot/channels/matrix.py
@@ -37,6 +37,7 @@ except ImportError as e:
     ) from e
 
 from nanobot.bus.events import OutboundMessage
+from nanobot.bus.queue import MessageBus
 from nanobot.channels.base import BaseChannel
 from nanobot.config.paths import get_data_dir, get_media_dir
 from nanobot.utils.helpers import safe_filename
@@ -146,15 +147,15 @@ class MatrixChannel(BaseChannel):
     """Matrix (Element) channel using long-polling sync."""
 
     name = "matrix"
+    display_name = "Matrix"
 
-    def __init__(self, config: Any, bus, *, restrict_to_workspace: bool = False,
-                 workspace: Path | None = None):
+    def __init__(self, config: Any, bus: MessageBus):
         super().__init__(config, bus)
         self.client: AsyncClient | None = None
         self._sync_task: asyncio.Task | None = None
         self._typing_tasks: dict[str, asyncio.Task] = {}
-        self._restrict_to_workspace = restrict_to_workspace
-        self._workspace = workspace.expanduser().resolve() if workspace else None
+        self._restrict_to_workspace = False
+        self._workspace: Path | None = None
         self._server_upload_limit_bytes: int | None = None
         self._server_upload_limit_checked = False
 
@@ -677,7 +678,14 @@ class MatrixChannel(BaseChannel):
         parts: list[str] = []
         if isinstance(body := getattr(event, "body", None), str) and body.strip():
             parts.append(body.strip())
-        if marker:
+
+        if attachment and attachment.get("type") == "audio":
+            transcription = await self.transcribe_audio(attachment["path"])
+            if transcription:
+                parts.append(f"[transcription: {transcription}]")
+            else:
+                parts.append(marker)
+        elif marker:
             parts.append(marker)
 
         await self._start_typing_keepalive(room.room_id)
diff --git a/nanobot/channels/mochat.py b/nanobot/channels/mochat.py
index 09e31c3..52e246f 100644
--- a/nanobot/channels/mochat.py
+++ b/nanobot/channels/mochat.py
@@ -216,6 +216,7 @@ class MochatChannel(BaseChannel):
     """Mochat channel using socket.io with fallback polling workers."""
 
     name = "mochat"
+    display_name = "Mochat"
 
     def __init__(self, config: MochatConfig, bus: MessageBus):
         super().__init__(config, bus)
diff --git a/nanobot/channels/qq.py b/nanobot/channels/qq.py
index 5ac06e3..792cc12 100644
--- a/nanobot/channels/qq.py
+++ b/nanobot/channels/qq.py
@@ -54,6 +54,7 @@ class QQChannel(BaseChannel):
     """QQ channel using botpy SDK with WebSocket connection."""
 
     name = "qq"
+    display_name = "QQ"
 
     def __init__(self, config: QQConfig, bus: MessageBus):
         super().__init__(config, bus)
diff --git a/nanobot/channels/registry.py b/nanobot/channels/registry.py
new file mode 100644
index 0000000..eb30ff7
--- /dev/null
+++ b/nanobot/channels/registry.py
@@ -0,0 +1,35 @@
+"""Auto-discovery for channel modules — no hardcoded registry."""
+
+from __future__ import annotations
+
+import importlib
+import pkgutil
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from nanobot.channels.base import BaseChannel
+
+_INTERNAL = frozenset({"base", "manager", "registry"})
+
+
+def discover_channel_names() -> list[str]:
+    """Return all channel module names by scanning the package (zero imports)."""
+    import nanobot.channels as pkg
+
+    return [
+        name
+        for _, name, ispkg in pkgutil.iter_modules(pkg.__path__)
+        if name not in _INTERNAL and not ispkg
+    ]
+
+
+def load_channel_class(module_name: str) -> type[BaseChannel]:
+    """Import *module_name* and return the first BaseChannel subclass found."""
+    from nanobot.channels.base import BaseChannel as _Base
+
+    mod = importlib.import_module(f"nanobot.channels.{module_name}")
+    for attr in dir(mod):
+        obj = getattr(mod, attr)
+        if isinstance(obj, type) and issubclass(obj, _Base) and obj is not _Base:
+            return obj
+    raise ImportError(f"No BaseChannel subclass in nanobot.channels.{module_name}")
diff --git a/nanobot/channels/slack.py b/nanobot/channels/slack.py
index 0384d8d..5819212 100644
--- a/nanobot/channels/slack.py
+++ b/nanobot/channels/slack.py
@@ -21,6 +21,7 @@ class SlackChannel(BaseChannel):
     """Slack channel using Socket Mode."""
 
     name = "slack"
+    display_name = "Slack"
 
     def __init__(self, config: SlackConfig, bus: MessageBus):
         super().__init__(config, bus)
diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py
index 5b294cc..9f93843 100644
--- a/nanobot/channels/telegram.py
+++ b/nanobot/channels/telegram.py
@@ -155,6 +155,7 @@ class TelegramChannel(BaseChannel):
     """
 
     name = "telegram"
+    display_name = "Telegram"
 
     # Commands registered with Telegram's command menu
     BOT_COMMANDS = [
@@ -164,15 +165,9 @@ class TelegramChannel(BaseChannel):
         BotCommand("help", "Show available commands"),
     ]
 
-    def __init__(
-        self,
-        config: TelegramConfig,
-        bus: MessageBus,
-        groq_api_key: str = "",
-    ):
+    def __init__(self, config: TelegramConfig, bus: MessageBus):
         super().__init__(config, bus)
         self.config: TelegramConfig = config
-        self.groq_api_key = groq_api_key
         self._app: Application | None = None
         self._chat_ids: dict[str, int] = {}  # Map sender_id to chat_id for replies
         self._typing_tasks: dict[str, asyncio.Task] = {}  # chat_id -> typing loop task
@@ -615,11 +610,8 @@ class TelegramChannel(BaseChannel):
 
                 media_paths.append(str(file_path))
 
-                # Handle voice transcription
-                if media_type == "voice" or media_type == "audio":
-                    from nanobot.providers.transcription import GroqTranscriptionProvider
-                    transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key)
-                    transcription = await transcriber.transcribe(file_path)
+                if media_type in ("voice", "audio"):
+                    transcription = await self.transcribe_audio(file_path)
                     if transcription:
                         logger.info("Transcribed {}: {}...", media_type, transcription[:50])
                         content_parts.append(f"[transcription: {transcription}]")
diff --git a/nanobot/channels/wecom.py b/nanobot/channels/wecom.py
index 72be9e2..e0f4ae0 100644
--- a/nanobot/channels/wecom.py
+++ b/nanobot/channels/wecom.py
@@ -36,6 +36,7 @@ class WecomChannel(BaseChannel):
     """
 
     name = "wecom"
+    display_name = "WeCom"
 
     def __init__(self, config: WecomConfig, bus: MessageBus):
         super().__init__(config, bus)
diff --git a/nanobot/channels/whatsapp.py b/nanobot/channels/whatsapp.py
index 1307716..7fffb80 100644
--- a/nanobot/channels/whatsapp.py
+++ b/nanobot/channels/whatsapp.py
@@ -22,6 +22,7 @@ class WhatsAppChannel(BaseChannel):
     """
 
     name = "whatsapp"
+    display_name = "WhatsApp"
 
     def __init__(self, config: WhatsAppConfig, bus: MessageBus):
         super().__init__(config, bus)
diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index f5ac859..dd5e60c 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -683,6 +683,7 @@ app.add_typer(channels_app, name="channels")
 @channels_app.command("status")
 def channels_status():
     """Show channel status."""
+    from nanobot.channels.registry import discover_channel_names, load_channel_class
     from nanobot.config.loader import load_config
 
     config = load_config()
@@ -690,85 +691,19 @@ def channels_status():
     table = Table(title="Channel Status")
     table.add_column("Channel", style="cyan")
     table.add_column("Enabled", style="green")
-    table.add_column("Configuration", style="yellow")
 
-    # WhatsApp
-    wa = config.channels.whatsapp
-    table.add_row(
-        "WhatsApp",
-        "✓" if wa.enabled else "✗",
-        wa.bridge_url
-    )
-
-    dc = config.channels.discord
-    table.add_row(
-        "Discord",
-        "✓" if dc.enabled else "✗",
-        dc.gateway_url
-    )
-
-    # Feishu
-    fs = config.channels.feishu
-    fs_config = f"app_id: {fs.app_id[:10]}..." if fs.app_id else "[dim]not configured[/dim]"
-    table.add_row(
-        "Feishu",
-        "✓" if fs.enabled else "✗",
-        fs_config
-    )
-
-    # Mochat
-    mc = config.channels.mochat
-    mc_base = mc.base_url or "[dim]not configured[/dim]"
-    table.add_row(
-        "Mochat",
-        "✓" if mc.enabled else "✗",
-        mc_base
-    )
-
-    # Telegram
-    tg = config.channels.telegram
-    tg_config = f"token: {tg.token[:10]}..." if tg.token else "[dim]not configured[/dim]"
-    table.add_row(
-        "Telegram",
-        "✓" if tg.enabled else "✗",
-        tg_config
-    )
-
-    # Slack
-    slack = config.channels.slack
-    slack_config = "socket" if slack.app_token and slack.bot_token else "[dim]not configured[/dim]"
-    table.add_row(
-        "Slack",
-        "✓" if slack.enabled else "✗",
-        slack_config
-    )
-
-    # DingTalk
-    dt = config.channels.dingtalk
-    dt_config = f"client_id: {dt.client_id[:10]}..." if dt.client_id else "[dim]not configured[/dim]"
-    table.add_row(
-        "DingTalk",
-        "✓" if dt.enabled else "✗",
-        dt_config
-    )
-
-    # QQ
-    qq = config.channels.qq
-    qq_config = f"app_id: {qq.app_id[:10]}..." if qq.app_id else "[dim]not configured[/dim]"
-    table.add_row(
-        "QQ",
-        "✓" if qq.enabled else "✗",
-        qq_config
-    )
-
-    # Email
-    em = config.channels.email
-    em_config = em.imap_host if em.imap_host else "[dim]not configured[/dim]"
-    table.add_row(
-        "Email",
-        "✓" if em.enabled else "✗",
-        em_config
-    )
+    for modname in sorted(discover_channel_names()):
+        section = getattr(config.channels, modname, None)
+        enabled = section and getattr(section, "enabled", False)
+        try:
+            cls = load_channel_class(modname)
+            display = cls.display_name
+        except ImportError:
+            display = modname.title()
+        table.add_row(
+            display,
+            "[green]\u2713[/green]" if enabled else "[dim]\u2717[/dim]",
+        )
 
     console.print(table)