From dbc518098e913d2f382121820dd58bbaf7a04234 Mon Sep 17 00:00:00 2001 From: VITOHJL Date: Sun, 8 Mar 2026 14:20:16 +0800 Subject: [PATCH 01/28] refactor: implement token-based context compression mechanism Major changes: - Replace message-count-based memory window with token-budget-based compression - Add max_tokens_input, compression_start_ratio, compression_target_ratio config - Implement _maybe_compress_history() that triggers based on prompt token usage - Use _build_compressed_history_view() to provide compressed history to LLM - Refactor MemoryStore.consolidate() -> consolidate_chunk() for chunk-based compression - Remove last_consolidated from Session, use _compressed_until metadata instead - Add background compression scheduling to avoid blocking message processing Key improvements: - Compression now based on actual token usage, not arbitrary message counts - Better handling of long conversations with large context windows - Non-destructive compression: old messages remain in session, but excluded from prompt - Automatic compression when history exceeds configured token thresholds --- nanobot/agent/loop.py | 521 +++++++++++++++++++++++++++++++++---- nanobot/agent/memory.py | 62 ++--- nanobot/config/schema.py | 25 +- nanobot/session/manager.py | 20 +- 4 files changed, 529 insertions(+), 99 deletions(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index ca9a06e..696e2a7 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -5,19 +5,24 @@ from __future__ import annotations import asyncio import json import re -import weakref from contextlib import AsyncExitStack from pathlib import Path from typing import TYPE_CHECKING, Any, Awaitable, Callable from loguru import logger +try: + import tiktoken # type: ignore +except Exception: # pragma: no cover - optional dependency + tiktoken = None + from nanobot.agent.context import ContextBuilder -from nanobot.agent.memory import MemoryStore from nanobot.agent.subagent import SubagentManager from nanobot.agent.tools.cron import CronTool from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool +from nanobot.agent.tools.huggingface import HuggingFaceModelSearchTool from nanobot.agent.tools.message import MessageTool +from nanobot.agent.tools.model_config import ValidateDeployJSONTool, ValidateUsageYAMLTool from nanobot.agent.tools.registry import ToolRegistry from nanobot.agent.tools.shell import ExecTool from nanobot.agent.tools.spawn import SpawnTool @@ -55,8 +60,11 @@ class AgentLoop: max_iterations: int = 40, temperature: float = 0.1, max_tokens: int = 4096, - memory_window: int = 100, + memory_window: int | None = None, # backward-compat only (unused) reasoning_effort: str | None = None, + max_tokens_input: int = 128_000, + compression_start_ratio: float = 0.7, + compression_target_ratio: float = 0.4, brave_api_key: str | None = None, web_proxy: str | None = None, exec_config: ExecToolConfig | None = None, @@ -74,9 +82,18 @@ class AgentLoop: self.model = model or provider.get_default_model() self.max_iterations = max_iterations self.temperature = temperature + # max_tokens: per-call output token cap (maxTokensOutput in config) self.max_tokens = max_tokens + # Keep legacy attribute for older call sites/tests; compression no longer uses it. self.memory_window = memory_window self.reasoning_effort = reasoning_effort + # max_tokens_input: model native context window (maxTokensInput in config) + self.max_tokens_input = max_tokens_input + # Token-based compression watermarks (fractions of available input budget) + self.compression_start_ratio = compression_start_ratio + self.compression_target_ratio = compression_target_ratio + # Reserve tokens for safety margin + self._reserve_tokens = 1000 self.brave_api_key = brave_api_key self.web_proxy = web_proxy self.exec_config = exec_config or ExecToolConfig() @@ -105,18 +122,373 @@ class AgentLoop: self._mcp_stack: AsyncExitStack | None = None self._mcp_connected = False self._mcp_connecting = False - self._consolidating: set[str] = set() # Session keys with consolidation in progress - self._consolidation_tasks: set[asyncio.Task] = set() # Strong refs to in-flight tasks - self._consolidation_locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary() self._active_tasks: dict[str, list[asyncio.Task]] = {} # session_key -> tasks + self._compression_tasks: dict[str, asyncio.Task] = {} # session_key -> task self._processing_lock = asyncio.Lock() self._register_default_tools() + @staticmethod + def _estimate_prompt_tokens( + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None = None, + ) -> int: + """Estimate prompt tokens with tiktoken (fallback only).""" + if tiktoken is None: + return 0 + + try: + enc = tiktoken.get_encoding("cl100k_base") + parts: list[str] = [] + for msg in messages: + content = msg.get("content") + if isinstance(content, str): + parts.append(content) + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + txt = part.get("text", "") + if txt: + parts.append(txt) + if tools: + parts.append(json.dumps(tools, ensure_ascii=False)) + return len(enc.encode("\n".join(parts))) + except Exception: + return 0 + + def _estimate_prompt_tokens_chain( + self, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None = None, + ) -> tuple[int, str]: + """Unified prompt-token estimation: provider counter -> tiktoken.""" + provider_counter = getattr(self.provider, "estimate_prompt_tokens", None) + if callable(provider_counter): + try: + tokens, source = provider_counter(messages, tools, self.model) + if isinstance(tokens, (int, float)) and tokens > 0: + return int(tokens), str(source or "provider_counter") + except Exception: + logger.debug("Provider token counter failed; fallback to tiktoken") + + estimated = self._estimate_prompt_tokens(messages, tools) + if estimated > 0: + return int(estimated), "tiktoken" + return 0, "none" + + @staticmethod + def _estimate_completion_tokens(content: str) -> int: + """Estimate completion tokens with tiktoken (fallback only).""" + if tiktoken is None: + return 0 + try: + enc = tiktoken.get_encoding("cl100k_base") + return len(enc.encode(content or "")) + except Exception: + return 0 + + def _get_compressed_until(self, session: Session) -> int: + """Read/normalize compressed boundary and migrate old metadata format.""" + raw = session.metadata.get("_compressed_until", 0) + try: + compressed_until = int(raw) + except (TypeError, ValueError): + compressed_until = 0 + + if compressed_until <= 0: + ranges = session.metadata.get("_compressed_ranges") + if isinstance(ranges, list): + inferred = 0 + for item in ranges: + if not isinstance(item, (list, tuple)) or len(item) != 2: + continue + try: + inferred = max(inferred, int(item[1])) + except (TypeError, ValueError): + continue + compressed_until = inferred + + compressed_until = max(0, min(compressed_until, len(session.messages))) + session.metadata["_compressed_until"] = compressed_until + # 兼容旧版本:一旦迁移出连续边界,就可以清理旧字段 + session.metadata.pop("_compressed_ranges", None) + session.metadata.pop("_cumulative_tokens", None) + return compressed_until + + def _set_compressed_until(self, session: Session, idx: int) -> None: + """Persist a contiguous compressed boundary.""" + session.metadata["_compressed_until"] = max(0, min(int(idx), len(session.messages))) + session.metadata.pop("_compressed_ranges", None) + session.metadata.pop("_cumulative_tokens", None) + + @staticmethod + def _estimate_message_tokens(message: dict[str, Any]) -> int: + """Rough token estimate for a single persisted message.""" + content = message.get("content") + parts: list[str] = [] + if isinstance(content, str): + parts.append(content) + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + txt = part.get("text", "") + if txt: + parts.append(txt) + else: + parts.append(json.dumps(part, ensure_ascii=False)) + elif content is not None: + parts.append(json.dumps(content, ensure_ascii=False)) + + for key in ("name", "tool_call_id"): + val = message.get(key) + if isinstance(val, str) and val: + parts.append(val) + if message.get("tool_calls"): + parts.append(json.dumps(message["tool_calls"], ensure_ascii=False)) + + payload = "\n".join(parts) + if not payload: + return 1 + if tiktoken is not None: + try: + enc = tiktoken.get_encoding("cl100k_base") + return max(1, len(enc.encode(payload))) + except Exception: + pass + return max(1, len(payload) // 4) + + def _pick_compression_chunk_by_tokens( + self, + session: Session, + reduction_tokens: int, + *, + tail_keep: int = 12, + ) -> tuple[int, int, int] | None: + """ + Pick one contiguous old chunk so its estimated size is roughly enough + to reduce `reduction_tokens`. + """ + messages = session.messages + start = self._get_compressed_until(session) + if len(messages) - start <= tail_keep + 2: + return None + + end_limit = len(messages) - tail_keep + if end_limit - start < 2: + return None + + target = max(1, reduction_tokens) + end = start + collected = 0 + while end < end_limit and collected < target: + collected += self._estimate_message_tokens(messages[end]) + end += 1 + + if end - start < 2: + end = min(end_limit, start + 2) + collected = sum(self._estimate_message_tokens(m) for m in messages[start:end]) + if end - start < 2: + return None + return start, end, collected + + def _estimate_session_prompt_tokens(self, session: Session) -> tuple[int, str]: + """ + Estimate current full prompt tokens for this session view + (system + compressed history view + runtime/user placeholder + tools). + """ + history = self._build_compressed_history_view(session) + channel, chat_id = (session.key.split(":", 1) if ":" in session.key else (None, None)) + probe_messages = self.context.build_messages( + history=history, + current_message="[token-probe]", + channel=channel, + chat_id=chat_id, + ) + return self._estimate_prompt_tokens_chain(probe_messages, self.tools.get_definitions()) + + async def _maybe_compress_history( + self, + session: Session, + ) -> None: + """ + End-of-turn policy: + - Estimate current prompt usage from persisted session view. + - If above start ratio, perform one best-effort compression chunk. + """ + if not session.messages: + self._set_compressed_until(session, 0) + return + + budget = max(1, self.max_tokens_input - self.max_tokens - self._reserve_tokens) + start_threshold = int(budget * self.compression_start_ratio) + target_threshold = int(budget * self.compression_target_ratio) + if target_threshold >= start_threshold: + target_threshold = max(0, start_threshold - 1) + + current_tokens, token_source = self._estimate_session_prompt_tokens(session) + current_ratio = current_tokens / budget if budget else 0.0 + if current_tokens <= 0: + logger.debug("Compression skip {}: token estimate unavailable", session.key) + return + if current_tokens < start_threshold: + logger.debug( + "Compression idle {}: {}/{} ({:.1%}) via {}", + session.key, + current_tokens, + budget, + current_ratio, + token_source, + ) + return + logger.info( + "Compression trigger {}: {}/{} ({:.1%}) via {}", + session.key, + current_tokens, + budget, + current_ratio, + token_source, + ) + + reduction_by_target = max(0, current_tokens - target_threshold) + reduction_by_delta = max(1, start_threshold - target_threshold) + reduction_need = max(reduction_by_target, reduction_by_delta) + + chunk_range = self._pick_compression_chunk_by_tokens(session, reduction_need, tail_keep=10) + if chunk_range is None: + logger.info("Compression skipped for {}: no compressible chunk", session.key) + return + + start_idx, end_idx, estimated_chunk_tokens = chunk_range + chunk = session.messages[start_idx:end_idx] + if len(chunk) < 2: + return + + logger.info( + "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})", + session.key, + start_idx, + end_idx - 1, + len(chunk), + estimated_chunk_tokens, + reduction_need, + ) + success, _ = await self.context.memory.consolidate_chunk( + chunk, + self.provider, + self.model, + ) + if not success: + logger.warning("Compression aborted for {}: consolidation failed", session.key) + return + + self._set_compressed_until(session, end_idx) + self.sessions.save(session) + + after_tokens, after_source = self._estimate_session_prompt_tokens(session) + after_ratio = after_tokens / budget if budget else 0.0 + reduced = max(0, current_tokens - after_tokens) + reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0 + logger.info( + "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})", + session.key, + after_tokens, + budget, + after_ratio, + after_source, + reduced, + reduced_ratio, + ) + + def _schedule_background_compression(self, session_key: str) -> None: + """Schedule best-effort background compression for a session.""" + existing = self._compression_tasks.get(session_key) + if existing is not None and not existing.done(): + return + + async def _runner() -> None: + session = self.sessions.get_or_create(session_key) + try: + await self._maybe_compress_history(session) + except Exception: + logger.exception("Background compression failed for {}", session_key) + + task = asyncio.create_task(_runner()) + self._compression_tasks[session_key] = task + + def _cleanup(t: asyncio.Task) -> None: + cur = self._compression_tasks.get(session_key) + if cur is t: + self._compression_tasks.pop(session_key, None) + try: + t.result() + except BaseException: + pass + + task.add_done_callback(_cleanup) + + async def wait_for_background_compression(self, timeout_s: float | None = None) -> None: + """Wait for currently scheduled compression tasks.""" + pending = [t for t in self._compression_tasks.values() if not t.done()] + if not pending: + return + + logger.info("Waiting for {} background compression task(s)", len(pending)) + waiter = asyncio.gather(*pending, return_exceptions=True) + if timeout_s is None: + await waiter + return + + try: + await asyncio.wait_for(waiter, timeout=timeout_s) + except asyncio.TimeoutError: + logger.warning( + "Background compression wait timed out after {}s ({} task(s) still running)", + timeout_s, + len([t for t in self._compression_tasks.values() if not t.done()]), + ) + + def _build_compressed_history_view( + self, + session: Session, + ) -> list[dict]: + """Build non-destructive history view using the compressed boundary.""" + compressed_until = self._get_compressed_until(session) + if compressed_until <= 0: + return session.get_history(max_messages=0) + + notice_msg: dict[str, Any] = { + "role": "assistant", + "content": ( + "As your assistant, I have compressed earlier context. " + "If you need details, please check memory/HISTORY.md." + ), + } + + tail: list[dict[str, Any]] = [] + for msg in session.messages[compressed_until:]: + entry: dict[str, Any] = {"role": msg["role"], "content": msg.get("content", "")} + for k in ("tool_calls", "tool_call_id", "name"): + if k in msg: + entry[k] = msg[k] + tail.append(entry) + + # Drop leading non-user entries from tail to avoid orphan tool blocks. + for i, m in enumerate(tail): + if m.get("role") == "user": + tail = tail[i:] + break + else: + tail = [] + + return [notice_msg, *tail] + def _register_default_tools(self) -> None: """Register the default set of tools.""" allowed_dir = self.workspace if self.restrict_to_workspace else None for cls in (ReadFileTool, WriteFileTool, EditFileTool, ListDirTool): self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir)) + self.tools.register(ValidateDeployJSONTool()) + self.tools.register(ValidateUsageYAMLTool()) + self.tools.register(HuggingFaceModelSearchTool()) self.tools.register(ExecTool( working_dir=str(self.workspace), timeout=self.exec_config.timeout, @@ -181,25 +553,78 @@ class AgentLoop: self, initial_messages: list[dict], on_progress: Callable[..., Awaitable[None]] | None = None, - ) -> tuple[str | None, list[str], list[dict]]: - """Run the agent iteration loop. Returns (final_content, tools_used, messages).""" + ) -> tuple[str | None, list[str], list[dict], int, str]: + """ + Run the agent iteration loop. + + Returns: + (final_content, tools_used, messages, total_tokens_this_turn, token_source) + total_tokens_this_turn: total tokens (prompt + completion) for this turn + token_source: provider_total / provider_sum / provider_prompt / + provider_counter+tiktoken_completion / tiktoken / none + """ messages = initial_messages iteration = 0 final_content = None tools_used: list[str] = [] + total_tokens_this_turn = 0 + token_source = "none" while iteration < self.max_iterations: iteration += 1 + tool_defs = self.tools.get_definitions() + response = await self.provider.chat( messages=messages, - tools=self.tools.get_definitions(), + tools=tool_defs, model=self.model, temperature=self.temperature, max_tokens=self.max_tokens, reasoning_effort=self.reasoning_effort, ) + # Prefer provider usage from the turn-ending model call; fallback to tiktoken. + # Calculate total tokens (prompt + completion) for this turn. + usage = response.usage or {} + t_tokens = usage.get("total_tokens") + p_tokens = usage.get("prompt_tokens") + c_tokens = usage.get("completion_tokens") + + if isinstance(t_tokens, (int, float)) and t_tokens > 0: + total_tokens_this_turn = int(t_tokens) + token_source = "provider_total" + elif isinstance(p_tokens, (int, float)) and isinstance(c_tokens, (int, float)): + # If we have both prompt and completion tokens, sum them + total_tokens_this_turn = int(p_tokens) + int(c_tokens) + token_source = "provider_sum" + elif isinstance(p_tokens, (int, float)) and p_tokens > 0: + # Fallback: use prompt tokens only (completion might be 0 for tool calls) + total_tokens_this_turn = int(p_tokens) + token_source = "provider_prompt" + else: + # Estimate with unified chain (provider counter -> tiktoken), plus completion tiktoken. + estimated_prompt, prompt_source = self._estimate_prompt_tokens_chain(messages, tool_defs) + estimated_completion = self._estimate_completion_tokens(response.content or "") + total_tokens_this_turn = estimated_prompt + estimated_completion + if total_tokens_this_turn > 0: + token_source = ( + "tiktoken" + if prompt_source == "tiktoken" + else f"{prompt_source}+tiktoken_completion" + ) + if total_tokens_this_turn <= 0: + total_tokens_this_turn = 0 + token_source = "none" + + logger.debug( + "Turn token usage: source={}, total={}, prompt={}, completion={}", + token_source, + total_tokens_this_turn, + p_tokens if isinstance(p_tokens, (int, float)) else None, + c_tokens if isinstance(c_tokens, (int, float)) else None, + ) + if response.has_tool_calls: if on_progress: thought = self._strip_think(response.content) @@ -254,7 +679,7 @@ class AgentLoop: "without completing the task. You can try breaking the task into smaller steps." ) - return final_content, tools_used, messages + return final_content, tools_used, messages, total_tokens_this_turn, token_source async def run(self) -> None: """Run the agent loop, dispatching messages as tasks to stay responsive to /stop.""" @@ -279,6 +704,9 @@ class AgentLoop: """Cancel all active tasks and subagents for the session.""" tasks = self._active_tasks.pop(msg.session_key, []) cancelled = sum(1 for t in tasks if not t.done() and t.cancel()) + comp = self._compression_tasks.get(msg.session_key) + if comp is not None and not comp.done() and comp.cancel(): + cancelled += 1 for t in tasks: try: await t @@ -325,6 +753,9 @@ class AgentLoop: def stop(self) -> None: """Stop the agent loop.""" self._running = False + for task in list(self._compression_tasks.values()): + if not task.done(): + task.cancel() logger.info("Agent loop stopping") async def _process_message( @@ -342,14 +773,15 @@ class AgentLoop: key = f"{channel}:{chat_id}" session = self.sessions.get_or_create(key) self._set_tool_context(channel, chat_id, msg.metadata.get("message_id")) - history = session.get_history(max_messages=self.memory_window) + history = self._build_compressed_history_view(session) messages = self.context.build_messages( history=history, current_message=msg.content, channel=channel, chat_id=chat_id, ) - final_content, _, all_msgs = await self._run_agent_loop(messages) + final_content, _, all_msgs, _, _ = await self._run_agent_loop(messages) self._save_turn(session, all_msgs, 1 + len(history)) self.sessions.save(session) + self._schedule_background_compression(session.key) return OutboundMessage(channel=channel, chat_id=chat_id, content=final_content or "Background task completed.") @@ -362,27 +794,27 @@ class AgentLoop: # Slash commands cmd = msg.content.strip().lower() if cmd == "/new": - lock = self._consolidation_locks.setdefault(session.key, asyncio.Lock()) - self._consolidating.add(session.key) try: - async with lock: - snapshot = session.messages[session.last_consolidated:] - if snapshot: - temp = Session(key=session.key) - temp.messages = list(snapshot) - if not await self._consolidate_memory(temp, archive_all=True): - return OutboundMessage( - channel=msg.channel, chat_id=msg.chat_id, - content="Memory archival failed, session not cleared. Please try again.", - ) + # 在清空会话前,将当前完整对话做一次归档压缩到 MEMORY/HISTORY 中 + if session.messages: + ok, _ = await self.context.memory.consolidate_chunk( + session.messages, + self.provider, + self.model, + ) + if not ok: + return OutboundMessage( + channel=msg.channel, + chat_id=msg.chat_id, + content="Memory archival failed, session not cleared. Please try again.", + ) except Exception: logger.exception("/new archival failed for {}", session.key) return OutboundMessage( - channel=msg.channel, chat_id=msg.chat_id, + channel=msg.channel, + chat_id=msg.chat_id, content="Memory archival failed, session not cleared. Please try again.", ) - finally: - self._consolidating.discard(session.key) session.clear() self.sessions.save(session) @@ -393,36 +825,23 @@ class AgentLoop: return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id, content="🐈 nanobot commands:\n/new — Start a new conversation\n/stop — Stop the current task\n/help — Show available commands") - unconsolidated = len(session.messages) - session.last_consolidated - if (unconsolidated >= self.memory_window and session.key not in self._consolidating): - self._consolidating.add(session.key) - lock = self._consolidation_locks.setdefault(session.key, asyncio.Lock()) - - async def _consolidate_and_unlock(): - try: - async with lock: - await self._consolidate_memory(session) - finally: - self._consolidating.discard(session.key) - _task = asyncio.current_task() - if _task is not None: - self._consolidation_tasks.discard(_task) - - _task = asyncio.create_task(_consolidate_and_unlock()) - self._consolidation_tasks.add(_task) - self._set_tool_context(msg.channel, msg.chat_id, msg.metadata.get("message_id")) if message_tool := self.tools.get("message"): if isinstance(message_tool, MessageTool): message_tool.start_turn() - history = session.get_history(max_messages=self.memory_window) + # 正常对话:使用压缩后的历史视图(压缩在回合结束后进行) + history = self._build_compressed_history_view(session) initial_messages = self.context.build_messages( history=history, current_message=msg.content, media=msg.media if msg.media else None, channel=msg.channel, chat_id=msg.chat_id, ) + # Add [CRON JOB] identifier for cron sessions (session_key starts with "cron:") + if session_key and session_key.startswith("cron:"): + if initial_messages and initial_messages[0].get("role") == "system": + initial_messages[0]["content"] = f"[CRON JOB] {initial_messages[0]['content']}" async def _bus_progress(content: str, *, tool_hint: bool = False) -> None: meta = dict(msg.metadata or {}) @@ -432,7 +851,7 @@ class AgentLoop: channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta, )) - final_content, _, all_msgs = await self._run_agent_loop( + final_content, _, all_msgs, _, _ = await self._run_agent_loop( initial_messages, on_progress=on_progress or _bus_progress, ) @@ -441,6 +860,7 @@ class AgentLoop: self._save_turn(session, all_msgs, 1 + len(history)) self.sessions.save(session) + self._schedule_background_compression(session.key) if (mt := self.tools.get("message")) and isinstance(mt, MessageTool) and mt._sent_in_turn: return None @@ -487,13 +907,6 @@ class AgentLoop: session.messages.append(entry) session.updated_at = datetime.now() - async def _consolidate_memory(self, session, archive_all: bool = False) -> bool: - """Delegate to MemoryStore.consolidate(). Returns True on success.""" - return await MemoryStore(self.workspace).consolidate( - session, self.provider, self.model, - archive_all=archive_all, memory_window=self.memory_window, - ) - async def process_direct( self, content: str, diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index 21fe77d..c8896c8 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -66,36 +66,25 @@ class MemoryStore: long_term = self.read_long_term() return f"## Long-term Memory\n{long_term}" if long_term else "" - async def consolidate( + async def consolidate_chunk( self, - session: Session, + messages: list[dict], provider: LLMProvider, model: str, - *, - archive_all: bool = False, - memory_window: int = 50, - ) -> bool: - """Consolidate old messages into MEMORY.md + HISTORY.md via LLM tool call. + ) -> tuple[bool, str | None]: + """Consolidate a chunk of messages into MEMORY.md + HISTORY.md via LLM tool call. - Returns True on success (including no-op), False on failure. + Returns (success, None). + + - success: True on success (including no-op), False on failure. + - The second return value is reserved for future use (e.g. RAG-style summaries) and is + always None in the current implementation. """ - if archive_all: - old_messages = session.messages - keep_count = 0 - logger.info("Memory consolidation (archive_all): {} messages", len(session.messages)) - else: - keep_count = memory_window // 2 - if len(session.messages) <= keep_count: - return True - if len(session.messages) - session.last_consolidated <= 0: - return True - old_messages = session.messages[session.last_consolidated:-keep_count] - if not old_messages: - return True - logger.info("Memory consolidation: {} to consolidate, {} keep", len(old_messages), keep_count) + if not messages: + return True, None lines = [] - for m in old_messages: + for m in messages: if not m.get("content"): continue tools = f" [tools: {', '.join(m['tools_used'])}]" if m.get("tools_used") else "" @@ -113,7 +102,19 @@ class MemoryStore: try: response = await provider.chat( messages=[ - {"role": "system", "content": "You are a memory consolidation agent. Call the save_memory tool with your consolidation of the conversation."}, + { + "role": "system", + "content": ( + "You are a memory consolidation agent.\n" + "Your job is to:\n" + "1) Append a concise but grep-friendly entry to HISTORY.md summarizing key events, decisions and topics.\n" + " - Write 1 paragraph of 2–5 sentences that starts with [YYYY-MM-DD HH:MM].\n" + " - Include concrete names, IDs and numbers so it is easy to search with grep.\n" + "2) Update long-term MEMORY.md with stable facts and user preferences as markdown, including all existing facts plus new ones.\n" + "3) Optionally return a short context_summary (1–3 sentences) that will replace the raw messages in future dialogue history.\n\n" + "Always call the save_memory tool with history_entry, memory_update and (optionally) context_summary." + ), + }, {"role": "user", "content": prompt}, ], tools=_SAVE_MEMORY_TOOL, @@ -122,7 +123,7 @@ class MemoryStore: if not response.has_tool_calls: logger.warning("Memory consolidation: LLM did not call save_memory, skipping") - return False + return False, None args = response.tool_calls[0].arguments # Some providers return arguments as a JSON string instead of dict @@ -134,10 +135,10 @@ class MemoryStore: args = args[0] else: logger.warning("Memory consolidation: unexpected arguments as empty or non-dict list") - return False + return False, None if not isinstance(args, dict): logger.warning("Memory consolidation: unexpected arguments type {}", type(args).__name__) - return False + return False, None if entry := args.get("history_entry"): if not isinstance(entry, str): @@ -149,9 +150,8 @@ class MemoryStore: if update != current_memory: self.write_long_term(update) - session.last_consolidated = 0 if archive_all else len(session.messages) - keep_count - logger.info("Memory consolidation done: {} messages, last_consolidated={}", len(session.messages), session.last_consolidated) - return True + logger.info("Memory consolidation done for {} messages", len(messages)) + return True, None except Exception: logger.exception("Memory consolidation failed") - return False + return False, None diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 803cb61..1ebde20 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -189,11 +189,22 @@ class SlackConfig(Base): class QQConfig(Base): - """QQ channel configuration using botpy SDK.""" + """QQ channel configuration. + + Supports two implementations: + 1. Official botpy SDK: requires app_id and secret + 2. OneBot protocol: requires api_url (and optionally ws_reverse_url, bot_qq, access_token) + """ enabled: bool = False + # Official botpy SDK fields app_id: str = "" # 机器人 ID (AppID) from q.qq.com secret: str = "" # 机器人密钥 (AppSecret) from q.qq.com + # OneBot protocol fields + api_url: str = "" # OneBot HTTP API URL (e.g. "http://localhost:5700") + ws_reverse_url: str = "" # OneBot WebSocket reverse URL (e.g. "ws://localhost:8080/ws/reverse") + bot_qq: int | None = None # Bot's QQ number (for filtering self messages) + access_token: str = "" # Optional access token for OneBot API allow_from: list[str] = Field( default_factory=list ) # Allowed user openids (empty = public access) @@ -226,10 +237,18 @@ class AgentDefaults(Base): provider: str = ( "auto" # Provider name (e.g. "anthropic", "openrouter") or "auto" for auto-detection ) - max_tokens: int = 8192 + # 原生上下文最大窗口(通常对应模型的 max_input_tokens / max_context_tokens) + # 默认按照主流大模型(如 GPT-4o、Claude 3.x 等)的 128k 上下文给一个宽松上限,实际应根据所选模型文档手动调整。 + max_tokens_input: int = 128_000 + # 默认单次回复的最大输出 token 上限(调用时可按需要再做截断或比例分配) + # 8192 足以覆盖大多数实际对话/工具使用场景,同样可按需手动调整。 + max_tokens_output: int = 8192 + # 会话历史压缩触发比例:当估算的输入 token 使用量 >= maxTokensInput * compressionStartRatio 时开始压缩。 + compression_start_ratio: float = 0.7 + # 会话历史压缩目标比例:每轮压缩后尽量把估算的输入 token 使用量压到 maxTokensInput * compressionTargetRatio 附近。 + compression_target_ratio: float = 0.4 temperature: float = 0.1 max_tool_iterations: int = 40 - memory_window: int = 100 reasoning_effort: str | None = None # low / medium / high — enables LLM thinking mode diff --git a/nanobot/session/manager.py b/nanobot/session/manager.py index f0a6484..1cb8a51 100644 --- a/nanobot/session/manager.py +++ b/nanobot/session/manager.py @@ -9,7 +9,6 @@ from typing import Any from loguru import logger -from nanobot.config.paths import get_legacy_sessions_dir from nanobot.utils.helpers import ensure_dir, safe_filename @@ -30,7 +29,6 @@ class Session: created_at: datetime = field(default_factory=datetime.now) updated_at: datetime = field(default_factory=datetime.now) metadata: dict[str, Any] = field(default_factory=dict) - last_consolidated: int = 0 # Number of messages already consolidated to files def add_message(self, role: str, content: str, **kwargs: Any) -> None: """Add a message to the session.""" @@ -44,9 +42,13 @@ class Session: self.updated_at = datetime.now() def get_history(self, max_messages: int = 500) -> list[dict[str, Any]]: - """Return unconsolidated messages for LLM input, aligned to a user turn.""" - unconsolidated = self.messages[self.last_consolidated:] - sliced = unconsolidated[-max_messages:] + """ + Return messages for LLM input, aligned to a user turn. + + - max_messages > 0 时只保留最近 max_messages 条; + - max_messages <= 0 时不做条数截断,返回全部消息。 + """ + sliced = self.messages if max_messages <= 0 else self.messages[-max_messages:] # Drop leading non-user messages to avoid orphaned tool_result blocks for i, m in enumerate(sliced): @@ -66,7 +68,7 @@ class Session: def clear(self) -> None: """Clear all messages and reset session to initial state.""" self.messages = [] - self.last_consolidated = 0 + self.metadata = {} self.updated_at = datetime.now() @@ -80,7 +82,7 @@ class SessionManager: def __init__(self, workspace: Path): self.workspace = workspace self.sessions_dir = ensure_dir(self.workspace / "sessions") - self.legacy_sessions_dir = get_legacy_sessions_dir() + self.legacy_sessions_dir = Path.home() / ".nanobot" / "sessions" self._cache: dict[str, Session] = {} def _get_session_path(self, key: str) -> Path: @@ -132,7 +134,6 @@ class SessionManager: messages = [] metadata = {} created_at = None - last_consolidated = 0 with open(path, encoding="utf-8") as f: for line in f: @@ -145,7 +146,6 @@ class SessionManager: if data.get("_type") == "metadata": metadata = data.get("metadata", {}) created_at = datetime.fromisoformat(data["created_at"]) if data.get("created_at") else None - last_consolidated = data.get("last_consolidated", 0) else: messages.append(data) @@ -154,7 +154,6 @@ class SessionManager: messages=messages, created_at=created_at or datetime.now(), metadata=metadata, - last_consolidated=last_consolidated ) except Exception as e: logger.warning("Failed to load session {}: {}", key, e) @@ -171,7 +170,6 @@ class SessionManager: "created_at": session.created_at.isoformat(), "updated_at": session.updated_at.isoformat(), "metadata": session.metadata, - "last_consolidated": session.last_consolidated } f.write(json.dumps(metadata_line, ensure_ascii=False) + "\n") for msg in session.messages: From 2dcb4de422ddec8c0f114dc6b0fdce06b9388b8f Mon Sep 17 00:00:00 2001 From: VITOHJL Date: Sun, 8 Mar 2026 15:04:38 +0800 Subject: [PATCH 02/28] fix(commands): update AgentLoop calls to use token-based compression parameters --- nanobot/cli/commands.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 2c8d6d3..cf29cc5 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -330,8 +330,10 @@ def gateway( temperature=config.agents.defaults.temperature, max_tokens=config.agents.defaults.max_tokens, max_iterations=config.agents.defaults.max_tool_iterations, - memory_window=config.agents.defaults.memory_window, reasoning_effort=config.agents.defaults.reasoning_effort, + max_tokens_input=config.agents.defaults.max_tokens_input, + compression_start_ratio=config.agents.defaults.compression_start_ratio, + compression_target_ratio=config.agents.defaults.compression_target_ratio, brave_api_key=config.tools.web.search.api_key or None, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, @@ -515,8 +517,10 @@ def agent( temperature=config.agents.defaults.temperature, max_tokens=config.agents.defaults.max_tokens, max_iterations=config.agents.defaults.max_tool_iterations, - memory_window=config.agents.defaults.memory_window, reasoning_effort=config.agents.defaults.reasoning_effort, + max_tokens_input=config.agents.defaults.max_tokens_input, + compression_start_ratio=config.agents.defaults.compression_start_ratio, + compression_target_ratio=config.agents.defaults.compression_target_ratio, brave_api_key=config.tools.web.search.api_key or None, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, From 2706d3c317be7325795e9dac74d07512e57112f4 Mon Sep 17 00:00:00 2001 From: VITOHJL Date: Sun, 8 Mar 2026 15:20:34 +0800 Subject: [PATCH 03/28] fix(commands): use max_tokens_output instead of max_tokens from AgentDefaults --- nanobot/cli/commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index cf29cc5..18c9d56 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -328,7 +328,7 @@ def gateway( workspace=config.workspace_path, model=config.agents.defaults.model, temperature=config.agents.defaults.temperature, - max_tokens=config.agents.defaults.max_tokens, + max_tokens=config.agents.defaults.max_tokens_output, max_iterations=config.agents.defaults.max_tool_iterations, reasoning_effort=config.agents.defaults.reasoning_effort, max_tokens_input=config.agents.defaults.max_tokens_input, @@ -515,7 +515,7 @@ def agent( workspace=config.workspace_path, model=config.agents.defaults.model, temperature=config.agents.defaults.temperature, - max_tokens=config.agents.defaults.max_tokens, + max_tokens=config.agents.defaults.max_tokens_output, max_iterations=config.agents.defaults.max_tool_iterations, reasoning_effort=config.agents.defaults.reasoning_effort, max_tokens_input=config.agents.defaults.max_tokens_input, From a984e0df3752f6a8883a0e9b6d8efee4abd7f9dd Mon Sep 17 00:00:00 2001 From: VITOHJL Date: Sun, 8 Mar 2026 15:23:55 +0800 Subject: [PATCH 04/28] feat(loop): add history message count logging in compression --- nanobot/agent/loop.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 696e2a7..5d316ea 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -362,6 +362,7 @@ class AgentLoop: if len(chunk) < 2: return + before_msg_count = len(session.messages) logger.info( "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})", session.key, @@ -383,12 +384,13 @@ class AgentLoop: self._set_compressed_until(session, end_idx) self.sessions.save(session) + after_msg_count = len(session.messages) after_tokens, after_source = self._estimate_session_prompt_tokens(session) after_ratio = after_tokens / budget if budget else 0.0 reduced = max(0, current_tokens - after_tokens) reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0 logger.info( - "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})", + "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%}), history: {} -> {}", session.key, after_tokens, budget, @@ -396,6 +398,8 @@ class AgentLoop: after_source, reduced, reduced_ratio, + before_msg_count, + after_msg_count, ) def _schedule_background_compression(self, session_key: str) -> None: From 1b16d48390b3fded3438f4fdbc3f0ae0a0379878 Mon Sep 17 00:00:00 2001 From: VITOHJL Date: Sun, 8 Mar 2026 15:26:49 +0800 Subject: [PATCH 05/28] fix(loop): update _cumulative_tokens in _save_turn and preserve it in compression methods --- nanobot/agent/loop.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 5d316ea..5e01b79 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -211,14 +211,14 @@ class AgentLoop: session.metadata["_compressed_until"] = compressed_until # 兼容旧版本:一旦迁移出连续边界,就可以清理旧字段 session.metadata.pop("_compressed_ranges", None) - session.metadata.pop("_cumulative_tokens", None) + # 注意:不要删除 _cumulative_tokens,压缩逻辑需要它来跟踪累积 token 计数 return compressed_until def _set_compressed_until(self, session: Session, idx: int) -> None: """Persist a contiguous compressed boundary.""" session.metadata["_compressed_until"] = max(0, min(int(idx), len(session.messages))) session.metadata.pop("_compressed_ranges", None) - session.metadata.pop("_cumulative_tokens", None) + # 注意:不要删除 _cumulative_tokens,压缩逻辑需要它来跟踪累积 token 计数 @staticmethod def _estimate_message_tokens(message: dict[str, Any]) -> int: @@ -362,7 +362,6 @@ class AgentLoop: if len(chunk) < 2: return - before_msg_count = len(session.messages) logger.info( "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})", session.key, @@ -384,13 +383,12 @@ class AgentLoop: self._set_compressed_until(session, end_idx) self.sessions.save(session) - after_msg_count = len(session.messages) after_tokens, after_source = self._estimate_session_prompt_tokens(session) after_ratio = after_tokens / budget if budget else 0.0 reduced = max(0, current_tokens - after_tokens) reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0 logger.info( - "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%}), history: {} -> {}", + "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})", session.key, after_tokens, budget, @@ -398,8 +396,6 @@ class AgentLoop: after_source, reduced, reduced_ratio, - before_msg_count, - after_msg_count, ) def _schedule_background_compression(self, session_key: str) -> None: @@ -855,14 +851,14 @@ class AgentLoop: channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta, )) - final_content, _, all_msgs, _, _ = await self._run_agent_loop( + final_content, _, all_msgs, total_tokens_this_turn, token_source = await self._run_agent_loop( initial_messages, on_progress=on_progress or _bus_progress, ) if final_content is None: final_content = "I've completed processing but have no response to give." - self._save_turn(session, all_msgs, 1 + len(history)) + self._save_turn(session, all_msgs, 1 + len(history), total_tokens_this_turn) self.sessions.save(session) self._schedule_background_compression(session.key) @@ -876,7 +872,7 @@ class AgentLoop: metadata=msg.metadata or {}, ) - def _save_turn(self, session: Session, messages: list[dict], skip: int) -> None: + def _save_turn(self, session: Session, messages: list[dict], skip: int, total_tokens_this_turn: int = 0) -> None: """Save new-turn messages into session, truncating large tool results.""" from datetime import datetime for m in messages[skip:]: @@ -910,6 +906,14 @@ class AgentLoop: entry.setdefault("timestamp", datetime.now().isoformat()) session.messages.append(entry) session.updated_at = datetime.now() + + # Update cumulative token count for compression tracking + if total_tokens_this_turn > 0: + current_cumulative = session.metadata.get("_cumulative_tokens", 0) + if isinstance(current_cumulative, (int, float)): + session.metadata["_cumulative_tokens"] = int(current_cumulative) + total_tokens_this_turn + else: + session.metadata["_cumulative_tokens"] = total_tokens_this_turn async def process_direct( self, From 274edc5451c1d0f79eda80c76127f497ec6923e9 Mon Sep 17 00:00:00 2001 From: VITOHJL Date: Sun, 8 Mar 2026 17:25:59 +0800 Subject: [PATCH 06/28] fix(compression): prefer provider prompt token usage --- nanobot/agent/loop.py | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 5e01b79..4f6a051 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -124,6 +124,8 @@ class AgentLoop: self._mcp_connecting = False self._active_tasks: dict[str, list[asyncio.Task]] = {} # session_key -> tasks self._compression_tasks: dict[str, asyncio.Task] = {} # session_key -> task + self._last_turn_prompt_tokens: int = 0 + self._last_turn_prompt_source: str = "none" self._processing_lock = asyncio.Lock() self._register_default_tools() @@ -324,7 +326,15 @@ class AgentLoop: if target_threshold >= start_threshold: target_threshold = max(0, start_threshold - 1) - current_tokens, token_source = self._estimate_session_prompt_tokens(session) + # Prefer provider usage prompt tokens from the turn-ending call. + # If unavailable, fall back to estimator chain. + raw_prompt_tokens = session.metadata.get("_last_prompt_tokens") + if isinstance(raw_prompt_tokens, (int, float)) and raw_prompt_tokens > 0: + current_tokens = int(raw_prompt_tokens) + token_source = str(session.metadata.get("_last_prompt_source") or "usage_prompt") + else: + current_tokens, token_source = self._estimate_session_prompt_tokens(session) + current_ratio = current_tokens / budget if budget else 0.0 if current_tokens <= 0: logger.debug("Compression skip {}: token estimate unavailable", session.key) @@ -569,6 +579,8 @@ class AgentLoop: tools_used: list[str] = [] total_tokens_this_turn = 0 token_source = "none" + self._last_turn_prompt_tokens = 0 + self._last_turn_prompt_source = "none" while iteration < self.max_iterations: iteration += 1 @@ -594,19 +606,35 @@ class AgentLoop: if isinstance(t_tokens, (int, float)) and t_tokens > 0: total_tokens_this_turn = int(t_tokens) token_source = "provider_total" + if isinstance(p_tokens, (int, float)) and p_tokens > 0: + self._last_turn_prompt_tokens = int(p_tokens) + self._last_turn_prompt_source = "usage_prompt" + elif isinstance(c_tokens, (int, float)): + prompt_derived = int(t_tokens) - int(c_tokens) + if prompt_derived > 0: + self._last_turn_prompt_tokens = prompt_derived + self._last_turn_prompt_source = "usage_total_minus_completion" elif isinstance(p_tokens, (int, float)) and isinstance(c_tokens, (int, float)): # If we have both prompt and completion tokens, sum them total_tokens_this_turn = int(p_tokens) + int(c_tokens) token_source = "provider_sum" + if p_tokens > 0: + self._last_turn_prompt_tokens = int(p_tokens) + self._last_turn_prompt_source = "usage_prompt" elif isinstance(p_tokens, (int, float)) and p_tokens > 0: # Fallback: use prompt tokens only (completion might be 0 for tool calls) total_tokens_this_turn = int(p_tokens) token_source = "provider_prompt" + self._last_turn_prompt_tokens = int(p_tokens) + self._last_turn_prompt_source = "usage_prompt" else: # Estimate with unified chain (provider counter -> tiktoken), plus completion tiktoken. estimated_prompt, prompt_source = self._estimate_prompt_tokens_chain(messages, tool_defs) estimated_completion = self._estimate_completion_tokens(response.content or "") total_tokens_this_turn = estimated_prompt + estimated_completion + if estimated_prompt > 0: + self._last_turn_prompt_tokens = int(estimated_prompt) + self._last_turn_prompt_source = str(prompt_source or "tiktoken") if total_tokens_this_turn > 0: token_source = ( "tiktoken" @@ -779,6 +807,12 @@ class AgentLoop: current_message=msg.content, channel=channel, chat_id=chat_id, ) final_content, _, all_msgs, _, _ = await self._run_agent_loop(messages) + if self._last_turn_prompt_tokens > 0: + session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens + session.metadata["_last_prompt_source"] = self._last_turn_prompt_source + else: + session.metadata.pop("_last_prompt_tokens", None) + session.metadata.pop("_last_prompt_source", None) self._save_turn(session, all_msgs, 1 + len(history)) self.sessions.save(session) self._schedule_background_compression(session.key) @@ -858,6 +892,13 @@ class AgentLoop: if final_content is None: final_content = "I've completed processing but have no response to give." + if self._last_turn_prompt_tokens > 0: + session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens + session.metadata["_last_prompt_source"] = self._last_turn_prompt_source + else: + session.metadata.pop("_last_prompt_tokens", None) + session.metadata.pop("_last_prompt_source", None) + self._save_turn(session, all_msgs, 1 + len(history), total_tokens_this_turn) self.sessions.save(session) self._schedule_background_compression(session.key) From a660a25504b48170579a57496378e2fd843a556f Mon Sep 17 00:00:00 2001 From: chengyongru <2755839590@qq.com> Date: Mon, 9 Mar 2026 22:00:45 +0800 Subject: [PATCH 07/28] feat(wecom): add wecom channel [wobsocket] support text/audio[wecom support audio message by default] --- nanobot/channels/manager.py | 14 +- nanobot/channels/wecom.py | 352 ++++++++++++++++++++++++++++++++++++ nanobot/config/schema.py | 9 + pyproject.toml | 1 + 4 files changed, 375 insertions(+), 1 deletion(-) create mode 100644 nanobot/channels/wecom.py diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py index 51539dd..369795a 100644 --- a/nanobot/channels/manager.py +++ b/nanobot/channels/manager.py @@ -7,7 +7,6 @@ from typing import Any from loguru import logger -from nanobot.bus.events import OutboundMessage from nanobot.bus.queue import MessageBus from nanobot.channels.base import BaseChannel from nanobot.config.schema import Config @@ -150,6 +149,19 @@ class ChannelManager: except ImportError as e: logger.warning("Matrix channel not available: {}", e) + # WeCom channel + if self.config.channels.wecom.enabled: + try: + from nanobot.channels.wecom import WecomChannel + self.channels["wecom"] = WecomChannel( + self.config.channels.wecom, + self.bus, + groq_api_key=self.config.providers.groq.api_key, + ) + logger.info("WeCom channel enabled") + except ImportError as e: + logger.warning("WeCom channel not available: {}", e) + self._validate_allow_from() def _validate_allow_from(self) -> None: diff --git a/nanobot/channels/wecom.py b/nanobot/channels/wecom.py new file mode 100644 index 0000000..dc97311 --- /dev/null +++ b/nanobot/channels/wecom.py @@ -0,0 +1,352 @@ +"""WeCom (Enterprise WeChat) channel implementation using wecom_aibot_sdk.""" + +import asyncio +import importlib.util +from collections import OrderedDict +from typing import Any + +from loguru import logger + +from nanobot.bus.events import OutboundMessage +from nanobot.bus.queue import MessageBus +from nanobot.channels.base import BaseChannel +from nanobot.config.paths import get_media_dir +from nanobot.config.schema import WecomConfig + +WECOM_AVAILABLE = importlib.util.find_spec("wecom_aibot_sdk") is not None + +# Message type display mapping +MSG_TYPE_MAP = { + "image": "[image]", + "voice": "[voice]", + "file": "[file]", + "mixed": "[mixed content]", +} + + +class WecomChannel(BaseChannel): + """ + WeCom (Enterprise WeChat) channel using WebSocket long connection. + + Uses WebSocket to receive events - no public IP or webhook required. + + Requires: + - Bot ID and Secret from WeCom AI Bot platform + """ + + name = "wecom" + + def __init__(self, config: WecomConfig, bus: MessageBus, groq_api_key: str = ""): + super().__init__(config, bus) + self.config: WecomConfig = config + self.groq_api_key = groq_api_key + self._client: Any = None + self._processed_message_ids: OrderedDict[str, None] = OrderedDict() + self._loop: asyncio.AbstractEventLoop | None = None + self._generate_req_id = None + # Store frame headers for each chat to enable replies + self._chat_frames: dict[str, Any] = {} + + async def start(self) -> None: + """Start the WeCom bot with WebSocket long connection.""" + if not WECOM_AVAILABLE: + logger.error("WeCom SDK not installed. Run: pip install wecom-aibot-sdk-python") + return + + if not self.config.bot_id or not self.config.secret: + logger.error("WeCom bot_id and secret not configured") + return + + from wecom_aibot_sdk import WSClient, generate_req_id + + self._running = True + self._loop = asyncio.get_running_loop() + self._generate_req_id = generate_req_id + + # Create WebSocket client + self._client = WSClient({ + "bot_id": self.config.bot_id, + "secret": self.config.secret, + "reconnect_interval": 1000, + "max_reconnect_attempts": -1, # Infinite reconnect + "heartbeat_interval": 30000, + }) + + # Register event handlers + self._client.on("connected", self._on_connected) + self._client.on("authenticated", self._on_authenticated) + self._client.on("disconnected", self._on_disconnected) + self._client.on("error", self._on_error) + self._client.on("message.text", self._on_text_message) + self._client.on("message.image", self._on_image_message) + self._client.on("message.voice", self._on_voice_message) + self._client.on("message.file", self._on_file_message) + self._client.on("message.mixed", self._on_mixed_message) + self._client.on("event.enter_chat", self._on_enter_chat) + + logger.info("WeCom bot starting with WebSocket long connection") + logger.info("No public IP required - using WebSocket to receive events") + + # Connect + await self._client.connect_async() + + # Keep running until stopped + while self._running: + await asyncio.sleep(1) + + async def stop(self) -> None: + """Stop the WeCom bot.""" + self._running = False + if self._client: + self._client.disconnect() + logger.info("WeCom bot stopped") + + async def _on_connected(self, frame: Any) -> None: + """Handle WebSocket connected event.""" + logger.info("WeCom WebSocket connected") + + async def _on_authenticated(self, frame: Any) -> None: + """Handle authentication success event.""" + logger.info("WeCom authenticated successfully") + + async def _on_disconnected(self, frame: Any) -> None: + """Handle WebSocket disconnected event.""" + reason = frame.body if hasattr(frame, 'body') else str(frame) + logger.warning("WeCom WebSocket disconnected: {}", reason) + + async def _on_error(self, frame: Any) -> None: + """Handle error event.""" + logger.error("WeCom error: {}", frame) + + async def _on_text_message(self, frame: Any) -> None: + """Handle text message.""" + await self._process_message(frame, "text") + + async def _on_image_message(self, frame: Any) -> None: + """Handle image message.""" + await self._process_message(frame, "image") + + async def _on_voice_message(self, frame: Any) -> None: + """Handle voice message.""" + await self._process_message(frame, "voice") + + async def _on_file_message(self, frame: Any) -> None: + """Handle file message.""" + await self._process_message(frame, "file") + + async def _on_mixed_message(self, frame: Any) -> None: + """Handle mixed content message.""" + await self._process_message(frame, "mixed") + + async def _on_enter_chat(self, frame: Any) -> None: + """Handle enter_chat event (user opens chat with bot).""" + try: + # Extract body from WsFrame dataclass or dict + if hasattr(frame, 'body'): + body = frame.body or {} + elif isinstance(frame, dict): + body = frame.get("body", frame) + else: + body = {} + + chat_id = body.get("chatid", "") if isinstance(body, dict) else "" + + if chat_id and self.config.welcome_message: + await self._client.reply_welcome(frame, { + "msgtype": "text", + "text": {"content": self.config.welcome_message}, + }) + except Exception as e: + logger.error("Error handling enter_chat: {}", e) + + async def _process_message(self, frame: Any, msg_type: str) -> None: + """Process incoming message and forward to bus.""" + try: + # Extract body from WsFrame dataclass or dict + if hasattr(frame, 'body'): + body = frame.body or {} + elif isinstance(frame, dict): + body = frame.get("body", frame) + else: + body = {} + + # Ensure body is a dict + if not isinstance(body, dict): + logger.warning("Invalid body type: {}", type(body)) + return + + # Extract message info + msg_id = body.get("msgid", "") + if not msg_id: + msg_id = f"{body.get('chatid', '')}_{body.get('sendertime', '')}" + + # Deduplication check + if msg_id in self._processed_message_ids: + return + self._processed_message_ids[msg_id] = None + + # Trim cache + while len(self._processed_message_ids) > 1000: + self._processed_message_ids.popitem(last=False) + + # Extract sender info from "from" field (SDK format) + from_info = body.get("from", {}) + sender_id = from_info.get("userid", "unknown") if isinstance(from_info, dict) else "unknown" + + # For single chat, chatid is the sender's userid + # For group chat, chatid is provided in body + chat_type = body.get("chattype", "single") + chat_id = body.get("chatid", sender_id) + + content_parts = [] + + if msg_type == "text": + text = body.get("text", {}).get("content", "") + if text: + content_parts.append(text) + + elif msg_type == "image": + image_info = body.get("image", {}) + file_url = image_info.get("url", "") + aes_key = image_info.get("aeskey", "") + + if file_url and aes_key: + file_path = await self._download_and_save_media(file_url, aes_key, "image") + if file_path: + import os + filename = os.path.basename(file_path) + content_parts.append(f"[image: {filename}]\n[Image: source: {file_path}]") + else: + content_parts.append("[image: download failed]") + else: + content_parts.append("[image: download failed]") + + elif msg_type == "voice": + voice_info = body.get("voice", {}) + # Voice message already contains transcribed content from WeCom + voice_content = voice_info.get("content", "") + if voice_content: + content_parts.append(f"[voice] {voice_content}") + else: + content_parts.append("[voice]") + + elif msg_type == "file": + file_info = body.get("file", {}) + file_url = file_info.get("url", "") + aes_key = file_info.get("aeskey", "") + file_name = file_info.get("name", "unknown") + + if file_url and aes_key: + file_path = await self._download_and_save_media(file_url, aes_key, "file", file_name) + if file_path: + content_parts.append(f"[file: {file_name}]\n[File: source: {file_path}]") + else: + content_parts.append(f"[file: {file_name}: download failed]") + else: + content_parts.append(f"[file: {file_name}: download failed]") + + elif msg_type == "mixed": + # Mixed content contains multiple message items + msg_items = body.get("mixed", {}).get("item", []) + for item in msg_items: + item_type = item.get("type", "") + if item_type == "text": + text = item.get("text", {}).get("content", "") + if text: + content_parts.append(text) + else: + content_parts.append(MSG_TYPE_MAP.get(item_type, f"[{item_type}]")) + + else: + content_parts.append(MSG_TYPE_MAP.get(msg_type, f"[{msg_type}]")) + + content = "\n".join(content_parts) if content_parts else "" + + if not content: + return + + # Store frame for this chat to enable replies + self._chat_frames[chat_id] = frame + + # Forward to message bus + # Note: media paths are included in content for broader model compatibility + await self._handle_message( + sender_id=sender_id, + chat_id=chat_id, + content=content, + media=None, + metadata={ + "message_id": msg_id, + "msg_type": msg_type, + "chat_type": chat_type, + } + ) + + except Exception as e: + logger.error("Error processing WeCom message: {}", e) + + async def _download_and_save_media( + self, + file_url: str, + aes_key: str, + media_type: str, + filename: str | None = None, + ) -> str | None: + """ + Download and decrypt media from WeCom. + + Returns: + file_path or None if download failed + """ + try: + data, fname = await self._client.download_file(file_url, aes_key) + + if not data: + logger.warning("Failed to download media from WeCom") + return None + + media_dir = get_media_dir("wecom") + if not filename: + filename = fname or f"{media_type}_{hash(file_url) % 100000}" + + file_path = media_dir / filename + file_path.write_bytes(data) + logger.debug("Downloaded {} to {}", media_type, file_path) + return str(file_path) + + except Exception as e: + logger.error("Error downloading media: {}", e) + return None + + async def send(self, msg: OutboundMessage) -> None: + """Send a message through WeCom.""" + if not self._client: + logger.warning("WeCom client not initialized") + return + + try: + content = msg.content.strip() + if not content: + return + + # Get the stored frame for this chat + frame = self._chat_frames.get(msg.chat_id) + if not frame: + logger.warning("No frame found for chat {}, cannot reply", msg.chat_id) + return + + # Use streaming reply for better UX + stream_id = self._generate_req_id("stream") + + # Send as streaming message with finish=True + await self._client.reply_stream( + frame, + stream_id, + content, + finish=True, + ) + + logger.debug("WeCom message sent to {}", msg.chat_id) + + except Exception as e: + logger.error("Error sending WeCom message: {}", e) diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 803cb61..63eae48 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -199,7 +199,15 @@ class QQConfig(Base): ) # Allowed user openids (empty = public access) +class WecomConfig(Base): + """WeCom (Enterprise WeChat) AI Bot channel configuration.""" + enabled: bool = False + bot_id: str = "" # Bot ID from WeCom AI Bot platform + secret: str = "" # Bot Secret from WeCom AI Bot platform + allow_from: list[str] = Field(default_factory=list) # Allowed user IDs + welcome_message: str = "" # Welcome message for enter_chat event + react_emoji: str = "eyes" # Emoji for message reactions class ChannelsConfig(Base): """Configuration for chat channels.""" @@ -216,6 +224,7 @@ class ChannelsConfig(Base): slack: SlackConfig = Field(default_factory=SlackConfig) qq: QQConfig = Field(default_factory=QQConfig) matrix: MatrixConfig = Field(default_factory=MatrixConfig) + wecom: WecomConfig = Field(default_factory=WecomConfig) class AgentDefaults(Base): diff --git a/pyproject.toml b/pyproject.toml index 62cf616..fac53ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ dependencies = [ "json-repair>=0.57.0,<1.0.0", "chardet>=3.0.2,<6.0.0", "openai>=2.8.0", + "wecom-aibot-sdk-python>=0.1.2", ] [project.optional-dependencies] From 45c0eebae5a700cfa5da28c2ff31208f34180509 Mon Sep 17 00:00:00 2001 From: chengyongru <2755839590@qq.com> Date: Tue, 10 Mar 2026 00:53:23 +0800 Subject: [PATCH 08/28] docs(wecom): add wecom configuration guide in readme --- README.md | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/README.md b/README.md index d3401ea..3d5fb63 100644 --- a/README.md +++ b/README.md @@ -207,6 +207,7 @@ Connect nanobot to your favorite chat platform. | **Slack** | Bot token + App-Level token | | **Email** | IMAP/SMTP credentials | | **QQ** | App ID + App Secret | +| **Wecom** | Bot ID + App Secret |
Telegram (Recommended) @@ -676,6 +677,44 @@ nanobot gateway
+
+Wecom (企业微信) + +Uses **WebSocket** long connection — no public IP required. + +**1. Create a wecom bot** + +In the client's workspace, click on "Intelligent Robot" to create a robot and choose API mode for creation. +Select to create in "long connection" mode, and obtain Bot ID and Secret. + +**2. Configure** + +```json +{ + "channels": { + "wecom": { + "enabled": true, + "botId": "your_bot_id", + "secret": "your_secret", + "allowFrom": [ + "your_id" + ] + } + } +} +``` + +**3. Run** + +```bash +nanobot gateway +``` + +> [!TIP] +> wecom uses WebSocket to receive messages — no webhook or public IP needed! + +
+ ## 🌐 Agent Social Network 🐈 nanobot is capable of linking to the agent social network (agent community). **Just send one message and your nanobot joins automatically!** From 2ffeb9295bdb4a5ef308498f60f45b2448ab48d2 Mon Sep 17 00:00:00 2001 From: lailoo Date: Wed, 11 Mar 2026 00:47:09 +0800 Subject: [PATCH 09/28] fix(subagent): preserve reasoning_content in assistant messages Subagent's _run_subagent() was dropping reasoning_content and thinking_blocks when building assistant messages for the conversation history. Providers like Deepseek Reasoner require reasoning_content on every assistant message when thinking mode is active, causing a 400 BadRequestError on the second LLM round-trip. Align with the main AgentLoop which already preserves these fields via ContextBuilder.add_assistant_message(). Closes #1834 --- nanobot/agent/subagent.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index f9eda1f..308e67d 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -145,11 +145,19 @@ class SubagentManager: } for tc in response.tool_calls ] - messages.append({ + assistant_msg: dict[str, Any] = { "role": "assistant", "content": response.content or "", "tool_calls": tool_call_dicts, - }) + } + # Preserve reasoning_content for providers that require it + # (e.g. Deepseek Reasoner mandates this field on every + # assistant message when thinking mode is active). + if response.reasoning_content is not None: + assistant_msg["reasoning_content"] = response.reasoning_content + if response.thinking_blocks: + assistant_msg["thinking_blocks"] = response.thinking_blocks + messages.append(assistant_msg) # Execute tools for tool_call in response.tool_calls: From 62ccda43b980d53c5ac7a79adf8edf43294f1fdb Mon Sep 17 00:00:00 2001 From: Re-bin Date: Tue, 10 Mar 2026 19:55:06 +0000 Subject: [PATCH 10/28] refactor(memory): switch consolidation to token-based context windows Move consolidation policy into MemoryConsolidator, keep backward compatibility for legacy config, and compress history by token budget instead of message count. --- nanobot/agent/loop.py | 544 ++--------------------- nanobot/agent/memory.py | 243 +++++++--- nanobot/cli/commands.py | 26 +- nanobot/config/schema.py | 32 +- nanobot/session/manager.py | 20 +- nanobot/utils/helpers.py | 85 ++++ pyproject.toml | 1 + tests/test_commands.py | 33 ++ tests/test_config_migration.py | 88 ++++ tests/test_consolidate_offset.py | 297 ++----------- tests/test_loop_consolidation_tokens.py | 190 ++++++++ tests/test_memory_consolidation_types.py | 51 +-- tests/test_message_tool_suppress.py | 10 +- 13 files changed, 709 insertions(+), 911 deletions(-) create mode 100644 tests/test_config_migration.py create mode 100644 tests/test_loop_consolidation_tokens.py diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index ba35a23..8605a09 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -11,18 +11,12 @@ from typing import TYPE_CHECKING, Any, Awaitable, Callable from loguru import logger -try: - import tiktoken # type: ignore -except Exception: # pragma: no cover - optional dependency - tiktoken = None - from nanobot.agent.context import ContextBuilder +from nanobot.agent.memory import MemoryConsolidator from nanobot.agent.subagent import SubagentManager from nanobot.agent.tools.cron import CronTool from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool -from nanobot.agent.tools.huggingface import HuggingFaceModelSearchTool from nanobot.agent.tools.message import MessageTool -from nanobot.agent.tools.model_config import ValidateDeployJSONTool, ValidateUsageYAMLTool from nanobot.agent.tools.registry import ToolRegistry from nanobot.agent.tools.shell import ExecTool from nanobot.agent.tools.spawn import SpawnTool @@ -60,11 +54,8 @@ class AgentLoop: max_iterations: int = 40, temperature: float = 0.1, max_tokens: int = 4096, - memory_window: int | None = None, # backward-compat only (unused) reasoning_effort: str | None = None, - max_tokens_input: int = 128_000, - compression_start_ratio: float = 0.7, - compression_target_ratio: float = 0.4, + context_window_tokens: int = 65_536, brave_api_key: str | None = None, web_proxy: str | None = None, exec_config: ExecToolConfig | None = None, @@ -82,18 +73,9 @@ class AgentLoop: self.model = model or provider.get_default_model() self.max_iterations = max_iterations self.temperature = temperature - # max_tokens: per-call output token cap (maxTokensOutput in config) self.max_tokens = max_tokens - # Keep legacy attribute for older call sites/tests; compression no longer uses it. - self.memory_window = memory_window self.reasoning_effort = reasoning_effort - # max_tokens_input: model native context window (maxTokensInput in config) - self.max_tokens_input = max_tokens_input - # Token-based compression watermarks (fractions of available input budget) - self.compression_start_ratio = compression_start_ratio - self.compression_target_ratio = compression_target_ratio - # Reserve tokens for safety margin - self._reserve_tokens = 1000 + self.context_window_tokens = context_window_tokens self.brave_api_key = brave_api_key self.web_proxy = web_proxy self.exec_config = exec_config or ExecToolConfig() @@ -123,382 +105,23 @@ class AgentLoop: self._mcp_connected = False self._mcp_connecting = False self._active_tasks: dict[str, list[asyncio.Task]] = {} # session_key -> tasks - self._compression_tasks: dict[str, asyncio.Task] = {} # session_key -> task - self._last_turn_prompt_tokens: int = 0 - self._last_turn_prompt_source: str = "none" self._processing_lock = asyncio.Lock() + self.memory_consolidator = MemoryConsolidator( + workspace=workspace, + provider=provider, + model=self.model, + sessions=self.sessions, + context_window_tokens=context_window_tokens, + build_messages=self.context.build_messages, + get_tool_definitions=self.tools.get_definitions, + ) self._register_default_tools() - @staticmethod - def _estimate_prompt_tokens( - messages: list[dict[str, Any]], - tools: list[dict[str, Any]] | None = None, - ) -> int: - """Estimate prompt tokens with tiktoken (fallback only).""" - if tiktoken is None: - return 0 - - try: - enc = tiktoken.get_encoding("cl100k_base") - parts: list[str] = [] - for msg in messages: - content = msg.get("content") - if isinstance(content, str): - parts.append(content) - elif isinstance(content, list): - for part in content: - if isinstance(part, dict) and part.get("type") == "text": - txt = part.get("text", "") - if txt: - parts.append(txt) - if tools: - parts.append(json.dumps(tools, ensure_ascii=False)) - return len(enc.encode("\n".join(parts))) - except Exception: - return 0 - - def _estimate_prompt_tokens_chain( - self, - messages: list[dict[str, Any]], - tools: list[dict[str, Any]] | None = None, - ) -> tuple[int, str]: - """Unified prompt-token estimation: provider counter -> tiktoken.""" - provider_counter = getattr(self.provider, "estimate_prompt_tokens", None) - if callable(provider_counter): - try: - tokens, source = provider_counter(messages, tools, self.model) - if isinstance(tokens, (int, float)) and tokens > 0: - return int(tokens), str(source or "provider_counter") - except Exception: - logger.debug("Provider token counter failed; fallback to tiktoken") - - estimated = self._estimate_prompt_tokens(messages, tools) - if estimated > 0: - return int(estimated), "tiktoken" - return 0, "none" - - @staticmethod - def _estimate_completion_tokens(content: str) -> int: - """Estimate completion tokens with tiktoken (fallback only).""" - if tiktoken is None: - return 0 - try: - enc = tiktoken.get_encoding("cl100k_base") - return len(enc.encode(content or "")) - except Exception: - return 0 - - def _get_compressed_until(self, session: Session) -> int: - """Read/normalize compressed boundary and migrate old metadata format.""" - raw = session.metadata.get("_compressed_until", 0) - try: - compressed_until = int(raw) - except (TypeError, ValueError): - compressed_until = 0 - - if compressed_until <= 0: - ranges = session.metadata.get("_compressed_ranges") - if isinstance(ranges, list): - inferred = 0 - for item in ranges: - if not isinstance(item, (list, tuple)) or len(item) != 2: - continue - try: - inferred = max(inferred, int(item[1])) - except (TypeError, ValueError): - continue - compressed_until = inferred - - compressed_until = max(0, min(compressed_until, len(session.messages))) - session.metadata["_compressed_until"] = compressed_until - # 兼容旧版本:一旦迁移出连续边界,就可以清理旧字段 - session.metadata.pop("_compressed_ranges", None) - # 注意:不要删除 _cumulative_tokens,压缩逻辑需要它来跟踪累积 token 计数 - return compressed_until - - def _set_compressed_until(self, session: Session, idx: int) -> None: - """Persist a contiguous compressed boundary.""" - session.metadata["_compressed_until"] = max(0, min(int(idx), len(session.messages))) - session.metadata.pop("_compressed_ranges", None) - # 注意:不要删除 _cumulative_tokens,压缩逻辑需要它来跟踪累积 token 计数 - - @staticmethod - def _estimate_message_tokens(message: dict[str, Any]) -> int: - """Rough token estimate for a single persisted message.""" - content = message.get("content") - parts: list[str] = [] - if isinstance(content, str): - parts.append(content) - elif isinstance(content, list): - for part in content: - if isinstance(part, dict) and part.get("type") == "text": - txt = part.get("text", "") - if txt: - parts.append(txt) - else: - parts.append(json.dumps(part, ensure_ascii=False)) - elif content is not None: - parts.append(json.dumps(content, ensure_ascii=False)) - - for key in ("name", "tool_call_id"): - val = message.get(key) - if isinstance(val, str) and val: - parts.append(val) - if message.get("tool_calls"): - parts.append(json.dumps(message["tool_calls"], ensure_ascii=False)) - - payload = "\n".join(parts) - if not payload: - return 1 - if tiktoken is not None: - try: - enc = tiktoken.get_encoding("cl100k_base") - return max(1, len(enc.encode(payload))) - except Exception: - pass - return max(1, len(payload) // 4) - - def _pick_compression_chunk_by_tokens( - self, - session: Session, - reduction_tokens: int, - *, - tail_keep: int = 12, - ) -> tuple[int, int, int] | None: - """ - Pick one contiguous old chunk so its estimated size is roughly enough - to reduce `reduction_tokens`. - """ - messages = session.messages - start = self._get_compressed_until(session) - if len(messages) - start <= tail_keep + 2: - return None - - end_limit = len(messages) - tail_keep - if end_limit - start < 2: - return None - - target = max(1, reduction_tokens) - end = start - collected = 0 - while end < end_limit and collected < target: - collected += self._estimate_message_tokens(messages[end]) - end += 1 - - if end - start < 2: - end = min(end_limit, start + 2) - collected = sum(self._estimate_message_tokens(m) for m in messages[start:end]) - if end - start < 2: - return None - return start, end, collected - - def _estimate_session_prompt_tokens(self, session: Session) -> tuple[int, str]: - """ - Estimate current full prompt tokens for this session view - (system + compressed history view + runtime/user placeholder + tools). - """ - history = self._build_compressed_history_view(session) - channel, chat_id = (session.key.split(":", 1) if ":" in session.key else (None, None)) - probe_messages = self.context.build_messages( - history=history, - current_message="[token-probe]", - channel=channel, - chat_id=chat_id, - ) - return self._estimate_prompt_tokens_chain(probe_messages, self.tools.get_definitions()) - - async def _maybe_compress_history( - self, - session: Session, - ) -> None: - """ - End-of-turn policy: - - Estimate current prompt usage from persisted session view. - - If above start ratio, perform one best-effort compression chunk. - """ - if not session.messages: - self._set_compressed_until(session, 0) - return - - budget = max(1, self.max_tokens_input - self.max_tokens - self._reserve_tokens) - start_threshold = int(budget * self.compression_start_ratio) - target_threshold = int(budget * self.compression_target_ratio) - if target_threshold >= start_threshold: - target_threshold = max(0, start_threshold - 1) - - # Prefer provider usage prompt tokens from the turn-ending call. - # If unavailable, fall back to estimator chain. - raw_prompt_tokens = session.metadata.get("_last_prompt_tokens") - if isinstance(raw_prompt_tokens, (int, float)) and raw_prompt_tokens > 0: - current_tokens = int(raw_prompt_tokens) - token_source = str(session.metadata.get("_last_prompt_source") or "usage_prompt") - else: - current_tokens, token_source = self._estimate_session_prompt_tokens(session) - - current_ratio = current_tokens / budget if budget else 0.0 - if current_tokens <= 0: - logger.debug("Compression skip {}: token estimate unavailable", session.key) - return - if current_tokens < start_threshold: - logger.debug( - "Compression idle {}: {}/{} ({:.1%}) via {}", - session.key, - current_tokens, - budget, - current_ratio, - token_source, - ) - return - logger.info( - "Compression trigger {}: {}/{} ({:.1%}) via {}", - session.key, - current_tokens, - budget, - current_ratio, - token_source, - ) - - reduction_by_target = max(0, current_tokens - target_threshold) - reduction_by_delta = max(1, start_threshold - target_threshold) - reduction_need = max(reduction_by_target, reduction_by_delta) - - chunk_range = self._pick_compression_chunk_by_tokens(session, reduction_need, tail_keep=10) - if chunk_range is None: - logger.info("Compression skipped for {}: no compressible chunk", session.key) - return - - start_idx, end_idx, estimated_chunk_tokens = chunk_range - chunk = session.messages[start_idx:end_idx] - if len(chunk) < 2: - return - - logger.info( - "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})", - session.key, - start_idx, - end_idx - 1, - len(chunk), - estimated_chunk_tokens, - reduction_need, - ) - success, _ = await self.context.memory.consolidate_chunk( - chunk, - self.provider, - self.model, - ) - if not success: - logger.warning("Compression aborted for {}: consolidation failed", session.key) - return - - self._set_compressed_until(session, end_idx) - self.sessions.save(session) - - after_tokens, after_source = self._estimate_session_prompt_tokens(session) - after_ratio = after_tokens / budget if budget else 0.0 - reduced = max(0, current_tokens - after_tokens) - reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0 - logger.info( - "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})", - session.key, - after_tokens, - budget, - after_ratio, - after_source, - reduced, - reduced_ratio, - ) - - def _schedule_background_compression(self, session_key: str) -> None: - """Schedule best-effort background compression for a session.""" - existing = self._compression_tasks.get(session_key) - if existing is not None and not existing.done(): - return - - async def _runner() -> None: - session = self.sessions.get_or_create(session_key) - try: - await self._maybe_compress_history(session) - except Exception: - logger.exception("Background compression failed for {}", session_key) - - task = asyncio.create_task(_runner()) - self._compression_tasks[session_key] = task - - def _cleanup(t: asyncio.Task) -> None: - cur = self._compression_tasks.get(session_key) - if cur is t: - self._compression_tasks.pop(session_key, None) - try: - t.result() - except BaseException: - pass - - task.add_done_callback(_cleanup) - - async def wait_for_background_compression(self, timeout_s: float | None = None) -> None: - """Wait for currently scheduled compression tasks.""" - pending = [t for t in self._compression_tasks.values() if not t.done()] - if not pending: - return - - logger.info("Waiting for {} background compression task(s)", len(pending)) - waiter = asyncio.gather(*pending, return_exceptions=True) - if timeout_s is None: - await waiter - return - - try: - await asyncio.wait_for(waiter, timeout=timeout_s) - except asyncio.TimeoutError: - logger.warning( - "Background compression wait timed out after {}s ({} task(s) still running)", - timeout_s, - len([t for t in self._compression_tasks.values() if not t.done()]), - ) - - def _build_compressed_history_view( - self, - session: Session, - ) -> list[dict]: - """Build non-destructive history view using the compressed boundary.""" - compressed_until = self._get_compressed_until(session) - if compressed_until <= 0: - return session.get_history(max_messages=0) - - notice_msg: dict[str, Any] = { - "role": "assistant", - "content": ( - "As your assistant, I have compressed earlier context. " - "If you need details, please check memory/HISTORY.md." - ), - } - - tail: list[dict[str, Any]] = [] - for msg in session.messages[compressed_until:]: - entry: dict[str, Any] = {"role": msg["role"], "content": msg.get("content", "")} - for k in ("tool_calls", "tool_call_id", "name"): - if k in msg: - entry[k] = msg[k] - tail.append(entry) - - # Drop leading non-user entries from tail to avoid orphan tool blocks. - for i, m in enumerate(tail): - if m.get("role") == "user": - tail = tail[i:] - break - else: - tail = [] - - return [notice_msg, *tail] - def _register_default_tools(self) -> None: """Register the default set of tools.""" allowed_dir = self.workspace if self.restrict_to_workspace else None for cls in (ReadFileTool, WriteFileTool, EditFileTool, ListDirTool): self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir)) - self.tools.register(ValidateDeployJSONTool()) - self.tools.register(ValidateUsageYAMLTool()) - self.tools.register(HuggingFaceModelSearchTool()) self.tools.register(ExecTool( working_dir=str(self.workspace), timeout=self.exec_config.timeout, @@ -563,24 +186,12 @@ class AgentLoop: self, initial_messages: list[dict], on_progress: Callable[..., Awaitable[None]] | None = None, - ) -> tuple[str | None, list[str], list[dict], int, str]: - """ - Run the agent iteration loop. - - Returns: - (final_content, tools_used, messages, total_tokens_this_turn, token_source) - total_tokens_this_turn: total tokens (prompt + completion) for this turn - token_source: provider_total / provider_sum / provider_prompt / - provider_counter+tiktoken_completion / tiktoken / none - """ + ) -> tuple[str | None, list[str], list[dict]]: + """Run the agent iteration loop.""" messages = initial_messages iteration = 0 final_content = None tools_used: list[str] = [] - total_tokens_this_turn = 0 - token_source = "none" - self._last_turn_prompt_tokens = 0 - self._last_turn_prompt_source = "none" while iteration < self.max_iterations: iteration += 1 @@ -596,63 +207,6 @@ class AgentLoop: reasoning_effort=self.reasoning_effort, ) - # Prefer provider usage from the turn-ending model call; fallback to tiktoken. - # Calculate total tokens (prompt + completion) for this turn. - usage = response.usage or {} - t_tokens = usage.get("total_tokens") - p_tokens = usage.get("prompt_tokens") - c_tokens = usage.get("completion_tokens") - - if isinstance(t_tokens, (int, float)) and t_tokens > 0: - total_tokens_this_turn = int(t_tokens) - token_source = "provider_total" - if isinstance(p_tokens, (int, float)) and p_tokens > 0: - self._last_turn_prompt_tokens = int(p_tokens) - self._last_turn_prompt_source = "usage_prompt" - elif isinstance(c_tokens, (int, float)): - prompt_derived = int(t_tokens) - int(c_tokens) - if prompt_derived > 0: - self._last_turn_prompt_tokens = prompt_derived - self._last_turn_prompt_source = "usage_total_minus_completion" - elif isinstance(p_tokens, (int, float)) and isinstance(c_tokens, (int, float)): - # If we have both prompt and completion tokens, sum them - total_tokens_this_turn = int(p_tokens) + int(c_tokens) - token_source = "provider_sum" - if p_tokens > 0: - self._last_turn_prompt_tokens = int(p_tokens) - self._last_turn_prompt_source = "usage_prompt" - elif isinstance(p_tokens, (int, float)) and p_tokens > 0: - # Fallback: use prompt tokens only (completion might be 0 for tool calls) - total_tokens_this_turn = int(p_tokens) - token_source = "provider_prompt" - self._last_turn_prompt_tokens = int(p_tokens) - self._last_turn_prompt_source = "usage_prompt" - else: - # Estimate with unified chain (provider counter -> tiktoken), plus completion tiktoken. - estimated_prompt, prompt_source = self._estimate_prompt_tokens_chain(messages, tool_defs) - estimated_completion = self._estimate_completion_tokens(response.content or "") - total_tokens_this_turn = estimated_prompt + estimated_completion - if estimated_prompt > 0: - self._last_turn_prompt_tokens = int(estimated_prompt) - self._last_turn_prompt_source = str(prompt_source or "tiktoken") - if total_tokens_this_turn > 0: - token_source = ( - "tiktoken" - if prompt_source == "tiktoken" - else f"{prompt_source}+tiktoken_completion" - ) - if total_tokens_this_turn <= 0: - total_tokens_this_turn = 0 - token_source = "none" - - logger.debug( - "Turn token usage: source={}, total={}, prompt={}, completion={}", - token_source, - total_tokens_this_turn, - p_tokens if isinstance(p_tokens, (int, float)) else None, - c_tokens if isinstance(c_tokens, (int, float)) else None, - ) - if response.has_tool_calls: if on_progress: thought = self._strip_think(response.content) @@ -707,7 +261,7 @@ class AgentLoop: "without completing the task. You can try breaking the task into smaller steps." ) - return final_content, tools_used, messages, total_tokens_this_turn, token_source + return final_content, tools_used, messages async def run(self) -> None: """Run the agent loop, dispatching messages as tasks to stay responsive to /stop.""" @@ -732,9 +286,6 @@ class AgentLoop: """Cancel all active tasks and subagents for the session.""" tasks = self._active_tasks.pop(msg.session_key, []) cancelled = sum(1 for t in tasks if not t.done() and t.cancel()) - comp = self._compression_tasks.get(msg.session_key) - if comp is not None and not comp.done() and comp.cancel(): - cancelled += 1 for t in tasks: try: await t @@ -781,9 +332,6 @@ class AgentLoop: def stop(self) -> None: """Stop the agent loop.""" self._running = False - for task in list(self._compression_tasks.values()): - if not task.done(): - task.cancel() logger.info("Agent loop stopping") async def _process_message( @@ -800,22 +348,17 @@ class AgentLoop: logger.info("Processing system message from {}", msg.sender_id) key = f"{channel}:{chat_id}" session = self.sessions.get_or_create(key) + await self.memory_consolidator.maybe_consolidate_by_tokens(session) self._set_tool_context(channel, chat_id, msg.metadata.get("message_id")) - history = self._build_compressed_history_view(session) + history = session.get_history(max_messages=0) messages = self.context.build_messages( history=history, current_message=msg.content, channel=channel, chat_id=chat_id, ) - final_content, _, all_msgs, _, _ = await self._run_agent_loop(messages) - if self._last_turn_prompt_tokens > 0: - session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens - session.metadata["_last_prompt_source"] = self._last_turn_prompt_source - else: - session.metadata.pop("_last_prompt_tokens", None) - session.metadata.pop("_last_prompt_source", None) + final_content, _, all_msgs = await self._run_agent_loop(messages) self._save_turn(session, all_msgs, 1 + len(history)) self.sessions.save(session) - self._schedule_background_compression(session.key) + await self.memory_consolidator.maybe_consolidate_by_tokens(session) return OutboundMessage(channel=channel, chat_id=chat_id, content=final_content or "Background task completed.") @@ -829,19 +372,12 @@ class AgentLoop: cmd = msg.content.strip().lower() if cmd == "/new": try: - # 在清空会话前,将当前完整对话做一次归档压缩到 MEMORY/HISTORY 中 - if session.messages: - ok, _ = await self.context.memory.consolidate_chunk( - session.messages, - self.provider, - self.model, + if not await self.memory_consolidator.archive_unconsolidated(session): + return OutboundMessage( + channel=msg.channel, + chat_id=msg.chat_id, + content="Memory archival failed, session not cleared. Please try again.", ) - if not ok: - return OutboundMessage( - channel=msg.channel, - chat_id=msg.chat_id, - content="Memory archival failed, session not cleared. Please try again.", - ) except Exception: logger.exception("/new archival failed for {}", session.key) return OutboundMessage( @@ -859,23 +395,20 @@ class AgentLoop: return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id, content="🐈 nanobot commands:\n/new — Start a new conversation\n/stop — Stop the current task\n/help — Show available commands") + await self.memory_consolidator.maybe_consolidate_by_tokens(session) + self._set_tool_context(msg.channel, msg.chat_id, msg.metadata.get("message_id")) if message_tool := self.tools.get("message"): if isinstance(message_tool, MessageTool): message_tool.start_turn() - # 正常对话:使用压缩后的历史视图(压缩在回合结束后进行) - history = self._build_compressed_history_view(session) + history = session.get_history(max_messages=0) initial_messages = self.context.build_messages( history=history, current_message=msg.content, media=msg.media if msg.media else None, channel=msg.channel, chat_id=msg.chat_id, ) - # Add [CRON JOB] identifier for cron sessions (session_key starts with "cron:") - if session_key and session_key.startswith("cron:"): - if initial_messages and initial_messages[0].get("role") == "system": - initial_messages[0]["content"] = f"[CRON JOB] {initial_messages[0]['content']}" async def _bus_progress(content: str, *, tool_hint: bool = False) -> None: meta = dict(msg.metadata or {}) @@ -885,23 +418,16 @@ class AgentLoop: channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta, )) - final_content, _, all_msgs, total_tokens_this_turn, token_source = await self._run_agent_loop( + final_content, _, all_msgs = await self._run_agent_loop( initial_messages, on_progress=on_progress or _bus_progress, ) if final_content is None: final_content = "I've completed processing but have no response to give." - if self._last_turn_prompt_tokens > 0: - session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens - session.metadata["_last_prompt_source"] = self._last_turn_prompt_source - else: - session.metadata.pop("_last_prompt_tokens", None) - session.metadata.pop("_last_prompt_source", None) - - self._save_turn(session, all_msgs, 1 + len(history), total_tokens_this_turn) + self._save_turn(session, all_msgs, 1 + len(history)) self.sessions.save(session) - self._schedule_background_compression(session.key) + await self.memory_consolidator.maybe_consolidate_by_tokens(session) if (mt := self.tools.get("message")) and isinstance(mt, MessageTool) and mt._sent_in_turn: return None @@ -913,7 +439,7 @@ class AgentLoop: metadata=msg.metadata or {}, ) - def _save_turn(self, session: Session, messages: list[dict], skip: int, total_tokens_this_turn: int = 0) -> None: + def _save_turn(self, session: Session, messages: list[dict], skip: int) -> None: """Save new-turn messages into session, truncating large tool results.""" from datetime import datetime for m in messages[skip:]: @@ -947,14 +473,6 @@ class AgentLoop: entry.setdefault("timestamp", datetime.now().isoformat()) session.messages.append(entry) session.updated_at = datetime.now() - - # Update cumulative token count for compression tracking - if total_tokens_this_turn > 0: - current_cumulative = session.metadata.get("_cumulative_tokens", 0) - if isinstance(current_cumulative, (int, float)): - session.metadata["_cumulative_tokens"] = int(current_cumulative) + total_tokens_this_turn - else: - session.metadata["_cumulative_tokens"] = total_tokens_this_turn async def process_direct( self, diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index e29788a..cd5f54f 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -2,17 +2,19 @@ from __future__ import annotations +import asyncio import json +import weakref from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Callable from loguru import logger -from nanobot.utils.helpers import ensure_dir +from nanobot.utils.helpers import ensure_dir, estimate_message_tokens, estimate_prompt_tokens_chain if TYPE_CHECKING: from nanobot.providers.base import LLMProvider - from nanobot.session.manager import Session + from nanobot.session.manager import Session, SessionManager _SAVE_MEMORY_TOOL = [ @@ -26,7 +28,7 @@ _SAVE_MEMORY_TOOL = [ "properties": { "history_entry": { "type": "string", - "description": "A paragraph (2-5 sentences) summarizing key events/decisions/topics. " + "description": "A paragraph summarizing key events/decisions/topics. " "Start with [YYYY-MM-DD HH:MM]. Include detail useful for grep search.", }, "memory_update": { @@ -42,6 +44,20 @@ _SAVE_MEMORY_TOOL = [ ] +def _ensure_text(value: Any) -> str: + """Normalize tool-call payload values to text for file storage.""" + return value if isinstance(value, str) else json.dumps(value, ensure_ascii=False) + + +def _normalize_save_memory_args(args: Any) -> dict[str, Any] | None: + """Normalize provider tool-call arguments to the expected dict shape.""" + if isinstance(args, str): + args = json.loads(args) + if isinstance(args, list): + return args[0] if args and isinstance(args[0], dict) else None + return args if isinstance(args, dict) else None + + class MemoryStore: """Two-layer memory: MEMORY.md (long-term facts) + HISTORY.md (grep-searchable log).""" @@ -66,29 +82,27 @@ class MemoryStore: long_term = self.read_long_term() return f"## Long-term Memory\n{long_term}" if long_term else "" - async def consolidate_chunk( + @staticmethod + def _format_messages(messages: list[dict]) -> str: + lines = [] + for message in messages: + if not message.get("content"): + continue + tools = f" [tools: {', '.join(message['tools_used'])}]" if message.get("tools_used") else "" + lines.append( + f"[{message.get('timestamp', '?')[:16]}] {message['role'].upper()}{tools}: {message['content']}" + ) + return "\n".join(lines) + + async def consolidate( self, messages: list[dict], provider: LLMProvider, model: str, - ) -> tuple[bool, str | None]: - """Consolidate a chunk of messages into MEMORY.md + HISTORY.md via LLM tool call. - - Returns (success, None). - - - success: True on success (including no-op), False on failure. - - The second return value is reserved for future use (e.g. RAG-style summaries) and is - always None in the current implementation. - """ + ) -> bool: + """Consolidate the provided message chunk into MEMORY.md + HISTORY.md.""" if not messages: - return True, None - - lines = [] - for m in messages: - if not m.get("content"): - continue - tools = f" [tools: {', '.join(m['tools_used'])}]" if m.get("tools_used") else "" - lines.append(f"[{m.get('timestamp', '?')[:16]}] {m['role'].upper()}{tools}: {m['content']}") + return True current_memory = self.read_long_term() prompt = f"""Process this conversation and call the save_memory tool with your consolidation. @@ -97,24 +111,12 @@ class MemoryStore: {current_memory or "(empty)"} ## Conversation to Process -{chr(10).join(lines)}""" +{self._format_messages(messages)}""" try: response = await provider.chat_with_retry( messages=[ - { - "role": "system", - "content": ( - "You are a memory consolidation agent.\n" - "Your job is to:\n" - "1) Append a concise but grep-friendly entry to HISTORY.md summarizing key events, decisions and topics.\n" - " - Write 1 paragraph of 2–5 sentences that starts with [YYYY-MM-DD HH:MM].\n" - " - Include concrete names, IDs and numbers so it is easy to search with grep.\n" - "2) Update long-term MEMORY.md with stable facts and user preferences as markdown, including all existing facts plus new ones.\n" - "3) Optionally return a short context_summary (1–3 sentences) that will replace the raw messages in future dialogue history.\n\n" - "Always call the save_memory tool with history_entry, memory_update and (optionally) context_summary." - ), - }, + {"role": "system", "content": "You are a memory consolidation agent. Call the save_memory tool with your consolidation of the conversation."}, {"role": "user", "content": prompt}, ], tools=_SAVE_MEMORY_TOOL, @@ -123,35 +125,160 @@ class MemoryStore: if not response.has_tool_calls: logger.warning("Memory consolidation: LLM did not call save_memory, skipping") - return False, None + return False - args = response.tool_calls[0].arguments - # Some providers return arguments as a JSON string instead of dict - if isinstance(args, str): - args = json.loads(args) - # Some providers return arguments as a list (handle edge case) - if isinstance(args, list): - if args and isinstance(args[0], dict): - args = args[0] - else: - logger.warning("Memory consolidation: unexpected arguments as empty or non-dict list") - return False, None - if not isinstance(args, dict): - logger.warning("Memory consolidation: unexpected arguments type {}", type(args).__name__) - return False, None + args = _normalize_save_memory_args(response.tool_calls[0].arguments) + if args is None: + logger.warning("Memory consolidation: unexpected save_memory arguments") + return False if entry := args.get("history_entry"): - if not isinstance(entry, str): - entry = json.dumps(entry, ensure_ascii=False) - self.append_history(entry) + self.append_history(_ensure_text(entry)) if update := args.get("memory_update"): - if not isinstance(update, str): - update = json.dumps(update, ensure_ascii=False) + update = _ensure_text(update) if update != current_memory: self.write_long_term(update) logger.info("Memory consolidation done for {} messages", len(messages)) - return True, None + return True except Exception: logger.exception("Memory consolidation failed") - return False, None + return False + + +class MemoryConsolidator: + """Owns consolidation policy, locking, and session offset updates.""" + + _MAX_CONSOLIDATION_ROUNDS = 5 + + def __init__( + self, + workspace: Path, + provider: LLMProvider, + model: str, + sessions: SessionManager, + context_window_tokens: int, + build_messages: Callable[..., list[dict[str, Any]]], + get_tool_definitions: Callable[[], list[dict[str, Any]]], + ): + self.store = MemoryStore(workspace) + self.provider = provider + self.model = model + self.sessions = sessions + self.context_window_tokens = context_window_tokens + self._build_messages = build_messages + self._get_tool_definitions = get_tool_definitions + self._locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary() + + def get_lock(self, session_key: str) -> asyncio.Lock: + """Return the shared consolidation lock for one session.""" + return self._locks.setdefault(session_key, asyncio.Lock()) + + async def consolidate_messages(self, messages: list[dict[str, object]]) -> bool: + """Archive a selected message chunk into persistent memory.""" + return await self.store.consolidate(messages, self.provider, self.model) + + def pick_consolidation_boundary( + self, + session: Session, + tokens_to_remove: int, + ) -> tuple[int, int] | None: + """Pick a user-turn boundary that removes enough old prompt tokens.""" + start = session.last_consolidated + if start >= len(session.messages) or tokens_to_remove <= 0: + return None + + removed_tokens = 0 + last_boundary: tuple[int, int] | None = None + for idx in range(start, len(session.messages)): + message = session.messages[idx] + if idx > start and message.get("role") == "user": + last_boundary = (idx, removed_tokens) + if removed_tokens >= tokens_to_remove: + return last_boundary + removed_tokens += estimate_message_tokens(message) + + return last_boundary + + def estimate_session_prompt_tokens(self, session: Session) -> tuple[int, str]: + """Estimate current prompt size for the normal session history view.""" + history = session.get_history(max_messages=0) + channel, chat_id = (session.key.split(":", 1) if ":" in session.key else (None, None)) + probe_messages = self._build_messages( + history=history, + current_message="[token-probe]", + channel=channel, + chat_id=chat_id, + ) + return estimate_prompt_tokens_chain( + self.provider, + self.model, + probe_messages, + self._get_tool_definitions(), + ) + + async def archive_unconsolidated(self, session: Session) -> bool: + """Archive the full unconsolidated tail for /new-style session rollover.""" + lock = self.get_lock(session.key) + async with lock: + snapshot = session.messages[session.last_consolidated:] + if not snapshot: + return True + return await self.consolidate_messages(snapshot) + + async def maybe_consolidate_by_tokens(self, session: Session) -> None: + """Loop: archive old messages until prompt fits within half the context window.""" + if not session.messages or self.context_window_tokens <= 0: + return + + lock = self.get_lock(session.key) + async with lock: + target = self.context_window_tokens // 2 + estimated, source = self.estimate_session_prompt_tokens(session) + if estimated <= 0: + return + if estimated < self.context_window_tokens: + logger.debug( + "Token consolidation idle {}: {}/{} via {}", + session.key, + estimated, + self.context_window_tokens, + source, + ) + return + + for round_num in range(self._MAX_CONSOLIDATION_ROUNDS): + if estimated <= target: + return + + boundary = self.pick_consolidation_boundary(session, max(1, estimated - target)) + if boundary is None: + logger.debug( + "Token consolidation: no safe boundary for {} (round {})", + session.key, + round_num, + ) + return + + end_idx = boundary[0] + chunk = session.messages[session.last_consolidated:end_idx] + if not chunk: + return + + logger.info( + "Token consolidation round {} for {}: {}/{} via {}, chunk={} msgs", + round_num, + session.key, + estimated, + self.context_window_tokens, + source, + len(chunk), + ) + if not await self.consolidate_messages(chunk): + return + session.last_consolidated = end_idx + self.sessions.save(session) + + estimated, source = self.estimate_session_prompt_tokens(session) + if estimated <= 0: + return diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 36e2a53..cf69450 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -191,6 +191,8 @@ def onboard(): save_config(Config()) console.print(f"[green]✓[/green] Created config at {config_path}") + console.print("[dim]Config template now uses `maxTokens` + `contextWindowTokens`; `memoryWindow` is no longer a runtime setting.[/dim]") + # Create workspace workspace = get_workspace_path() @@ -283,6 +285,16 @@ def _load_runtime_config(config: str | None = None, workspace: str | None = None return loaded +def _print_deprecated_memory_window_notice(config: Config) -> None: + """Warn when running with old memoryWindow-only config.""" + if config.agents.defaults.should_warn_deprecated_memory_window: + console.print( + "[yellow]Hint:[/yellow] Detected deprecated `memoryWindow` without " + "`contextWindowTokens`. `memoryWindow` is ignored; run " + "[cyan]nanobot onboard[/cyan] to refresh your config template." + ) + + # ============================================================================ # Gateway / Server # ============================================================================ @@ -310,6 +322,7 @@ def gateway( logging.basicConfig(level=logging.DEBUG) config = _load_runtime_config(config, workspace) + _print_deprecated_memory_window_notice(config) port = port if port is not None else config.gateway.port console.print(f"{__logo__} Starting nanobot gateway on port {port}...") @@ -329,12 +342,10 @@ def gateway( workspace=config.workspace_path, model=config.agents.defaults.model, temperature=config.agents.defaults.temperature, - max_tokens=config.agents.defaults.max_tokens_output, + max_tokens=config.agents.defaults.max_tokens, max_iterations=config.agents.defaults.max_tool_iterations, reasoning_effort=config.agents.defaults.reasoning_effort, - max_tokens_input=config.agents.defaults.max_tokens_input, - compression_start_ratio=config.agents.defaults.compression_start_ratio, - compression_target_ratio=config.agents.defaults.compression_target_ratio, + context_window_tokens=config.agents.defaults.context_window_tokens, brave_api_key=config.tools.web.search.api_key or None, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, @@ -496,6 +507,7 @@ def agent( from nanobot.cron.service import CronService config = _load_runtime_config(config, workspace) + _print_deprecated_memory_window_notice(config) sync_workspace_templates(config.workspace_path) bus = MessageBus() @@ -516,12 +528,10 @@ def agent( workspace=config.workspace_path, model=config.agents.defaults.model, temperature=config.agents.defaults.temperature, - max_tokens=config.agents.defaults.max_tokens_output, + max_tokens=config.agents.defaults.max_tokens, max_iterations=config.agents.defaults.max_tool_iterations, reasoning_effort=config.agents.defaults.reasoning_effort, - max_tokens_input=config.agents.defaults.max_tokens_input, - compression_start_ratio=config.agents.defaults.compression_start_ratio, - compression_target_ratio=config.agents.defaults.compression_target_ratio, + context_window_tokens=config.agents.defaults.context_window_tokens, brave_api_key=config.tools.web.search.api_key or None, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 0e41d12..a2de239 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -190,22 +190,11 @@ class SlackConfig(Base): class QQConfig(Base): - """QQ channel configuration. - - Supports two implementations: - 1. Official botpy SDK: requires app_id and secret - 2. OneBot protocol: requires api_url (and optionally ws_reverse_url, bot_qq, access_token) - """ + """QQ channel configuration using botpy SDK.""" enabled: bool = False - # Official botpy SDK fields app_id: str = "" # 机器人 ID (AppID) from q.qq.com secret: str = "" # 机器人密钥 (AppSecret) from q.qq.com - # OneBot protocol fields - api_url: str = "" # OneBot HTTP API URL (e.g. "http://localhost:5700") - ws_reverse_url: str = "" # OneBot WebSocket reverse URL (e.g. "ws://localhost:8080/ws/reverse") - bot_qq: int | None = None # Bot's QQ number (for filtering self messages) - access_token: str = "" # Optional access token for OneBot API allow_from: list[str] = Field( default_factory=list ) # Allowed user openids (empty = public access) @@ -238,20 +227,19 @@ class AgentDefaults(Base): provider: str = ( "auto" # Provider name (e.g. "anthropic", "openrouter") or "auto" for auto-detection ) - # 原生上下文最大窗口(通常对应模型的 max_input_tokens / max_context_tokens) - # 默认按照主流大模型(如 GPT-4o、Claude 3.x 等)的 128k 上下文给一个宽松上限,实际应根据所选模型文档手动调整。 - max_tokens_input: int = 128_000 - # 默认单次回复的最大输出 token 上限(调用时可按需要再做截断或比例分配) - # 8192 足以覆盖大多数实际对话/工具使用场景,同样可按需手动调整。 - max_tokens_output: int = 8192 - # 会话历史压缩触发比例:当估算的输入 token 使用量 >= maxTokensInput * compressionStartRatio 时开始压缩。 - compression_start_ratio: float = 0.7 - # 会话历史压缩目标比例:每轮压缩后尽量把估算的输入 token 使用量压到 maxTokensInput * compressionTargetRatio 附近。 - compression_target_ratio: float = 0.4 + max_tokens: int = 8192 + context_window_tokens: int = 65_536 temperature: float = 0.1 max_tool_iterations: int = 40 + # Deprecated compatibility field: accepted from old configs but ignored at runtime. + memory_window: int | None = Field(default=None, exclude=True) reasoning_effort: str | None = None # low / medium / high — enables LLM thinking mode + @property + def should_warn_deprecated_memory_window(self) -> bool: + """Return True when old memoryWindow is present without contextWindowTokens.""" + return self.memory_window is not None and "context_window_tokens" not in self.model_fields_set + class AgentsConfig(Base): """Agent configuration.""" diff --git a/nanobot/session/manager.py b/nanobot/session/manager.py index 1cb8a51..f0a6484 100644 --- a/nanobot/session/manager.py +++ b/nanobot/session/manager.py @@ -9,6 +9,7 @@ from typing import Any from loguru import logger +from nanobot.config.paths import get_legacy_sessions_dir from nanobot.utils.helpers import ensure_dir, safe_filename @@ -29,6 +30,7 @@ class Session: created_at: datetime = field(default_factory=datetime.now) updated_at: datetime = field(default_factory=datetime.now) metadata: dict[str, Any] = field(default_factory=dict) + last_consolidated: int = 0 # Number of messages already consolidated to files def add_message(self, role: str, content: str, **kwargs: Any) -> None: """Add a message to the session.""" @@ -42,13 +44,9 @@ class Session: self.updated_at = datetime.now() def get_history(self, max_messages: int = 500) -> list[dict[str, Any]]: - """ - Return messages for LLM input, aligned to a user turn. - - - max_messages > 0 时只保留最近 max_messages 条; - - max_messages <= 0 时不做条数截断,返回全部消息。 - """ - sliced = self.messages if max_messages <= 0 else self.messages[-max_messages:] + """Return unconsolidated messages for LLM input, aligned to a user turn.""" + unconsolidated = self.messages[self.last_consolidated:] + sliced = unconsolidated[-max_messages:] # Drop leading non-user messages to avoid orphaned tool_result blocks for i, m in enumerate(sliced): @@ -68,7 +66,7 @@ class Session: def clear(self) -> None: """Clear all messages and reset session to initial state.""" self.messages = [] - self.metadata = {} + self.last_consolidated = 0 self.updated_at = datetime.now() @@ -82,7 +80,7 @@ class SessionManager: def __init__(self, workspace: Path): self.workspace = workspace self.sessions_dir = ensure_dir(self.workspace / "sessions") - self.legacy_sessions_dir = Path.home() / ".nanobot" / "sessions" + self.legacy_sessions_dir = get_legacy_sessions_dir() self._cache: dict[str, Session] = {} def _get_session_path(self, key: str) -> Path: @@ -134,6 +132,7 @@ class SessionManager: messages = [] metadata = {} created_at = None + last_consolidated = 0 with open(path, encoding="utf-8") as f: for line in f: @@ -146,6 +145,7 @@ class SessionManager: if data.get("_type") == "metadata": metadata = data.get("metadata", {}) created_at = datetime.fromisoformat(data["created_at"]) if data.get("created_at") else None + last_consolidated = data.get("last_consolidated", 0) else: messages.append(data) @@ -154,6 +154,7 @@ class SessionManager: messages=messages, created_at=created_at or datetime.now(), metadata=metadata, + last_consolidated=last_consolidated ) except Exception as e: logger.warning("Failed to load session {}: {}", key, e) @@ -170,6 +171,7 @@ class SessionManager: "created_at": session.created_at.isoformat(), "updated_at": session.updated_at.isoformat(), "metadata": session.metadata, + "last_consolidated": session.last_consolidated } f.write(json.dumps(metadata_line, ensure_ascii=False) + "\n") for msg in session.messages: diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py index 57c60dc..9242ba6 100644 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -1,8 +1,12 @@ """Utility functions for nanobot.""" +import json import re from datetime import datetime from pathlib import Path +from typing import Any + +import tiktoken def detect_image_mime(data: bytes) -> str | None: @@ -68,6 +72,87 @@ def split_message(content: str, max_len: int = 2000) -> list[str]: return chunks +def estimate_prompt_tokens( + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None = None, +) -> int: + """Estimate prompt tokens with tiktoken.""" + try: + enc = tiktoken.get_encoding("cl100k_base") + parts: list[str] = [] + for msg in messages: + content = msg.get("content") + if isinstance(content, str): + parts.append(content) + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + txt = part.get("text", "") + if txt: + parts.append(txt) + if tools: + parts.append(json.dumps(tools, ensure_ascii=False)) + return len(enc.encode("\n".join(parts))) + except Exception: + return 0 + + +def estimate_message_tokens(message: dict[str, Any]) -> int: + """Estimate prompt tokens contributed by one persisted message.""" + content = message.get("content") + parts: list[str] = [] + if isinstance(content, str): + parts.append(content) + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + text = part.get("text", "") + if text: + parts.append(text) + else: + parts.append(json.dumps(part, ensure_ascii=False)) + elif content is not None: + parts.append(json.dumps(content, ensure_ascii=False)) + + for key in ("name", "tool_call_id"): + value = message.get(key) + if isinstance(value, str) and value: + parts.append(value) + if message.get("tool_calls"): + parts.append(json.dumps(message["tool_calls"], ensure_ascii=False)) + + payload = "\n".join(parts) + if not payload: + return 1 + try: + enc = tiktoken.get_encoding("cl100k_base") + return max(1, len(enc.encode(payload))) + except Exception: + return max(1, len(payload) // 4) + + +def estimate_prompt_tokens_chain( + provider: Any, + model: str | None, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None = None, +) -> tuple[int, str]: + """Estimate prompt tokens via provider counter first, then tiktoken fallback.""" + provider_counter = getattr(provider, "estimate_prompt_tokens", None) + if callable(provider_counter): + try: + tokens, source = provider_counter(messages, tools, model) + if isinstance(tokens, (int, float)) and tokens > 0: + return int(tokens), str(source or "provider_counter") + except Exception: + pass + + estimated = estimate_prompt_tokens(messages, tools) + if estimated > 0: + return int(estimated), "tiktoken" + return 0, "none" + + def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str]: """Sync bundled templates to workspace. Only creates missing files.""" from importlib.resources import files as pkg_files diff --git a/pyproject.toml b/pyproject.toml index 62cf616..0344348 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ dependencies = [ "json-repair>=0.57.0,<1.0.0", "chardet>=3.0.2,<6.0.0", "openai>=2.8.0", + "tiktoken>=0.12.0,<1.0.0", ] [project.optional-dependencies] diff --git a/tests/test_commands.py b/tests/test_commands.py index 5e3760a..1375a3a 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -267,6 +267,16 @@ def test_agent_workspace_override_wins_over_config_workspace(mock_agent_runtime, assert mock_agent_runtime["agent_loop_cls"].call_args.kwargs["workspace"] == workspace_path +def test_agent_warns_about_deprecated_memory_window(mock_agent_runtime): + mock_agent_runtime["config"].agents.defaults.memory_window = 100 + + result = runner.invoke(app, ["agent", "-m", "hello"]) + + assert result.exit_code == 0 + assert "memoryWindow" in result.stdout + assert "contextWindowTokens" in result.stdout + + def test_gateway_uses_workspace_from_config_by_default(monkeypatch, tmp_path: Path) -> None: config_file = tmp_path / "instance" / "config.json" config_file.parent.mkdir(parents=True) @@ -327,6 +337,29 @@ def test_gateway_workspace_option_overrides_config(monkeypatch, tmp_path: Path) assert seen["workspace"] == override assert config.workspace_path == override + +def test_gateway_warns_about_deprecated_memory_window(monkeypatch, tmp_path: Path) -> None: + config_file = tmp_path / "instance" / "config.json" + config_file.parent.mkdir(parents=True) + config_file.write_text("{}") + + config = Config() + config.agents.defaults.memory_window = 100 + + monkeypatch.setattr("nanobot.config.loader.set_config_path", lambda _path: None) + monkeypatch.setattr("nanobot.config.loader.load_config", lambda _path=None: config) + monkeypatch.setattr("nanobot.cli.commands.sync_workspace_templates", lambda _path: None) + monkeypatch.setattr( + "nanobot.cli.commands._make_provider", + lambda _config: (_ for _ in ()).throw(_StopGateway("stop")), + ) + + result = runner.invoke(app, ["gateway", "--config", str(config_file)]) + + assert isinstance(result.exception, _StopGateway) + assert "memoryWindow" in result.stdout + assert "contextWindowTokens" in result.stdout + def test_gateway_uses_config_directory_for_cron_store(monkeypatch, tmp_path: Path) -> None: config_file = tmp_path / "instance" / "config.json" config_file.parent.mkdir(parents=True) diff --git a/tests/test_config_migration.py b/tests/test_config_migration.py new file mode 100644 index 0000000..62e601e --- /dev/null +++ b/tests/test_config_migration.py @@ -0,0 +1,88 @@ +import json + +from typer.testing import CliRunner + +from nanobot.cli.commands import app +from nanobot.config.loader import load_config, save_config + +runner = CliRunner() + + +def test_load_config_keeps_max_tokens_and_warns_on_legacy_memory_window(tmp_path) -> None: + config_path = tmp_path / "config.json" + config_path.write_text( + json.dumps( + { + "agents": { + "defaults": { + "maxTokens": 1234, + "memoryWindow": 42, + } + } + } + ), + encoding="utf-8", + ) + + config = load_config(config_path) + + assert config.agents.defaults.max_tokens == 1234 + assert config.agents.defaults.context_window_tokens == 65_536 + assert config.agents.defaults.should_warn_deprecated_memory_window is True + + +def test_save_config_writes_context_window_tokens_but_not_memory_window(tmp_path) -> None: + config_path = tmp_path / "config.json" + config_path.write_text( + json.dumps( + { + "agents": { + "defaults": { + "maxTokens": 2222, + "memoryWindow": 30, + } + } + } + ), + encoding="utf-8", + ) + + config = load_config(config_path) + save_config(config, config_path) + saved = json.loads(config_path.read_text(encoding="utf-8")) + defaults = saved["agents"]["defaults"] + + assert defaults["maxTokens"] == 2222 + assert defaults["contextWindowTokens"] == 65_536 + assert "memoryWindow" not in defaults + + +def test_onboard_refresh_rewrites_legacy_config_template(tmp_path, monkeypatch) -> None: + config_path = tmp_path / "config.json" + workspace = tmp_path / "workspace" + config_path.write_text( + json.dumps( + { + "agents": { + "defaults": { + "maxTokens": 3333, + "memoryWindow": 50, + } + } + } + ), + encoding="utf-8", + ) + + monkeypatch.setattr("nanobot.config.loader.get_config_path", lambda: config_path) + monkeypatch.setattr("nanobot.cli.commands.get_workspace_path", lambda: workspace) + + result = runner.invoke(app, ["onboard"], input="n\n") + + assert result.exit_code == 0 + assert "contextWindowTokens" in result.stdout + saved = json.loads(config_path.read_text(encoding="utf-8")) + defaults = saved["agents"]["defaults"] + assert defaults["maxTokens"] == 3333 + assert defaults["contextWindowTokens"] == 65_536 + assert "memoryWindow" not in defaults diff --git a/tests/test_consolidate_offset.py b/tests/test_consolidate_offset.py index a3213dd..7d12338 100644 --- a/tests/test_consolidate_offset.py +++ b/tests/test_consolidate_offset.py @@ -480,226 +480,35 @@ class TestEmptyAndBoundarySessions: assert_messages_content(old_messages, 10, 34) -class TestConsolidationDeduplicationGuard: - """Test that consolidation tasks are deduplicated and serialized.""" +class TestNewCommandArchival: + """Test /new archival behavior with the simplified consolidation flow.""" - @pytest.mark.asyncio - async def test_consolidation_guard_prevents_duplicate_tasks(self, tmp_path: Path) -> None: - """Concurrent messages above memory_window spawn only one consolidation task.""" + @staticmethod + def _make_loop(tmp_path: Path): from nanobot.agent.loop import AgentLoop - from nanobot.bus.events import InboundMessage from nanobot.bus.queue import MessageBus from nanobot.providers.base import LLMResponse bus = MessageBus() provider = MagicMock() provider.get_default_model.return_value = "test-model" + provider.estimate_prompt_tokens.return_value = (10_000, "test") loop = AgentLoop( - bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10 + bus=bus, + provider=provider, + workspace=tmp_path, + model="test-model", + context_window_tokens=1, ) - - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) + loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) loop.tools.get_definitions = MagicMock(return_value=[]) - - session = loop.sessions.get_or_create("cli:test") - for i in range(15): - session.add_message("user", f"msg{i}") - session.add_message("assistant", f"resp{i}") - loop.sessions.save(session) - - consolidation_calls = 0 - - async def _fake_consolidate(_session, archive_all: bool = False) -> None: - nonlocal consolidation_calls - consolidation_calls += 1 - await asyncio.sleep(0.05) - - loop._consolidate_memory = _fake_consolidate # type: ignore[method-assign] - - msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello") - await loop._process_message(msg) - await loop._process_message(msg) - await asyncio.sleep(0.1) - - assert consolidation_calls == 1, ( - f"Expected exactly 1 consolidation, got {consolidation_calls}" - ) - - @pytest.mark.asyncio - async def test_new_command_guard_prevents_concurrent_consolidation( - self, tmp_path: Path - ) -> None: - """/new command does not run consolidation concurrently with in-flight consolidation.""" - from nanobot.agent.loop import AgentLoop - from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - from nanobot.providers.base import LLMResponse - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - loop = AgentLoop( - bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10 - ) - - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) - loop.tools.get_definitions = MagicMock(return_value=[]) - - session = loop.sessions.get_or_create("cli:test") - for i in range(15): - session.add_message("user", f"msg{i}") - session.add_message("assistant", f"resp{i}") - loop.sessions.save(session) - - consolidation_calls = 0 - active = 0 - max_active = 0 - - async def _fake_consolidate(_session, archive_all: bool = False) -> None: - nonlocal consolidation_calls, active, max_active - consolidation_calls += 1 - active += 1 - max_active = max(max_active, active) - await asyncio.sleep(0.05) - active -= 1 - - loop._consolidate_memory = _fake_consolidate # type: ignore[method-assign] - - msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello") - await loop._process_message(msg) - - new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new") - await loop._process_message(new_msg) - await asyncio.sleep(0.1) - - assert consolidation_calls == 2, ( - f"Expected normal + /new consolidations, got {consolidation_calls}" - ) - assert max_active == 1, ( - f"Expected serialized consolidation, observed concurrency={max_active}" - ) - - @pytest.mark.asyncio - async def test_consolidation_tasks_are_referenced(self, tmp_path: Path) -> None: - """create_task results are tracked in _consolidation_tasks while in flight.""" - from nanobot.agent.loop import AgentLoop - from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - from nanobot.providers.base import LLMResponse - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - loop = AgentLoop( - bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10 - ) - - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) - loop.tools.get_definitions = MagicMock(return_value=[]) - - session = loop.sessions.get_or_create("cli:test") - for i in range(15): - session.add_message("user", f"msg{i}") - session.add_message("assistant", f"resp{i}") - loop.sessions.save(session) - - started = asyncio.Event() - - async def _slow_consolidate(_session, archive_all: bool = False) -> None: - started.set() - await asyncio.sleep(0.1) - - loop._consolidate_memory = _slow_consolidate # type: ignore[method-assign] - - msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello") - await loop._process_message(msg) - - await started.wait() - assert len(loop._consolidation_tasks) == 1, "Task must be referenced while in-flight" - - await asyncio.sleep(0.15) - assert len(loop._consolidation_tasks) == 0, ( - "Task reference must be removed after completion" - ) - - @pytest.mark.asyncio - async def test_new_waits_for_inflight_consolidation_and_preserves_messages( - self, tmp_path: Path - ) -> None: - """/new waits for in-flight consolidation and archives before clear.""" - from nanobot.agent.loop import AgentLoop - from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - from nanobot.providers.base import LLMResponse - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - loop = AgentLoop( - bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10 - ) - - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) - loop.tools.get_definitions = MagicMock(return_value=[]) - - session = loop.sessions.get_or_create("cli:test") - for i in range(15): - session.add_message("user", f"msg{i}") - session.add_message("assistant", f"resp{i}") - loop.sessions.save(session) - - started = asyncio.Event() - release = asyncio.Event() - archived_count = 0 - - async def _fake_consolidate(sess, archive_all: bool = False) -> bool: - nonlocal archived_count - if archive_all: - archived_count = len(sess.messages) - return True - started.set() - await release.wait() - return True - - loop._consolidate_memory = _fake_consolidate # type: ignore[method-assign] - - msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello") - await loop._process_message(msg) - await started.wait() - - new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new") - pending_new = asyncio.create_task(loop._process_message(new_msg)) - - await asyncio.sleep(0.02) - assert not pending_new.done(), "/new should wait while consolidation is in-flight" - - release.set() - response = await pending_new - assert response is not None - assert "new session started" in response.content.lower() - assert archived_count > 0, "Expected /new archival to process a non-empty snapshot" - - session_after = loop.sessions.get_or_create("cli:test") - assert session_after.messages == [], "Session should be cleared after successful archival" + return loop @pytest.mark.asyncio async def test_new_does_not_clear_session_when_archive_fails(self, tmp_path: Path) -> None: - """/new must keep session data if archive step reports failure.""" - from nanobot.agent.loop import AgentLoop from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - from nanobot.providers.base import LLMResponse - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - loop = AgentLoop( - bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10 - ) - - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) - loop.tools.get_definitions = MagicMock(return_value=[]) + loop = self._make_loop(tmp_path) session = loop.sessions.get_or_create("cli:test") for i in range(5): session.add_message("user", f"msg{i}") @@ -707,111 +516,61 @@ class TestConsolidationDeduplicationGuard: loop.sessions.save(session) before_count = len(session.messages) - async def _failing_consolidate(sess, archive_all: bool = False) -> bool: - if archive_all: - return False - return True + async def _failing_consolidate(_messages) -> bool: + return False - loop._consolidate_memory = _failing_consolidate # type: ignore[method-assign] + loop.memory_consolidator.consolidate_messages = _failing_consolidate # type: ignore[method-assign] new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new") response = await loop._process_message(new_msg) assert response is not None assert "failed" in response.content.lower() - session_after = loop.sessions.get_or_create("cli:test") - assert len(session_after.messages) == before_count, ( - "Session must remain intact when /new archival fails" - ) + assert len(loop.sessions.get_or_create("cli:test").messages) == before_count @pytest.mark.asyncio - async def test_new_archives_only_unconsolidated_messages_after_inflight_task( - self, tmp_path: Path - ) -> None: - """/new should archive only messages not yet consolidated by prior task.""" - from nanobot.agent.loop import AgentLoop + async def test_new_archives_only_unconsolidated_messages(self, tmp_path: Path) -> None: from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - from nanobot.providers.base import LLMResponse - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - loop = AgentLoop( - bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10 - ) - - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) - loop.tools.get_definitions = MagicMock(return_value=[]) + loop = self._make_loop(tmp_path) session = loop.sessions.get_or_create("cli:test") for i in range(15): session.add_message("user", f"msg{i}") session.add_message("assistant", f"resp{i}") + session.last_consolidated = len(session.messages) - 3 loop.sessions.save(session) - started = asyncio.Event() - release = asyncio.Event() archived_count = -1 - async def _fake_consolidate(sess, archive_all: bool = False) -> bool: + async def _fake_consolidate(messages) -> bool: nonlocal archived_count - if archive_all: - archived_count = len(sess.messages) - return True - - started.set() - await release.wait() - sess.last_consolidated = len(sess.messages) - 3 + archived_count = len(messages) return True - loop._consolidate_memory = _fake_consolidate # type: ignore[method-assign] - - msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello") - await loop._process_message(msg) - await started.wait() + loop.memory_consolidator.consolidate_messages = _fake_consolidate # type: ignore[method-assign] new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new") - pending_new = asyncio.create_task(loop._process_message(new_msg)) - await asyncio.sleep(0.02) - assert not pending_new.done() - - release.set() - response = await pending_new + response = await loop._process_message(new_msg) assert response is not None assert "new session started" in response.content.lower() - assert archived_count == 3, ( - f"Expected only unconsolidated tail to archive, got {archived_count}" - ) + assert archived_count == 3 @pytest.mark.asyncio async def test_new_clears_session_and_responds(self, tmp_path: Path) -> None: - """/new clears session and returns confirmation.""" - from nanobot.agent.loop import AgentLoop from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - from nanobot.providers.base import LLMResponse - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - loop = AgentLoop( - bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10 - ) - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) - loop.tools.get_definitions = MagicMock(return_value=[]) + loop = self._make_loop(tmp_path) session = loop.sessions.get_or_create("cli:test") for i in range(3): session.add_message("user", f"msg{i}") session.add_message("assistant", f"resp{i}") loop.sessions.save(session) - async def _ok_consolidate(sess, archive_all: bool = False) -> bool: + async def _ok_consolidate(_messages) -> bool: return True - loop._consolidate_memory = _ok_consolidate # type: ignore[method-assign] + loop.memory_consolidator.consolidate_messages = _ok_consolidate # type: ignore[method-assign] new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new") response = await loop._process_message(new_msg) diff --git a/tests/test_loop_consolidation_tokens.py b/tests/test_loop_consolidation_tokens.py new file mode 100644 index 0000000..b0f3dda --- /dev/null +++ b/tests/test_loop_consolidation_tokens.py @@ -0,0 +1,190 @@ +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from nanobot.agent.loop import AgentLoop +import nanobot.agent.memory as memory_module +from nanobot.bus.queue import MessageBus +from nanobot.providers.base import LLMResponse + + +def _make_loop(tmp_path, *, estimated_tokens: int, context_window_tokens: int) -> AgentLoop: + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + provider.estimate_prompt_tokens.return_value = (estimated_tokens, "test-counter") + provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) + + loop = AgentLoop( + bus=MessageBus(), + provider=provider, + workspace=tmp_path, + model="test-model", + context_window_tokens=context_window_tokens, + ) + loop.tools.get_definitions = MagicMock(return_value=[]) + return loop + + +@pytest.mark.asyncio +async def test_prompt_below_threshold_does_not_consolidate(tmp_path) -> None: + loop = _make_loop(tmp_path, estimated_tokens=100, context_window_tokens=200) + loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign] + + await loop.process_direct("hello", session_key="cli:test") + + loop.memory_consolidator.consolidate_messages.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_prompt_above_threshold_triggers_consolidation(tmp_path, monkeypatch) -> None: + loop = _make_loop(tmp_path, estimated_tokens=1000, context_window_tokens=200) + loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign] + session = loop.sessions.get_or_create("cli:test") + session.messages = [ + {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"}, + {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"}, + {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"}, + ] + loop.sessions.save(session) + monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _message: 500) + + await loop.process_direct("hello", session_key="cli:test") + + assert loop.memory_consolidator.consolidate_messages.await_count >= 1 + + +@pytest.mark.asyncio +async def test_prompt_above_threshold_archives_until_next_user_boundary(tmp_path, monkeypatch) -> None: + loop = _make_loop(tmp_path, estimated_tokens=1000, context_window_tokens=200) + loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign] + + session = loop.sessions.get_or_create("cli:test") + session.messages = [ + {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"}, + {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"}, + {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"}, + {"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"}, + {"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"}, + ] + loop.sessions.save(session) + + token_map = {"u1": 120, "a1": 120, "u2": 120, "a2": 120, "u3": 120} + monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda message: token_map[message["content"]]) + + await loop.memory_consolidator.maybe_consolidate_by_tokens(session) + + archived_chunk = loop.memory_consolidator.consolidate_messages.await_args.args[0] + assert [message["content"] for message in archived_chunk] == ["u1", "a1", "u2", "a2"] + assert session.last_consolidated == 4 + + +@pytest.mark.asyncio +async def test_consolidation_loops_until_target_met(tmp_path, monkeypatch) -> None: + """Verify maybe_consolidate_by_tokens keeps looping until under threshold.""" + loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200) + loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign] + + session = loop.sessions.get_or_create("cli:test") + session.messages = [ + {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"}, + {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"}, + {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"}, + {"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"}, + {"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"}, + {"role": "assistant", "content": "a3", "timestamp": "2026-01-01T00:00:05"}, + {"role": "user", "content": "u4", "timestamp": "2026-01-01T00:00:06"}, + ] + loop.sessions.save(session) + + call_count = [0] + def mock_estimate(_session): + call_count[0] += 1 + if call_count[0] == 1: + return (500, "test") + if call_count[0] == 2: + return (300, "test") + return (80, "test") + + loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate # type: ignore[method-assign] + monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 100) + + await loop.memory_consolidator.maybe_consolidate_by_tokens(session) + + assert loop.memory_consolidator.consolidate_messages.await_count == 2 + assert session.last_consolidated == 6 + + +@pytest.mark.asyncio +async def test_consolidation_continues_below_trigger_until_half_target(tmp_path, monkeypatch) -> None: + """Once triggered, consolidation should continue until it drops below half threshold.""" + loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200) + loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign] + + session = loop.sessions.get_or_create("cli:test") + session.messages = [ + {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"}, + {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"}, + {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"}, + {"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"}, + {"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"}, + {"role": "assistant", "content": "a3", "timestamp": "2026-01-01T00:00:05"}, + {"role": "user", "content": "u4", "timestamp": "2026-01-01T00:00:06"}, + ] + loop.sessions.save(session) + + call_count = [0] + + def mock_estimate(_session): + call_count[0] += 1 + if call_count[0] == 1: + return (500, "test") + if call_count[0] == 2: + return (150, "test") + return (80, "test") + + loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate # type: ignore[method-assign] + monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 100) + + await loop.memory_consolidator.maybe_consolidate_by_tokens(session) + + assert loop.memory_consolidator.consolidate_messages.await_count == 2 + assert session.last_consolidated == 6 + + +@pytest.mark.asyncio +async def test_preflight_consolidation_before_llm_call(tmp_path, monkeypatch) -> None: + """Verify preflight consolidation runs before the LLM call in process_direct.""" + order: list[str] = [] + + loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200) + + async def track_consolidate(messages): + order.append("consolidate") + return True + loop.memory_consolidator.consolidate_messages = track_consolidate # type: ignore[method-assign] + + async def track_llm(*args, **kwargs): + order.append("llm") + return LLMResponse(content="ok", tool_calls=[]) + loop.provider.chat_with_retry = track_llm + + session = loop.sessions.get_or_create("cli:test") + session.messages = [ + {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"}, + {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"}, + {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"}, + ] + loop.sessions.save(session) + monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 500) + + call_count = [0] + def mock_estimate(_session): + call_count[0] += 1 + return (1000 if call_count[0] <= 1 else 80, "test") + loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate # type: ignore[method-assign] + + await loop.process_direct("hello", session_key="cli:test") + + assert "consolidate" in order + assert "llm" in order + assert order.index("consolidate") < order.index("llm") diff --git a/tests/test_memory_consolidation_types.py b/tests/test_memory_consolidation_types.py index 2605bf7..0263f01 100644 --- a/tests/test_memory_consolidation_types.py +++ b/tests/test_memory_consolidation_types.py @@ -7,7 +7,7 @@ tool call response, it should serialize them to JSON instead of raising TypeErro import json from pathlib import Path -from unittest.mock import AsyncMock, MagicMock +from unittest.mock import AsyncMock import pytest @@ -15,15 +15,12 @@ from nanobot.agent.memory import MemoryStore from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest -def _make_session(message_count: int = 30, memory_window: int = 50): - """Create a mock session with messages.""" - session = MagicMock() - session.messages = [ +def _make_messages(message_count: int = 30): + """Create a list of mock messages.""" + return [ {"role": "user", "content": f"msg{i}", "timestamp": "2026-01-01 00:00"} for i in range(message_count) ] - session.last_consolidated = 0 - return session def _make_tool_response(history_entry, memory_update): @@ -74,9 +71,9 @@ class TestMemoryConsolidationTypeHandling: ) ) provider.chat_with_retry = provider.chat - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is True assert store.history_file.exists() @@ -95,9 +92,9 @@ class TestMemoryConsolidationTypeHandling: ) ) provider.chat_with_retry = provider.chat - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is True assert store.history_file.exists() @@ -131,9 +128,9 @@ class TestMemoryConsolidationTypeHandling: ) provider.chat = AsyncMock(return_value=response) provider.chat_with_retry = provider.chat - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is True assert "User discussed testing." in store.history_file.read_text() @@ -147,22 +144,22 @@ class TestMemoryConsolidationTypeHandling: return_value=LLMResponse(content="I summarized the conversation.", tool_calls=[]) ) provider.chat_with_retry = provider.chat - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is False assert not store.history_file.exists() @pytest.mark.asyncio - async def test_skips_when_few_messages(self, tmp_path: Path) -> None: - """Consolidation should be a no-op when messages < keep_count.""" + async def test_skips_when_message_chunk_is_empty(self, tmp_path: Path) -> None: + """Consolidation should be a no-op when the selected chunk is empty.""" store = MemoryStore(tmp_path) provider = AsyncMock() provider.chat_with_retry = provider.chat - session = _make_session(message_count=10) + messages: list[dict] = [] - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is True provider.chat.assert_not_called() @@ -189,9 +186,9 @@ class TestMemoryConsolidationTypeHandling: ) provider.chat = AsyncMock(return_value=response) provider.chat_with_retry = provider.chat - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is True assert "User discussed testing." in store.history_file.read_text() @@ -215,9 +212,9 @@ class TestMemoryConsolidationTypeHandling: ) provider.chat = AsyncMock(return_value=response) provider.chat_with_retry = provider.chat - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is False @@ -239,9 +236,9 @@ class TestMemoryConsolidationTypeHandling: ) provider.chat = AsyncMock(return_value=response) provider.chat_with_retry = provider.chat - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is False @@ -255,7 +252,7 @@ class TestMemoryConsolidationTypeHandling: memory_update="# Memory\nUser likes testing.", ), ]) - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) delays: list[int] = [] async def _fake_sleep(delay: int) -> None: @@ -263,7 +260,7 @@ class TestMemoryConsolidationTypeHandling: monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is True assert provider.calls == 2 diff --git a/tests/test_message_tool_suppress.py b/tests/test_message_tool_suppress.py index 63b0fd1..1091de4 100644 --- a/tests/test_message_tool_suppress.py +++ b/tests/test_message_tool_suppress.py @@ -16,7 +16,7 @@ def _make_loop(tmp_path: Path) -> AgentLoop: bus = MessageBus() provider = MagicMock() provider.get_default_model.return_value = "test-model" - return AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10) + return AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model") class TestMessageToolSuppressLogic: @@ -33,7 +33,7 @@ class TestMessageToolSuppressLogic: LLMResponse(content="", tool_calls=[tool_call]), LLMResponse(content="Done", tool_calls=[]), ]) - loop.provider.chat = AsyncMock(side_effect=lambda *a, **kw: next(calls)) + loop.provider.chat_with_retry = AsyncMock(side_effect=lambda *a, **kw: next(calls)) loop.tools.get_definitions = MagicMock(return_value=[]) sent: list[OutboundMessage] = [] @@ -58,7 +58,7 @@ class TestMessageToolSuppressLogic: LLMResponse(content="", tool_calls=[tool_call]), LLMResponse(content="I've sent the email.", tool_calls=[]), ]) - loop.provider.chat = AsyncMock(side_effect=lambda *a, **kw: next(calls)) + loop.provider.chat_with_retry = AsyncMock(side_effect=lambda *a, **kw: next(calls)) loop.tools.get_definitions = MagicMock(return_value=[]) sent: list[OutboundMessage] = [] @@ -77,7 +77,7 @@ class TestMessageToolSuppressLogic: @pytest.mark.asyncio async def test_not_suppress_when_no_message_tool_used(self, tmp_path: Path) -> None: loop = _make_loop(tmp_path) - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="Hello!", tool_calls=[])) + loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="Hello!", tool_calls=[])) loop.tools.get_definitions = MagicMock(return_value=[]) msg = InboundMessage(channel="feishu", sender_id="user1", chat_id="chat123", content="Hi") @@ -98,7 +98,7 @@ class TestMessageToolSuppressLogic: ), LLMResponse(content="Done", tool_calls=[]), ]) - loop.provider.chat = AsyncMock(side_effect=lambda *a, **kw: next(calls)) + loop.provider.chat_with_retry = AsyncMock(side_effect=lambda *a, **kw: next(calls)) loop.tools.get_definitions = MagicMock(return_value=[]) loop.tools.execute = AsyncMock(return_value="ok") From a44ee115d1188a62012d3d7cc38077ff5013f4ee Mon Sep 17 00:00:00 2001 From: greyishsong Date: Wed, 11 Mar 2026 09:02:28 +0800 Subject: [PATCH 11/28] fix: bump litellm version to 1.82.1 for Moonshot provider support see issue #1628 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 62cf616..7127354 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ classifiers = [ dependencies = [ "typer>=0.20.0,<1.0.0", - "litellm>=1.81.5,<2.0.0", + "litellm>=1.82.1,<2.0.0", "pydantic>=2.12.0,<3.0.0", "pydantic-settings>=2.12.0,<3.0.0", "websockets>=16.0,<17.0", From d1df53aaf783d44394d3d335948b5eaf31af803f Mon Sep 17 00:00:00 2001 From: YinAnPing Date: Wed, 11 Mar 2026 09:30:33 +0800 Subject: [PATCH 12/28] fix: exclude hidden files when syncing workspace templates Skip files starting with '.' (e.g., macOS extended attributes like ._AGENTS.md) to prevent UnicodeDecodeError during template synchronization. --- nanobot/utils/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 nanobot/utils/helpers.py diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py old mode 100644 new mode 100755 index 57c60dc..a387b79 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -88,7 +88,7 @@ def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str] added.append(str(dest.relative_to(workspace))) for item in tpl.iterdir(): - if item.name.endswith(".md"): + if item.name.endswith(".md") and not item.name.startswith("."): _write(item, workspace / item.name) _write(tpl / "memory" / "MEMORY.md", workspace / "memory" / "MEMORY.md") _write(None, workspace / "memory" / "HISTORY.md") From 35d811c99790b71ef34c5908b23168eeb526ca6b Mon Sep 17 00:00:00 2001 From: dingyanyi2019 Date: Wed, 11 Mar 2026 10:19:43 +0800 Subject: [PATCH 13/28] feat: support retrieving DingTalk voice recognition text --- nanobot/channels/dingtalk.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nanobot/channels/dingtalk.py b/nanobot/channels/dingtalk.py index 3c301a9..cdcba57 100644 --- a/nanobot/channels/dingtalk.py +++ b/nanobot/channels/dingtalk.py @@ -57,6 +57,8 @@ class NanobotDingTalkHandler(CallbackHandler): content = "" if chatbot_msg.text: content = chatbot_msg.text.content.strip() + elif chatbot_msg.extensions.get("content", {}).get("recognition"): + content = chatbot_msg.extensions["content"]["recognition"].strip() if not content: content = message.data.get("text", {}).get("content", "").strip() From 91f17cad00b14b7a550f154791be3fc8eb12b746 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Wed, 11 Mar 2026 03:40:33 +0000 Subject: [PATCH 14/28] feat(dingtalk): support voice recognition text fallback Read DingTalk recognition text when text.content is empty, and add a handler-level regression test for voice transcript delivery. --- tests/test_dingtalk_channel.py | 47 +++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/tests/test_dingtalk_channel.py b/tests/test_dingtalk_channel.py index 7595a33..6051014 100644 --- a/tests/test_dingtalk_channel.py +++ b/tests/test_dingtalk_channel.py @@ -1,9 +1,11 @@ +import asyncio from types import SimpleNamespace import pytest from nanobot.bus.queue import MessageBus -from nanobot.channels.dingtalk import DingTalkChannel +import nanobot.channels.dingtalk as dingtalk_module +from nanobot.channels.dingtalk import DingTalkChannel, NanobotDingTalkHandler from nanobot.config.schema import DingTalkConfig @@ -64,3 +66,46 @@ async def test_group_send_uses_group_messages_api() -> None: assert call["url"] == "https://api.dingtalk.com/v1.0/robot/groupMessages/send" assert call["json"]["openConversationId"] == "conv123" assert call["json"]["msgKey"] == "sampleMarkdown" + + +@pytest.mark.asyncio +async def test_handler_uses_voice_recognition_text_when_text_is_empty(monkeypatch) -> None: + bus = MessageBus() + channel = DingTalkChannel( + DingTalkConfig(client_id="app", client_secret="secret", allow_from=["user1"]), + bus, + ) + handler = NanobotDingTalkHandler(channel) + + class _FakeChatbotMessage: + text = None + extensions = {"content": {"recognition": "voice transcript"}} + sender_staff_id = "user1" + sender_id = "fallback-user" + sender_nick = "Alice" + message_type = "audio" + + @staticmethod + def from_dict(_data): + return _FakeChatbotMessage() + + monkeypatch.setattr(dingtalk_module, "ChatbotMessage", _FakeChatbotMessage) + monkeypatch.setattr(dingtalk_module, "AckMessage", SimpleNamespace(STATUS_OK="OK")) + + status, body = await handler.process( + SimpleNamespace( + data={ + "conversationType": "2", + "conversationId": "conv123", + "text": {"content": ""}, + } + ) + ) + + await asyncio.gather(*list(channel._background_tasks)) + msg = await bus.consume_inbound() + + assert (status, body) == ("OK", "OK") + assert msg.content == "voice transcript" + assert msg.sender_id == "user1" + assert msg.chat_id == "group:conv123" From ddccf25bb1be8529d453d2344eea21bd593021c2 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Wed, 11 Mar 2026 03:47:24 +0000 Subject: [PATCH 15/28] fix(subagent): preserve reasoning fields across tool turns Share assistant message construction between the main agent and subagents, and add a regression test to keep reasoning_content and thinking_blocks in follow-up tool rounds. --- nanobot/agent/context.py | 16 +++++++-------- nanobot/agent/subagent.py | 21 +++++++------------ nanobot/utils/helpers.py | 17 ++++++++++++++++ tests/test_task_cancel.py | 43 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+), 23 deletions(-) diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index 2c648eb..e47fcb8 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -10,7 +10,7 @@ from typing import Any from nanobot.agent.memory import MemoryStore from nanobot.agent.skills import SkillsLoader -from nanobot.utils.helpers import detect_image_mime +from nanobot.utils.helpers import build_assistant_message, detect_image_mime class ContextBuilder: @@ -182,12 +182,10 @@ Reply directly with text for conversations. Only use the 'message' tool to send thinking_blocks: list[dict] | None = None, ) -> list[dict[str, Any]]: """Add an assistant message to the message list.""" - msg: dict[str, Any] = {"role": "assistant", "content": content} - if tool_calls: - msg["tool_calls"] = tool_calls - if reasoning_content is not None: - msg["reasoning_content"] = reasoning_content - if thinking_blocks: - msg["thinking_blocks"] = thinking_blocks - messages.append(msg) + messages.append(build_assistant_message( + content, + tool_calls=tool_calls, + reasoning_content=reasoning_content, + thinking_blocks=thinking_blocks, + )) return messages diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index 308e67d..eff0b4f 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -16,6 +16,7 @@ from nanobot.bus.events import InboundMessage from nanobot.bus.queue import MessageBus from nanobot.config.schema import ExecToolConfig from nanobot.providers.base import LLMProvider +from nanobot.utils.helpers import build_assistant_message class SubagentManager: @@ -133,7 +134,6 @@ class SubagentManager: ) if response.has_tool_calls: - # Add assistant message with tool calls tool_call_dicts = [ { "id": tc.id, @@ -145,19 +145,12 @@ class SubagentManager: } for tc in response.tool_calls ] - assistant_msg: dict[str, Any] = { - "role": "assistant", - "content": response.content or "", - "tool_calls": tool_call_dicts, - } - # Preserve reasoning_content for providers that require it - # (e.g. Deepseek Reasoner mandates this field on every - # assistant message when thinking mode is active). - if response.reasoning_content is not None: - assistant_msg["reasoning_content"] = response.reasoning_content - if response.thinking_blocks: - assistant_msg["thinking_blocks"] = response.thinking_blocks - messages.append(assistant_msg) + messages.append(build_assistant_message( + response.content or "", + tool_calls=tool_call_dicts, + reasoning_content=response.reasoning_content, + thinking_blocks=response.thinking_blocks, + )) # Execute tools for tool_call in response.tool_calls: diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py index 9242ba6..6d2c670 100644 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -72,6 +72,23 @@ def split_message(content: str, max_len: int = 2000) -> list[str]: return chunks +def build_assistant_message( + content: str | None, + tool_calls: list[dict[str, Any]] | None = None, + reasoning_content: str | None = None, + thinking_blocks: list[dict] | None = None, +) -> dict[str, Any]: + """Build a provider-safe assistant message with optional reasoning fields.""" + msg: dict[str, Any] = {"role": "assistant", "content": content} + if tool_calls: + msg["tool_calls"] = tool_calls + if reasoning_content is not None: + msg["reasoning_content"] = reasoning_content + if thinking_blocks: + msg["thinking_blocks"] = thinking_blocks + return msg + + def estimate_prompt_tokens( messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None, diff --git a/tests/test_task_cancel.py b/tests/test_task_cancel.py index 27a2d73..62ab2cc 100644 --- a/tests/test_task_cancel.py +++ b/tests/test_task_cancel.py @@ -165,3 +165,46 @@ class TestSubagentCancellation: provider.get_default_model.return_value = "test-model" mgr = SubagentManager(provider=provider, workspace=MagicMock(), bus=bus) assert await mgr.cancel_by_session("nonexistent") == 0 + + @pytest.mark.asyncio + async def test_subagent_preserves_reasoning_fields_in_tool_turn(self, monkeypatch, tmp_path): + from nanobot.agent.subagent import SubagentManager + from nanobot.bus.queue import MessageBus + from nanobot.providers.base import LLMResponse, ToolCallRequest + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + + captured_second_call: list[dict] = [] + + call_count = {"n": 0} + + async def scripted_chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 1: + return LLMResponse( + content="thinking", + tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})], + reasoning_content="hidden reasoning", + thinking_blocks=[{"type": "thinking", "thinking": "step"}], + ) + captured_second_call[:] = messages + return LLMResponse(content="done", tool_calls=[]) + provider.chat_with_retry = scripted_chat_with_retry + mgr = SubagentManager(provider=provider, workspace=tmp_path, bus=bus) + + async def fake_execute(self, name, arguments): + return "tool result" + + monkeypatch.setattr("nanobot.agent.tools.registry.ToolRegistry.execute", fake_execute) + + await mgr._run_subagent("sub-1", "do task", "label", {"channel": "test", "chat_id": "c1"}) + + assistant_messages = [ + msg for msg in captured_second_call + if msg.get("role") == "assistant" and msg.get("tool_calls") + ] + assert len(assistant_messages) == 1 + assert assistant_messages[0]["reasoning_content"] == "hidden reasoning" + assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}] From 76c6063141f84d8bde3f3a95896c36e4e673c5c7 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Wed, 11 Mar 2026 03:50:54 +0000 Subject: [PATCH 16/28] chore: normalize helpers.py file mode --- nanobot/utils/helpers.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 nanobot/utils/helpers.py diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py old mode 100755 new mode 100644 From dee4f27dce4a8837eea4b97b882314c50a2b74e3 Mon Sep 17 00:00:00 2001 From: "Jerome Sonnet (letzdoo)" Date: Wed, 11 Mar 2026 07:43:28 +0400 Subject: [PATCH 17/28] feat: add Ollama as a local LLM provider Add native Ollama support so local models (e.g. nemotron-3-nano) can be used without an API key. Adds ProviderSpec with ollama_chat LiteLLM prefix, ProvidersConfig field, and skips API key validation for local providers. Co-Authored-By: Claude Opus 4.6 --- nanobot/cli/commands.py | 2 +- nanobot/config/schema.py | 5 +++-- nanobot/providers/registry.py | 17 +++++++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index cf69450..8387b28 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -252,7 +252,7 @@ def _make_provider(config: Config): from nanobot.providers.litellm_provider import LiteLLMProvider from nanobot.providers.registry import find_by_name spec = find_by_name(provider_name) - if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and spec.is_oauth): + if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and (spec.is_oauth or spec.is_local)): console.print("[red]Error: No API key configured.[/red]") console.print("Set one in ~/.nanobot/config.json under providers section") raise typer.Exit(1) diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index a2de239..9b5821b 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -272,6 +272,7 @@ class ProvidersConfig(Base): moonshot: ProviderConfig = Field(default_factory=ProviderConfig) minimax: ProviderConfig = Field(default_factory=ProviderConfig) aihubmix: ProviderConfig = Field(default_factory=ProviderConfig) # AiHubMix API gateway + ollama: ProviderConfig = Field(default_factory=ProviderConfig) # Ollama local models siliconflow: ProviderConfig = Field(default_factory=ProviderConfig) # SiliconFlow (硅基流动) volcengine: ProviderConfig = Field(default_factory=ProviderConfig) # VolcEngine (火山引擎) openai_codex: ProviderConfig = Field(default_factory=ProviderConfig) # OpenAI Codex (OAuth) @@ -375,14 +376,14 @@ class Config(BaseSettings): for spec in PROVIDERS: p = getattr(self.providers, spec.name, None) if p and model_prefix and normalized_prefix == spec.name: - if spec.is_oauth or p.api_key: + if spec.is_oauth or spec.is_local or p.api_key: return p, spec.name # Match by keyword (order follows PROVIDERS registry) for spec in PROVIDERS: p = getattr(self.providers, spec.name, None) if p and any(_kw_matches(kw) for kw in spec.keywords): - if spec.is_oauth or p.api_key: + if spec.is_oauth or spec.is_local or p.api_key: return p, spec.name # Fallback: gateways first, then others (follows registry order) diff --git a/nanobot/providers/registry.py b/nanobot/providers/registry.py index 3ba1a0e..c4bcfe2 100644 --- a/nanobot/providers/registry.py +++ b/nanobot/providers/registry.py @@ -360,6 +360,23 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( strip_model_prefix=False, model_overrides=(), ), + # === Ollama (local, OpenAI-compatible) =================================== + ProviderSpec( + name="ollama", + keywords=("ollama", "nemotron"), + env_key="OLLAMA_API_KEY", + display_name="Ollama", + litellm_prefix="ollama_chat", # model → ollama_chat/model + skip_prefixes=("ollama/", "ollama_chat/"), + env_extras=(), + is_gateway=False, + is_local=True, + detect_by_key_prefix="", + detect_by_base_keyword="11434", + default_api_base="http://localhost:11434", + strip_model_prefix=False, + model_overrides=(), + ), # === Auxiliary (not a primary LLM provider) ============================ # Groq: mainly used for Whisper voice transcription, also usable for LLM. # Needs "groq/" prefix for LiteLLM routing. Placed last — it rarely wins fallback. From c7e2622ee1cb313ca3f7a4a31779813cc3ebc27b Mon Sep 17 00:00:00 2001 From: ethanclaw Date: Wed, 11 Mar 2026 12:25:28 +0800 Subject: [PATCH 18/28] fix(subagent): pass reasoning_content and thinking_blocks in subagent messages Fix issue #1834: Spawn/subagent tool fails with Deepseek Reasoner due to missing reasoning_content field when using thinking mode. The subagent was not including reasoning_content and thinking_blocks in assistant messages with tool calls, causing the Deepseek API to reject subsequent requests. - Add reasoning_content to assistant message when subagent makes tool calls - Add thinking_blocks to assistant message for Anthropic extended thinking - Add tests to verify both fields are properly passed Fixes #1834 --- nanobot/agent/subagent.py | 2 + tests/test_subagent_reasoning.py | 144 +++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+) create mode 100644 tests/test_subagent_reasoning.py diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index f9eda1f..6163a52 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -149,6 +149,8 @@ class SubagentManager: "role": "assistant", "content": response.content or "", "tool_calls": tool_call_dicts, + "reasoning_content": response.reasoning_content, + "thinking_blocks": response.thinking_blocks, }) # Execute tools diff --git a/tests/test_subagent_reasoning.py b/tests/test_subagent_reasoning.py new file mode 100644 index 0000000..5e70506 --- /dev/null +++ b/tests/test_subagent_reasoning.py @@ -0,0 +1,144 @@ +"""Tests for subagent reasoning_content and thinking_blocks handling.""" + +from __future__ import annotations + +import asyncio +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + + +class TestSubagentReasoningContent: + """Test that subagent properly handles reasoning_content and thinking_blocks.""" + + @pytest.mark.asyncio + async def test_subagent_message_includes_reasoning_content(self): + """Verify reasoning_content is included in assistant messages with tool calls. + + This is the fix for issue #1834: Spawn/subagent tool fails with + Deepseek Reasoner due to missing reasoning_content field. + """ + from nanobot.agent.subagent import SubagentManager + from nanobot.bus.queue import MessageBus + from nanobot.providers.base import LLMResponse, ToolCallRequest + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "deepseek-reasoner" + + # Create a real Path object for workspace + workspace = Path("/tmp/test_workspace") + workspace.mkdir(parents=True, exist_ok=True) + + # Capture messages that are sent to the provider + captured_messages = [] + + async def mock_chat(*args, **kwargs): + captured_messages.append(kwargs.get("messages", [])) + # Return response with tool calls and reasoning_content + tool_call = ToolCallRequest( + id="test-1", + name="read_file", + arguments={"path": "/test.txt"}, + ) + return LLMResponse( + content="", + tool_calls=[tool_call], + reasoning_content="I need to read this file first", + ) + + provider.chat_with_retry = AsyncMock(side_effect=mock_chat) + + mgr = SubagentManager(provider=provider, workspace=workspace, bus=bus) + + # Mock the tools registry + with patch("nanobot.agent.subagent.ToolRegistry") as MockToolRegistry: + mock_registry = MagicMock() + mock_registry.get_definitions.return_value = [] + mock_registry.execute = AsyncMock(return_value="file content") + MockToolRegistry.return_value = mock_registry + + result = await mgr.spawn( + task="Read a file", + label="test", + origin_channel="cli", + origin_chat_id="direct", + session_key="cli:direct", + ) + + # Wait for the task to complete + await asyncio.sleep(0.5) + + # Check the captured messages + assert len(captured_messages) >= 1 + # Find the assistant message with tool_calls + found = False + for msg_list in captured_messages: + for msg in msg_list: + if msg.get("role") == "assistant" and msg.get("tool_calls"): + assert "reasoning_content" in msg, "reasoning_content should be in assistant message with tool_calls" + assert msg["reasoning_content"] == "I need to read this file first" + found = True + assert found, "Should have found an assistant message with tool_calls" + + @pytest.mark.asyncio + async def test_subagent_message_includes_thinking_blocks(self): + """Verify thinking_blocks is included in assistant messages with tool calls.""" + from nanobot.agent.subagent import SubagentManager + from nanobot.bus.queue import MessageBus + from nanobot.providers.base import LLMResponse, ToolCallRequest + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "claude-sonnet" + + workspace = Path("/tmp/test_workspace2") + workspace.mkdir(parents=True, exist_ok=True) + + captured_messages = [] + + async def mock_chat(*args, **kwargs): + captured_messages.append(kwargs.get("messages", [])) + tool_call = ToolCallRequest( + id="test-2", + name="read_file", + arguments={"path": "/test.txt"}, + ) + return LLMResponse( + content="", + tool_calls=[tool_call], + thinking_blocks=[ + {"signature": "sig1", "thought": "thinking step 1"}, + {"signature": "sig2", "thought": "thinking step 2"}, + ], + ) + + provider.chat_with_retry = AsyncMock(side_effect=mock_chat) + + mgr = SubagentManager(provider=provider, workspace=workspace, bus=bus) + + with patch("nanobot.agent.subagent.ToolRegistry") as MockToolRegistry: + mock_registry = MagicMock() + mock_registry.get_definitions.return_value = [] + mock_registry.execute = AsyncMock(return_value="file content") + MockToolRegistry.return_value = mock_registry + + result = await mgr.spawn( + task="Read a file", + label="test", + origin_channel="cli", + origin_chat_id="direct", + ) + + await asyncio.sleep(0.5) + + # Check the captured messages + found = False + for msg_list in captured_messages: + for msg in msg_list: + if msg.get("role") == "assistant" and msg.get("tool_calls"): + assert "thinking_blocks" in msg, "thinking_blocks should be in assistant message with tool_calls" + assert len(msg["thinking_blocks"]) == 2 + found = True + assert found, "Should have found an assistant message with tool_calls" From 12104c8d46c0b688e0db21617b23d54f012970ba Mon Sep 17 00:00:00 2001 From: ethanclaw Date: Wed, 11 Mar 2026 14:22:33 +0800 Subject: [PATCH 19/28] fix(memory): pass temperature, max_tokens and reasoning_effort to memory consolidation Fix issue #1823: Memory consolidation does not inherit agent temperature and maxTokens configuration. The agent's configured generation parameters were not being passed through to the memory consolidation call, causing it to fall back to default values. This resulted in the consolidation response being truncated before the save_memory tool call was emitted. - Pass temperature, max_tokens, reasoning_effort from AgentLoop to MemoryConsolidator and then to MemoryStore.consolidate() - Forward these parameters to the provider.chat_with_retry() call Fixes #1823 --- nanobot/agent/loop.py | 3 +++ nanobot/agent/memory.py | 21 ++++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 8605a09..edf1e8e 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -114,6 +114,9 @@ class AgentLoop: context_window_tokens=context_window_tokens, build_messages=self.context.build_messages, get_tool_definitions=self.tools.get_definitions, + temperature=self.temperature, + max_tokens=self.max_tokens, + reasoning_effort=self.reasoning_effort, ) self._register_default_tools() diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index cd5f54f..d79887b 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -99,6 +99,9 @@ class MemoryStore: messages: list[dict], provider: LLMProvider, model: str, + temperature: float | None = None, + max_tokens: int | None = None, + reasoning_effort: str | None = None, ) -> bool: """Consolidate the provided message chunk into MEMORY.md + HISTORY.md.""" if not messages: @@ -121,6 +124,9 @@ class MemoryStore: ], tools=_SAVE_MEMORY_TOOL, model=model, + temperature=temperature, + max_tokens=max_tokens, + reasoning_effort=reasoning_effort, ) if not response.has_tool_calls: @@ -160,6 +166,9 @@ class MemoryConsolidator: context_window_tokens: int, build_messages: Callable[..., list[dict[str, Any]]], get_tool_definitions: Callable[[], list[dict[str, Any]]], + temperature: float | None = None, + max_tokens: int | None = None, + reasoning_effort: str | None = None, ): self.store = MemoryStore(workspace) self.provider = provider @@ -168,6 +177,9 @@ class MemoryConsolidator: self.context_window_tokens = context_window_tokens self._build_messages = build_messages self._get_tool_definitions = get_tool_definitions + self._temperature = temperature + self._max_tokens = max_tokens + self._reasoning_effort = reasoning_effort self._locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary() def get_lock(self, session_key: str) -> asyncio.Lock: @@ -176,7 +188,14 @@ class MemoryConsolidator: async def consolidate_messages(self, messages: list[dict[str, object]]) -> bool: """Archive a selected message chunk into persistent memory.""" - return await self.store.consolidate(messages, self.provider, self.model) + return await self.store.consolidate( + messages, + self.provider, + self.model, + temperature=self._temperature, + max_tokens=self._max_tokens, + reasoning_effort=self._reasoning_effort, + ) def pick_consolidation_boundary( self, From d0b4f0d70d025ba3ffa0a9127b280d8325bb698f Mon Sep 17 00:00:00 2001 From: Re-bin Date: Wed, 11 Mar 2026 07:57:12 +0000 Subject: [PATCH 20/28] feat(wecom): add WeCom channel with SDK pinned to GitHub tag v0.1.2 --- README.md | 25 ++++++++++++++----------- nanobot/channels/manager.py | 1 - nanobot/channels/wecom.py | 8 ++++---- nanobot/config/schema.py | 2 +- pyproject.toml | 4 +++- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 5be0ce5..6e8211e 100644 --- a/README.md +++ b/README.md @@ -208,7 +208,7 @@ Connect nanobot to your favorite chat platform. | **Slack** | Bot token + App-Level token | | **Email** | IMAP/SMTP credentials | | **QQ** | App ID + App Secret | -| **Wecom** | Bot ID + App Secret | +| **Wecom** | Bot ID + Bot Secret |
Telegram (Recommended) @@ -683,12 +683,17 @@ nanobot gateway Uses **WebSocket** long connection — no public IP required. -**1. Create a wecom bot** +**1. Install the optional dependency** -In the client's workspace, click on "Intelligent Robot" to create a robot and choose API mode for creation. -Select to create in "long connection" mode, and obtain Bot ID and Secret. +```bash +pip install nanobot-ai[wecom] +``` -**2. Configure** +**2. Create a WeCom AI Bot** + +Go to the WeCom admin console → Intelligent Robot → Create Robot → select **API mode** with **long connection**. Copy the Bot ID and Secret. + +**3. Configure** ```json { @@ -696,23 +701,21 @@ Select to create in "long connection" mode, and obtain Bot ID and Secret. "wecom": { "enabled": true, "botId": "your_bot_id", - "secret": "your_secret", - "allowFrom": [ - "your_id" - ] + "secret": "your_bot_secret", + "allowFrom": ["your_id"] } } } ``` -**3. Run** +**4. Run** ```bash nanobot gateway ``` > [!TIP] -> wecom uses WebSocket to receive messages — no webhook or public IP needed! +> WeCom uses WebSocket to receive messages — no webhook or public IP needed!
diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py index 369795a..2c5cd3f 100644 --- a/nanobot/channels/manager.py +++ b/nanobot/channels/manager.py @@ -156,7 +156,6 @@ class ChannelManager: self.channels["wecom"] = WecomChannel( self.config.channels.wecom, self.bus, - groq_api_key=self.config.providers.groq.api_key, ) logger.info("WeCom channel enabled") except ImportError as e: diff --git a/nanobot/channels/wecom.py b/nanobot/channels/wecom.py index dc97311..1c44451 100644 --- a/nanobot/channels/wecom.py +++ b/nanobot/channels/wecom.py @@ -2,6 +2,7 @@ import asyncio import importlib.util +import os from collections import OrderedDict from typing import Any @@ -36,10 +37,9 @@ class WecomChannel(BaseChannel): name = "wecom" - def __init__(self, config: WecomConfig, bus: MessageBus, groq_api_key: str = ""): + def __init__(self, config: WecomConfig, bus: MessageBus): super().__init__(config, bus) self.config: WecomConfig = config - self.groq_api_key = groq_api_key self._client: Any = None self._processed_message_ids: OrderedDict[str, None] = OrderedDict() self._loop: asyncio.AbstractEventLoop | None = None @@ -50,7 +50,7 @@ class WecomChannel(BaseChannel): async def start(self) -> None: """Start the WeCom bot with WebSocket long connection.""" if not WECOM_AVAILABLE: - logger.error("WeCom SDK not installed. Run: pip install wecom-aibot-sdk-python") + logger.error("WeCom SDK not installed. Run: pip install nanobot-ai[wecom]") return if not self.config.bot_id or not self.config.secret: @@ -213,7 +213,6 @@ class WecomChannel(BaseChannel): if file_url and aes_key: file_path = await self._download_and_save_media(file_url, aes_key, "image") if file_path: - import os filename = os.path.basename(file_path) content_parts.append(f"[image: {filename}]\n[Image: source: {file_path}]") else: @@ -308,6 +307,7 @@ class WecomChannel(BaseChannel): media_dir = get_media_dir("wecom") if not filename: filename = fname or f"{media_type}_{hash(file_url) % 100000}" + filename = os.path.basename(filename) file_path = media_dir / filename file_path.write_bytes(data) diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index b772d18..bb0d286 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -208,7 +208,7 @@ class WecomConfig(Base): secret: str = "" # Bot Secret from WeCom AI Bot platform allow_from: list[str] = Field(default_factory=list) # Allowed user IDs welcome_message: str = "" # Welcome message for enter_chat event - react_emoji: str = "eyes" # Emoji for message reactions + class ChannelsConfig(Base): """Configuration for chat channels.""" diff --git a/pyproject.toml b/pyproject.toml index 0582be6..9868513 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,11 +44,13 @@ dependencies = [ "json-repair>=0.57.0,<1.0.0", "chardet>=3.0.2,<6.0.0", "openai>=2.8.0", - "wecom-aibot-sdk-python>=0.1.2", "tiktoken>=0.12.0,<1.0.0", ] [project.optional-dependencies] +wecom = [ + "wecom-aibot-sdk-python @ git+https://github.com/chengyongru/wecom_aibot_sdk.git@v0.1.2", +] matrix = [ "matrix-nio[e2e]>=0.25.2", "mistune>=3.0.0,<4.0.0", From 7ceddcded643432f0f4b78aa22de7ad107b61f3a Mon Sep 17 00:00:00 2001 From: Re-bin Date: Wed, 11 Mar 2026 08:04:14 +0000 Subject: [PATCH 21/28] fix(wecom): await async disconnect, add SDK attribution in README --- README.md | 7 +++---- nanobot/channels/wecom.py | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 6e8211e..2a49214 100644 --- a/README.md +++ b/README.md @@ -681,7 +681,9 @@ nanobot gateway
Wecom (企业微信) -Uses **WebSocket** long connection — no public IP required. +> Here we use [wecom-aibot-sdk-python](https://github.com/chengyongru/wecom_aibot_sdk) (community Python version of the official [@wecom/aibot-node-sdk](https://www.npmjs.com/package/@wecom/aibot-node-sdk)). +> +> Uses **WebSocket** long connection — no public IP required. **1. Install the optional dependency** @@ -714,9 +716,6 @@ Go to the WeCom admin console → Intelligent Robot → Create Robot → select nanobot gateway ``` -> [!TIP] -> WeCom uses WebSocket to receive messages — no webhook or public IP needed! -
## 🌐 Agent Social Network diff --git a/nanobot/channels/wecom.py b/nanobot/channels/wecom.py index 1c44451..72be9e2 100644 --- a/nanobot/channels/wecom.py +++ b/nanobot/channels/wecom.py @@ -98,7 +98,7 @@ class WecomChannel(BaseChannel): """Stop the WeCom bot.""" self._running = False if self._client: - self._client.disconnect() + await self._client.disconnect() logger.info("WeCom bot stopped") async def _on_connected(self, frame: Any) -> None: From 486df1ddbd8db4fb248115851254b8fbb03c09f0 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Wed, 11 Mar 2026 08:10:38 +0000 Subject: [PATCH 22/28] docs: update table of contents in README --- README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.md b/README.md index 2a49214..ed4e8e7 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,25 @@ 📏 Real-time line count: run `bash core_agent_lines.sh` to verify anytime. +## Table of Contents + +- [News](#-news) +- [Key Features](#key-features-of-nanobot) +- [Architecture](#️-architecture) +- [Features](#-features) +- [Install](#-install) +- [Quick Start](#-quick-start) +- [Chat Apps](#-chat-apps) +- [Agent Social Network](#-agent-social-network) +- [Configuration](#️-configuration) +- [Multiple Instances](#-multiple-instances) +- [CLI Reference](#-cli-reference) +- [Docker](#-docker) +- [Linux Service](#-linux-service) +- [Project Structure](#-project-structure) +- [Contribute & Roadmap](#-contribute--roadmap) +- [Star History](#-star-history) + ## 📢 News - **2026-03-08** 🚀 Released **v0.1.4.post4** — a reliability-packed release with safer defaults, better multi-instance support, sturdier MCP, and major channel and provider improvements. Please see [release notes](https://github.com/HKUDS/nanobot/releases/tag/v0.1.4.post4) for details. From ec87946c04ccf4d453ffea02febcb747139c415c Mon Sep 17 00:00:00 2001 From: Re-bin Date: Wed, 11 Mar 2026 08:11:28 +0000 Subject: [PATCH 23/28] docs: update table of contents position --- README.md | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index ed4e8e7..f0e1a6b 100644 --- a/README.md +++ b/README.md @@ -18,25 +18,6 @@ 📏 Real-time line count: run `bash core_agent_lines.sh` to verify anytime. -## Table of Contents - -- [News](#-news) -- [Key Features](#key-features-of-nanobot) -- [Architecture](#️-architecture) -- [Features](#-features) -- [Install](#-install) -- [Quick Start](#-quick-start) -- [Chat Apps](#-chat-apps) -- [Agent Social Network](#-agent-social-network) -- [Configuration](#️-configuration) -- [Multiple Instances](#-multiple-instances) -- [CLI Reference](#-cli-reference) -- [Docker](#-docker) -- [Linux Service](#-linux-service) -- [Project Structure](#-project-structure) -- [Contribute & Roadmap](#-contribute--roadmap) -- [Star History](#-star-history) - ## 📢 News - **2026-03-08** 🚀 Released **v0.1.4.post4** — a reliability-packed release with safer defaults, better multi-instance support, sturdier MCP, and major channel and provider improvements. Please see [release notes](https://github.com/HKUDS/nanobot/releases/tag/v0.1.4.post4) for details. @@ -97,6 +78,25 @@ nanobot architecture

+## Table of Contents + +- [News](#-news) +- [Key Features](#key-features-of-nanobot) +- [Architecture](#️-architecture) +- [Features](#-features) +- [Install](#-install) +- [Quick Start](#-quick-start) +- [Chat Apps](#-chat-apps) +- [Agent Social Network](#-agent-social-network) +- [Configuration](#️-configuration) +- [Multiple Instances](#-multiple-instances) +- [CLI Reference](#-cli-reference) +- [Docker](#-docker) +- [Linux Service](#-linux-service) +- [Project Structure](#-project-structure) +- [Contribute & Roadmap](#-contribute--roadmap) +- [Star History](#-star-history) + ## ✨ Features From 4478838424496b6c233c5402d7fa205f33c683e6 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Wed, 11 Mar 2026 08:42:12 +0000 Subject: [PATCH 24/28] fix(pr-1863): complete Ollama provider routing and README docs --- README.md | 32 ++++++++++++++++++++++++++++++++ nano.2091796.save | 2 ++ nano.2095802.save | 2 ++ nanobot/config/schema.py | 13 +++++++++++-- tests/test_commands.py | 29 +++++++++++++++++++++++++++++ 5 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 nano.2091796.save create mode 100644 nano.2095802.save diff --git a/README.md b/README.md index f0e1a6b..8dba2d7 100644 --- a/README.md +++ b/README.md @@ -778,6 +778,7 @@ Config file: `~/.nanobot/config.json` | `dashscope` | LLM (Qwen) | [dashscope.console.aliyun.com](https://dashscope.console.aliyun.com) | | `moonshot` | LLM (Moonshot/Kimi) | [platform.moonshot.cn](https://platform.moonshot.cn) | | `zhipu` | LLM (Zhipu GLM) | [open.bigmodel.cn](https://open.bigmodel.cn) | +| `ollama` | LLM (local, Ollama) | — | | `vllm` | LLM (local, any OpenAI-compatible server) | — | | `openai_codex` | LLM (Codex, OAuth) | `nanobot provider login openai-codex` | | `github_copilot` | LLM (GitHub Copilot, OAuth) | `nanobot provider login github-copilot` | @@ -843,6 +844,37 @@ Connects directly to any OpenAI-compatible endpoint — LM Studio, llama.cpp, To +
+Ollama (local) + +Run a local model with Ollama, then add to config: + +**1. Start Ollama** (example): +```bash +ollama run llama3.2 +``` + +**2. Add to config** (partial — merge into `~/.nanobot/config.json`): +```json +{ + "providers": { + "ollama": { + "apiBase": "http://localhost:11434" + } + }, + "agents": { + "defaults": { + "provider": "ollama", + "model": "llama3.2" + } + } +} +``` + +> `provider: "auto"` also works when `providers.ollama.apiBase` is configured, but setting `"provider": "ollama"` is the clearest option. + +
+
vLLM (local / OpenAI-compatible) diff --git a/nano.2091796.save b/nano.2091796.save new file mode 100644 index 0000000..6953168 --- /dev/null +++ b/nano.2091796.save @@ -0,0 +1,2 @@ +da activate base + diff --git a/nano.2095802.save b/nano.2095802.save new file mode 100644 index 0000000..6953168 --- /dev/null +++ b/nano.2095802.save @@ -0,0 +1,2 @@ +da activate base + diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index d2ef713..1b26dd7 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -395,6 +395,15 @@ class Config(BaseSettings): if spec.is_oauth or spec.is_local or p.api_key: return p, spec.name + # Fallback: configured local providers can route models without + # provider-specific keywords (for example plain "llama3.2" on Ollama). + for spec in PROVIDERS: + if not spec.is_local: + continue + p = getattr(self.providers, spec.name, None) + if p and p.api_base: + return p, spec.name + # Fallback: gateways first, then others (follows registry order) # OAuth providers are NOT valid fallbacks — they require explicit model selection for spec in PROVIDERS: @@ -421,7 +430,7 @@ class Config(BaseSettings): return p.api_key if p else None def get_api_base(self, model: str | None = None) -> str | None: - """Get API base URL for the given model. Applies default URLs for known gateways.""" + """Get API base URL for the given model. Applies default URLs for gateway/local providers.""" from nanobot.providers.registry import find_by_name p, name = self._match_provider(model) @@ -432,7 +441,7 @@ class Config(BaseSettings): # to avoid polluting the global litellm.api_base. if name: spec = find_by_name(name) - if spec and spec.is_gateway and spec.default_api_base: + if spec and (spec.is_gateway or spec.is_local) and spec.default_api_base: return spec.default_api_base return None diff --git a/tests/test_commands.py b/tests/test_commands.py index 1375a3a..583ef6f 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -114,6 +114,35 @@ def test_config_matches_openai_codex_with_hyphen_prefix(): assert config.get_provider_name() == "openai_codex" +def test_config_matches_explicit_ollama_prefix_without_api_key(): + config = Config() + config.agents.defaults.model = "ollama/llama3.2" + + assert config.get_provider_name() == "ollama" + assert config.get_api_base() == "http://localhost:11434" + + +def test_config_explicit_ollama_provider_uses_default_localhost_api_base(): + config = Config() + config.agents.defaults.provider = "ollama" + config.agents.defaults.model = "llama3.2" + + assert config.get_provider_name() == "ollama" + assert config.get_api_base() == "http://localhost:11434" + + +def test_config_auto_detects_ollama_from_local_api_base(): + config = Config.model_validate( + { + "agents": {"defaults": {"provider": "auto", "model": "llama3.2"}}, + "providers": {"ollama": {"apiBase": "http://localhost:11434"}}, + } + ) + + assert config.get_provider_name() == "ollama" + assert config.get_api_base() == "http://localhost:11434" + + def test_find_by_model_prefers_explicit_prefix_over_generic_codex_keyword(): spec = find_by_model("github-copilot/gpt-5.3-codex") From 89eff6f573d52af025ae9cb7e9db6ea8a0ad698f Mon Sep 17 00:00:00 2001 From: Re-bin Date: Wed, 11 Mar 2026 08:44:38 +0000 Subject: [PATCH 25/28] chore: remove stray nano backup files --- .gitignore | 1 + nano.2091796.save | 2 -- nano.2095802.save | 2 -- 3 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 nano.2091796.save delete mode 100644 nano.2095802.save diff --git a/.gitignore b/.gitignore index 374875a..c50cab8 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,5 @@ __pycache__/ poetry.lock .pytest_cache/ botpy.log +nano.*.save diff --git a/nano.2091796.save b/nano.2091796.save deleted file mode 100644 index 6953168..0000000 --- a/nano.2091796.save +++ /dev/null @@ -1,2 +0,0 @@ -da activate base - diff --git a/nano.2095802.save b/nano.2095802.save deleted file mode 100644 index 6953168..0000000 --- a/nano.2095802.save +++ /dev/null @@ -1,2 +0,0 @@ -da activate base - From c72c2ce7e2b84fda1fd5933fc28d90137f936d03 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Wed, 11 Mar 2026 09:47:04 +0000 Subject: [PATCH 26/28] refactor: move generation settings to provider level, eliminate parameter passthrough --- nanobot/agent/loop.py | 15 --- nanobot/agent/memory.py | 22 +--- nanobot/agent/subagent.py | 9 -- nanobot/cli/commands.py | 57 +++++---- nanobot/providers/base.py | 38 +++++- tests/test_memory_consolidation_types.py | 23 ++++ tests/test_provider_retry.py | 35 +++++- tests/test_subagent_reasoning.py | 144 ----------------------- 8 files changed, 120 insertions(+), 223 deletions(-) delete mode 100644 tests/test_subagent_reasoning.py diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index edf1e8e..b1bfd2f 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -52,9 +52,6 @@ class AgentLoop: workspace: Path, model: str | None = None, max_iterations: int = 40, - temperature: float = 0.1, - max_tokens: int = 4096, - reasoning_effort: str | None = None, context_window_tokens: int = 65_536, brave_api_key: str | None = None, web_proxy: str | None = None, @@ -72,9 +69,6 @@ class AgentLoop: self.workspace = workspace self.model = model or provider.get_default_model() self.max_iterations = max_iterations - self.temperature = temperature - self.max_tokens = max_tokens - self.reasoning_effort = reasoning_effort self.context_window_tokens = context_window_tokens self.brave_api_key = brave_api_key self.web_proxy = web_proxy @@ -90,9 +84,6 @@ class AgentLoop: workspace=workspace, bus=bus, model=self.model, - temperature=self.temperature, - max_tokens=self.max_tokens, - reasoning_effort=reasoning_effort, brave_api_key=brave_api_key, web_proxy=web_proxy, exec_config=self.exec_config, @@ -114,9 +105,6 @@ class AgentLoop: context_window_tokens=context_window_tokens, build_messages=self.context.build_messages, get_tool_definitions=self.tools.get_definitions, - temperature=self.temperature, - max_tokens=self.max_tokens, - reasoning_effort=self.reasoning_effort, ) self._register_default_tools() @@ -205,9 +193,6 @@ class AgentLoop: messages=messages, tools=tool_defs, model=self.model, - temperature=self.temperature, - max_tokens=self.max_tokens, - reasoning_effort=self.reasoning_effort, ) if response.has_tool_calls: diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index d79887b..59ba40e 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -57,7 +57,6 @@ def _normalize_save_memory_args(args: Any) -> dict[str, Any] | None: return args[0] if args and isinstance(args[0], dict) else None return args if isinstance(args, dict) else None - class MemoryStore: """Two-layer memory: MEMORY.md (long-term facts) + HISTORY.md (grep-searchable log).""" @@ -99,9 +98,6 @@ class MemoryStore: messages: list[dict], provider: LLMProvider, model: str, - temperature: float | None = None, - max_tokens: int | None = None, - reasoning_effort: str | None = None, ) -> bool: """Consolidate the provided message chunk into MEMORY.md + HISTORY.md.""" if not messages: @@ -124,9 +120,6 @@ class MemoryStore: ], tools=_SAVE_MEMORY_TOOL, model=model, - temperature=temperature, - max_tokens=max_tokens, - reasoning_effort=reasoning_effort, ) if not response.has_tool_calls: @@ -166,9 +159,6 @@ class MemoryConsolidator: context_window_tokens: int, build_messages: Callable[..., list[dict[str, Any]]], get_tool_definitions: Callable[[], list[dict[str, Any]]], - temperature: float | None = None, - max_tokens: int | None = None, - reasoning_effort: str | None = None, ): self.store = MemoryStore(workspace) self.provider = provider @@ -177,9 +167,6 @@ class MemoryConsolidator: self.context_window_tokens = context_window_tokens self._build_messages = build_messages self._get_tool_definitions = get_tool_definitions - self._temperature = temperature - self._max_tokens = max_tokens - self._reasoning_effort = reasoning_effort self._locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary() def get_lock(self, session_key: str) -> asyncio.Lock: @@ -188,14 +175,7 @@ class MemoryConsolidator: async def consolidate_messages(self, messages: list[dict[str, object]]) -> bool: """Archive a selected message chunk into persistent memory.""" - return await self.store.consolidate( - messages, - self.provider, - self.model, - temperature=self._temperature, - max_tokens=self._max_tokens, - reasoning_effort=self._reasoning_effort, - ) + return await self.store.consolidate(messages, self.provider, self.model) def pick_consolidation_boundary( self, diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index eff0b4f..21b8b32 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -28,9 +28,6 @@ class SubagentManager: workspace: Path, bus: MessageBus, model: str | None = None, - temperature: float = 0.7, - max_tokens: int = 4096, - reasoning_effort: str | None = None, brave_api_key: str | None = None, web_proxy: str | None = None, exec_config: "ExecToolConfig | None" = None, @@ -41,9 +38,6 @@ class SubagentManager: self.workspace = workspace self.bus = bus self.model = model or provider.get_default_model() - self.temperature = temperature - self.max_tokens = max_tokens - self.reasoning_effort = reasoning_effort self.brave_api_key = brave_api_key self.web_proxy = web_proxy self.exec_config = exec_config or ExecToolConfig() @@ -128,9 +122,6 @@ class SubagentManager: messages=messages, tools=tools.get_definitions(), model=self.model, - temperature=self.temperature, - max_tokens=self.max_tokens, - reasoning_effort=self.reasoning_effort, ) if response.has_tool_calls: diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 8387b28..f5ac859 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -215,6 +215,7 @@ def onboard(): def _make_provider(config: Config): """Create the appropriate LLM provider from config.""" + from nanobot.providers.base import GenerationSettings from nanobot.providers.openai_codex_provider import OpenAICodexProvider from nanobot.providers.azure_openai_provider import AzureOpenAIProvider @@ -224,46 +225,50 @@ def _make_provider(config: Config): # OpenAI Codex (OAuth) if provider_name == "openai_codex" or model.startswith("openai-codex/"): - return OpenAICodexProvider(default_model=model) - + provider = OpenAICodexProvider(default_model=model) # Custom: direct OpenAI-compatible endpoint, bypasses LiteLLM - from nanobot.providers.custom_provider import CustomProvider - if provider_name == "custom": - return CustomProvider( + elif provider_name == "custom": + from nanobot.providers.custom_provider import CustomProvider + provider = CustomProvider( api_key=p.api_key if p else "no-key", api_base=config.get_api_base(model) or "http://localhost:8000/v1", default_model=model, ) - # Azure OpenAI: direct Azure OpenAI endpoint with deployment name - if provider_name == "azure_openai": + elif provider_name == "azure_openai": if not p or not p.api_key or not p.api_base: console.print("[red]Error: Azure OpenAI requires api_key and api_base.[/red]") console.print("Set them in ~/.nanobot/config.json under providers.azure_openai section") console.print("Use the model field to specify the deployment name.") raise typer.Exit(1) - - return AzureOpenAIProvider( + provider = AzureOpenAIProvider( api_key=p.api_key, api_base=p.api_base, default_model=model, ) + else: + from nanobot.providers.litellm_provider import LiteLLMProvider + from nanobot.providers.registry import find_by_name + spec = find_by_name(provider_name) + if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and (spec.is_oauth or spec.is_local)): + console.print("[red]Error: No API key configured.[/red]") + console.print("Set one in ~/.nanobot/config.json under providers section") + raise typer.Exit(1) + provider = LiteLLMProvider( + api_key=p.api_key if p else None, + api_base=config.get_api_base(model), + default_model=model, + extra_headers=p.extra_headers if p else None, + provider_name=provider_name, + ) - from nanobot.providers.litellm_provider import LiteLLMProvider - from nanobot.providers.registry import find_by_name - spec = find_by_name(provider_name) - if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and (spec.is_oauth or spec.is_local)): - console.print("[red]Error: No API key configured.[/red]") - console.print("Set one in ~/.nanobot/config.json under providers section") - raise typer.Exit(1) - - return LiteLLMProvider( - api_key=p.api_key if p else None, - api_base=config.get_api_base(model), - default_model=model, - extra_headers=p.extra_headers if p else None, - provider_name=provider_name, + defaults = config.agents.defaults + provider.generation = GenerationSettings( + temperature=defaults.temperature, + max_tokens=defaults.max_tokens, + reasoning_effort=defaults.reasoning_effort, ) + return provider def _load_runtime_config(config: str | None = None, workspace: str | None = None) -> Config: @@ -341,10 +346,7 @@ def gateway( provider=provider, workspace=config.workspace_path, model=config.agents.defaults.model, - temperature=config.agents.defaults.temperature, - max_tokens=config.agents.defaults.max_tokens, max_iterations=config.agents.defaults.max_tool_iterations, - reasoning_effort=config.agents.defaults.reasoning_effort, context_window_tokens=config.agents.defaults.context_window_tokens, brave_api_key=config.tools.web.search.api_key or None, web_proxy=config.tools.web.proxy or None, @@ -527,10 +529,7 @@ def agent( provider=provider, workspace=config.workspace_path, model=config.agents.defaults.model, - temperature=config.agents.defaults.temperature, - max_tokens=config.agents.defaults.max_tokens, max_iterations=config.agents.defaults.max_tool_iterations, - reasoning_effort=config.agents.defaults.reasoning_effort, context_window_tokens=config.agents.defaults.context_window_tokens, brave_api_key=config.tools.web.search.api_key or None, web_proxy=config.tools.web.proxy or None, diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py index a3b6c47..d4ea60d 100644 --- a/nanobot/providers/base.py +++ b/nanobot/providers/base.py @@ -32,6 +32,21 @@ class LLMResponse: return len(self.tool_calls) > 0 +@dataclass(frozen=True) +class GenerationSettings: + """Default generation parameters for LLM calls. + + Stored on the provider so every call site inherits the same defaults + without having to pass temperature / max_tokens / reasoning_effort + through every layer. Individual call sites can still override by + passing explicit keyword arguments to chat() / chat_with_retry(). + """ + + temperature: float = 0.7 + max_tokens: int = 4096 + reasoning_effort: str | None = None + + class LLMProvider(ABC): """ Abstract base class for LLM providers. @@ -56,9 +71,12 @@ class LLMProvider(ABC): "temporarily unavailable", ) + _SENTINEL = object() + def __init__(self, api_key: str | None = None, api_base: str | None = None): self.api_key = api_key self.api_base = api_base + self.generation: GenerationSettings = GenerationSettings() @staticmethod def _sanitize_empty_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: @@ -155,11 +173,23 @@ class LLMProvider(ABC): messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None, model: str | None = None, - max_tokens: int = 4096, - temperature: float = 0.7, - reasoning_effort: str | None = None, + max_tokens: object = _SENTINEL, + temperature: object = _SENTINEL, + reasoning_effort: object = _SENTINEL, ) -> LLMResponse: - """Call chat() with retry on transient provider failures.""" + """Call chat() with retry on transient provider failures. + + Parameters default to ``self.generation`` when not explicitly passed, + so callers no longer need to thread temperature / max_tokens / + reasoning_effort through every layer. + """ + if max_tokens is self._SENTINEL: + max_tokens = self.generation.max_tokens + if temperature is self._SENTINEL: + temperature = self.generation.temperature + if reasoning_effort is self._SENTINEL: + reasoning_effort = self.generation.reasoning_effort + for attempt, delay in enumerate(self._CHAT_RETRY_DELAYS, start=1): try: response = await self.chat( diff --git a/tests/test_memory_consolidation_types.py b/tests/test_memory_consolidation_types.py index 0263f01..69be858 100644 --- a/tests/test_memory_consolidation_types.py +++ b/tests/test_memory_consolidation_types.py @@ -265,3 +265,26 @@ class TestMemoryConsolidationTypeHandling: assert result is True assert provider.calls == 2 assert delays == [1] + + @pytest.mark.asyncio + async def test_consolidation_delegates_to_provider_defaults(self, tmp_path: Path) -> None: + """Consolidation no longer passes generation params — the provider owns them.""" + store = MemoryStore(tmp_path) + provider = AsyncMock() + provider.chat_with_retry = AsyncMock( + return_value=_make_tool_response( + history_entry="[2026-01-01] User discussed testing.", + memory_update="# Memory\nUser likes testing.", + ) + ) + messages = _make_messages(message_count=60) + + result = await store.consolidate(messages, provider, "test-model") + + assert result is True + provider.chat_with_retry.assert_awaited_once() + _, kwargs = provider.chat_with_retry.await_args + assert kwargs["model"] == "test-model" + assert "temperature" not in kwargs + assert "max_tokens" not in kwargs + assert "reasoning_effort" not in kwargs diff --git a/tests/test_provider_retry.py b/tests/test_provider_retry.py index 751ecc3..2420399 100644 --- a/tests/test_provider_retry.py +++ b/tests/test_provider_retry.py @@ -2,7 +2,7 @@ import asyncio import pytest -from nanobot.providers.base import LLMProvider, LLMResponse +from nanobot.providers.base import GenerationSettings, LLMProvider, LLMResponse class ScriptedProvider(LLMProvider): @@ -10,9 +10,11 @@ class ScriptedProvider(LLMProvider): super().__init__() self._responses = list(responses) self.calls = 0 + self.last_kwargs: dict = {} async def chat(self, *args, **kwargs) -> LLMResponse: self.calls += 1 + self.last_kwargs = kwargs response = self._responses.pop(0) if isinstance(response, BaseException): raise response @@ -90,3 +92,34 @@ async def test_chat_with_retry_preserves_cancelled_error() -> None: with pytest.raises(asyncio.CancelledError): await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}]) + + +@pytest.mark.asyncio +async def test_chat_with_retry_uses_provider_generation_defaults() -> None: + """When callers omit generation params, provider.generation defaults are used.""" + provider = ScriptedProvider([LLMResponse(content="ok")]) + provider.generation = GenerationSettings(temperature=0.2, max_tokens=321, reasoning_effort="high") + + await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}]) + + assert provider.last_kwargs["temperature"] == 0.2 + assert provider.last_kwargs["max_tokens"] == 321 + assert provider.last_kwargs["reasoning_effort"] == "high" + + +@pytest.mark.asyncio +async def test_chat_with_retry_explicit_override_beats_defaults() -> None: + """Explicit kwargs should override provider.generation defaults.""" + provider = ScriptedProvider([LLMResponse(content="ok")]) + provider.generation = GenerationSettings(temperature=0.2, max_tokens=321, reasoning_effort="high") + + await provider.chat_with_retry( + messages=[{"role": "user", "content": "hello"}], + temperature=0.9, + max_tokens=9999, + reasoning_effort="low", + ) + + assert provider.last_kwargs["temperature"] == 0.9 + assert provider.last_kwargs["max_tokens"] == 9999 + assert provider.last_kwargs["reasoning_effort"] == "low" diff --git a/tests/test_subagent_reasoning.py b/tests/test_subagent_reasoning.py deleted file mode 100644 index 5e70506..0000000 --- a/tests/test_subagent_reasoning.py +++ /dev/null @@ -1,144 +0,0 @@ -"""Tests for subagent reasoning_content and thinking_blocks handling.""" - -from __future__ import annotations - -import asyncio -from pathlib import Path -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - - -class TestSubagentReasoningContent: - """Test that subagent properly handles reasoning_content and thinking_blocks.""" - - @pytest.mark.asyncio - async def test_subagent_message_includes_reasoning_content(self): - """Verify reasoning_content is included in assistant messages with tool calls. - - This is the fix for issue #1834: Spawn/subagent tool fails with - Deepseek Reasoner due to missing reasoning_content field. - """ - from nanobot.agent.subagent import SubagentManager - from nanobot.bus.queue import MessageBus - from nanobot.providers.base import LLMResponse, ToolCallRequest - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "deepseek-reasoner" - - # Create a real Path object for workspace - workspace = Path("/tmp/test_workspace") - workspace.mkdir(parents=True, exist_ok=True) - - # Capture messages that are sent to the provider - captured_messages = [] - - async def mock_chat(*args, **kwargs): - captured_messages.append(kwargs.get("messages", [])) - # Return response with tool calls and reasoning_content - tool_call = ToolCallRequest( - id="test-1", - name="read_file", - arguments={"path": "/test.txt"}, - ) - return LLMResponse( - content="", - tool_calls=[tool_call], - reasoning_content="I need to read this file first", - ) - - provider.chat_with_retry = AsyncMock(side_effect=mock_chat) - - mgr = SubagentManager(provider=provider, workspace=workspace, bus=bus) - - # Mock the tools registry - with patch("nanobot.agent.subagent.ToolRegistry") as MockToolRegistry: - mock_registry = MagicMock() - mock_registry.get_definitions.return_value = [] - mock_registry.execute = AsyncMock(return_value="file content") - MockToolRegistry.return_value = mock_registry - - result = await mgr.spawn( - task="Read a file", - label="test", - origin_channel="cli", - origin_chat_id="direct", - session_key="cli:direct", - ) - - # Wait for the task to complete - await asyncio.sleep(0.5) - - # Check the captured messages - assert len(captured_messages) >= 1 - # Find the assistant message with tool_calls - found = False - for msg_list in captured_messages: - for msg in msg_list: - if msg.get("role") == "assistant" and msg.get("tool_calls"): - assert "reasoning_content" in msg, "reasoning_content should be in assistant message with tool_calls" - assert msg["reasoning_content"] == "I need to read this file first" - found = True - assert found, "Should have found an assistant message with tool_calls" - - @pytest.mark.asyncio - async def test_subagent_message_includes_thinking_blocks(self): - """Verify thinking_blocks is included in assistant messages with tool calls.""" - from nanobot.agent.subagent import SubagentManager - from nanobot.bus.queue import MessageBus - from nanobot.providers.base import LLMResponse, ToolCallRequest - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "claude-sonnet" - - workspace = Path("/tmp/test_workspace2") - workspace.mkdir(parents=True, exist_ok=True) - - captured_messages = [] - - async def mock_chat(*args, **kwargs): - captured_messages.append(kwargs.get("messages", [])) - tool_call = ToolCallRequest( - id="test-2", - name="read_file", - arguments={"path": "/test.txt"}, - ) - return LLMResponse( - content="", - tool_calls=[tool_call], - thinking_blocks=[ - {"signature": "sig1", "thought": "thinking step 1"}, - {"signature": "sig2", "thought": "thinking step 2"}, - ], - ) - - provider.chat_with_retry = AsyncMock(side_effect=mock_chat) - - mgr = SubagentManager(provider=provider, workspace=workspace, bus=bus) - - with patch("nanobot.agent.subagent.ToolRegistry") as MockToolRegistry: - mock_registry = MagicMock() - mock_registry.get_definitions.return_value = [] - mock_registry.execute = AsyncMock(return_value="file content") - MockToolRegistry.return_value = mock_registry - - result = await mgr.spawn( - task="Read a file", - label="test", - origin_channel="cli", - origin_chat_id="direct", - ) - - await asyncio.sleep(0.5) - - # Check the captured messages - found = False - for msg_list in captured_messages: - for msg in msg_list: - if msg.get("role") == "assistant" and msg.get("tool_calls"): - assert "thinking_blocks" in msg, "thinking_blocks should be in assistant message with tool_calls" - assert len(msg["thinking_blocks"]) == 2 - found = True - assert found, "Should have found an assistant message with tool_calls" From 2c5226550d0083ceb41cf4042925682753e2adb5 Mon Sep 17 00:00:00 2001 From: for13to1 Date: Wed, 11 Mar 2026 20:35:04 +0800 Subject: [PATCH 27/28] feat: allow direct references in hatch metadata for wecom dep --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 9868513..a52c0c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,9 @@ nanobot = "nanobot.cli.commands:app" requires = ["hatchling"] build-backend = "hatchling.build" +[tool.hatch.metadata] +allow-direct-references = true + [tool.hatch.build.targets.wheel] packages = ["nanobot"] From 254cfd48babf74cca4bbe7baedda7b540b897cbb Mon Sep 17 00:00:00 2001 From: Re-bin Date: Wed, 11 Mar 2026 14:23:19 +0000 Subject: [PATCH 28/28] refactor: auto-discover channels via pkgutil, eliminate hardcoded registry --- nanobot/channels/base.py | 18 +++++ nanobot/channels/dingtalk.py | 1 + nanobot/channels/discord.py | 1 + nanobot/channels/email.py | 1 + nanobot/channels/feishu.py | 18 ++--- nanobot/channels/manager.py | 140 ++++------------------------------- nanobot/channels/matrix.py | 18 +++-- nanobot/channels/mochat.py | 1 + nanobot/channels/qq.py | 1 + nanobot/channels/registry.py | 35 +++++++++ nanobot/channels/slack.py | 1 + nanobot/channels/telegram.py | 16 +--- nanobot/channels/wecom.py | 1 + nanobot/channels/whatsapp.py | 1 + nanobot/cli/commands.py | 91 ++++------------------- 15 files changed, 111 insertions(+), 233 deletions(-) create mode 100644 nanobot/channels/registry.py diff --git a/nanobot/channels/base.py b/nanobot/channels/base.py index dc53ba4..74c540a 100644 --- a/nanobot/channels/base.py +++ b/nanobot/channels/base.py @@ -1,6 +1,9 @@ """Base channel interface for chat platforms.""" +from __future__ import annotations + from abc import ABC, abstractmethod +from pathlib import Path from typing import Any from loguru import logger @@ -18,6 +21,8 @@ class BaseChannel(ABC): """ name: str = "base" + display_name: str = "Base" + transcription_api_key: str = "" def __init__(self, config: Any, bus: MessageBus): """ @@ -31,6 +36,19 @@ class BaseChannel(ABC): self.bus = bus self._running = False + async def transcribe_audio(self, file_path: str | Path) -> str: + """Transcribe an audio file via Groq Whisper. Returns empty string on failure.""" + if not self.transcription_api_key: + return "" + try: + from nanobot.providers.transcription import GroqTranscriptionProvider + + provider = GroqTranscriptionProvider(api_key=self.transcription_api_key) + return await provider.transcribe(file_path) + except Exception as e: + logger.warning("{}: audio transcription failed: {}", self.name, e) + return "" + @abstractmethod async def start(self) -> None: """ diff --git a/nanobot/channels/dingtalk.py b/nanobot/channels/dingtalk.py index cdcba57..4626d95 100644 --- a/nanobot/channels/dingtalk.py +++ b/nanobot/channels/dingtalk.py @@ -114,6 +114,7 @@ class DingTalkChannel(BaseChannel): """ name = "dingtalk" + display_name = "DingTalk" _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"} _AUDIO_EXTS = {".amr", ".mp3", ".wav", ".ogg", ".m4a", ".aac"} _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm"} diff --git a/nanobot/channels/discord.py b/nanobot/channels/discord.py index 2ee4f77..afa20c9 100644 --- a/nanobot/channels/discord.py +++ b/nanobot/channels/discord.py @@ -25,6 +25,7 @@ class DiscordChannel(BaseChannel): """Discord channel using Gateway websocket.""" name = "discord" + display_name = "Discord" def __init__(self, config: DiscordConfig, bus: MessageBus): super().__init__(config, bus) diff --git a/nanobot/channels/email.py b/nanobot/channels/email.py index 16771fb..46c2103 100644 --- a/nanobot/channels/email.py +++ b/nanobot/channels/email.py @@ -35,6 +35,7 @@ class EmailChannel(BaseChannel): """ name = "email" + display_name = "Email" _IMAP_MONTHS = ( "Jan", "Feb", diff --git a/nanobot/channels/feishu.py b/nanobot/channels/feishu.py index 0409c32..160b9b4 100644 --- a/nanobot/channels/feishu.py +++ b/nanobot/channels/feishu.py @@ -244,11 +244,11 @@ class FeishuChannel(BaseChannel): """ name = "feishu" + display_name = "Feishu" - def __init__(self, config: FeishuConfig, bus: MessageBus, groq_api_key: str = ""): + def __init__(self, config: FeishuConfig, bus: MessageBus): super().__init__(config, bus) self.config: FeishuConfig = config - self.groq_api_key = groq_api_key self._client: Any = None self._ws_client: Any = None self._ws_thread: threading.Thread | None = None @@ -928,16 +928,10 @@ class FeishuChannel(BaseChannel): if file_path: media_paths.append(file_path) - # Transcribe audio using Groq Whisper - if msg_type == "audio" and file_path and self.groq_api_key: - try: - from nanobot.providers.transcription import GroqTranscriptionProvider - transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key) - transcription = await transcriber.transcribe(file_path) - if transcription: - content_text = f"[transcription: {transcription}]" - except Exception as e: - logger.warning("Failed to transcribe audio: {}", e) + if msg_type == "audio" and file_path: + transcription = await self.transcribe_audio(file_path) + if transcription: + content_text = f"[transcription: {transcription}]" content_parts.append(content_text) diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py index 2c5cd3f..8288ad0 100644 --- a/nanobot/channels/manager.py +++ b/nanobot/channels/manager.py @@ -31,135 +31,23 @@ class ChannelManager: self._init_channels() def _init_channels(self) -> None: - """Initialize channels based on config.""" + """Initialize channels discovered via pkgutil scan.""" + from nanobot.channels.registry import discover_channel_names, load_channel_class - # Telegram channel - if self.config.channels.telegram.enabled: + groq_key = self.config.providers.groq.api_key + + for modname in discover_channel_names(): + section = getattr(self.config.channels, modname, None) + if not section or not getattr(section, "enabled", False): + continue try: - from nanobot.channels.telegram import TelegramChannel - self.channels["telegram"] = TelegramChannel( - self.config.channels.telegram, - self.bus, - groq_api_key=self.config.providers.groq.api_key, - ) - logger.info("Telegram channel enabled") + cls = load_channel_class(modname) + channel = cls(section, self.bus) + channel.transcription_api_key = groq_key + self.channels[modname] = channel + logger.info("{} channel enabled", cls.display_name) except ImportError as e: - logger.warning("Telegram channel not available: {}", e) - - # WhatsApp channel - if self.config.channels.whatsapp.enabled: - try: - from nanobot.channels.whatsapp import WhatsAppChannel - self.channels["whatsapp"] = WhatsAppChannel( - self.config.channels.whatsapp, self.bus - ) - logger.info("WhatsApp channel enabled") - except ImportError as e: - logger.warning("WhatsApp channel not available: {}", e) - - # Discord channel - if self.config.channels.discord.enabled: - try: - from nanobot.channels.discord import DiscordChannel - self.channels["discord"] = DiscordChannel( - self.config.channels.discord, self.bus - ) - logger.info("Discord channel enabled") - except ImportError as e: - logger.warning("Discord channel not available: {}", e) - - # Feishu channel - if self.config.channels.feishu.enabled: - try: - from nanobot.channels.feishu import FeishuChannel - self.channels["feishu"] = FeishuChannel( - self.config.channels.feishu, self.bus, - groq_api_key=self.config.providers.groq.api_key, - ) - logger.info("Feishu channel enabled") - except ImportError as e: - logger.warning("Feishu channel not available: {}", e) - - # Mochat channel - if self.config.channels.mochat.enabled: - try: - from nanobot.channels.mochat import MochatChannel - - self.channels["mochat"] = MochatChannel( - self.config.channels.mochat, self.bus - ) - logger.info("Mochat channel enabled") - except ImportError as e: - logger.warning("Mochat channel not available: {}", e) - - # DingTalk channel - if self.config.channels.dingtalk.enabled: - try: - from nanobot.channels.dingtalk import DingTalkChannel - self.channels["dingtalk"] = DingTalkChannel( - self.config.channels.dingtalk, self.bus - ) - logger.info("DingTalk channel enabled") - except ImportError as e: - logger.warning("DingTalk channel not available: {}", e) - - # Email channel - if self.config.channels.email.enabled: - try: - from nanobot.channels.email import EmailChannel - self.channels["email"] = EmailChannel( - self.config.channels.email, self.bus - ) - logger.info("Email channel enabled") - except ImportError as e: - logger.warning("Email channel not available: {}", e) - - # Slack channel - if self.config.channels.slack.enabled: - try: - from nanobot.channels.slack import SlackChannel - self.channels["slack"] = SlackChannel( - self.config.channels.slack, self.bus - ) - logger.info("Slack channel enabled") - except ImportError as e: - logger.warning("Slack channel not available: {}", e) - - # QQ channel - if self.config.channels.qq.enabled: - try: - from nanobot.channels.qq import QQChannel - self.channels["qq"] = QQChannel( - self.config.channels.qq, - self.bus, - ) - logger.info("QQ channel enabled") - except ImportError as e: - logger.warning("QQ channel not available: {}", e) - - # Matrix channel - if self.config.channels.matrix.enabled: - try: - from nanobot.channels.matrix import MatrixChannel - self.channels["matrix"] = MatrixChannel( - self.config.channels.matrix, - self.bus, - ) - logger.info("Matrix channel enabled") - except ImportError as e: - logger.warning("Matrix channel not available: {}", e) - - # WeCom channel - if self.config.channels.wecom.enabled: - try: - from nanobot.channels.wecom import WecomChannel - self.channels["wecom"] = WecomChannel( - self.config.channels.wecom, - self.bus, - ) - logger.info("WeCom channel enabled") - except ImportError as e: - logger.warning("WeCom channel not available: {}", e) + logger.warning("{} channel not available: {}", modname, e) self._validate_allow_from() diff --git a/nanobot/channels/matrix.py b/nanobot/channels/matrix.py index 63cb0ca..0d7a908 100644 --- a/nanobot/channels/matrix.py +++ b/nanobot/channels/matrix.py @@ -37,6 +37,7 @@ except ImportError as e: ) from e from nanobot.bus.events import OutboundMessage +from nanobot.bus.queue import MessageBus from nanobot.channels.base import BaseChannel from nanobot.config.paths import get_data_dir, get_media_dir from nanobot.utils.helpers import safe_filename @@ -146,15 +147,15 @@ class MatrixChannel(BaseChannel): """Matrix (Element) channel using long-polling sync.""" name = "matrix" + display_name = "Matrix" - def __init__(self, config: Any, bus, *, restrict_to_workspace: bool = False, - workspace: Path | None = None): + def __init__(self, config: Any, bus: MessageBus): super().__init__(config, bus) self.client: AsyncClient | None = None self._sync_task: asyncio.Task | None = None self._typing_tasks: dict[str, asyncio.Task] = {} - self._restrict_to_workspace = restrict_to_workspace - self._workspace = workspace.expanduser().resolve() if workspace else None + self._restrict_to_workspace = False + self._workspace: Path | None = None self._server_upload_limit_bytes: int | None = None self._server_upload_limit_checked = False @@ -677,7 +678,14 @@ class MatrixChannel(BaseChannel): parts: list[str] = [] if isinstance(body := getattr(event, "body", None), str) and body.strip(): parts.append(body.strip()) - if marker: + + if attachment and attachment.get("type") == "audio": + transcription = await self.transcribe_audio(attachment["path"]) + if transcription: + parts.append(f"[transcription: {transcription}]") + else: + parts.append(marker) + elif marker: parts.append(marker) await self._start_typing_keepalive(room.room_id) diff --git a/nanobot/channels/mochat.py b/nanobot/channels/mochat.py index 09e31c3..52e246f 100644 --- a/nanobot/channels/mochat.py +++ b/nanobot/channels/mochat.py @@ -216,6 +216,7 @@ class MochatChannel(BaseChannel): """Mochat channel using socket.io with fallback polling workers.""" name = "mochat" + display_name = "Mochat" def __init__(self, config: MochatConfig, bus: MessageBus): super().__init__(config, bus) diff --git a/nanobot/channels/qq.py b/nanobot/channels/qq.py index 5ac06e3..792cc12 100644 --- a/nanobot/channels/qq.py +++ b/nanobot/channels/qq.py @@ -54,6 +54,7 @@ class QQChannel(BaseChannel): """QQ channel using botpy SDK with WebSocket connection.""" name = "qq" + display_name = "QQ" def __init__(self, config: QQConfig, bus: MessageBus): super().__init__(config, bus) diff --git a/nanobot/channels/registry.py b/nanobot/channels/registry.py new file mode 100644 index 0000000..eb30ff7 --- /dev/null +++ b/nanobot/channels/registry.py @@ -0,0 +1,35 @@ +"""Auto-discovery for channel modules — no hardcoded registry.""" + +from __future__ import annotations + +import importlib +import pkgutil +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from nanobot.channels.base import BaseChannel + +_INTERNAL = frozenset({"base", "manager", "registry"}) + + +def discover_channel_names() -> list[str]: + """Return all channel module names by scanning the package (zero imports).""" + import nanobot.channels as pkg + + return [ + name + for _, name, ispkg in pkgutil.iter_modules(pkg.__path__) + if name not in _INTERNAL and not ispkg + ] + + +def load_channel_class(module_name: str) -> type[BaseChannel]: + """Import *module_name* and return the first BaseChannel subclass found.""" + from nanobot.channels.base import BaseChannel as _Base + + mod = importlib.import_module(f"nanobot.channels.{module_name}") + for attr in dir(mod): + obj = getattr(mod, attr) + if isinstance(obj, type) and issubclass(obj, _Base) and obj is not _Base: + return obj + raise ImportError(f"No BaseChannel subclass in nanobot.channels.{module_name}") diff --git a/nanobot/channels/slack.py b/nanobot/channels/slack.py index 0384d8d..5819212 100644 --- a/nanobot/channels/slack.py +++ b/nanobot/channels/slack.py @@ -21,6 +21,7 @@ class SlackChannel(BaseChannel): """Slack channel using Socket Mode.""" name = "slack" + display_name = "Slack" def __init__(self, config: SlackConfig, bus: MessageBus): super().__init__(config, bus) diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py index 5b294cc..9f93843 100644 --- a/nanobot/channels/telegram.py +++ b/nanobot/channels/telegram.py @@ -155,6 +155,7 @@ class TelegramChannel(BaseChannel): """ name = "telegram" + display_name = "Telegram" # Commands registered with Telegram's command menu BOT_COMMANDS = [ @@ -164,15 +165,9 @@ class TelegramChannel(BaseChannel): BotCommand("help", "Show available commands"), ] - def __init__( - self, - config: TelegramConfig, - bus: MessageBus, - groq_api_key: str = "", - ): + def __init__(self, config: TelegramConfig, bus: MessageBus): super().__init__(config, bus) self.config: TelegramConfig = config - self.groq_api_key = groq_api_key self._app: Application | None = None self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies self._typing_tasks: dict[str, asyncio.Task] = {} # chat_id -> typing loop task @@ -615,11 +610,8 @@ class TelegramChannel(BaseChannel): media_paths.append(str(file_path)) - # Handle voice transcription - if media_type == "voice" or media_type == "audio": - from nanobot.providers.transcription import GroqTranscriptionProvider - transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key) - transcription = await transcriber.transcribe(file_path) + if media_type in ("voice", "audio"): + transcription = await self.transcribe_audio(file_path) if transcription: logger.info("Transcribed {}: {}...", media_type, transcription[:50]) content_parts.append(f"[transcription: {transcription}]") diff --git a/nanobot/channels/wecom.py b/nanobot/channels/wecom.py index 72be9e2..e0f4ae0 100644 --- a/nanobot/channels/wecom.py +++ b/nanobot/channels/wecom.py @@ -36,6 +36,7 @@ class WecomChannel(BaseChannel): """ name = "wecom" + display_name = "WeCom" def __init__(self, config: WecomConfig, bus: MessageBus): super().__init__(config, bus) diff --git a/nanobot/channels/whatsapp.py b/nanobot/channels/whatsapp.py index 1307716..7fffb80 100644 --- a/nanobot/channels/whatsapp.py +++ b/nanobot/channels/whatsapp.py @@ -22,6 +22,7 @@ class WhatsAppChannel(BaseChannel): """ name = "whatsapp" + display_name = "WhatsApp" def __init__(self, config: WhatsAppConfig, bus: MessageBus): super().__init__(config, bus) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index f5ac859..dd5e60c 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -683,6 +683,7 @@ app.add_typer(channels_app, name="channels") @channels_app.command("status") def channels_status(): """Show channel status.""" + from nanobot.channels.registry import discover_channel_names, load_channel_class from nanobot.config.loader import load_config config = load_config() @@ -690,85 +691,19 @@ def channels_status(): table = Table(title="Channel Status") table.add_column("Channel", style="cyan") table.add_column("Enabled", style="green") - table.add_column("Configuration", style="yellow") - # WhatsApp - wa = config.channels.whatsapp - table.add_row( - "WhatsApp", - "✓" if wa.enabled else "✗", - wa.bridge_url - ) - - dc = config.channels.discord - table.add_row( - "Discord", - "✓" if dc.enabled else "✗", - dc.gateway_url - ) - - # Feishu - fs = config.channels.feishu - fs_config = f"app_id: {fs.app_id[:10]}..." if fs.app_id else "[dim]not configured[/dim]" - table.add_row( - "Feishu", - "✓" if fs.enabled else "✗", - fs_config - ) - - # Mochat - mc = config.channels.mochat - mc_base = mc.base_url or "[dim]not configured[/dim]" - table.add_row( - "Mochat", - "✓" if mc.enabled else "✗", - mc_base - ) - - # Telegram - tg = config.channels.telegram - tg_config = f"token: {tg.token[:10]}..." if tg.token else "[dim]not configured[/dim]" - table.add_row( - "Telegram", - "✓" if tg.enabled else "✗", - tg_config - ) - - # Slack - slack = config.channels.slack - slack_config = "socket" if slack.app_token and slack.bot_token else "[dim]not configured[/dim]" - table.add_row( - "Slack", - "✓" if slack.enabled else "✗", - slack_config - ) - - # DingTalk - dt = config.channels.dingtalk - dt_config = f"client_id: {dt.client_id[:10]}..." if dt.client_id else "[dim]not configured[/dim]" - table.add_row( - "DingTalk", - "✓" if dt.enabled else "✗", - dt_config - ) - - # QQ - qq = config.channels.qq - qq_config = f"app_id: {qq.app_id[:10]}..." if qq.app_id else "[dim]not configured[/dim]" - table.add_row( - "QQ", - "✓" if qq.enabled else "✗", - qq_config - ) - - # Email - em = config.channels.email - em_config = em.imap_host if em.imap_host else "[dim]not configured[/dim]" - table.add_row( - "Email", - "✓" if em.enabled else "✗", - em_config - ) + for modname in sorted(discover_channel_names()): + section = getattr(config.channels, modname, None) + enabled = section and getattr(section, "enabled", False) + try: + cls = load_channel_class(modname) + display = cls.display_name + except ImportError: + display = modname.title() + table.add_row( + display, + "[green]\u2713[/green]" if enabled else "[dim]\u2717[/dim]", + ) console.print(table)