From 43329018f78826770f28674bf0c01643414f65fe Mon Sep 17 00:00:00 2001 From: Sense_wang <167664334+haosenwang1018@users.noreply.github.com> Date: Sun, 1 Mar 2026 16:50:02 +0000 Subject: [PATCH 01/29] fix(telegram): add group_policy config for Telegram groups Add `group_policy` field to `TelegramConfig` with "open" (default) and "mention" options, consistent with Slack and Matrix channel configs. --- nanobot/config/schema.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 6b80c81..69cf826 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -31,6 +31,7 @@ class TelegramConfig(Base): allow_from: list[str] = Field(default_factory=list) # Allowed user IDs or usernames proxy: str | None = None # HTTP/SOCKS5 proxy URL, e.g. "http://127.0.0.1:7890" or "socks5://127.0.0.1:1080" reply_to_message: bool = False # If true, bot replies quote the original message + group_policy: Literal["open", "mention"] = "open" # "open" responds to all, "mention" only when @mentioned or replied to class FeishuConfig(Base): From 521217a7f50f0a8de46a88e101c8e9bf16abae27 Mon Sep 17 00:00:00 2001 From: Sense_wang <167664334+haosenwang1018@users.noreply.github.com> Date: Sun, 1 Mar 2026 16:50:36 +0000 Subject: [PATCH 02/29] fix(telegram): enforce group_policy in _on_message When `group_policy` is set to "mention", skip messages in group chats unless the bot is @mentioned or the message is a reply to the bot. Fixes #1380 --- nanobot/channels/telegram.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py index c290535..3702666 100644 --- a/nanobot/channels/telegram.py +++ b/nanobot/channels/telegram.py @@ -341,6 +341,23 @@ class TelegramChannel(BaseChannel): # Store chat_id for replies self._chat_ids[sender_id] = chat_id + # Enforce group_policy: in group chats with "mention" policy, + # only respond when the bot is @mentioned or the message is a reply to the bot. + is_group = message.chat.type != "private" + if is_group and getattr(self.config, "group_policy", "open") == "mention": + bot_username = (await self._app.bot.get_me()).username if self._app else None + mentioned = False + # Check if bot is @mentioned in text + if bot_username and message.text: + mentioned = f"@{bot_username}" in message.text + # Check if the message is a reply to the bot + if not mentioned and message.reply_to_message and message.reply_to_message.from_user: + bot_id = (await self._app.bot.get_me()).id if self._app else None + if bot_id and message.reply_to_message.from_user.id == bot_id: + mentioned = True + if not mentioned: + return + # Build content from text and/or media content_parts = [] media_paths = [] From a7be0b3c9eaf967c0079ab1c1a08be4f2010fc09 Mon Sep 17 00:00:00 2001 From: Yan-ke Guo Date: Tue, 3 Mar 2026 18:14:26 +0800 Subject: [PATCH 03/29] sync missing scripts from upstream openclaw repository --- nanobot/skills/skill-creator/SKILL.md | 3 +- .../skill-creator/scripts/init_skill.py | 378 ++++++++++++++++++ .../skill-creator/scripts/package_skill.py | 139 +++++++ 3 files changed, 519 insertions(+), 1 deletion(-) create mode 100755 nanobot/skills/skill-creator/scripts/init_skill.py create mode 100755 nanobot/skills/skill-creator/scripts/package_skill.py diff --git a/nanobot/skills/skill-creator/SKILL.md b/nanobot/skills/skill-creator/SKILL.md index 9b5eb6f..f4d6e0b 100644 --- a/nanobot/skills/skill-creator/SKILL.md +++ b/nanobot/skills/skill-creator/SKILL.md @@ -349,7 +349,6 @@ scripts/package_skill.py ./dist The packaging script will: 1. **Validate** the skill automatically, checking: - - YAML frontmatter format and required fields - Skill naming conventions and directory structure - Description completeness and quality @@ -357,6 +356,8 @@ The packaging script will: 2. **Package** the skill if validation passes, creating a .skill file named after the skill (e.g., `my-skill.skill`) that includes all files and maintains the proper directory structure for distribution. The .skill file is a zip file with a .skill extension. + Security restriction: symlinks are rejected and packaging fails when any symlink is present. + If validation fails, the script will report the errors and exit without creating a package. Fix any validation errors and run the packaging command again. ### Step 6: Iterate diff --git a/nanobot/skills/skill-creator/scripts/init_skill.py b/nanobot/skills/skill-creator/scripts/init_skill.py new file mode 100755 index 0000000..8633fe9 --- /dev/null +++ b/nanobot/skills/skill-creator/scripts/init_skill.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +""" +Skill Initializer - Creates a new skill from template + +Usage: + init_skill.py --path [--resources scripts,references,assets] [--examples] + +Examples: + init_skill.py my-new-skill --path skills/public + init_skill.py my-new-skill --path skills/public --resources scripts,references + init_skill.py my-api-helper --path skills/private --resources scripts --examples + init_skill.py custom-skill --path /custom/location +""" + +import argparse +import re +import sys +from pathlib import Path + +MAX_SKILL_NAME_LENGTH = 64 +ALLOWED_RESOURCES = {"scripts", "references", "assets"} + +SKILL_TEMPLATE = """--- +name: {skill_name} +description: [TODO: Complete and informative explanation of what the skill does and when to use it. Include WHEN to use this skill - specific scenarios, file types, or tasks that trigger it.] +--- + +# {skill_title} + +## Overview + +[TODO: 1-2 sentences explaining what this skill enables] + +## Structuring This Skill + +[TODO: Choose the structure that best fits this skill's purpose. Common patterns: + +**1. Workflow-Based** (best for sequential processes) +- Works well when there are clear step-by-step procedures +- Example: DOCX skill with "Workflow Decision Tree" -> "Reading" -> "Creating" -> "Editing" +- Structure: ## Overview -> ## Workflow Decision Tree -> ## Step 1 -> ## Step 2... + +**2. Task-Based** (best for tool collections) +- Works well when the skill offers different operations/capabilities +- Example: PDF skill with "Quick Start" -> "Merge PDFs" -> "Split PDFs" -> "Extract Text" +- Structure: ## Overview -> ## Quick Start -> ## Task Category 1 -> ## Task Category 2... + +**3. Reference/Guidelines** (best for standards or specifications) +- Works well for brand guidelines, coding standards, or requirements +- Example: Brand styling with "Brand Guidelines" -> "Colors" -> "Typography" -> "Features" +- Structure: ## Overview -> ## Guidelines -> ## Specifications -> ## Usage... + +**4. Capabilities-Based** (best for integrated systems) +- Works well when the skill provides multiple interrelated features +- Example: Product Management with "Core Capabilities" -> numbered capability list +- Structure: ## Overview -> ## Core Capabilities -> ### 1. Feature -> ### 2. Feature... + +Patterns can be mixed and matched as needed. Most skills combine patterns (e.g., start with task-based, add workflow for complex operations). + +Delete this entire "Structuring This Skill" section when done - it's just guidance.] + +## [TODO: Replace with the first main section based on chosen structure] + +[TODO: Add content here. See examples in existing skills: +- Code samples for technical skills +- Decision trees for complex workflows +- Concrete examples with realistic user requests +- References to scripts/templates/references as needed] + +## Resources (optional) + +Create only the resource directories this skill actually needs. Delete this section if no resources are required. + +### scripts/ +Executable code (Python/Bash/etc.) that can be run directly to perform specific operations. + +**Examples from other skills:** +- PDF skill: `fill_fillable_fields.py`, `extract_form_field_info.py` - utilities for PDF manipulation +- DOCX skill: `document.py`, `utilities.py` - Python modules for document processing + +**Appropriate for:** Python scripts, shell scripts, or any executable code that performs automation, data processing, or specific operations. + +**Note:** Scripts may be executed without loading into context, but can still be read by Codex for patching or environment adjustments. + +### references/ +Documentation and reference material intended to be loaded into context to inform Codex's process and thinking. + +**Examples from other skills:** +- Product management: `communication.md`, `context_building.md` - detailed workflow guides +- BigQuery: API reference documentation and query examples +- Finance: Schema documentation, company policies + +**Appropriate for:** In-depth documentation, API references, database schemas, comprehensive guides, or any detailed information that Codex should reference while working. + +### assets/ +Files not intended to be loaded into context, but rather used within the output Codex produces. + +**Examples from other skills:** +- Brand styling: PowerPoint template files (.pptx), logo files +- Frontend builder: HTML/React boilerplate project directories +- Typography: Font files (.ttf, .woff2) + +**Appropriate for:** Templates, boilerplate code, document templates, images, icons, fonts, or any files meant to be copied or used in the final output. + +--- + +**Not every skill requires all three types of resources.** +""" + +EXAMPLE_SCRIPT = '''#!/usr/bin/env python3 +""" +Example helper script for {skill_name} + +This is a placeholder script that can be executed directly. +Replace with actual implementation or delete if not needed. + +Example real scripts from other skills: +- pdf/scripts/fill_fillable_fields.py - Fills PDF form fields +- pdf/scripts/convert_pdf_to_images.py - Converts PDF pages to images +""" + +def main(): + print("This is an example script for {skill_name}") + # TODO: Add actual script logic here + # This could be data processing, file conversion, API calls, etc. + +if __name__ == "__main__": + main() +''' + +EXAMPLE_REFERENCE = """# Reference Documentation for {skill_title} + +This is a placeholder for detailed reference documentation. +Replace with actual reference content or delete if not needed. + +Example real reference docs from other skills: +- product-management/references/communication.md - Comprehensive guide for status updates +- product-management/references/context_building.md - Deep-dive on gathering context +- bigquery/references/ - API references and query examples + +## When Reference Docs Are Useful + +Reference docs are ideal for: +- Comprehensive API documentation +- Detailed workflow guides +- Complex multi-step processes +- Information too lengthy for main SKILL.md +- Content that's only needed for specific use cases + +## Structure Suggestions + +### API Reference Example +- Overview +- Authentication +- Endpoints with examples +- Error codes +- Rate limits + +### Workflow Guide Example +- Prerequisites +- Step-by-step instructions +- Common patterns +- Troubleshooting +- Best practices +""" + +EXAMPLE_ASSET = """# Example Asset File + +This placeholder represents where asset files would be stored. +Replace with actual asset files (templates, images, fonts, etc.) or delete if not needed. + +Asset files are NOT intended to be loaded into context, but rather used within +the output Codex produces. + +Example asset files from other skills: +- Brand guidelines: logo.png, slides_template.pptx +- Frontend builder: hello-world/ directory with HTML/React boilerplate +- Typography: custom-font.ttf, font-family.woff2 +- Data: sample_data.csv, test_dataset.json + +## Common Asset Types + +- Templates: .pptx, .docx, boilerplate directories +- Images: .png, .jpg, .svg, .gif +- Fonts: .ttf, .otf, .woff, .woff2 +- Boilerplate code: Project directories, starter files +- Icons: .ico, .svg +- Data files: .csv, .json, .xml, .yaml + +Note: This is a text placeholder. Actual assets can be any file type. +""" + + +def normalize_skill_name(skill_name): + """Normalize a skill name to lowercase hyphen-case.""" + normalized = skill_name.strip().lower() + normalized = re.sub(r"[^a-z0-9]+", "-", normalized) + normalized = normalized.strip("-") + normalized = re.sub(r"-{2,}", "-", normalized) + return normalized + + +def title_case_skill_name(skill_name): + """Convert hyphenated skill name to Title Case for display.""" + return " ".join(word.capitalize() for word in skill_name.split("-")) + + +def parse_resources(raw_resources): + if not raw_resources: + return [] + resources = [item.strip() for item in raw_resources.split(",") if item.strip()] + invalid = sorted({item for item in resources if item not in ALLOWED_RESOURCES}) + if invalid: + allowed = ", ".join(sorted(ALLOWED_RESOURCES)) + print(f"[ERROR] Unknown resource type(s): {', '.join(invalid)}") + print(f" Allowed: {allowed}") + sys.exit(1) + deduped = [] + seen = set() + for resource in resources: + if resource not in seen: + deduped.append(resource) + seen.add(resource) + return deduped + + +def create_resource_dirs(skill_dir, skill_name, skill_title, resources, include_examples): + for resource in resources: + resource_dir = skill_dir / resource + resource_dir.mkdir(exist_ok=True) + if resource == "scripts": + if include_examples: + example_script = resource_dir / "example.py" + example_script.write_text(EXAMPLE_SCRIPT.format(skill_name=skill_name)) + example_script.chmod(0o755) + print("[OK] Created scripts/example.py") + else: + print("[OK] Created scripts/") + elif resource == "references": + if include_examples: + example_reference = resource_dir / "api_reference.md" + example_reference.write_text(EXAMPLE_REFERENCE.format(skill_title=skill_title)) + print("[OK] Created references/api_reference.md") + else: + print("[OK] Created references/") + elif resource == "assets": + if include_examples: + example_asset = resource_dir / "example_asset.txt" + example_asset.write_text(EXAMPLE_ASSET) + print("[OK] Created assets/example_asset.txt") + else: + print("[OK] Created assets/") + + +def init_skill(skill_name, path, resources, include_examples): + """ + Initialize a new skill directory with template SKILL.md. + + Args: + skill_name: Name of the skill + path: Path where the skill directory should be created + resources: Resource directories to create + include_examples: Whether to create example files in resource directories + + Returns: + Path to created skill directory, or None if error + """ + # Determine skill directory path + skill_dir = Path(path).resolve() / skill_name + + # Check if directory already exists + if skill_dir.exists(): + print(f"[ERROR] Skill directory already exists: {skill_dir}") + return None + + # Create skill directory + try: + skill_dir.mkdir(parents=True, exist_ok=False) + print(f"[OK] Created skill directory: {skill_dir}") + except Exception as e: + print(f"[ERROR] Error creating directory: {e}") + return None + + # Create SKILL.md from template + skill_title = title_case_skill_name(skill_name) + skill_content = SKILL_TEMPLATE.format(skill_name=skill_name, skill_title=skill_title) + + skill_md_path = skill_dir / "SKILL.md" + try: + skill_md_path.write_text(skill_content) + print("[OK] Created SKILL.md") + except Exception as e: + print(f"[ERROR] Error creating SKILL.md: {e}") + return None + + # Create resource directories if requested + if resources: + try: + create_resource_dirs(skill_dir, skill_name, skill_title, resources, include_examples) + except Exception as e: + print(f"[ERROR] Error creating resource directories: {e}") + return None + + # Print next steps + print(f"\n[OK] Skill '{skill_name}' initialized successfully at {skill_dir}") + print("\nNext steps:") + print("1. Edit SKILL.md to complete the TODO items and update the description") + if resources: + if include_examples: + print("2. Customize or delete the example files in scripts/, references/, and assets/") + else: + print("2. Add resources to scripts/, references/, and assets/ as needed") + else: + print("2. Create resource directories only if needed (scripts/, references/, assets/)") + print("3. Run the validator when ready to check the skill structure") + + return skill_dir + + +def main(): + parser = argparse.ArgumentParser( + description="Create a new skill directory with a SKILL.md template.", + ) + parser.add_argument("skill_name", help="Skill name (normalized to hyphen-case)") + parser.add_argument("--path", required=True, help="Output directory for the skill") + parser.add_argument( + "--resources", + default="", + help="Comma-separated list: scripts,references,assets", + ) + parser.add_argument( + "--examples", + action="store_true", + help="Create example files inside the selected resource directories", + ) + args = parser.parse_args() + + raw_skill_name = args.skill_name + skill_name = normalize_skill_name(raw_skill_name) + if not skill_name: + print("[ERROR] Skill name must include at least one letter or digit.") + sys.exit(1) + if len(skill_name) > MAX_SKILL_NAME_LENGTH: + print( + f"[ERROR] Skill name '{skill_name}' is too long ({len(skill_name)} characters). " + f"Maximum is {MAX_SKILL_NAME_LENGTH} characters." + ) + sys.exit(1) + if skill_name != raw_skill_name: + print(f"Note: Normalized skill name from '{raw_skill_name}' to '{skill_name}'.") + + resources = parse_resources(args.resources) + if args.examples and not resources: + print("[ERROR] --examples requires --resources to be set.") + sys.exit(1) + + path = args.path + + print(f"Initializing skill: {skill_name}") + print(f" Location: {path}") + if resources: + print(f" Resources: {', '.join(resources)}") + if args.examples: + print(" Examples: enabled") + else: + print(" Resources: none (create as needed)") + print() + + result = init_skill(skill_name, path, resources, args.examples) + + if result: + sys.exit(0) + else: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/nanobot/skills/skill-creator/scripts/package_skill.py b/nanobot/skills/skill-creator/scripts/package_skill.py new file mode 100755 index 0000000..aa4de89 --- /dev/null +++ b/nanobot/skills/skill-creator/scripts/package_skill.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +""" +Skill Packager - Creates a distributable .skill file of a skill folder + +Usage: + python utils/package_skill.py [output-directory] + +Example: + python utils/package_skill.py skills/public/my-skill + python utils/package_skill.py skills/public/my-skill ./dist +""" + +import sys +import zipfile +from pathlib import Path + +from quick_validate import validate_skill + + +def _is_within(path: Path, root: Path) -> bool: + try: + path.relative_to(root) + return True + except ValueError: + return False + + +def package_skill(skill_path, output_dir=None): + """ + Package a skill folder into a .skill file. + + Args: + skill_path: Path to the skill folder + output_dir: Optional output directory for the .skill file (defaults to current directory) + + Returns: + Path to the created .skill file, or None if error + """ + skill_path = Path(skill_path).resolve() + + # Validate skill folder exists + if not skill_path.exists(): + print(f"[ERROR] Skill folder not found: {skill_path}") + return None + + if not skill_path.is_dir(): + print(f"[ERROR] Path is not a directory: {skill_path}") + return None + + # Validate SKILL.md exists + skill_md = skill_path / "SKILL.md" + if not skill_md.exists(): + print(f"[ERROR] SKILL.md not found in {skill_path}") + return None + + # Run validation before packaging + print("Validating skill...") + valid, message = validate_skill(skill_path) + if not valid: + print(f"[ERROR] Validation failed: {message}") + print(" Please fix the validation errors before packaging.") + return None + print(f"[OK] {message}\n") + + # Determine output location + skill_name = skill_path.name + if output_dir: + output_path = Path(output_dir).resolve() + output_path.mkdir(parents=True, exist_ok=True) + else: + output_path = Path.cwd() + + skill_filename = output_path / f"{skill_name}.skill" + + EXCLUDED_DIRS = {".git", ".svn", ".hg", "__pycache__", "node_modules"} + + # Create the .skill file (zip format) + try: + with zipfile.ZipFile(skill_filename, "w", zipfile.ZIP_DEFLATED) as zipf: + # Walk through the skill directory + for file_path in skill_path.rglob("*"): + # Security: never follow or package symlinks. + if file_path.is_symlink(): + print(f"[WARN] Skipping symlink: {file_path}") + continue + + rel_parts = file_path.relative_to(skill_path).parts + if any(part in EXCLUDED_DIRS for part in rel_parts): + continue + + if file_path.is_file(): + resolved_file = file_path.resolve() + if not _is_within(resolved_file, skill_path): + print(f"[ERROR] File escapes skill root: {file_path}") + return None + # If output lives under skill_path, avoid writing archive into itself. + if resolved_file == skill_filename.resolve(): + print(f"[WARN] Skipping output archive: {file_path}") + continue + + # Calculate the relative path within the zip. + arcname = Path(skill_name) / file_path.relative_to(skill_path) + zipf.write(file_path, arcname) + print(f" Added: {arcname}") + + print(f"\n[OK] Successfully packaged skill to: {skill_filename}") + return skill_filename + + except Exception as e: + print(f"[ERROR] Error creating .skill file: {e}") + return None + + +def main(): + if len(sys.argv) < 2: + print("Usage: python utils/package_skill.py [output-directory]") + print("\nExample:") + print(" python utils/package_skill.py skills/public/my-skill") + print(" python utils/package_skill.py skills/public/my-skill ./dist") + sys.exit(1) + + skill_path = sys.argv[1] + output_dir = sys.argv[2] if len(sys.argv) > 2 else None + + print(f"Packaging skill: {skill_path}") + if output_dir: + print(f" Output directory: {output_dir}") + print() + + result = package_skill(skill_path, output_dir) + + if result: + sys.exit(0) + else: + sys.exit(1) + + +if __name__ == "__main__": + main() From d0c647918616f4d5f133f5bf07032d477de3c8f0 Mon Sep 17 00:00:00 2001 From: Kiplangatkorir Date: Wed, 4 Mar 2026 11:20:50 +0300 Subject: [PATCH 04/29] feat: add LLM retry with exponential backoff for transient errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit provider.chat() had no retry logic — a transient 429 rate limit, 502 gateway error, or network timeout would permanently fail the entire message. For a system running cron jobs and heartbeats 24/7, even a brief provider blip causes lost tasks. Adds _chat_with_retry() that: - Retries up to 3 times with 1s/2s/4s exponential backoff - Only retries transient errors (429, 5xx, timeout, connection) - Returns immediately on permanent errors (400, 401, etc.) - Falls through to the final attempt if all retries exhaust --- nanobot/agent/loop.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 65a62e5..9819a38 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -159,6 +159,33 @@ class AgentLoop: if hasattr(tool, "set_context"): tool.set_context(channel, chat_id, *([message_id] if name == "message" else [])) + _RETRY_DELAYS = (1, 2, 4) # seconds — exponential backoff for transient LLM errors + + async def _chat_with_retry(self, **kwargs: Any) -> Any: + """Call provider.chat() with retry on transient errors (429, 5xx, network).""" + from nanobot.providers.base import LLMResponse + + last_response: LLMResponse | None = None + for attempt, delay in enumerate(self._RETRY_DELAYS): + response = await self.provider.chat(**kwargs) + if response.finish_reason != "error": + return response + # Check if the error looks transient (rate limit, server error, network) + err = (response.content or "").lower() + is_transient = any(kw in err for kw in ( + "429", "rate limit", "500", "502", "503", "504", + "overloaded", "timeout", "connection", "server error", + )) + if not is_transient: + return response # permanent error (400, 401, etc.) — don't retry + last_response = response + logger.warning("LLM transient error (attempt {}/{}), retrying in {}s: {}", + attempt + 1, len(self._RETRY_DELAYS), delay, err[:120]) + await asyncio.sleep(delay) + # All retries exhausted — make one final attempt + response = await self.provider.chat(**kwargs) + return response if response.finish_reason != "error" else (last_response or response) + @staticmethod def _strip_think(text: str | None) -> str | None: """Remove blocks that some models embed in content.""" @@ -191,7 +218,7 @@ class AgentLoop: while iteration < self.max_iterations: iteration += 1 - response = await self.provider.chat( + response = await self._chat_with_retry( messages=messages, tools=self.tools.get_definitions(), model=self.model, From dbc518098e913d2f382121820dd58bbaf7a04234 Mon Sep 17 00:00:00 2001 From: VITOHJL Date: Sun, 8 Mar 2026 14:20:16 +0800 Subject: [PATCH 05/29] refactor: implement token-based context compression mechanism Major changes: - Replace message-count-based memory window with token-budget-based compression - Add max_tokens_input, compression_start_ratio, compression_target_ratio config - Implement _maybe_compress_history() that triggers based on prompt token usage - Use _build_compressed_history_view() to provide compressed history to LLM - Refactor MemoryStore.consolidate() -> consolidate_chunk() for chunk-based compression - Remove last_consolidated from Session, use _compressed_until metadata instead - Add background compression scheduling to avoid blocking message processing Key improvements: - Compression now based on actual token usage, not arbitrary message counts - Better handling of long conversations with large context windows - Non-destructive compression: old messages remain in session, but excluded from prompt - Automatic compression when history exceeds configured token thresholds --- nanobot/agent/loop.py | 521 +++++++++++++++++++++++++++++++++---- nanobot/agent/memory.py | 62 ++--- nanobot/config/schema.py | 25 +- nanobot/session/manager.py | 20 +- 4 files changed, 529 insertions(+), 99 deletions(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index ca9a06e..696e2a7 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -5,19 +5,24 @@ from __future__ import annotations import asyncio import json import re -import weakref from contextlib import AsyncExitStack from pathlib import Path from typing import TYPE_CHECKING, Any, Awaitable, Callable from loguru import logger +try: + import tiktoken # type: ignore +except Exception: # pragma: no cover - optional dependency + tiktoken = None + from nanobot.agent.context import ContextBuilder -from nanobot.agent.memory import MemoryStore from nanobot.agent.subagent import SubagentManager from nanobot.agent.tools.cron import CronTool from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool +from nanobot.agent.tools.huggingface import HuggingFaceModelSearchTool from nanobot.agent.tools.message import MessageTool +from nanobot.agent.tools.model_config import ValidateDeployJSONTool, ValidateUsageYAMLTool from nanobot.agent.tools.registry import ToolRegistry from nanobot.agent.tools.shell import ExecTool from nanobot.agent.tools.spawn import SpawnTool @@ -55,8 +60,11 @@ class AgentLoop: max_iterations: int = 40, temperature: float = 0.1, max_tokens: int = 4096, - memory_window: int = 100, + memory_window: int | None = None, # backward-compat only (unused) reasoning_effort: str | None = None, + max_tokens_input: int = 128_000, + compression_start_ratio: float = 0.7, + compression_target_ratio: float = 0.4, brave_api_key: str | None = None, web_proxy: str | None = None, exec_config: ExecToolConfig | None = None, @@ -74,9 +82,18 @@ class AgentLoop: self.model = model or provider.get_default_model() self.max_iterations = max_iterations self.temperature = temperature + # max_tokens: per-call output token cap (maxTokensOutput in config) self.max_tokens = max_tokens + # Keep legacy attribute for older call sites/tests; compression no longer uses it. self.memory_window = memory_window self.reasoning_effort = reasoning_effort + # max_tokens_input: model native context window (maxTokensInput in config) + self.max_tokens_input = max_tokens_input + # Token-based compression watermarks (fractions of available input budget) + self.compression_start_ratio = compression_start_ratio + self.compression_target_ratio = compression_target_ratio + # Reserve tokens for safety margin + self._reserve_tokens = 1000 self.brave_api_key = brave_api_key self.web_proxy = web_proxy self.exec_config = exec_config or ExecToolConfig() @@ -105,18 +122,373 @@ class AgentLoop: self._mcp_stack: AsyncExitStack | None = None self._mcp_connected = False self._mcp_connecting = False - self._consolidating: set[str] = set() # Session keys with consolidation in progress - self._consolidation_tasks: set[asyncio.Task] = set() # Strong refs to in-flight tasks - self._consolidation_locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary() self._active_tasks: dict[str, list[asyncio.Task]] = {} # session_key -> tasks + self._compression_tasks: dict[str, asyncio.Task] = {} # session_key -> task self._processing_lock = asyncio.Lock() self._register_default_tools() + @staticmethod + def _estimate_prompt_tokens( + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None = None, + ) -> int: + """Estimate prompt tokens with tiktoken (fallback only).""" + if tiktoken is None: + return 0 + + try: + enc = tiktoken.get_encoding("cl100k_base") + parts: list[str] = [] + for msg in messages: + content = msg.get("content") + if isinstance(content, str): + parts.append(content) + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + txt = part.get("text", "") + if txt: + parts.append(txt) + if tools: + parts.append(json.dumps(tools, ensure_ascii=False)) + return len(enc.encode("\n".join(parts))) + except Exception: + return 0 + + def _estimate_prompt_tokens_chain( + self, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None = None, + ) -> tuple[int, str]: + """Unified prompt-token estimation: provider counter -> tiktoken.""" + provider_counter = getattr(self.provider, "estimate_prompt_tokens", None) + if callable(provider_counter): + try: + tokens, source = provider_counter(messages, tools, self.model) + if isinstance(tokens, (int, float)) and tokens > 0: + return int(tokens), str(source or "provider_counter") + except Exception: + logger.debug("Provider token counter failed; fallback to tiktoken") + + estimated = self._estimate_prompt_tokens(messages, tools) + if estimated > 0: + return int(estimated), "tiktoken" + return 0, "none" + + @staticmethod + def _estimate_completion_tokens(content: str) -> int: + """Estimate completion tokens with tiktoken (fallback only).""" + if tiktoken is None: + return 0 + try: + enc = tiktoken.get_encoding("cl100k_base") + return len(enc.encode(content or "")) + except Exception: + return 0 + + def _get_compressed_until(self, session: Session) -> int: + """Read/normalize compressed boundary and migrate old metadata format.""" + raw = session.metadata.get("_compressed_until", 0) + try: + compressed_until = int(raw) + except (TypeError, ValueError): + compressed_until = 0 + + if compressed_until <= 0: + ranges = session.metadata.get("_compressed_ranges") + if isinstance(ranges, list): + inferred = 0 + for item in ranges: + if not isinstance(item, (list, tuple)) or len(item) != 2: + continue + try: + inferred = max(inferred, int(item[1])) + except (TypeError, ValueError): + continue + compressed_until = inferred + + compressed_until = max(0, min(compressed_until, len(session.messages))) + session.metadata["_compressed_until"] = compressed_until + # 兼容旧版本:一旦迁移出连续边界,就可以清理旧字段 + session.metadata.pop("_compressed_ranges", None) + session.metadata.pop("_cumulative_tokens", None) + return compressed_until + + def _set_compressed_until(self, session: Session, idx: int) -> None: + """Persist a contiguous compressed boundary.""" + session.metadata["_compressed_until"] = max(0, min(int(idx), len(session.messages))) + session.metadata.pop("_compressed_ranges", None) + session.metadata.pop("_cumulative_tokens", None) + + @staticmethod + def _estimate_message_tokens(message: dict[str, Any]) -> int: + """Rough token estimate for a single persisted message.""" + content = message.get("content") + parts: list[str] = [] + if isinstance(content, str): + parts.append(content) + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + txt = part.get("text", "") + if txt: + parts.append(txt) + else: + parts.append(json.dumps(part, ensure_ascii=False)) + elif content is not None: + parts.append(json.dumps(content, ensure_ascii=False)) + + for key in ("name", "tool_call_id"): + val = message.get(key) + if isinstance(val, str) and val: + parts.append(val) + if message.get("tool_calls"): + parts.append(json.dumps(message["tool_calls"], ensure_ascii=False)) + + payload = "\n".join(parts) + if not payload: + return 1 + if tiktoken is not None: + try: + enc = tiktoken.get_encoding("cl100k_base") + return max(1, len(enc.encode(payload))) + except Exception: + pass + return max(1, len(payload) // 4) + + def _pick_compression_chunk_by_tokens( + self, + session: Session, + reduction_tokens: int, + *, + tail_keep: int = 12, + ) -> tuple[int, int, int] | None: + """ + Pick one contiguous old chunk so its estimated size is roughly enough + to reduce `reduction_tokens`. + """ + messages = session.messages + start = self._get_compressed_until(session) + if len(messages) - start <= tail_keep + 2: + return None + + end_limit = len(messages) - tail_keep + if end_limit - start < 2: + return None + + target = max(1, reduction_tokens) + end = start + collected = 0 + while end < end_limit and collected < target: + collected += self._estimate_message_tokens(messages[end]) + end += 1 + + if end - start < 2: + end = min(end_limit, start + 2) + collected = sum(self._estimate_message_tokens(m) for m in messages[start:end]) + if end - start < 2: + return None + return start, end, collected + + def _estimate_session_prompt_tokens(self, session: Session) -> tuple[int, str]: + """ + Estimate current full prompt tokens for this session view + (system + compressed history view + runtime/user placeholder + tools). + """ + history = self._build_compressed_history_view(session) + channel, chat_id = (session.key.split(":", 1) if ":" in session.key else (None, None)) + probe_messages = self.context.build_messages( + history=history, + current_message="[token-probe]", + channel=channel, + chat_id=chat_id, + ) + return self._estimate_prompt_tokens_chain(probe_messages, self.tools.get_definitions()) + + async def _maybe_compress_history( + self, + session: Session, + ) -> None: + """ + End-of-turn policy: + - Estimate current prompt usage from persisted session view. + - If above start ratio, perform one best-effort compression chunk. + """ + if not session.messages: + self._set_compressed_until(session, 0) + return + + budget = max(1, self.max_tokens_input - self.max_tokens - self._reserve_tokens) + start_threshold = int(budget * self.compression_start_ratio) + target_threshold = int(budget * self.compression_target_ratio) + if target_threshold >= start_threshold: + target_threshold = max(0, start_threshold - 1) + + current_tokens, token_source = self._estimate_session_prompt_tokens(session) + current_ratio = current_tokens / budget if budget else 0.0 + if current_tokens <= 0: + logger.debug("Compression skip {}: token estimate unavailable", session.key) + return + if current_tokens < start_threshold: + logger.debug( + "Compression idle {}: {}/{} ({:.1%}) via {}", + session.key, + current_tokens, + budget, + current_ratio, + token_source, + ) + return + logger.info( + "Compression trigger {}: {}/{} ({:.1%}) via {}", + session.key, + current_tokens, + budget, + current_ratio, + token_source, + ) + + reduction_by_target = max(0, current_tokens - target_threshold) + reduction_by_delta = max(1, start_threshold - target_threshold) + reduction_need = max(reduction_by_target, reduction_by_delta) + + chunk_range = self._pick_compression_chunk_by_tokens(session, reduction_need, tail_keep=10) + if chunk_range is None: + logger.info("Compression skipped for {}: no compressible chunk", session.key) + return + + start_idx, end_idx, estimated_chunk_tokens = chunk_range + chunk = session.messages[start_idx:end_idx] + if len(chunk) < 2: + return + + logger.info( + "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})", + session.key, + start_idx, + end_idx - 1, + len(chunk), + estimated_chunk_tokens, + reduction_need, + ) + success, _ = await self.context.memory.consolidate_chunk( + chunk, + self.provider, + self.model, + ) + if not success: + logger.warning("Compression aborted for {}: consolidation failed", session.key) + return + + self._set_compressed_until(session, end_idx) + self.sessions.save(session) + + after_tokens, after_source = self._estimate_session_prompt_tokens(session) + after_ratio = after_tokens / budget if budget else 0.0 + reduced = max(0, current_tokens - after_tokens) + reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0 + logger.info( + "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})", + session.key, + after_tokens, + budget, + after_ratio, + after_source, + reduced, + reduced_ratio, + ) + + def _schedule_background_compression(self, session_key: str) -> None: + """Schedule best-effort background compression for a session.""" + existing = self._compression_tasks.get(session_key) + if existing is not None and not existing.done(): + return + + async def _runner() -> None: + session = self.sessions.get_or_create(session_key) + try: + await self._maybe_compress_history(session) + except Exception: + logger.exception("Background compression failed for {}", session_key) + + task = asyncio.create_task(_runner()) + self._compression_tasks[session_key] = task + + def _cleanup(t: asyncio.Task) -> None: + cur = self._compression_tasks.get(session_key) + if cur is t: + self._compression_tasks.pop(session_key, None) + try: + t.result() + except BaseException: + pass + + task.add_done_callback(_cleanup) + + async def wait_for_background_compression(self, timeout_s: float | None = None) -> None: + """Wait for currently scheduled compression tasks.""" + pending = [t for t in self._compression_tasks.values() if not t.done()] + if not pending: + return + + logger.info("Waiting for {} background compression task(s)", len(pending)) + waiter = asyncio.gather(*pending, return_exceptions=True) + if timeout_s is None: + await waiter + return + + try: + await asyncio.wait_for(waiter, timeout=timeout_s) + except asyncio.TimeoutError: + logger.warning( + "Background compression wait timed out after {}s ({} task(s) still running)", + timeout_s, + len([t for t in self._compression_tasks.values() if not t.done()]), + ) + + def _build_compressed_history_view( + self, + session: Session, + ) -> list[dict]: + """Build non-destructive history view using the compressed boundary.""" + compressed_until = self._get_compressed_until(session) + if compressed_until <= 0: + return session.get_history(max_messages=0) + + notice_msg: dict[str, Any] = { + "role": "assistant", + "content": ( + "As your assistant, I have compressed earlier context. " + "If you need details, please check memory/HISTORY.md." + ), + } + + tail: list[dict[str, Any]] = [] + for msg in session.messages[compressed_until:]: + entry: dict[str, Any] = {"role": msg["role"], "content": msg.get("content", "")} + for k in ("tool_calls", "tool_call_id", "name"): + if k in msg: + entry[k] = msg[k] + tail.append(entry) + + # Drop leading non-user entries from tail to avoid orphan tool blocks. + for i, m in enumerate(tail): + if m.get("role") == "user": + tail = tail[i:] + break + else: + tail = [] + + return [notice_msg, *tail] + def _register_default_tools(self) -> None: """Register the default set of tools.""" allowed_dir = self.workspace if self.restrict_to_workspace else None for cls in (ReadFileTool, WriteFileTool, EditFileTool, ListDirTool): self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir)) + self.tools.register(ValidateDeployJSONTool()) + self.tools.register(ValidateUsageYAMLTool()) + self.tools.register(HuggingFaceModelSearchTool()) self.tools.register(ExecTool( working_dir=str(self.workspace), timeout=self.exec_config.timeout, @@ -181,25 +553,78 @@ class AgentLoop: self, initial_messages: list[dict], on_progress: Callable[..., Awaitable[None]] | None = None, - ) -> tuple[str | None, list[str], list[dict]]: - """Run the agent iteration loop. Returns (final_content, tools_used, messages).""" + ) -> tuple[str | None, list[str], list[dict], int, str]: + """ + Run the agent iteration loop. + + Returns: + (final_content, tools_used, messages, total_tokens_this_turn, token_source) + total_tokens_this_turn: total tokens (prompt + completion) for this turn + token_source: provider_total / provider_sum / provider_prompt / + provider_counter+tiktoken_completion / tiktoken / none + """ messages = initial_messages iteration = 0 final_content = None tools_used: list[str] = [] + total_tokens_this_turn = 0 + token_source = "none" while iteration < self.max_iterations: iteration += 1 + tool_defs = self.tools.get_definitions() + response = await self.provider.chat( messages=messages, - tools=self.tools.get_definitions(), + tools=tool_defs, model=self.model, temperature=self.temperature, max_tokens=self.max_tokens, reasoning_effort=self.reasoning_effort, ) + # Prefer provider usage from the turn-ending model call; fallback to tiktoken. + # Calculate total tokens (prompt + completion) for this turn. + usage = response.usage or {} + t_tokens = usage.get("total_tokens") + p_tokens = usage.get("prompt_tokens") + c_tokens = usage.get("completion_tokens") + + if isinstance(t_tokens, (int, float)) and t_tokens > 0: + total_tokens_this_turn = int(t_tokens) + token_source = "provider_total" + elif isinstance(p_tokens, (int, float)) and isinstance(c_tokens, (int, float)): + # If we have both prompt and completion tokens, sum them + total_tokens_this_turn = int(p_tokens) + int(c_tokens) + token_source = "provider_sum" + elif isinstance(p_tokens, (int, float)) and p_tokens > 0: + # Fallback: use prompt tokens only (completion might be 0 for tool calls) + total_tokens_this_turn = int(p_tokens) + token_source = "provider_prompt" + else: + # Estimate with unified chain (provider counter -> tiktoken), plus completion tiktoken. + estimated_prompt, prompt_source = self._estimate_prompt_tokens_chain(messages, tool_defs) + estimated_completion = self._estimate_completion_tokens(response.content or "") + total_tokens_this_turn = estimated_prompt + estimated_completion + if total_tokens_this_turn > 0: + token_source = ( + "tiktoken" + if prompt_source == "tiktoken" + else f"{prompt_source}+tiktoken_completion" + ) + if total_tokens_this_turn <= 0: + total_tokens_this_turn = 0 + token_source = "none" + + logger.debug( + "Turn token usage: source={}, total={}, prompt={}, completion={}", + token_source, + total_tokens_this_turn, + p_tokens if isinstance(p_tokens, (int, float)) else None, + c_tokens if isinstance(c_tokens, (int, float)) else None, + ) + if response.has_tool_calls: if on_progress: thought = self._strip_think(response.content) @@ -254,7 +679,7 @@ class AgentLoop: "without completing the task. You can try breaking the task into smaller steps." ) - return final_content, tools_used, messages + return final_content, tools_used, messages, total_tokens_this_turn, token_source async def run(self) -> None: """Run the agent loop, dispatching messages as tasks to stay responsive to /stop.""" @@ -279,6 +704,9 @@ class AgentLoop: """Cancel all active tasks and subagents for the session.""" tasks = self._active_tasks.pop(msg.session_key, []) cancelled = sum(1 for t in tasks if not t.done() and t.cancel()) + comp = self._compression_tasks.get(msg.session_key) + if comp is not None and not comp.done() and comp.cancel(): + cancelled += 1 for t in tasks: try: await t @@ -325,6 +753,9 @@ class AgentLoop: def stop(self) -> None: """Stop the agent loop.""" self._running = False + for task in list(self._compression_tasks.values()): + if not task.done(): + task.cancel() logger.info("Agent loop stopping") async def _process_message( @@ -342,14 +773,15 @@ class AgentLoop: key = f"{channel}:{chat_id}" session = self.sessions.get_or_create(key) self._set_tool_context(channel, chat_id, msg.metadata.get("message_id")) - history = session.get_history(max_messages=self.memory_window) + history = self._build_compressed_history_view(session) messages = self.context.build_messages( history=history, current_message=msg.content, channel=channel, chat_id=chat_id, ) - final_content, _, all_msgs = await self._run_agent_loop(messages) + final_content, _, all_msgs, _, _ = await self._run_agent_loop(messages) self._save_turn(session, all_msgs, 1 + len(history)) self.sessions.save(session) + self._schedule_background_compression(session.key) return OutboundMessage(channel=channel, chat_id=chat_id, content=final_content or "Background task completed.") @@ -362,27 +794,27 @@ class AgentLoop: # Slash commands cmd = msg.content.strip().lower() if cmd == "/new": - lock = self._consolidation_locks.setdefault(session.key, asyncio.Lock()) - self._consolidating.add(session.key) try: - async with lock: - snapshot = session.messages[session.last_consolidated:] - if snapshot: - temp = Session(key=session.key) - temp.messages = list(snapshot) - if not await self._consolidate_memory(temp, archive_all=True): - return OutboundMessage( - channel=msg.channel, chat_id=msg.chat_id, - content="Memory archival failed, session not cleared. Please try again.", - ) + # 在清空会话前,将当前完整对话做一次归档压缩到 MEMORY/HISTORY 中 + if session.messages: + ok, _ = await self.context.memory.consolidate_chunk( + session.messages, + self.provider, + self.model, + ) + if not ok: + return OutboundMessage( + channel=msg.channel, + chat_id=msg.chat_id, + content="Memory archival failed, session not cleared. Please try again.", + ) except Exception: logger.exception("/new archival failed for {}", session.key) return OutboundMessage( - channel=msg.channel, chat_id=msg.chat_id, + channel=msg.channel, + chat_id=msg.chat_id, content="Memory archival failed, session not cleared. Please try again.", ) - finally: - self._consolidating.discard(session.key) session.clear() self.sessions.save(session) @@ -393,36 +825,23 @@ class AgentLoop: return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id, content="🐈 nanobot commands:\n/new — Start a new conversation\n/stop — Stop the current task\n/help — Show available commands") - unconsolidated = len(session.messages) - session.last_consolidated - if (unconsolidated >= self.memory_window and session.key not in self._consolidating): - self._consolidating.add(session.key) - lock = self._consolidation_locks.setdefault(session.key, asyncio.Lock()) - - async def _consolidate_and_unlock(): - try: - async with lock: - await self._consolidate_memory(session) - finally: - self._consolidating.discard(session.key) - _task = asyncio.current_task() - if _task is not None: - self._consolidation_tasks.discard(_task) - - _task = asyncio.create_task(_consolidate_and_unlock()) - self._consolidation_tasks.add(_task) - self._set_tool_context(msg.channel, msg.chat_id, msg.metadata.get("message_id")) if message_tool := self.tools.get("message"): if isinstance(message_tool, MessageTool): message_tool.start_turn() - history = session.get_history(max_messages=self.memory_window) + # 正常对话:使用压缩后的历史视图(压缩在回合结束后进行) + history = self._build_compressed_history_view(session) initial_messages = self.context.build_messages( history=history, current_message=msg.content, media=msg.media if msg.media else None, channel=msg.channel, chat_id=msg.chat_id, ) + # Add [CRON JOB] identifier for cron sessions (session_key starts with "cron:") + if session_key and session_key.startswith("cron:"): + if initial_messages and initial_messages[0].get("role") == "system": + initial_messages[0]["content"] = f"[CRON JOB] {initial_messages[0]['content']}" async def _bus_progress(content: str, *, tool_hint: bool = False) -> None: meta = dict(msg.metadata or {}) @@ -432,7 +851,7 @@ class AgentLoop: channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta, )) - final_content, _, all_msgs = await self._run_agent_loop( + final_content, _, all_msgs, _, _ = await self._run_agent_loop( initial_messages, on_progress=on_progress or _bus_progress, ) @@ -441,6 +860,7 @@ class AgentLoop: self._save_turn(session, all_msgs, 1 + len(history)) self.sessions.save(session) + self._schedule_background_compression(session.key) if (mt := self.tools.get("message")) and isinstance(mt, MessageTool) and mt._sent_in_turn: return None @@ -487,13 +907,6 @@ class AgentLoop: session.messages.append(entry) session.updated_at = datetime.now() - async def _consolidate_memory(self, session, archive_all: bool = False) -> bool: - """Delegate to MemoryStore.consolidate(). Returns True on success.""" - return await MemoryStore(self.workspace).consolidate( - session, self.provider, self.model, - archive_all=archive_all, memory_window=self.memory_window, - ) - async def process_direct( self, content: str, diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index 21fe77d..c8896c8 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -66,36 +66,25 @@ class MemoryStore: long_term = self.read_long_term() return f"## Long-term Memory\n{long_term}" if long_term else "" - async def consolidate( + async def consolidate_chunk( self, - session: Session, + messages: list[dict], provider: LLMProvider, model: str, - *, - archive_all: bool = False, - memory_window: int = 50, - ) -> bool: - """Consolidate old messages into MEMORY.md + HISTORY.md via LLM tool call. + ) -> tuple[bool, str | None]: + """Consolidate a chunk of messages into MEMORY.md + HISTORY.md via LLM tool call. - Returns True on success (including no-op), False on failure. + Returns (success, None). + + - success: True on success (including no-op), False on failure. + - The second return value is reserved for future use (e.g. RAG-style summaries) and is + always None in the current implementation. """ - if archive_all: - old_messages = session.messages - keep_count = 0 - logger.info("Memory consolidation (archive_all): {} messages", len(session.messages)) - else: - keep_count = memory_window // 2 - if len(session.messages) <= keep_count: - return True - if len(session.messages) - session.last_consolidated <= 0: - return True - old_messages = session.messages[session.last_consolidated:-keep_count] - if not old_messages: - return True - logger.info("Memory consolidation: {} to consolidate, {} keep", len(old_messages), keep_count) + if not messages: + return True, None lines = [] - for m in old_messages: + for m in messages: if not m.get("content"): continue tools = f" [tools: {', '.join(m['tools_used'])}]" if m.get("tools_used") else "" @@ -113,7 +102,19 @@ class MemoryStore: try: response = await provider.chat( messages=[ - {"role": "system", "content": "You are a memory consolidation agent. Call the save_memory tool with your consolidation of the conversation."}, + { + "role": "system", + "content": ( + "You are a memory consolidation agent.\n" + "Your job is to:\n" + "1) Append a concise but grep-friendly entry to HISTORY.md summarizing key events, decisions and topics.\n" + " - Write 1 paragraph of 2–5 sentences that starts with [YYYY-MM-DD HH:MM].\n" + " - Include concrete names, IDs and numbers so it is easy to search with grep.\n" + "2) Update long-term MEMORY.md with stable facts and user preferences as markdown, including all existing facts plus new ones.\n" + "3) Optionally return a short context_summary (1–3 sentences) that will replace the raw messages in future dialogue history.\n\n" + "Always call the save_memory tool with history_entry, memory_update and (optionally) context_summary." + ), + }, {"role": "user", "content": prompt}, ], tools=_SAVE_MEMORY_TOOL, @@ -122,7 +123,7 @@ class MemoryStore: if not response.has_tool_calls: logger.warning("Memory consolidation: LLM did not call save_memory, skipping") - return False + return False, None args = response.tool_calls[0].arguments # Some providers return arguments as a JSON string instead of dict @@ -134,10 +135,10 @@ class MemoryStore: args = args[0] else: logger.warning("Memory consolidation: unexpected arguments as empty or non-dict list") - return False + return False, None if not isinstance(args, dict): logger.warning("Memory consolidation: unexpected arguments type {}", type(args).__name__) - return False + return False, None if entry := args.get("history_entry"): if not isinstance(entry, str): @@ -149,9 +150,8 @@ class MemoryStore: if update != current_memory: self.write_long_term(update) - session.last_consolidated = 0 if archive_all else len(session.messages) - keep_count - logger.info("Memory consolidation done: {} messages, last_consolidated={}", len(session.messages), session.last_consolidated) - return True + logger.info("Memory consolidation done for {} messages", len(messages)) + return True, None except Exception: logger.exception("Memory consolidation failed") - return False + return False, None diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 803cb61..1ebde20 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -189,11 +189,22 @@ class SlackConfig(Base): class QQConfig(Base): - """QQ channel configuration using botpy SDK.""" + """QQ channel configuration. + + Supports two implementations: + 1. Official botpy SDK: requires app_id and secret + 2. OneBot protocol: requires api_url (and optionally ws_reverse_url, bot_qq, access_token) + """ enabled: bool = False + # Official botpy SDK fields app_id: str = "" # 机器人 ID (AppID) from q.qq.com secret: str = "" # 机器人密钥 (AppSecret) from q.qq.com + # OneBot protocol fields + api_url: str = "" # OneBot HTTP API URL (e.g. "http://localhost:5700") + ws_reverse_url: str = "" # OneBot WebSocket reverse URL (e.g. "ws://localhost:8080/ws/reverse") + bot_qq: int | None = None # Bot's QQ number (for filtering self messages) + access_token: str = "" # Optional access token for OneBot API allow_from: list[str] = Field( default_factory=list ) # Allowed user openids (empty = public access) @@ -226,10 +237,18 @@ class AgentDefaults(Base): provider: str = ( "auto" # Provider name (e.g. "anthropic", "openrouter") or "auto" for auto-detection ) - max_tokens: int = 8192 + # 原生上下文最大窗口(通常对应模型的 max_input_tokens / max_context_tokens) + # 默认按照主流大模型(如 GPT-4o、Claude 3.x 等)的 128k 上下文给一个宽松上限,实际应根据所选模型文档手动调整。 + max_tokens_input: int = 128_000 + # 默认单次回复的最大输出 token 上限(调用时可按需要再做截断或比例分配) + # 8192 足以覆盖大多数实际对话/工具使用场景,同样可按需手动调整。 + max_tokens_output: int = 8192 + # 会话历史压缩触发比例:当估算的输入 token 使用量 >= maxTokensInput * compressionStartRatio 时开始压缩。 + compression_start_ratio: float = 0.7 + # 会话历史压缩目标比例:每轮压缩后尽量把估算的输入 token 使用量压到 maxTokensInput * compressionTargetRatio 附近。 + compression_target_ratio: float = 0.4 temperature: float = 0.1 max_tool_iterations: int = 40 - memory_window: int = 100 reasoning_effort: str | None = None # low / medium / high — enables LLM thinking mode diff --git a/nanobot/session/manager.py b/nanobot/session/manager.py index f0a6484..1cb8a51 100644 --- a/nanobot/session/manager.py +++ b/nanobot/session/manager.py @@ -9,7 +9,6 @@ from typing import Any from loguru import logger -from nanobot.config.paths import get_legacy_sessions_dir from nanobot.utils.helpers import ensure_dir, safe_filename @@ -30,7 +29,6 @@ class Session: created_at: datetime = field(default_factory=datetime.now) updated_at: datetime = field(default_factory=datetime.now) metadata: dict[str, Any] = field(default_factory=dict) - last_consolidated: int = 0 # Number of messages already consolidated to files def add_message(self, role: str, content: str, **kwargs: Any) -> None: """Add a message to the session.""" @@ -44,9 +42,13 @@ class Session: self.updated_at = datetime.now() def get_history(self, max_messages: int = 500) -> list[dict[str, Any]]: - """Return unconsolidated messages for LLM input, aligned to a user turn.""" - unconsolidated = self.messages[self.last_consolidated:] - sliced = unconsolidated[-max_messages:] + """ + Return messages for LLM input, aligned to a user turn. + + - max_messages > 0 时只保留最近 max_messages 条; + - max_messages <= 0 时不做条数截断,返回全部消息。 + """ + sliced = self.messages if max_messages <= 0 else self.messages[-max_messages:] # Drop leading non-user messages to avoid orphaned tool_result blocks for i, m in enumerate(sliced): @@ -66,7 +68,7 @@ class Session: def clear(self) -> None: """Clear all messages and reset session to initial state.""" self.messages = [] - self.last_consolidated = 0 + self.metadata = {} self.updated_at = datetime.now() @@ -80,7 +82,7 @@ class SessionManager: def __init__(self, workspace: Path): self.workspace = workspace self.sessions_dir = ensure_dir(self.workspace / "sessions") - self.legacy_sessions_dir = get_legacy_sessions_dir() + self.legacy_sessions_dir = Path.home() / ".nanobot" / "sessions" self._cache: dict[str, Session] = {} def _get_session_path(self, key: str) -> Path: @@ -132,7 +134,6 @@ class SessionManager: messages = [] metadata = {} created_at = None - last_consolidated = 0 with open(path, encoding="utf-8") as f: for line in f: @@ -145,7 +146,6 @@ class SessionManager: if data.get("_type") == "metadata": metadata = data.get("metadata", {}) created_at = datetime.fromisoformat(data["created_at"]) if data.get("created_at") else None - last_consolidated = data.get("last_consolidated", 0) else: messages.append(data) @@ -154,7 +154,6 @@ class SessionManager: messages=messages, created_at=created_at or datetime.now(), metadata=metadata, - last_consolidated=last_consolidated ) except Exception as e: logger.warning("Failed to load session {}: {}", key, e) @@ -171,7 +170,6 @@ class SessionManager: "created_at": session.created_at.isoformat(), "updated_at": session.updated_at.isoformat(), "metadata": session.metadata, - "last_consolidated": session.last_consolidated } f.write(json.dumps(metadata_line, ensure_ascii=False) + "\n") for msg in session.messages: From 2dcb4de422ddec8c0f114dc6b0fdce06b9388b8f Mon Sep 17 00:00:00 2001 From: VITOHJL Date: Sun, 8 Mar 2026 15:04:38 +0800 Subject: [PATCH 06/29] fix(commands): update AgentLoop calls to use token-based compression parameters --- nanobot/cli/commands.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 2c8d6d3..cf29cc5 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -330,8 +330,10 @@ def gateway( temperature=config.agents.defaults.temperature, max_tokens=config.agents.defaults.max_tokens, max_iterations=config.agents.defaults.max_tool_iterations, - memory_window=config.agents.defaults.memory_window, reasoning_effort=config.agents.defaults.reasoning_effort, + max_tokens_input=config.agents.defaults.max_tokens_input, + compression_start_ratio=config.agents.defaults.compression_start_ratio, + compression_target_ratio=config.agents.defaults.compression_target_ratio, brave_api_key=config.tools.web.search.api_key or None, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, @@ -515,8 +517,10 @@ def agent( temperature=config.agents.defaults.temperature, max_tokens=config.agents.defaults.max_tokens, max_iterations=config.agents.defaults.max_tool_iterations, - memory_window=config.agents.defaults.memory_window, reasoning_effort=config.agents.defaults.reasoning_effort, + max_tokens_input=config.agents.defaults.max_tokens_input, + compression_start_ratio=config.agents.defaults.compression_start_ratio, + compression_target_ratio=config.agents.defaults.compression_target_ratio, brave_api_key=config.tools.web.search.api_key or None, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, From 2706d3c317be7325795e9dac74d07512e57112f4 Mon Sep 17 00:00:00 2001 From: VITOHJL Date: Sun, 8 Mar 2026 15:20:34 +0800 Subject: [PATCH 07/29] fix(commands): use max_tokens_output instead of max_tokens from AgentDefaults --- nanobot/cli/commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index cf29cc5..18c9d56 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -328,7 +328,7 @@ def gateway( workspace=config.workspace_path, model=config.agents.defaults.model, temperature=config.agents.defaults.temperature, - max_tokens=config.agents.defaults.max_tokens, + max_tokens=config.agents.defaults.max_tokens_output, max_iterations=config.agents.defaults.max_tool_iterations, reasoning_effort=config.agents.defaults.reasoning_effort, max_tokens_input=config.agents.defaults.max_tokens_input, @@ -515,7 +515,7 @@ def agent( workspace=config.workspace_path, model=config.agents.defaults.model, temperature=config.agents.defaults.temperature, - max_tokens=config.agents.defaults.max_tokens, + max_tokens=config.agents.defaults.max_tokens_output, max_iterations=config.agents.defaults.max_tool_iterations, reasoning_effort=config.agents.defaults.reasoning_effort, max_tokens_input=config.agents.defaults.max_tokens_input, From a984e0df3752f6a8883a0e9b6d8efee4abd7f9dd Mon Sep 17 00:00:00 2001 From: VITOHJL Date: Sun, 8 Mar 2026 15:23:55 +0800 Subject: [PATCH 08/29] feat(loop): add history message count logging in compression --- nanobot/agent/loop.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 696e2a7..5d316ea 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -362,6 +362,7 @@ class AgentLoop: if len(chunk) < 2: return + before_msg_count = len(session.messages) logger.info( "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})", session.key, @@ -383,12 +384,13 @@ class AgentLoop: self._set_compressed_until(session, end_idx) self.sessions.save(session) + after_msg_count = len(session.messages) after_tokens, after_source = self._estimate_session_prompt_tokens(session) after_ratio = after_tokens / budget if budget else 0.0 reduced = max(0, current_tokens - after_tokens) reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0 logger.info( - "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})", + "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%}), history: {} -> {}", session.key, after_tokens, budget, @@ -396,6 +398,8 @@ class AgentLoop: after_source, reduced, reduced_ratio, + before_msg_count, + after_msg_count, ) def _schedule_background_compression(self, session_key: str) -> None: From 1b16d48390b3fded3438f4fdbc3f0ae0a0379878 Mon Sep 17 00:00:00 2001 From: VITOHJL Date: Sun, 8 Mar 2026 15:26:49 +0800 Subject: [PATCH 09/29] fix(loop): update _cumulative_tokens in _save_turn and preserve it in compression methods --- nanobot/agent/loop.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 5d316ea..5e01b79 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -211,14 +211,14 @@ class AgentLoop: session.metadata["_compressed_until"] = compressed_until # 兼容旧版本:一旦迁移出连续边界,就可以清理旧字段 session.metadata.pop("_compressed_ranges", None) - session.metadata.pop("_cumulative_tokens", None) + # 注意:不要删除 _cumulative_tokens,压缩逻辑需要它来跟踪累积 token 计数 return compressed_until def _set_compressed_until(self, session: Session, idx: int) -> None: """Persist a contiguous compressed boundary.""" session.metadata["_compressed_until"] = max(0, min(int(idx), len(session.messages))) session.metadata.pop("_compressed_ranges", None) - session.metadata.pop("_cumulative_tokens", None) + # 注意:不要删除 _cumulative_tokens,压缩逻辑需要它来跟踪累积 token 计数 @staticmethod def _estimate_message_tokens(message: dict[str, Any]) -> int: @@ -362,7 +362,6 @@ class AgentLoop: if len(chunk) < 2: return - before_msg_count = len(session.messages) logger.info( "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})", session.key, @@ -384,13 +383,12 @@ class AgentLoop: self._set_compressed_until(session, end_idx) self.sessions.save(session) - after_msg_count = len(session.messages) after_tokens, after_source = self._estimate_session_prompt_tokens(session) after_ratio = after_tokens / budget if budget else 0.0 reduced = max(0, current_tokens - after_tokens) reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0 logger.info( - "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%}), history: {} -> {}", + "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})", session.key, after_tokens, budget, @@ -398,8 +396,6 @@ class AgentLoop: after_source, reduced, reduced_ratio, - before_msg_count, - after_msg_count, ) def _schedule_background_compression(self, session_key: str) -> None: @@ -855,14 +851,14 @@ class AgentLoop: channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta, )) - final_content, _, all_msgs, _, _ = await self._run_agent_loop( + final_content, _, all_msgs, total_tokens_this_turn, token_source = await self._run_agent_loop( initial_messages, on_progress=on_progress or _bus_progress, ) if final_content is None: final_content = "I've completed processing but have no response to give." - self._save_turn(session, all_msgs, 1 + len(history)) + self._save_turn(session, all_msgs, 1 + len(history), total_tokens_this_turn) self.sessions.save(session) self._schedule_background_compression(session.key) @@ -876,7 +872,7 @@ class AgentLoop: metadata=msg.metadata or {}, ) - def _save_turn(self, session: Session, messages: list[dict], skip: int) -> None: + def _save_turn(self, session: Session, messages: list[dict], skip: int, total_tokens_this_turn: int = 0) -> None: """Save new-turn messages into session, truncating large tool results.""" from datetime import datetime for m in messages[skip:]: @@ -910,6 +906,14 @@ class AgentLoop: entry.setdefault("timestamp", datetime.now().isoformat()) session.messages.append(entry) session.updated_at = datetime.now() + + # Update cumulative token count for compression tracking + if total_tokens_this_turn > 0: + current_cumulative = session.metadata.get("_cumulative_tokens", 0) + if isinstance(current_cumulative, (int, float)): + session.metadata["_cumulative_tokens"] = int(current_cumulative) + total_tokens_this_turn + else: + session.metadata["_cumulative_tokens"] = total_tokens_this_turn async def process_direct( self, From 274edc5451c1d0f79eda80c76127f497ec6923e9 Mon Sep 17 00:00:00 2001 From: VITOHJL Date: Sun, 8 Mar 2026 17:25:59 +0800 Subject: [PATCH 10/29] fix(compression): prefer provider prompt token usage --- nanobot/agent/loop.py | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 5e01b79..4f6a051 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -124,6 +124,8 @@ class AgentLoop: self._mcp_connecting = False self._active_tasks: dict[str, list[asyncio.Task]] = {} # session_key -> tasks self._compression_tasks: dict[str, asyncio.Task] = {} # session_key -> task + self._last_turn_prompt_tokens: int = 0 + self._last_turn_prompt_source: str = "none" self._processing_lock = asyncio.Lock() self._register_default_tools() @@ -324,7 +326,15 @@ class AgentLoop: if target_threshold >= start_threshold: target_threshold = max(0, start_threshold - 1) - current_tokens, token_source = self._estimate_session_prompt_tokens(session) + # Prefer provider usage prompt tokens from the turn-ending call. + # If unavailable, fall back to estimator chain. + raw_prompt_tokens = session.metadata.get("_last_prompt_tokens") + if isinstance(raw_prompt_tokens, (int, float)) and raw_prompt_tokens > 0: + current_tokens = int(raw_prompt_tokens) + token_source = str(session.metadata.get("_last_prompt_source") or "usage_prompt") + else: + current_tokens, token_source = self._estimate_session_prompt_tokens(session) + current_ratio = current_tokens / budget if budget else 0.0 if current_tokens <= 0: logger.debug("Compression skip {}: token estimate unavailable", session.key) @@ -569,6 +579,8 @@ class AgentLoop: tools_used: list[str] = [] total_tokens_this_turn = 0 token_source = "none" + self._last_turn_prompt_tokens = 0 + self._last_turn_prompt_source = "none" while iteration < self.max_iterations: iteration += 1 @@ -594,19 +606,35 @@ class AgentLoop: if isinstance(t_tokens, (int, float)) and t_tokens > 0: total_tokens_this_turn = int(t_tokens) token_source = "provider_total" + if isinstance(p_tokens, (int, float)) and p_tokens > 0: + self._last_turn_prompt_tokens = int(p_tokens) + self._last_turn_prompt_source = "usage_prompt" + elif isinstance(c_tokens, (int, float)): + prompt_derived = int(t_tokens) - int(c_tokens) + if prompt_derived > 0: + self._last_turn_prompt_tokens = prompt_derived + self._last_turn_prompt_source = "usage_total_minus_completion" elif isinstance(p_tokens, (int, float)) and isinstance(c_tokens, (int, float)): # If we have both prompt and completion tokens, sum them total_tokens_this_turn = int(p_tokens) + int(c_tokens) token_source = "provider_sum" + if p_tokens > 0: + self._last_turn_prompt_tokens = int(p_tokens) + self._last_turn_prompt_source = "usage_prompt" elif isinstance(p_tokens, (int, float)) and p_tokens > 0: # Fallback: use prompt tokens only (completion might be 0 for tool calls) total_tokens_this_turn = int(p_tokens) token_source = "provider_prompt" + self._last_turn_prompt_tokens = int(p_tokens) + self._last_turn_prompt_source = "usage_prompt" else: # Estimate with unified chain (provider counter -> tiktoken), plus completion tiktoken. estimated_prompt, prompt_source = self._estimate_prompt_tokens_chain(messages, tool_defs) estimated_completion = self._estimate_completion_tokens(response.content or "") total_tokens_this_turn = estimated_prompt + estimated_completion + if estimated_prompt > 0: + self._last_turn_prompt_tokens = int(estimated_prompt) + self._last_turn_prompt_source = str(prompt_source or "tiktoken") if total_tokens_this_turn > 0: token_source = ( "tiktoken" @@ -779,6 +807,12 @@ class AgentLoop: current_message=msg.content, channel=channel, chat_id=chat_id, ) final_content, _, all_msgs, _, _ = await self._run_agent_loop(messages) + if self._last_turn_prompt_tokens > 0: + session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens + session.metadata["_last_prompt_source"] = self._last_turn_prompt_source + else: + session.metadata.pop("_last_prompt_tokens", None) + session.metadata.pop("_last_prompt_source", None) self._save_turn(session, all_msgs, 1 + len(history)) self.sessions.save(session) self._schedule_background_compression(session.key) @@ -858,6 +892,13 @@ class AgentLoop: if final_content is None: final_content = "I've completed processing but have no response to give." + if self._last_turn_prompt_tokens > 0: + session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens + session.metadata["_last_prompt_source"] = self._last_turn_prompt_source + else: + session.metadata.pop("_last_prompt_tokens", None) + session.metadata.pop("_last_prompt_source", None) + self._save_turn(session, all_msgs, 1 + len(history), total_tokens_this_turn) self.sessions.save(session) self._schedule_background_compression(session.key) From 4147d0ff9d12f9faaa3aefe5be449b18461588d1 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Sun, 8 Mar 2026 17:00:09 +0000 Subject: [PATCH 11/29] docs: update v0.1.4.post4 release news --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d3401ea..2450b8c 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ ## 📢 News +- **2026-03-08** 🚀 Released **v0.1.4.post4** — a reliability-packed release with safer defaults, better multi-instance support, sturdier MCP/tooling, and major channel and provider improvements. Please see [release notes](https://github.com/HKUDS/nanobot/releases/tag/v0.1.4.post4) for details. - **2026-03-07** 🚀 Azure OpenAI provider, WhatsApp media, QQ group chats, and more Telegram/Feishu polish. - **2026-03-06** 🪄 Lighter providers, smarter media handling, and sturdier memory and CLI compatibility. - **2026-03-05** ⚡️ Telegram draft streaming, MCP SSE support, and broader channel reliability fixes. From f19cefb1b9b61dcf902afb5666aea80b1c362e46 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Sun, 8 Mar 2026 17:00:46 +0000 Subject: [PATCH 12/29] docs: update v0.1.4.post4 release news --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2450b8c..f169bd7 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ ## 📢 News -- **2026-03-08** 🚀 Released **v0.1.4.post4** — a reliability-packed release with safer defaults, better multi-instance support, sturdier MCP/tooling, and major channel and provider improvements. Please see [release notes](https://github.com/HKUDS/nanobot/releases/tag/v0.1.4.post4) for details. +- **2026-03-08** 🚀 Released **v0.1.4.post4** — a reliability-packed release with safer defaults, better multi-instance support, sturdier MCP, and major channel and provider improvements. Please see [release notes](https://github.com/HKUDS/nanobot/releases/tag/v0.1.4.post4) for details. - **2026-03-07** 🚀 Azure OpenAI provider, WhatsApp media, QQ group chats, and more Telegram/Feishu polish. - **2026-03-06** 🪄 Lighter providers, smarter media handling, and sturdier memory and CLI compatibility. - **2026-03-05** ⚡️ Telegram draft streaming, MCP SSE support, and broader channel reliability fixes. From 4044b85d4bfa9104b633f3cb408894f0459a0164 Mon Sep 17 00:00:00 2001 From: chengyongru <2755839590@qq.com> Date: Mon, 9 Mar 2026 01:32:10 +0800 Subject: [PATCH 13/29] fix: ensure feishu audio file has .opus extension for Groq Whisper compatibility --- nanobot/channels/feishu.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nanobot/channels/feishu.py b/nanobot/channels/feishu.py index a637025..0409c32 100644 --- a/nanobot/channels/feishu.py +++ b/nanobot/channels/feishu.py @@ -753,8 +753,9 @@ class FeishuChannel(BaseChannel): None, self._download_file_sync, message_id, file_key, msg_type ) if not filename: - ext = {"audio": ".opus", "media": ".mp4"}.get(msg_type, "") - filename = f"{file_key[:16]}{ext}" + filename = file_key[:16] + if msg_type == "audio" and not filename.endswith(".opus"): + filename = f"{filename}.opus" if data and filename: file_path = media_dir / filename From 620d7896c710748053257695d25c3391aa637dc5 Mon Sep 17 00:00:00 2001 From: ailuntz Date: Tue, 10 Mar 2026 00:14:34 +0800 Subject: [PATCH 14/29] fix(slack): define thread usage when sending messages --- nanobot/channels/slack.py | 2 +- tests/test_slack_channel.py | 88 +++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 tests/test_slack_channel.py diff --git a/nanobot/channels/slack.py b/nanobot/channels/slack.py index a4e7324..e36c4c9 100644 --- a/nanobot/channels/slack.py +++ b/nanobot/channels/slack.py @@ -82,6 +82,7 @@ class SlackChannel(BaseChannel): thread_ts = slack_meta.get("thread_ts") channel_type = slack_meta.get("channel_type") # Only reply in thread for channel/group messages; DMs don't use threads + use_thread = bool(thread_ts and channel_type != "im") thread_ts_param = thread_ts if use_thread else None # Slack rejects empty text payloads. Keep media-only messages media-only, @@ -278,4 +279,3 @@ class SlackChannel(BaseChannel): if parts: rows.append(" · ".join(parts)) return "\n".join(rows) - diff --git a/tests/test_slack_channel.py b/tests/test_slack_channel.py new file mode 100644 index 0000000..18b96ef --- /dev/null +++ b/tests/test_slack_channel.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +import pytest + +from nanobot.bus.events import OutboundMessage +from nanobot.bus.queue import MessageBus +from nanobot.channels.slack import SlackChannel +from nanobot.config.schema import SlackConfig + + +class _FakeAsyncWebClient: + def __init__(self) -> None: + self.chat_post_calls: list[dict[str, object | None]] = [] + self.file_upload_calls: list[dict[str, object | None]] = [] + + async def chat_postMessage( + self, + *, + channel: str, + text: str, + thread_ts: str | None = None, + ) -> None: + self.chat_post_calls.append( + { + "channel": channel, + "text": text, + "thread_ts": thread_ts, + } + ) + + async def files_upload_v2( + self, + *, + channel: str, + file: str, + thread_ts: str | None = None, + ) -> None: + self.file_upload_calls.append( + { + "channel": channel, + "file": file, + "thread_ts": thread_ts, + } + ) + + +@pytest.mark.asyncio +async def test_send_uses_thread_for_channel_messages() -> None: + channel = SlackChannel(SlackConfig(enabled=True), MessageBus()) + fake_web = _FakeAsyncWebClient() + channel._web_client = fake_web + + await channel.send( + OutboundMessage( + channel="slack", + chat_id="C123", + content="hello", + media=["/tmp/demo.txt"], + metadata={"slack": {"thread_ts": "1700000000.000100", "channel_type": "channel"}}, + ) + ) + + assert len(fake_web.chat_post_calls) == 1 + assert fake_web.chat_post_calls[0]["thread_ts"] == "1700000000.000100" + assert len(fake_web.file_upload_calls) == 1 + assert fake_web.file_upload_calls[0]["thread_ts"] == "1700000000.000100" + + +@pytest.mark.asyncio +async def test_send_omits_thread_for_dm_messages() -> None: + channel = SlackChannel(SlackConfig(enabled=True), MessageBus()) + fake_web = _FakeAsyncWebClient() + channel._web_client = fake_web + + await channel.send( + OutboundMessage( + channel="slack", + chat_id="D123", + content="hello", + media=["/tmp/demo.txt"], + metadata={"slack": {"thread_ts": "1700000000.000100", "channel_type": "im"}}, + ) + ) + + assert len(fake_web.chat_post_calls) == 1 + assert fake_web.chat_post_calls[0]["thread_ts"] is None + assert len(fake_web.file_upload_calls) == 1 + assert fake_web.file_upload_calls[0]["thread_ts"] is None From 9c88e40a616190aca65ce3d3149f4529865ca5d8 Mon Sep 17 00:00:00 2001 From: ailuntz Date: Tue, 10 Mar 2026 00:32:42 +0800 Subject: [PATCH 15/29] fix(cli): respect gateway port from config when --port omitted --- nanobot/cli/commands.py | 5 +++-- tests/test_commands.py | 44 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 2c8d6d3..a5906d2 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -290,7 +290,7 @@ def _load_runtime_config(config: str | None = None, workspace: str | None = None @app.command() def gateway( - port: int = typer.Option(18790, "--port", "-p", help="Gateway port"), + port: int | None = typer.Option(None, "--port", "-p", help="Gateway port"), workspace: str | None = typer.Option(None, "--workspace", "-w", help="Workspace directory"), verbose: bool = typer.Option(False, "--verbose", "-v", help="Verbose output"), config: str | None = typer.Option(None, "--config", "-c", help="Path to config file"), @@ -310,8 +310,9 @@ def gateway( logging.basicConfig(level=logging.DEBUG) config = _load_runtime_config(config, workspace) + selected_port = port if port is not None else config.gateway.port - console.print(f"{__logo__} Starting nanobot gateway on port {port}...") + console.print(f"{__logo__} Starting nanobot gateway on port {selected_port}...") sync_workspace_templates(config.workspace_path) bus = MessageBus() provider = _make_provider(config) diff --git a/tests/test_commands.py b/tests/test_commands.py index 19c1998..9479dad 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -328,6 +328,50 @@ def test_gateway_workspace_option_overrides_config(monkeypatch, tmp_path: Path) assert config.workspace_path == override +def test_gateway_uses_port_from_config_when_cli_port_is_omitted(monkeypatch, tmp_path: Path) -> None: + config_file = tmp_path / "instance" / "config.json" + config_file.parent.mkdir(parents=True) + config_file.write_text("{}") + + config = Config() + config.gateway.port = 18791 + + monkeypatch.setattr("nanobot.config.loader.set_config_path", lambda _path: None) + monkeypatch.setattr("nanobot.config.loader.load_config", lambda _path=None: config) + monkeypatch.setattr("nanobot.cli.commands.sync_workspace_templates", lambda _path: None) + monkeypatch.setattr( + "nanobot.cli.commands._make_provider", + lambda _config: (_ for _ in ()).throw(_StopGateway("stop")), + ) + + result = runner.invoke(app, ["gateway", "--config", str(config_file)]) + + assert isinstance(result.exception, _StopGateway) + assert "Starting nanobot gateway on port 18791" in result.stdout + + +def test_gateway_cli_port_overrides_config_port(monkeypatch, tmp_path: Path) -> None: + config_file = tmp_path / "instance" / "config.json" + config_file.parent.mkdir(parents=True) + config_file.write_text("{}") + + config = Config() + config.gateway.port = 18791 + + monkeypatch.setattr("nanobot.config.loader.set_config_path", lambda _path: None) + monkeypatch.setattr("nanobot.config.loader.load_config", lambda _path=None: config) + monkeypatch.setattr("nanobot.cli.commands.sync_workspace_templates", lambda _path: None) + monkeypatch.setattr( + "nanobot.cli.commands._make_provider", + lambda _config: (_ for _ in ()).throw(_StopGateway("stop")), + ) + + result = runner.invoke(app, ["gateway", "--config", str(config_file), "--port", "18801"]) + + assert isinstance(result.exception, _StopGateway) + assert "Starting nanobot gateway on port 18801" in result.stdout + + def test_gateway_uses_config_directory_for_cron_store(monkeypatch, tmp_path: Path) -> None: config_file = tmp_path / "instance" / "config.json" config_file.parent.mkdir(parents=True) From 28330940d0b2cefbfe740957ee8f51ed9349c24e Mon Sep 17 00:00:00 2001 From: Re-bin Date: Mon, 9 Mar 2026 17:18:10 +0000 Subject: [PATCH 16/29] fix(slack): skip thread_ts for direct messages --- nanobot/channels/slack.py | 5 ++--- tests/test_slack_channel.py | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/nanobot/channels/slack.py b/nanobot/channels/slack.py index e36c4c9..0384d8d 100644 --- a/nanobot/channels/slack.py +++ b/nanobot/channels/slack.py @@ -81,9 +81,8 @@ class SlackChannel(BaseChannel): slack_meta = msg.metadata.get("slack", {}) if msg.metadata else {} thread_ts = slack_meta.get("thread_ts") channel_type = slack_meta.get("channel_type") - # Only reply in thread for channel/group messages; DMs don't use threads - use_thread = bool(thread_ts and channel_type != "im") - thread_ts_param = thread_ts if use_thread else None + # Slack DMs don't use threads; channel/group replies may keep thread_ts. + thread_ts_param = thread_ts if thread_ts and channel_type != "im" else None # Slack rejects empty text payloads. Keep media-only messages media-only, # but send a single blank message when the bot has no text or files to send. diff --git a/tests/test_slack_channel.py b/tests/test_slack_channel.py index 18b96ef..891f86a 100644 --- a/tests/test_slack_channel.py +++ b/tests/test_slack_channel.py @@ -61,6 +61,7 @@ async def test_send_uses_thread_for_channel_messages() -> None: ) assert len(fake_web.chat_post_calls) == 1 + assert fake_web.chat_post_calls[0]["text"] == "hello\n" assert fake_web.chat_post_calls[0]["thread_ts"] == "1700000000.000100" assert len(fake_web.file_upload_calls) == 1 assert fake_web.file_upload_calls[0]["thread_ts"] == "1700000000.000100" @@ -83,6 +84,7 @@ async def test_send_omits_thread_for_dm_messages() -> None: ) assert len(fake_web.chat_post_calls) == 1 + assert fake_web.chat_post_calls[0]["text"] == "hello\n" assert fake_web.chat_post_calls[0]["thread_ts"] is None assert len(fake_web.file_upload_calls) == 1 assert fake_web.file_upload_calls[0]["thread_ts"] is None From 1284c7217ea2c59a5a9e2786c5f550e9fb5ace1b Mon Sep 17 00:00:00 2001 From: Protocol Zero <257158451+Protocol-zero-0@users.noreply.github.com> Date: Mon, 9 Mar 2026 20:12:11 +0000 Subject: [PATCH 17/29] fix(cli): let gateway use config port by default Respect config.gateway.port when --port is omitted, while keeping CLI flags as the highest-precedence override. --- nanobot/cli/commands.py | 3 ++- tests/test_commands.py | 44 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 2c8d6d3..37f08b2 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -290,7 +290,7 @@ def _load_runtime_config(config: str | None = None, workspace: str | None = None @app.command() def gateway( - port: int = typer.Option(18790, "--port", "-p", help="Gateway port"), + port: int | None = typer.Option(None, "--port", "-p", help="Gateway port"), workspace: str | None = typer.Option(None, "--workspace", "-w", help="Workspace directory"), verbose: bool = typer.Option(False, "--verbose", "-v", help="Verbose output"), config: str | None = typer.Option(None, "--config", "-c", help="Path to config file"), @@ -310,6 +310,7 @@ def gateway( logging.basicConfig(level=logging.DEBUG) config = _load_runtime_config(config, workspace) + port = port if port is not None else config.gateway.port console.print(f"{__logo__} Starting nanobot gateway on port {port}...") sync_workspace_templates(config.workspace_path) diff --git a/tests/test_commands.py b/tests/test_commands.py index 19c1998..5d38942 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -356,3 +356,47 @@ def test_gateway_uses_config_directory_for_cron_store(monkeypatch, tmp_path: Pat assert isinstance(result.exception, _StopGateway) assert seen["cron_store"] == config_file.parent / "cron" / "jobs.json" + + +def test_gateway_uses_configured_port_when_cli_flag_is_missing(monkeypatch, tmp_path: Path) -> None: + config_file = tmp_path / "instance" / "config.json" + config_file.parent.mkdir(parents=True) + config_file.write_text("{}") + + config = Config() + config.gateway.port = 18791 + + monkeypatch.setattr("nanobot.config.loader.set_config_path", lambda _path: None) + monkeypatch.setattr("nanobot.config.loader.load_config", lambda _path=None: config) + monkeypatch.setattr("nanobot.cli.commands.sync_workspace_templates", lambda _path: None) + monkeypatch.setattr( + "nanobot.cli.commands._make_provider", + lambda _config: (_ for _ in ()).throw(_StopGateway("stop")), + ) + + result = runner.invoke(app, ["gateway", "--config", str(config_file)]) + + assert isinstance(result.exception, _StopGateway) + assert "port 18791" in result.stdout + + +def test_gateway_cli_port_overrides_configured_port(monkeypatch, tmp_path: Path) -> None: + config_file = tmp_path / "instance" / "config.json" + config_file.parent.mkdir(parents=True) + config_file.write_text("{}") + + config = Config() + config.gateway.port = 18791 + + monkeypatch.setattr("nanobot.config.loader.set_config_path", lambda _path: None) + monkeypatch.setattr("nanobot.config.loader.load_config", lambda _path=None: config) + monkeypatch.setattr("nanobot.cli.commands.sync_workspace_templates", lambda _path: None) + monkeypatch.setattr( + "nanobot.cli.commands._make_provider", + lambda _config: (_ for _ in ()).throw(_StopGateway("stop")), + ) + + result = runner.invoke(app, ["gateway", "--config", str(config_file), "--port", "18792"]) + + assert isinstance(result.exception, _StopGateway) + assert "port 18792" in result.stdout From 4f9857f85f1f8aeddceb019bc0062d3ba7cab032 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Tue, 10 Mar 2026 04:34:15 +0000 Subject: [PATCH 18/29] feat(telegram): add configurable group mention policy --- nanobot/channels/telegram.py | 86 ++++++++++++++---- nanobot/config/schema.py | 2 +- tests/test_telegram_channel.py | 156 ++++++++++++++++++++++++++++++++- 3 files changed, 226 insertions(+), 18 deletions(-) diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py index 0821b7d..5b294cc 100644 --- a/nanobot/channels/telegram.py +++ b/nanobot/channels/telegram.py @@ -179,6 +179,8 @@ class TelegramChannel(BaseChannel): self._media_group_buffers: dict[str, dict] = {} self._media_group_tasks: dict[str, asyncio.Task] = {} self._message_threads: dict[tuple[str, int], int] = {} + self._bot_user_id: int | None = None + self._bot_username: str | None = None def is_allowed(self, sender_id: str) -> bool: """Preserve Telegram's legacy id|username allowlist matching.""" @@ -242,6 +244,8 @@ class TelegramChannel(BaseChannel): # Get bot info and register command menu bot_info = await self._app.bot.get_me() + self._bot_user_id = getattr(bot_info, "id", None) + self._bot_username = getattr(bot_info, "username", None) logger.info("Telegram bot @{} connected", bot_info.username) try: @@ -462,6 +466,70 @@ class TelegramChannel(BaseChannel): "is_forum": bool(getattr(message.chat, "is_forum", False)), } + async def _ensure_bot_identity(self) -> tuple[int | None, str | None]: + """Load bot identity once and reuse it for mention/reply checks.""" + if self._bot_user_id is not None or self._bot_username is not None: + return self._bot_user_id, self._bot_username + if not self._app: + return None, None + bot_info = await self._app.bot.get_me() + self._bot_user_id = getattr(bot_info, "id", None) + self._bot_username = getattr(bot_info, "username", None) + return self._bot_user_id, self._bot_username + + @staticmethod + def _has_mention_entity( + text: str, + entities, + bot_username: str, + bot_id: int | None, + ) -> bool: + """Check Telegram mention entities against the bot username.""" + handle = f"@{bot_username}".lower() + for entity in entities or []: + entity_type = getattr(entity, "type", None) + if entity_type == "text_mention": + user = getattr(entity, "user", None) + if user is not None and bot_id is not None and getattr(user, "id", None) == bot_id: + return True + continue + if entity_type != "mention": + continue + offset = getattr(entity, "offset", None) + length = getattr(entity, "length", None) + if offset is None or length is None: + continue + if text[offset : offset + length].lower() == handle: + return True + return handle in text.lower() + + async def _is_group_message_for_bot(self, message) -> bool: + """Allow group messages when policy is open, @mentioned, or replying to the bot.""" + if message.chat.type == "private" or self.config.group_policy == "open": + return True + + bot_id, bot_username = await self._ensure_bot_identity() + if bot_username: + text = message.text or "" + caption = message.caption or "" + if self._has_mention_entity( + text, + getattr(message, "entities", None), + bot_username, + bot_id, + ): + return True + if self._has_mention_entity( + caption, + getattr(message, "caption_entities", None), + bot_username, + bot_id, + ): + return True + + reply_user = getattr(getattr(message, "reply_to_message", None), "from_user", None) + return bool(bot_id and reply_user and reply_user.id == bot_id) + def _remember_thread_context(self, message) -> None: """Cache topic thread id by chat/message id for follow-up replies.""" message_thread_id = getattr(message, "message_thread_id", None) @@ -501,22 +569,8 @@ class TelegramChannel(BaseChannel): # Store chat_id for replies self._chat_ids[sender_id] = chat_id - # Enforce group_policy: in group chats with "mention" policy, - # only respond when the bot is @mentioned or the message is a reply to the bot. - is_group = message.chat.type != "private" - if is_group and getattr(self.config, "group_policy", "open") == "mention": - bot_username = (await self._app.bot.get_me()).username if self._app else None - mentioned = False - # Check if bot is @mentioned in text - if bot_username and message.text: - mentioned = f"@{bot_username}" in message.text - # Check if the message is a reply to the bot - if not mentioned and message.reply_to_message and message.reply_to_message.from_user: - bot_id = (await self._app.bot.get_me()).id if self._app else None - if bot_id and message.reply_to_message.from_user.id == bot_id: - mentioned = True - if not mentioned: - return + if not await self._is_group_message_for_bot(message): + return # Build content from text and/or media content_parts = [] diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 3c5e315..8cfcad6 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -33,7 +33,7 @@ class TelegramConfig(Base): None # HTTP/SOCKS5 proxy URL, e.g. "http://127.0.0.1:7890" or "socks5://127.0.0.1:1080" ) reply_to_message: bool = False # If true, bot replies quote the original message - group_policy: Literal["open", "mention"] = "open" # "open" responds to all, "mention" only when @mentioned or replied to + group_policy: Literal["open", "mention"] = "mention" # "mention" responds when @mentioned or replied to, "open" responds to all class FeishuConfig(Base): diff --git a/tests/test_telegram_channel.py b/tests/test_telegram_channel.py index 88c3f54..678512d 100644 --- a/tests/test_telegram_channel.py +++ b/tests/test_telegram_channel.py @@ -27,9 +27,11 @@ class _FakeUpdater: class _FakeBot: def __init__(self) -> None: self.sent_messages: list[dict] = [] + self.get_me_calls = 0 async def get_me(self): - return SimpleNamespace(username="nanobot_test") + self.get_me_calls += 1 + return SimpleNamespace(id=999, username="nanobot_test") async def set_my_commands(self, commands) -> None: self.commands = commands @@ -37,6 +39,9 @@ class _FakeBot: async def send_message(self, **kwargs) -> None: self.sent_messages.append(kwargs) + async def send_chat_action(self, **kwargs) -> None: + pass + class _FakeApp: def __init__(self, on_start_polling) -> None: @@ -87,6 +92,35 @@ class _FakeBuilder: return self.app +def _make_telegram_update( + *, + chat_type: str = "group", + text: str | None = None, + caption: str | None = None, + entities=None, + caption_entities=None, + reply_to_message=None, +): + user = SimpleNamespace(id=12345, username="alice", first_name="Alice") + message = SimpleNamespace( + chat=SimpleNamespace(type=chat_type, is_forum=False), + chat_id=-100123, + text=text, + caption=caption, + entities=entities or [], + caption_entities=caption_entities or [], + reply_to_message=reply_to_message, + photo=None, + voice=None, + audio=None, + document=None, + media_group_id=None, + message_thread_id=None, + message_id=1, + ) + return SimpleNamespace(message=message, effective_user=user) + + @pytest.mark.asyncio async def test_start_uses_request_proxy_without_builder_proxy(monkeypatch) -> None: config = TelegramConfig( @@ -131,6 +165,10 @@ def test_get_extension_falls_back_to_original_filename() -> None: assert channel._get_extension("file", None, "archive.tar.gz") == ".tar.gz" +def test_telegram_group_policy_defaults_to_mention() -> None: + assert TelegramConfig().group_policy == "mention" + + def test_is_allowed_accepts_legacy_telegram_id_username_formats() -> None: channel = TelegramChannel(TelegramConfig(allow_from=["12345", "alice", "67890|bob"]), MessageBus()) @@ -182,3 +220,119 @@ async def test_send_reply_infers_topic_from_message_id_cache() -> None: assert channel._app.bot.sent_messages[0]["message_thread_id"] == 42 assert channel._app.bot.sent_messages[0]["reply_parameters"].message_id == 10 + + +@pytest.mark.asyncio +async def test_group_policy_mention_ignores_unmentioned_group_message() -> None: + channel = TelegramChannel( + TelegramConfig(enabled=True, token="123:abc", allow_from=["*"], group_policy="mention"), + MessageBus(), + ) + channel._app = _FakeApp(lambda: None) + + handled = [] + + async def capture_handle(**kwargs) -> None: + handled.append(kwargs) + + channel._handle_message = capture_handle + channel._start_typing = lambda _chat_id: None + + await channel._on_message(_make_telegram_update(text="hello everyone"), None) + + assert handled == [] + assert channel._app.bot.get_me_calls == 1 + + +@pytest.mark.asyncio +async def test_group_policy_mention_accepts_text_mention_and_caches_bot_identity() -> None: + channel = TelegramChannel( + TelegramConfig(enabled=True, token="123:abc", allow_from=["*"], group_policy="mention"), + MessageBus(), + ) + channel._app = _FakeApp(lambda: None) + + handled = [] + + async def capture_handle(**kwargs) -> None: + handled.append(kwargs) + + channel._handle_message = capture_handle + channel._start_typing = lambda _chat_id: None + + mention = SimpleNamespace(type="mention", offset=0, length=13) + await channel._on_message(_make_telegram_update(text="@nanobot_test hi", entities=[mention]), None) + await channel._on_message(_make_telegram_update(text="@nanobot_test again", entities=[mention]), None) + + assert len(handled) == 2 + assert channel._app.bot.get_me_calls == 1 + + +@pytest.mark.asyncio +async def test_group_policy_mention_accepts_caption_mention() -> None: + channel = TelegramChannel( + TelegramConfig(enabled=True, token="123:abc", allow_from=["*"], group_policy="mention"), + MessageBus(), + ) + channel._app = _FakeApp(lambda: None) + + handled = [] + + async def capture_handle(**kwargs) -> None: + handled.append(kwargs) + + channel._handle_message = capture_handle + channel._start_typing = lambda _chat_id: None + + mention = SimpleNamespace(type="mention", offset=0, length=13) + await channel._on_message( + _make_telegram_update(caption="@nanobot_test photo", caption_entities=[mention]), + None, + ) + + assert len(handled) == 1 + assert handled[0]["content"] == "@nanobot_test photo" + + +@pytest.mark.asyncio +async def test_group_policy_mention_accepts_reply_to_bot() -> None: + channel = TelegramChannel( + TelegramConfig(enabled=True, token="123:abc", allow_from=["*"], group_policy="mention"), + MessageBus(), + ) + channel._app = _FakeApp(lambda: None) + + handled = [] + + async def capture_handle(**kwargs) -> None: + handled.append(kwargs) + + channel._handle_message = capture_handle + channel._start_typing = lambda _chat_id: None + + reply = SimpleNamespace(from_user=SimpleNamespace(id=999)) + await channel._on_message(_make_telegram_update(text="reply", reply_to_message=reply), None) + + assert len(handled) == 1 + + +@pytest.mark.asyncio +async def test_group_policy_open_accepts_plain_group_message() -> None: + channel = TelegramChannel( + TelegramConfig(enabled=True, token="123:abc", allow_from=["*"], group_policy="open"), + MessageBus(), + ) + channel._app = _FakeApp(lambda: None) + + handled = [] + + async def capture_handle(**kwargs) -> None: + handled.append(kwargs) + + channel._handle_message = capture_handle + channel._start_typing = lambda _chat_id: None + + await channel._on_message(_make_telegram_update(text="hello group"), None) + + assert len(handled) == 1 + assert channel._app.bot.get_me_calls == 0 From b7ecc94c9b85aadc79e0d6598ea42ad7dbaa15f1 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Tue, 10 Mar 2026 09:16:23 +0000 Subject: [PATCH 19/29] fix(skill-creator): restore validation and align packaging docs --- nanobot/skills/skill-creator/SKILL.md | 10 +- .../skill-creator/scripts/package_skill.py | 77 ++++--- .../skill-creator/scripts/quick_validate.py | 213 ++++++++++++++++++ tests/test_skill_creator_scripts.py | 127 +++++++++++ 4 files changed, 392 insertions(+), 35 deletions(-) create mode 100644 nanobot/skills/skill-creator/scripts/quick_validate.py create mode 100644 tests/test_skill_creator_scripts.py diff --git a/nanobot/skills/skill-creator/SKILL.md b/nanobot/skills/skill-creator/SKILL.md index f4d6e0b..ea53abe 100644 --- a/nanobot/skills/skill-creator/SKILL.md +++ b/nanobot/skills/skill-creator/SKILL.md @@ -268,6 +268,8 @@ Skip this step only if the skill being developed already exists, and iteration o When creating a new skill from scratch, always run the `init_skill.py` script. The script conveniently generates a new template skill directory that automatically includes everything a skill requires, making the skill creation process much more efficient and reliable. +For `nanobot`, custom skills should live under the active workspace `skills/` directory so they can be discovered automatically at runtime (for example, `/skills/my-skill/SKILL.md`). + Usage: ```bash @@ -277,9 +279,9 @@ scripts/init_skill.py --path [--resources script Examples: ```bash -scripts/init_skill.py my-skill --path skills/public -scripts/init_skill.py my-skill --path skills/public --resources scripts,references -scripts/init_skill.py my-skill --path skills/public --resources scripts --examples +scripts/init_skill.py my-skill --path ./workspace/skills +scripts/init_skill.py my-skill --path ./workspace/skills --resources scripts,references +scripts/init_skill.py my-skill --path ./workspace/skills --resources scripts --examples ``` The script: @@ -326,7 +328,7 @@ Write the YAML frontmatter with `name` and `description`: - Include all "when to use" information here - Not in the body. The body is only loaded after triggering, so "When to Use This Skill" sections in the body are not helpful to the agent. - Example description for a `docx` skill: "Comprehensive document creation, editing, and analysis with support for tracked changes, comments, formatting preservation, and text extraction. Use when the agent needs to work with professional documents (.docx files) for: (1) Creating new documents, (2) Modifying or editing content, (3) Working with tracked changes, (4) Adding comments, or any other document tasks" -Do not include any other fields in YAML frontmatter. +Keep frontmatter minimal. In `nanobot`, `metadata` and `always` are also supported when needed, but avoid adding extra fields unless they are actually required. ##### Body diff --git a/nanobot/skills/skill-creator/scripts/package_skill.py b/nanobot/skills/skill-creator/scripts/package_skill.py index aa4de89..48fcbbe 100755 --- a/nanobot/skills/skill-creator/scripts/package_skill.py +++ b/nanobot/skills/skill-creator/scripts/package_skill.py @@ -3,11 +3,11 @@ Skill Packager - Creates a distributable .skill file of a skill folder Usage: - python utils/package_skill.py [output-directory] + python package_skill.py [output-directory] Example: - python utils/package_skill.py skills/public/my-skill - python utils/package_skill.py skills/public/my-skill ./dist + python package_skill.py skills/public/my-skill + python package_skill.py skills/public/my-skill ./dist """ import sys @@ -25,6 +25,14 @@ def _is_within(path: Path, root: Path) -> bool: return False +def _cleanup_partial_archive(skill_filename: Path) -> None: + try: + if skill_filename.exists(): + skill_filename.unlink() + except OSError: + pass + + def package_skill(skill_path, output_dir=None): """ Package a skill folder into a .skill file. @@ -74,49 +82,56 @@ def package_skill(skill_path, output_dir=None): EXCLUDED_DIRS = {".git", ".svn", ".hg", "__pycache__", "node_modules"} + files_to_package = [] + resolved_archive = skill_filename.resolve() + + for file_path in skill_path.rglob("*"): + # Fail closed on symlinks so the packaged contents are explicit and predictable. + if file_path.is_symlink(): + print(f"[ERROR] Symlink not allowed in packaged skill: {file_path}") + _cleanup_partial_archive(skill_filename) + return None + + rel_parts = file_path.relative_to(skill_path).parts + if any(part in EXCLUDED_DIRS for part in rel_parts): + continue + + if file_path.is_file(): + resolved_file = file_path.resolve() + if not _is_within(resolved_file, skill_path): + print(f"[ERROR] File escapes skill root: {file_path}") + _cleanup_partial_archive(skill_filename) + return None + # If output lives under skill_path, avoid writing archive into itself. + if resolved_file == resolved_archive: + print(f"[WARN] Skipping output archive: {file_path}") + continue + files_to_package.append(file_path) + # Create the .skill file (zip format) try: with zipfile.ZipFile(skill_filename, "w", zipfile.ZIP_DEFLATED) as zipf: - # Walk through the skill directory - for file_path in skill_path.rglob("*"): - # Security: never follow or package symlinks. - if file_path.is_symlink(): - print(f"[WARN] Skipping symlink: {file_path}") - continue - - rel_parts = file_path.relative_to(skill_path).parts - if any(part in EXCLUDED_DIRS for part in rel_parts): - continue - - if file_path.is_file(): - resolved_file = file_path.resolve() - if not _is_within(resolved_file, skill_path): - print(f"[ERROR] File escapes skill root: {file_path}") - return None - # If output lives under skill_path, avoid writing archive into itself. - if resolved_file == skill_filename.resolve(): - print(f"[WARN] Skipping output archive: {file_path}") - continue - - # Calculate the relative path within the zip. - arcname = Path(skill_name) / file_path.relative_to(skill_path) - zipf.write(file_path, arcname) - print(f" Added: {arcname}") + for file_path in files_to_package: + # Calculate the relative path within the zip. + arcname = Path(skill_name) / file_path.relative_to(skill_path) + zipf.write(file_path, arcname) + print(f" Added: {arcname}") print(f"\n[OK] Successfully packaged skill to: {skill_filename}") return skill_filename except Exception as e: + _cleanup_partial_archive(skill_filename) print(f"[ERROR] Error creating .skill file: {e}") return None def main(): if len(sys.argv) < 2: - print("Usage: python utils/package_skill.py [output-directory]") + print("Usage: python package_skill.py [output-directory]") print("\nExample:") - print(" python utils/package_skill.py skills/public/my-skill") - print(" python utils/package_skill.py skills/public/my-skill ./dist") + print(" python package_skill.py skills/public/my-skill") + print(" python package_skill.py skills/public/my-skill ./dist") sys.exit(1) skill_path = sys.argv[1] diff --git a/nanobot/skills/skill-creator/scripts/quick_validate.py b/nanobot/skills/skill-creator/scripts/quick_validate.py new file mode 100644 index 0000000..03d246d --- /dev/null +++ b/nanobot/skills/skill-creator/scripts/quick_validate.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3 +""" +Minimal validator for nanobot skill folders. +""" + +import re +import sys +from pathlib import Path +from typing import Optional + +try: + import yaml +except ModuleNotFoundError: + yaml = None + +MAX_SKILL_NAME_LENGTH = 64 +ALLOWED_FRONTMATTER_KEYS = { + "name", + "description", + "metadata", + "always", + "license", + "allowed-tools", +} +ALLOWED_RESOURCE_DIRS = {"scripts", "references", "assets"} +PLACEHOLDER_MARKERS = ("[todo", "todo:") + + +def _extract_frontmatter(content: str) -> Optional[str]: + lines = content.splitlines() + if not lines or lines[0].strip() != "---": + return None + for i in range(1, len(lines)): + if lines[i].strip() == "---": + return "\n".join(lines[1:i]) + return None + + +def _parse_simple_frontmatter(frontmatter_text: str) -> Optional[dict[str, str]]: + """Fallback parser for simple frontmatter when PyYAML is unavailable.""" + parsed: dict[str, str] = {} + current_key: Optional[str] = None + multiline_key: Optional[str] = None + + for raw_line in frontmatter_text.splitlines(): + stripped = raw_line.strip() + if not stripped or stripped.startswith("#"): + continue + + is_indented = raw_line[:1].isspace() + if is_indented: + if current_key is None: + return None + current_value = parsed[current_key] + parsed[current_key] = f"{current_value}\n{stripped}" if current_value else stripped + continue + + if ":" not in stripped: + return None + + key, value = stripped.split(":", 1) + key = key.strip() + value = value.strip() + if not key: + return None + + if value in {"|", ">"}: + parsed[key] = "" + current_key = key + multiline_key = key + continue + + if (value.startswith('"') and value.endswith('"')) or ( + value.startswith("'") and value.endswith("'") + ): + value = value[1:-1] + parsed[key] = value + current_key = key + multiline_key = None + + if multiline_key is not None and multiline_key not in parsed: + return None + return parsed + + +def _load_frontmatter(frontmatter_text: str) -> tuple[Optional[dict], Optional[str]]: + if yaml is not None: + try: + frontmatter = yaml.safe_load(frontmatter_text) + except yaml.YAMLError as exc: + return None, f"Invalid YAML in frontmatter: {exc}" + if not isinstance(frontmatter, dict): + return None, "Frontmatter must be a YAML dictionary" + return frontmatter, None + + frontmatter = _parse_simple_frontmatter(frontmatter_text) + if frontmatter is None: + return None, "Invalid YAML in frontmatter: unsupported syntax without PyYAML installed" + return frontmatter, None + + +def _validate_skill_name(name: str, folder_name: str) -> Optional[str]: + if not re.fullmatch(r"[a-z0-9]+(?:-[a-z0-9]+)*", name): + return ( + f"Name '{name}' should be hyphen-case " + "(lowercase letters, digits, and single hyphens only)" + ) + if len(name) > MAX_SKILL_NAME_LENGTH: + return ( + f"Name is too long ({len(name)} characters). " + f"Maximum is {MAX_SKILL_NAME_LENGTH} characters." + ) + if name != folder_name: + return f"Skill name '{name}' must match directory name '{folder_name}'" + return None + + +def _validate_description(description: str) -> Optional[str]: + trimmed = description.strip() + if not trimmed: + return "Description cannot be empty" + lowered = trimmed.lower() + if any(marker in lowered for marker in PLACEHOLDER_MARKERS): + return "Description still contains TODO placeholder text" + if "<" in trimmed or ">" in trimmed: + return "Description cannot contain angle brackets (< or >)" + if len(trimmed) > 1024: + return f"Description is too long ({len(trimmed)} characters). Maximum is 1024 characters." + return None + + +def validate_skill(skill_path): + """Validate a skill folder structure and required frontmatter.""" + skill_path = Path(skill_path).resolve() + + if not skill_path.exists(): + return False, f"Skill folder not found: {skill_path}" + if not skill_path.is_dir(): + return False, f"Path is not a directory: {skill_path}" + + skill_md = skill_path / "SKILL.md" + if not skill_md.exists(): + return False, "SKILL.md not found" + + try: + content = skill_md.read_text(encoding="utf-8") + except OSError as exc: + return False, f"Could not read SKILL.md: {exc}" + + frontmatter_text = _extract_frontmatter(content) + if frontmatter_text is None: + return False, "Invalid frontmatter format" + + frontmatter, error = _load_frontmatter(frontmatter_text) + if error: + return False, error + + unexpected_keys = sorted(set(frontmatter.keys()) - ALLOWED_FRONTMATTER_KEYS) + if unexpected_keys: + allowed = ", ".join(sorted(ALLOWED_FRONTMATTER_KEYS)) + unexpected = ", ".join(unexpected_keys) + return ( + False, + f"Unexpected key(s) in SKILL.md frontmatter: {unexpected}. Allowed properties are: {allowed}", + ) + + if "name" not in frontmatter: + return False, "Missing 'name' in frontmatter" + if "description" not in frontmatter: + return False, "Missing 'description' in frontmatter" + + name = frontmatter["name"] + if not isinstance(name, str): + return False, f"Name must be a string, got {type(name).__name__}" + name_error = _validate_skill_name(name.strip(), skill_path.name) + if name_error: + return False, name_error + + description = frontmatter["description"] + if not isinstance(description, str): + return False, f"Description must be a string, got {type(description).__name__}" + description_error = _validate_description(description) + if description_error: + return False, description_error + + always = frontmatter.get("always") + if always is not None and not isinstance(always, bool): + return False, f"'always' must be a boolean, got {type(always).__name__}" + + for child in skill_path.iterdir(): + if child.name == "SKILL.md": + continue + if child.is_dir() and child.name in ALLOWED_RESOURCE_DIRS: + continue + if child.is_symlink(): + continue + return ( + False, + f"Unexpected file or directory in skill root: {child.name}. " + "Only SKILL.md, scripts/, references/, and assets/ are allowed.", + ) + + return True, "Skill is valid!" + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: python quick_validate.py ") + sys.exit(1) + + valid, message = validate_skill(sys.argv[1]) + print(message) + sys.exit(0 if valid else 1) diff --git a/tests/test_skill_creator_scripts.py b/tests/test_skill_creator_scripts.py new file mode 100644 index 0000000..4207c6f --- /dev/null +++ b/tests/test_skill_creator_scripts.py @@ -0,0 +1,127 @@ +import importlib +import shutil +import sys +import zipfile +from pathlib import Path + + +SCRIPT_DIR = Path("nanobot/skills/skill-creator/scripts").resolve() +if str(SCRIPT_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPT_DIR)) + +init_skill = importlib.import_module("init_skill") +package_skill = importlib.import_module("package_skill") +quick_validate = importlib.import_module("quick_validate") + + +def test_init_skill_creates_expected_files(tmp_path: Path) -> None: + skill_dir = init_skill.init_skill( + "demo-skill", + tmp_path, + ["scripts", "references", "assets"], + include_examples=True, + ) + + assert skill_dir == tmp_path / "demo-skill" + assert (skill_dir / "SKILL.md").exists() + assert (skill_dir / "scripts" / "example.py").exists() + assert (skill_dir / "references" / "api_reference.md").exists() + assert (skill_dir / "assets" / "example_asset.txt").exists() + + +def test_validate_skill_accepts_existing_skill_creator() -> None: + valid, message = quick_validate.validate_skill( + Path("nanobot/skills/skill-creator").resolve() + ) + + assert valid, message + + +def test_validate_skill_rejects_placeholder_description(tmp_path: Path) -> None: + skill_dir = tmp_path / "placeholder-skill" + skill_dir.mkdir() + (skill_dir / "SKILL.md").write_text( + "---\n" + "name: placeholder-skill\n" + 'description: "[TODO: fill me in]"\n' + "---\n" + "# Placeholder\n", + encoding="utf-8", + ) + + valid, message = quick_validate.validate_skill(skill_dir) + + assert not valid + assert "TODO placeholder" in message + + +def test_validate_skill_rejects_root_files_outside_allowed_dirs(tmp_path: Path) -> None: + skill_dir = tmp_path / "bad-root-skill" + skill_dir.mkdir() + (skill_dir / "SKILL.md").write_text( + "---\n" + "name: bad-root-skill\n" + "description: Valid description\n" + "---\n" + "# Skill\n", + encoding="utf-8", + ) + (skill_dir / "README.md").write_text("extra\n", encoding="utf-8") + + valid, message = quick_validate.validate_skill(skill_dir) + + assert not valid + assert "Unexpected file or directory in skill root" in message + + +def test_package_skill_creates_archive(tmp_path: Path) -> None: + skill_dir = tmp_path / "package-me" + skill_dir.mkdir() + (skill_dir / "SKILL.md").write_text( + "---\n" + "name: package-me\n" + "description: Package this skill.\n" + "---\n" + "# Skill\n", + encoding="utf-8", + ) + scripts_dir = skill_dir / "scripts" + scripts_dir.mkdir() + (scripts_dir / "helper.py").write_text("print('ok')\n", encoding="utf-8") + + archive_path = package_skill.package_skill(skill_dir, tmp_path / "dist") + + assert archive_path == (tmp_path / "dist" / "package-me.skill") + assert archive_path.exists() + with zipfile.ZipFile(archive_path, "r") as archive: + names = set(archive.namelist()) + assert "package-me/SKILL.md" in names + assert "package-me/scripts/helper.py" in names + + +def test_package_skill_rejects_symlink(tmp_path: Path) -> None: + skill_dir = tmp_path / "symlink-skill" + skill_dir.mkdir() + (skill_dir / "SKILL.md").write_text( + "---\n" + "name: symlink-skill\n" + "description: Reject symlinks during packaging.\n" + "---\n" + "# Skill\n", + encoding="utf-8", + ) + scripts_dir = skill_dir / "scripts" + scripts_dir.mkdir() + target = tmp_path / "outside.txt" + target.write_text("secret\n", encoding="utf-8") + link = scripts_dir / "outside.txt" + + try: + link.symlink_to(target) + except (OSError, NotImplementedError): + return + + archive_path = package_skill.package_skill(skill_dir, tmp_path / "dist") + + assert archive_path is None + assert not (tmp_path / "dist" / "symlink-skill.skill").exists() From b0a5435b8720a5968e683ce5aa82a8b16e614452 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Tue, 10 Mar 2026 10:10:37 +0000 Subject: [PATCH 20/29] refactor(llm): share transient retry across agent paths --- nanobot/agent/loop.py | 29 +------- nanobot/agent/memory.py | 2 +- nanobot/agent/subagent.py | 2 +- nanobot/heartbeat/service.py | 2 +- nanobot/providers/base.py | 84 ++++++++++++++++++++++ tests/test_heartbeat_service.py | 47 +++++++++++- tests/test_memory_consolidation_types.py | 50 ++++++++++++- tests/test_provider_retry.py | 92 ++++++++++++++++++++++++ 8 files changed, 274 insertions(+), 34 deletions(-) create mode 100644 tests/test_provider_retry.py diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index b67baae..fcbc880 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -159,33 +159,6 @@ class AgentLoop: if hasattr(tool, "set_context"): tool.set_context(channel, chat_id, *([message_id] if name == "message" else [])) - _RETRY_DELAYS = (1, 2, 4) # seconds — exponential backoff for transient LLM errors - - async def _chat_with_retry(self, **kwargs: Any) -> Any: - """Call provider.chat() with retry on transient errors (429, 5xx, network).""" - from nanobot.providers.base import LLMResponse - - last_response: LLMResponse | None = None - for attempt, delay in enumerate(self._RETRY_DELAYS): - response = await self.provider.chat(**kwargs) - if response.finish_reason != "error": - return response - # Check if the error looks transient (rate limit, server error, network) - err = (response.content or "").lower() - is_transient = any(kw in err for kw in ( - "429", "rate limit", "500", "502", "503", "504", - "overloaded", "timeout", "connection", "server error", - )) - if not is_transient: - return response # permanent error (400, 401, etc.) — don't retry - last_response = response - logger.warning("LLM transient error (attempt {}/{}), retrying in {}s: {}", - attempt + 1, len(self._RETRY_DELAYS), delay, err[:120]) - await asyncio.sleep(delay) - # All retries exhausted — make one final attempt - response = await self.provider.chat(**kwargs) - return response if response.finish_reason != "error" else (last_response or response) - @staticmethod def _strip_think(text: str | None) -> str | None: """Remove blocks that some models embed in content.""" @@ -218,7 +191,7 @@ class AgentLoop: while iteration < self.max_iterations: iteration += 1 - response = await self._chat_with_retry( + response = await self.provider.chat_with_retry( messages=messages, tools=self.tools.get_definitions(), model=self.model, diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index 21fe77d..66efec2 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -111,7 +111,7 @@ class MemoryStore: {chr(10).join(lines)}""" try: - response = await provider.chat( + response = await provider.chat_with_retry( messages=[ {"role": "system", "content": "You are a memory consolidation agent. Call the save_memory tool with your consolidation of the conversation."}, {"role": "user", "content": prompt}, diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index f2d6ee5..f9eda1f 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -123,7 +123,7 @@ class SubagentManager: while iteration < max_iterations: iteration += 1 - response = await self.provider.chat( + response = await self.provider.chat_with_retry( messages=messages, tools=tools.get_definitions(), model=self.model, diff --git a/nanobot/heartbeat/service.py b/nanobot/heartbeat/service.py index e534017..831ae85 100644 --- a/nanobot/heartbeat/service.py +++ b/nanobot/heartbeat/service.py @@ -87,7 +87,7 @@ class HeartbeatService: Returns (action, tasks) where action is 'skip' or 'run'. """ - response = await self.provider.chat( + response = await self.provider.chat_with_retry( messages=[ {"role": "system", "content": "You are a heartbeat agent. Call the heartbeat tool to report your decision."}, {"role": "user", "content": ( diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py index 0f73544..a3b6c47 100644 --- a/nanobot/providers/base.py +++ b/nanobot/providers/base.py @@ -1,9 +1,12 @@ """Base LLM provider interface.""" +import asyncio from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import Any +from loguru import logger + @dataclass class ToolCallRequest: @@ -37,6 +40,22 @@ class LLMProvider(ABC): while maintaining a consistent interface. """ + _CHAT_RETRY_DELAYS = (1, 2, 4) + _TRANSIENT_ERROR_MARKERS = ( + "429", + "rate limit", + "500", + "502", + "503", + "504", + "overloaded", + "timeout", + "timed out", + "connection", + "server error", + "temporarily unavailable", + ) + def __init__(self, api_key: str | None = None, api_base: str | None = None): self.api_key = api_key self.api_base = api_base @@ -126,6 +145,71 @@ class LLMProvider(ABC): """ pass + @classmethod + def _is_transient_error(cls, content: str | None) -> bool: + err = (content or "").lower() + return any(marker in err for marker in cls._TRANSIENT_ERROR_MARKERS) + + async def chat_with_retry( + self, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None = None, + model: str | None = None, + max_tokens: int = 4096, + temperature: float = 0.7, + reasoning_effort: str | None = None, + ) -> LLMResponse: + """Call chat() with retry on transient provider failures.""" + for attempt, delay in enumerate(self._CHAT_RETRY_DELAYS, start=1): + try: + response = await self.chat( + messages=messages, + tools=tools, + model=model, + max_tokens=max_tokens, + temperature=temperature, + reasoning_effort=reasoning_effort, + ) + except asyncio.CancelledError: + raise + except Exception as exc: + response = LLMResponse( + content=f"Error calling LLM: {exc}", + finish_reason="error", + ) + + if response.finish_reason != "error": + return response + if not self._is_transient_error(response.content): + return response + + err = (response.content or "").lower() + logger.warning( + "LLM transient error (attempt {}/{}), retrying in {}s: {}", + attempt, + len(self._CHAT_RETRY_DELAYS), + delay, + err[:120], + ) + await asyncio.sleep(delay) + + try: + return await self.chat( + messages=messages, + tools=tools, + model=model, + max_tokens=max_tokens, + temperature=temperature, + reasoning_effort=reasoning_effort, + ) + except asyncio.CancelledError: + raise + except Exception as exc: + return LLMResponse( + content=f"Error calling LLM: {exc}", + finish_reason="error", + ) + @abstractmethod def get_default_model(self) -> str: """Get the default model for this provider.""" diff --git a/tests/test_heartbeat_service.py b/tests/test_heartbeat_service.py index c5478af..9ce8912 100644 --- a/tests/test_heartbeat_service.py +++ b/tests/test_heartbeat_service.py @@ -3,18 +3,24 @@ import asyncio import pytest from nanobot.heartbeat.service import HeartbeatService -from nanobot.providers.base import LLMResponse, ToolCallRequest +from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest -class DummyProvider: +class DummyProvider(LLMProvider): def __init__(self, responses: list[LLMResponse]): + super().__init__() self._responses = list(responses) + self.calls = 0 async def chat(self, *args, **kwargs) -> LLMResponse: + self.calls += 1 if self._responses: return self._responses.pop(0) return LLMResponse(content="", tool_calls=[]) + def get_default_model(self) -> str: + return "test-model" + @pytest.mark.asyncio async def test_start_is_idempotent(tmp_path) -> None: @@ -115,3 +121,40 @@ async def test_trigger_now_returns_none_when_decision_is_skip(tmp_path) -> None: ) assert await service.trigger_now() is None + + +@pytest.mark.asyncio +async def test_decide_retries_transient_error_then_succeeds(tmp_path, monkeypatch) -> None: + provider = DummyProvider([ + LLMResponse(content="429 rate limit", finish_reason="error"), + LLMResponse( + content="", + tool_calls=[ + ToolCallRequest( + id="hb_1", + name="heartbeat", + arguments={"action": "run", "tasks": "check open tasks"}, + ) + ], + ), + ]) + + delays: list[int] = [] + + async def _fake_sleep(delay: int) -> None: + delays.append(delay) + + monkeypatch.setattr(asyncio, "sleep", _fake_sleep) + + service = HeartbeatService( + workspace=tmp_path, + provider=provider, + model="openai/gpt-4o-mini", + ) + + action, tasks = await service._decide("heartbeat content") + + assert action == "run" + assert tasks == "check open tasks" + assert provider.calls == 2 + assert delays == [1] diff --git a/tests/test_memory_consolidation_types.py b/tests/test_memory_consolidation_types.py index ff15584..2605bf7 100644 --- a/tests/test_memory_consolidation_types.py +++ b/tests/test_memory_consolidation_types.py @@ -12,7 +12,7 @@ from unittest.mock import AsyncMock, MagicMock import pytest from nanobot.agent.memory import MemoryStore -from nanobot.providers.base import LLMResponse, ToolCallRequest +from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest def _make_session(message_count: int = 30, memory_window: int = 50): @@ -43,6 +43,22 @@ def _make_tool_response(history_entry, memory_update): ) +class ScriptedProvider(LLMProvider): + def __init__(self, responses: list[LLMResponse]): + super().__init__() + self._responses = list(responses) + self.calls = 0 + + async def chat(self, *args, **kwargs) -> LLMResponse: + self.calls += 1 + if self._responses: + return self._responses.pop(0) + return LLMResponse(content="", tool_calls=[]) + + def get_default_model(self) -> str: + return "test-model" + + class TestMemoryConsolidationTypeHandling: """Test that consolidation handles various argument types correctly.""" @@ -57,6 +73,7 @@ class TestMemoryConsolidationTypeHandling: memory_update="# Memory\nUser likes testing.", ) ) + provider.chat_with_retry = provider.chat session = _make_session(message_count=60) result = await store.consolidate(session, provider, "test-model", memory_window=50) @@ -77,6 +94,7 @@ class TestMemoryConsolidationTypeHandling: memory_update={"facts": ["User likes testing"], "topics": ["testing"]}, ) ) + provider.chat_with_retry = provider.chat session = _make_session(message_count=60) result = await store.consolidate(session, provider, "test-model", memory_window=50) @@ -112,6 +130,7 @@ class TestMemoryConsolidationTypeHandling: ], ) provider.chat = AsyncMock(return_value=response) + provider.chat_with_retry = provider.chat session = _make_session(message_count=60) result = await store.consolidate(session, provider, "test-model", memory_window=50) @@ -127,6 +146,7 @@ class TestMemoryConsolidationTypeHandling: provider.chat = AsyncMock( return_value=LLMResponse(content="I summarized the conversation.", tool_calls=[]) ) + provider.chat_with_retry = provider.chat session = _make_session(message_count=60) result = await store.consolidate(session, provider, "test-model", memory_window=50) @@ -139,6 +159,7 @@ class TestMemoryConsolidationTypeHandling: """Consolidation should be a no-op when messages < keep_count.""" store = MemoryStore(tmp_path) provider = AsyncMock() + provider.chat_with_retry = provider.chat session = _make_session(message_count=10) result = await store.consolidate(session, provider, "test-model", memory_window=50) @@ -167,6 +188,7 @@ class TestMemoryConsolidationTypeHandling: ], ) provider.chat = AsyncMock(return_value=response) + provider.chat_with_retry = provider.chat session = _make_session(message_count=60) result = await store.consolidate(session, provider, "test-model", memory_window=50) @@ -192,6 +214,7 @@ class TestMemoryConsolidationTypeHandling: ], ) provider.chat = AsyncMock(return_value=response) + provider.chat_with_retry = provider.chat session = _make_session(message_count=60) result = await store.consolidate(session, provider, "test-model", memory_window=50) @@ -215,8 +238,33 @@ class TestMemoryConsolidationTypeHandling: ], ) provider.chat = AsyncMock(return_value=response) + provider.chat_with_retry = provider.chat session = _make_session(message_count=60) result = await store.consolidate(session, provider, "test-model", memory_window=50) assert result is False + + @pytest.mark.asyncio + async def test_retries_transient_error_then_succeeds(self, tmp_path: Path, monkeypatch) -> None: + store = MemoryStore(tmp_path) + provider = ScriptedProvider([ + LLMResponse(content="503 server error", finish_reason="error"), + _make_tool_response( + history_entry="[2026-01-01] User discussed testing.", + memory_update="# Memory\nUser likes testing.", + ), + ]) + session = _make_session(message_count=60) + delays: list[int] = [] + + async def _fake_sleep(delay: int) -> None: + delays.append(delay) + + monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep) + + result = await store.consolidate(session, provider, "test-model", memory_window=50) + + assert result is True + assert provider.calls == 2 + assert delays == [1] diff --git a/tests/test_provider_retry.py b/tests/test_provider_retry.py new file mode 100644 index 0000000..751ecc3 --- /dev/null +++ b/tests/test_provider_retry.py @@ -0,0 +1,92 @@ +import asyncio + +import pytest + +from nanobot.providers.base import LLMProvider, LLMResponse + + +class ScriptedProvider(LLMProvider): + def __init__(self, responses): + super().__init__() + self._responses = list(responses) + self.calls = 0 + + async def chat(self, *args, **kwargs) -> LLMResponse: + self.calls += 1 + response = self._responses.pop(0) + if isinstance(response, BaseException): + raise response + return response + + def get_default_model(self) -> str: + return "test-model" + + +@pytest.mark.asyncio +async def test_chat_with_retry_retries_transient_error_then_succeeds(monkeypatch) -> None: + provider = ScriptedProvider([ + LLMResponse(content="429 rate limit", finish_reason="error"), + LLMResponse(content="ok"), + ]) + delays: list[int] = [] + + async def _fake_sleep(delay: int) -> None: + delays.append(delay) + + monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep) + + response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}]) + + assert response.finish_reason == "stop" + assert response.content == "ok" + assert provider.calls == 2 + assert delays == [1] + + +@pytest.mark.asyncio +async def test_chat_with_retry_does_not_retry_non_transient_error(monkeypatch) -> None: + provider = ScriptedProvider([ + LLMResponse(content="401 unauthorized", finish_reason="error"), + ]) + delays: list[int] = [] + + async def _fake_sleep(delay: int) -> None: + delays.append(delay) + + monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep) + + response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}]) + + assert response.content == "401 unauthorized" + assert provider.calls == 1 + assert delays == [] + + +@pytest.mark.asyncio +async def test_chat_with_retry_returns_final_error_after_retries(monkeypatch) -> None: + provider = ScriptedProvider([ + LLMResponse(content="429 rate limit a", finish_reason="error"), + LLMResponse(content="429 rate limit b", finish_reason="error"), + LLMResponse(content="429 rate limit c", finish_reason="error"), + LLMResponse(content="503 final server error", finish_reason="error"), + ]) + delays: list[int] = [] + + async def _fake_sleep(delay: int) -> None: + delays.append(delay) + + monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep) + + response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}]) + + assert response.content == "503 final server error" + assert provider.calls == 4 + assert delays == [1, 2, 4] + + +@pytest.mark.asyncio +async def test_chat_with_retry_preserves_cancelled_error() -> None: + provider = ScriptedProvider([asyncio.CancelledError()]) + + with pytest.raises(asyncio.CancelledError): + await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}]) From 947ed508ad876bdc227c27fd1b008b163ea830b3 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Tue, 10 Mar 2026 10:13:46 +0000 Subject: [PATCH 21/29] chore: exclude skills from core agent line count --- core_agent_lines.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core_agent_lines.sh b/core_agent_lines.sh index 3f5301a..df32394 100755 --- a/core_agent_lines.sh +++ b/core_agent_lines.sh @@ -15,7 +15,7 @@ root=$(cat nanobot/__init__.py nanobot/__main__.py | wc -l) printf " %-16s %5s lines\n" "(root)" "$root" echo "" -total=$(find nanobot -name "*.py" ! -path "*/channels/*" ! -path "*/cli/*" ! -path "*/providers/*" | xargs cat | wc -l) +total=$(find nanobot -name "*.py" ! -path "*/channels/*" ! -path "*/cli/*" ! -path "*/providers/*" ! -path "*/skills/*" | xargs cat | wc -l) echo " Core total: $total lines" echo "" -echo " (excludes: channels/, cli/, providers/)" +echo " (excludes: channels/, cli/, providers/, skills/)" From 2ffeb9295bdb4a5ef308498f60f45b2448ab48d2 Mon Sep 17 00:00:00 2001 From: lailoo Date: Wed, 11 Mar 2026 00:47:09 +0800 Subject: [PATCH 22/29] fix(subagent): preserve reasoning_content in assistant messages Subagent's _run_subagent() was dropping reasoning_content and thinking_blocks when building assistant messages for the conversation history. Providers like Deepseek Reasoner require reasoning_content on every assistant message when thinking mode is active, causing a 400 BadRequestError on the second LLM round-trip. Align with the main AgentLoop which already preserves these fields via ContextBuilder.add_assistant_message(). Closes #1834 --- nanobot/agent/subagent.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index f9eda1f..308e67d 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -145,11 +145,19 @@ class SubagentManager: } for tc in response.tool_calls ] - messages.append({ + assistant_msg: dict[str, Any] = { "role": "assistant", "content": response.content or "", "tool_calls": tool_call_dicts, - }) + } + # Preserve reasoning_content for providers that require it + # (e.g. Deepseek Reasoner mandates this field on every + # assistant message when thinking mode is active). + if response.reasoning_content is not None: + assistant_msg["reasoning_content"] = response.reasoning_content + if response.thinking_blocks: + assistant_msg["thinking_blocks"] = response.thinking_blocks + messages.append(assistant_msg) # Execute tools for tool_call in response.tool_calls: From 62ccda43b980d53c5ac7a79adf8edf43294f1fdb Mon Sep 17 00:00:00 2001 From: Re-bin Date: Tue, 10 Mar 2026 19:55:06 +0000 Subject: [PATCH 23/29] refactor(memory): switch consolidation to token-based context windows Move consolidation policy into MemoryConsolidator, keep backward compatibility for legacy config, and compress history by token budget instead of message count. --- nanobot/agent/loop.py | 544 ++--------------------- nanobot/agent/memory.py | 243 +++++++--- nanobot/cli/commands.py | 26 +- nanobot/config/schema.py | 32 +- nanobot/session/manager.py | 20 +- nanobot/utils/helpers.py | 85 ++++ pyproject.toml | 1 + tests/test_commands.py | 33 ++ tests/test_config_migration.py | 88 ++++ tests/test_consolidate_offset.py | 297 ++----------- tests/test_loop_consolidation_tokens.py | 190 ++++++++ tests/test_memory_consolidation_types.py | 51 +-- tests/test_message_tool_suppress.py | 10 +- 13 files changed, 709 insertions(+), 911 deletions(-) create mode 100644 tests/test_config_migration.py create mode 100644 tests/test_loop_consolidation_tokens.py diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index ba35a23..8605a09 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -11,18 +11,12 @@ from typing import TYPE_CHECKING, Any, Awaitable, Callable from loguru import logger -try: - import tiktoken # type: ignore -except Exception: # pragma: no cover - optional dependency - tiktoken = None - from nanobot.agent.context import ContextBuilder +from nanobot.agent.memory import MemoryConsolidator from nanobot.agent.subagent import SubagentManager from nanobot.agent.tools.cron import CronTool from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool -from nanobot.agent.tools.huggingface import HuggingFaceModelSearchTool from nanobot.agent.tools.message import MessageTool -from nanobot.agent.tools.model_config import ValidateDeployJSONTool, ValidateUsageYAMLTool from nanobot.agent.tools.registry import ToolRegistry from nanobot.agent.tools.shell import ExecTool from nanobot.agent.tools.spawn import SpawnTool @@ -60,11 +54,8 @@ class AgentLoop: max_iterations: int = 40, temperature: float = 0.1, max_tokens: int = 4096, - memory_window: int | None = None, # backward-compat only (unused) reasoning_effort: str | None = None, - max_tokens_input: int = 128_000, - compression_start_ratio: float = 0.7, - compression_target_ratio: float = 0.4, + context_window_tokens: int = 65_536, brave_api_key: str | None = None, web_proxy: str | None = None, exec_config: ExecToolConfig | None = None, @@ -82,18 +73,9 @@ class AgentLoop: self.model = model or provider.get_default_model() self.max_iterations = max_iterations self.temperature = temperature - # max_tokens: per-call output token cap (maxTokensOutput in config) self.max_tokens = max_tokens - # Keep legacy attribute for older call sites/tests; compression no longer uses it. - self.memory_window = memory_window self.reasoning_effort = reasoning_effort - # max_tokens_input: model native context window (maxTokensInput in config) - self.max_tokens_input = max_tokens_input - # Token-based compression watermarks (fractions of available input budget) - self.compression_start_ratio = compression_start_ratio - self.compression_target_ratio = compression_target_ratio - # Reserve tokens for safety margin - self._reserve_tokens = 1000 + self.context_window_tokens = context_window_tokens self.brave_api_key = brave_api_key self.web_proxy = web_proxy self.exec_config = exec_config or ExecToolConfig() @@ -123,382 +105,23 @@ class AgentLoop: self._mcp_connected = False self._mcp_connecting = False self._active_tasks: dict[str, list[asyncio.Task]] = {} # session_key -> tasks - self._compression_tasks: dict[str, asyncio.Task] = {} # session_key -> task - self._last_turn_prompt_tokens: int = 0 - self._last_turn_prompt_source: str = "none" self._processing_lock = asyncio.Lock() + self.memory_consolidator = MemoryConsolidator( + workspace=workspace, + provider=provider, + model=self.model, + sessions=self.sessions, + context_window_tokens=context_window_tokens, + build_messages=self.context.build_messages, + get_tool_definitions=self.tools.get_definitions, + ) self._register_default_tools() - @staticmethod - def _estimate_prompt_tokens( - messages: list[dict[str, Any]], - tools: list[dict[str, Any]] | None = None, - ) -> int: - """Estimate prompt tokens with tiktoken (fallback only).""" - if tiktoken is None: - return 0 - - try: - enc = tiktoken.get_encoding("cl100k_base") - parts: list[str] = [] - for msg in messages: - content = msg.get("content") - if isinstance(content, str): - parts.append(content) - elif isinstance(content, list): - for part in content: - if isinstance(part, dict) and part.get("type") == "text": - txt = part.get("text", "") - if txt: - parts.append(txt) - if tools: - parts.append(json.dumps(tools, ensure_ascii=False)) - return len(enc.encode("\n".join(parts))) - except Exception: - return 0 - - def _estimate_prompt_tokens_chain( - self, - messages: list[dict[str, Any]], - tools: list[dict[str, Any]] | None = None, - ) -> tuple[int, str]: - """Unified prompt-token estimation: provider counter -> tiktoken.""" - provider_counter = getattr(self.provider, "estimate_prompt_tokens", None) - if callable(provider_counter): - try: - tokens, source = provider_counter(messages, tools, self.model) - if isinstance(tokens, (int, float)) and tokens > 0: - return int(tokens), str(source or "provider_counter") - except Exception: - logger.debug("Provider token counter failed; fallback to tiktoken") - - estimated = self._estimate_prompt_tokens(messages, tools) - if estimated > 0: - return int(estimated), "tiktoken" - return 0, "none" - - @staticmethod - def _estimate_completion_tokens(content: str) -> int: - """Estimate completion tokens with tiktoken (fallback only).""" - if tiktoken is None: - return 0 - try: - enc = tiktoken.get_encoding("cl100k_base") - return len(enc.encode(content or "")) - except Exception: - return 0 - - def _get_compressed_until(self, session: Session) -> int: - """Read/normalize compressed boundary and migrate old metadata format.""" - raw = session.metadata.get("_compressed_until", 0) - try: - compressed_until = int(raw) - except (TypeError, ValueError): - compressed_until = 0 - - if compressed_until <= 0: - ranges = session.metadata.get("_compressed_ranges") - if isinstance(ranges, list): - inferred = 0 - for item in ranges: - if not isinstance(item, (list, tuple)) or len(item) != 2: - continue - try: - inferred = max(inferred, int(item[1])) - except (TypeError, ValueError): - continue - compressed_until = inferred - - compressed_until = max(0, min(compressed_until, len(session.messages))) - session.metadata["_compressed_until"] = compressed_until - # 兼容旧版本:一旦迁移出连续边界,就可以清理旧字段 - session.metadata.pop("_compressed_ranges", None) - # 注意:不要删除 _cumulative_tokens,压缩逻辑需要它来跟踪累积 token 计数 - return compressed_until - - def _set_compressed_until(self, session: Session, idx: int) -> None: - """Persist a contiguous compressed boundary.""" - session.metadata["_compressed_until"] = max(0, min(int(idx), len(session.messages))) - session.metadata.pop("_compressed_ranges", None) - # 注意:不要删除 _cumulative_tokens,压缩逻辑需要它来跟踪累积 token 计数 - - @staticmethod - def _estimate_message_tokens(message: dict[str, Any]) -> int: - """Rough token estimate for a single persisted message.""" - content = message.get("content") - parts: list[str] = [] - if isinstance(content, str): - parts.append(content) - elif isinstance(content, list): - for part in content: - if isinstance(part, dict) and part.get("type") == "text": - txt = part.get("text", "") - if txt: - parts.append(txt) - else: - parts.append(json.dumps(part, ensure_ascii=False)) - elif content is not None: - parts.append(json.dumps(content, ensure_ascii=False)) - - for key in ("name", "tool_call_id"): - val = message.get(key) - if isinstance(val, str) and val: - parts.append(val) - if message.get("tool_calls"): - parts.append(json.dumps(message["tool_calls"], ensure_ascii=False)) - - payload = "\n".join(parts) - if not payload: - return 1 - if tiktoken is not None: - try: - enc = tiktoken.get_encoding("cl100k_base") - return max(1, len(enc.encode(payload))) - except Exception: - pass - return max(1, len(payload) // 4) - - def _pick_compression_chunk_by_tokens( - self, - session: Session, - reduction_tokens: int, - *, - tail_keep: int = 12, - ) -> tuple[int, int, int] | None: - """ - Pick one contiguous old chunk so its estimated size is roughly enough - to reduce `reduction_tokens`. - """ - messages = session.messages - start = self._get_compressed_until(session) - if len(messages) - start <= tail_keep + 2: - return None - - end_limit = len(messages) - tail_keep - if end_limit - start < 2: - return None - - target = max(1, reduction_tokens) - end = start - collected = 0 - while end < end_limit and collected < target: - collected += self._estimate_message_tokens(messages[end]) - end += 1 - - if end - start < 2: - end = min(end_limit, start + 2) - collected = sum(self._estimate_message_tokens(m) for m in messages[start:end]) - if end - start < 2: - return None - return start, end, collected - - def _estimate_session_prompt_tokens(self, session: Session) -> tuple[int, str]: - """ - Estimate current full prompt tokens for this session view - (system + compressed history view + runtime/user placeholder + tools). - """ - history = self._build_compressed_history_view(session) - channel, chat_id = (session.key.split(":", 1) if ":" in session.key else (None, None)) - probe_messages = self.context.build_messages( - history=history, - current_message="[token-probe]", - channel=channel, - chat_id=chat_id, - ) - return self._estimate_prompt_tokens_chain(probe_messages, self.tools.get_definitions()) - - async def _maybe_compress_history( - self, - session: Session, - ) -> None: - """ - End-of-turn policy: - - Estimate current prompt usage from persisted session view. - - If above start ratio, perform one best-effort compression chunk. - """ - if not session.messages: - self._set_compressed_until(session, 0) - return - - budget = max(1, self.max_tokens_input - self.max_tokens - self._reserve_tokens) - start_threshold = int(budget * self.compression_start_ratio) - target_threshold = int(budget * self.compression_target_ratio) - if target_threshold >= start_threshold: - target_threshold = max(0, start_threshold - 1) - - # Prefer provider usage prompt tokens from the turn-ending call. - # If unavailable, fall back to estimator chain. - raw_prompt_tokens = session.metadata.get("_last_prompt_tokens") - if isinstance(raw_prompt_tokens, (int, float)) and raw_prompt_tokens > 0: - current_tokens = int(raw_prompt_tokens) - token_source = str(session.metadata.get("_last_prompt_source") or "usage_prompt") - else: - current_tokens, token_source = self._estimate_session_prompt_tokens(session) - - current_ratio = current_tokens / budget if budget else 0.0 - if current_tokens <= 0: - logger.debug("Compression skip {}: token estimate unavailable", session.key) - return - if current_tokens < start_threshold: - logger.debug( - "Compression idle {}: {}/{} ({:.1%}) via {}", - session.key, - current_tokens, - budget, - current_ratio, - token_source, - ) - return - logger.info( - "Compression trigger {}: {}/{} ({:.1%}) via {}", - session.key, - current_tokens, - budget, - current_ratio, - token_source, - ) - - reduction_by_target = max(0, current_tokens - target_threshold) - reduction_by_delta = max(1, start_threshold - target_threshold) - reduction_need = max(reduction_by_target, reduction_by_delta) - - chunk_range = self._pick_compression_chunk_by_tokens(session, reduction_need, tail_keep=10) - if chunk_range is None: - logger.info("Compression skipped for {}: no compressible chunk", session.key) - return - - start_idx, end_idx, estimated_chunk_tokens = chunk_range - chunk = session.messages[start_idx:end_idx] - if len(chunk) < 2: - return - - logger.info( - "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})", - session.key, - start_idx, - end_idx - 1, - len(chunk), - estimated_chunk_tokens, - reduction_need, - ) - success, _ = await self.context.memory.consolidate_chunk( - chunk, - self.provider, - self.model, - ) - if not success: - logger.warning("Compression aborted for {}: consolidation failed", session.key) - return - - self._set_compressed_until(session, end_idx) - self.sessions.save(session) - - after_tokens, after_source = self._estimate_session_prompt_tokens(session) - after_ratio = after_tokens / budget if budget else 0.0 - reduced = max(0, current_tokens - after_tokens) - reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0 - logger.info( - "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})", - session.key, - after_tokens, - budget, - after_ratio, - after_source, - reduced, - reduced_ratio, - ) - - def _schedule_background_compression(self, session_key: str) -> None: - """Schedule best-effort background compression for a session.""" - existing = self._compression_tasks.get(session_key) - if existing is not None and not existing.done(): - return - - async def _runner() -> None: - session = self.sessions.get_or_create(session_key) - try: - await self._maybe_compress_history(session) - except Exception: - logger.exception("Background compression failed for {}", session_key) - - task = asyncio.create_task(_runner()) - self._compression_tasks[session_key] = task - - def _cleanup(t: asyncio.Task) -> None: - cur = self._compression_tasks.get(session_key) - if cur is t: - self._compression_tasks.pop(session_key, None) - try: - t.result() - except BaseException: - pass - - task.add_done_callback(_cleanup) - - async def wait_for_background_compression(self, timeout_s: float | None = None) -> None: - """Wait for currently scheduled compression tasks.""" - pending = [t for t in self._compression_tasks.values() if not t.done()] - if not pending: - return - - logger.info("Waiting for {} background compression task(s)", len(pending)) - waiter = asyncio.gather(*pending, return_exceptions=True) - if timeout_s is None: - await waiter - return - - try: - await asyncio.wait_for(waiter, timeout=timeout_s) - except asyncio.TimeoutError: - logger.warning( - "Background compression wait timed out after {}s ({} task(s) still running)", - timeout_s, - len([t for t in self._compression_tasks.values() if not t.done()]), - ) - - def _build_compressed_history_view( - self, - session: Session, - ) -> list[dict]: - """Build non-destructive history view using the compressed boundary.""" - compressed_until = self._get_compressed_until(session) - if compressed_until <= 0: - return session.get_history(max_messages=0) - - notice_msg: dict[str, Any] = { - "role": "assistant", - "content": ( - "As your assistant, I have compressed earlier context. " - "If you need details, please check memory/HISTORY.md." - ), - } - - tail: list[dict[str, Any]] = [] - for msg in session.messages[compressed_until:]: - entry: dict[str, Any] = {"role": msg["role"], "content": msg.get("content", "")} - for k in ("tool_calls", "tool_call_id", "name"): - if k in msg: - entry[k] = msg[k] - tail.append(entry) - - # Drop leading non-user entries from tail to avoid orphan tool blocks. - for i, m in enumerate(tail): - if m.get("role") == "user": - tail = tail[i:] - break - else: - tail = [] - - return [notice_msg, *tail] - def _register_default_tools(self) -> None: """Register the default set of tools.""" allowed_dir = self.workspace if self.restrict_to_workspace else None for cls in (ReadFileTool, WriteFileTool, EditFileTool, ListDirTool): self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir)) - self.tools.register(ValidateDeployJSONTool()) - self.tools.register(ValidateUsageYAMLTool()) - self.tools.register(HuggingFaceModelSearchTool()) self.tools.register(ExecTool( working_dir=str(self.workspace), timeout=self.exec_config.timeout, @@ -563,24 +186,12 @@ class AgentLoop: self, initial_messages: list[dict], on_progress: Callable[..., Awaitable[None]] | None = None, - ) -> tuple[str | None, list[str], list[dict], int, str]: - """ - Run the agent iteration loop. - - Returns: - (final_content, tools_used, messages, total_tokens_this_turn, token_source) - total_tokens_this_turn: total tokens (prompt + completion) for this turn - token_source: provider_total / provider_sum / provider_prompt / - provider_counter+tiktoken_completion / tiktoken / none - """ + ) -> tuple[str | None, list[str], list[dict]]: + """Run the agent iteration loop.""" messages = initial_messages iteration = 0 final_content = None tools_used: list[str] = [] - total_tokens_this_turn = 0 - token_source = "none" - self._last_turn_prompt_tokens = 0 - self._last_turn_prompt_source = "none" while iteration < self.max_iterations: iteration += 1 @@ -596,63 +207,6 @@ class AgentLoop: reasoning_effort=self.reasoning_effort, ) - # Prefer provider usage from the turn-ending model call; fallback to tiktoken. - # Calculate total tokens (prompt + completion) for this turn. - usage = response.usage or {} - t_tokens = usage.get("total_tokens") - p_tokens = usage.get("prompt_tokens") - c_tokens = usage.get("completion_tokens") - - if isinstance(t_tokens, (int, float)) and t_tokens > 0: - total_tokens_this_turn = int(t_tokens) - token_source = "provider_total" - if isinstance(p_tokens, (int, float)) and p_tokens > 0: - self._last_turn_prompt_tokens = int(p_tokens) - self._last_turn_prompt_source = "usage_prompt" - elif isinstance(c_tokens, (int, float)): - prompt_derived = int(t_tokens) - int(c_tokens) - if prompt_derived > 0: - self._last_turn_prompt_tokens = prompt_derived - self._last_turn_prompt_source = "usage_total_minus_completion" - elif isinstance(p_tokens, (int, float)) and isinstance(c_tokens, (int, float)): - # If we have both prompt and completion tokens, sum them - total_tokens_this_turn = int(p_tokens) + int(c_tokens) - token_source = "provider_sum" - if p_tokens > 0: - self._last_turn_prompt_tokens = int(p_tokens) - self._last_turn_prompt_source = "usage_prompt" - elif isinstance(p_tokens, (int, float)) and p_tokens > 0: - # Fallback: use prompt tokens only (completion might be 0 for tool calls) - total_tokens_this_turn = int(p_tokens) - token_source = "provider_prompt" - self._last_turn_prompt_tokens = int(p_tokens) - self._last_turn_prompt_source = "usage_prompt" - else: - # Estimate with unified chain (provider counter -> tiktoken), plus completion tiktoken. - estimated_prompt, prompt_source = self._estimate_prompt_tokens_chain(messages, tool_defs) - estimated_completion = self._estimate_completion_tokens(response.content or "") - total_tokens_this_turn = estimated_prompt + estimated_completion - if estimated_prompt > 0: - self._last_turn_prompt_tokens = int(estimated_prompt) - self._last_turn_prompt_source = str(prompt_source or "tiktoken") - if total_tokens_this_turn > 0: - token_source = ( - "tiktoken" - if prompt_source == "tiktoken" - else f"{prompt_source}+tiktoken_completion" - ) - if total_tokens_this_turn <= 0: - total_tokens_this_turn = 0 - token_source = "none" - - logger.debug( - "Turn token usage: source={}, total={}, prompt={}, completion={}", - token_source, - total_tokens_this_turn, - p_tokens if isinstance(p_tokens, (int, float)) else None, - c_tokens if isinstance(c_tokens, (int, float)) else None, - ) - if response.has_tool_calls: if on_progress: thought = self._strip_think(response.content) @@ -707,7 +261,7 @@ class AgentLoop: "without completing the task. You can try breaking the task into smaller steps." ) - return final_content, tools_used, messages, total_tokens_this_turn, token_source + return final_content, tools_used, messages async def run(self) -> None: """Run the agent loop, dispatching messages as tasks to stay responsive to /stop.""" @@ -732,9 +286,6 @@ class AgentLoop: """Cancel all active tasks and subagents for the session.""" tasks = self._active_tasks.pop(msg.session_key, []) cancelled = sum(1 for t in tasks if not t.done() and t.cancel()) - comp = self._compression_tasks.get(msg.session_key) - if comp is not None and not comp.done() and comp.cancel(): - cancelled += 1 for t in tasks: try: await t @@ -781,9 +332,6 @@ class AgentLoop: def stop(self) -> None: """Stop the agent loop.""" self._running = False - for task in list(self._compression_tasks.values()): - if not task.done(): - task.cancel() logger.info("Agent loop stopping") async def _process_message( @@ -800,22 +348,17 @@ class AgentLoop: logger.info("Processing system message from {}", msg.sender_id) key = f"{channel}:{chat_id}" session = self.sessions.get_or_create(key) + await self.memory_consolidator.maybe_consolidate_by_tokens(session) self._set_tool_context(channel, chat_id, msg.metadata.get("message_id")) - history = self._build_compressed_history_view(session) + history = session.get_history(max_messages=0) messages = self.context.build_messages( history=history, current_message=msg.content, channel=channel, chat_id=chat_id, ) - final_content, _, all_msgs, _, _ = await self._run_agent_loop(messages) - if self._last_turn_prompt_tokens > 0: - session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens - session.metadata["_last_prompt_source"] = self._last_turn_prompt_source - else: - session.metadata.pop("_last_prompt_tokens", None) - session.metadata.pop("_last_prompt_source", None) + final_content, _, all_msgs = await self._run_agent_loop(messages) self._save_turn(session, all_msgs, 1 + len(history)) self.sessions.save(session) - self._schedule_background_compression(session.key) + await self.memory_consolidator.maybe_consolidate_by_tokens(session) return OutboundMessage(channel=channel, chat_id=chat_id, content=final_content or "Background task completed.") @@ -829,19 +372,12 @@ class AgentLoop: cmd = msg.content.strip().lower() if cmd == "/new": try: - # 在清空会话前,将当前完整对话做一次归档压缩到 MEMORY/HISTORY 中 - if session.messages: - ok, _ = await self.context.memory.consolidate_chunk( - session.messages, - self.provider, - self.model, + if not await self.memory_consolidator.archive_unconsolidated(session): + return OutboundMessage( + channel=msg.channel, + chat_id=msg.chat_id, + content="Memory archival failed, session not cleared. Please try again.", ) - if not ok: - return OutboundMessage( - channel=msg.channel, - chat_id=msg.chat_id, - content="Memory archival failed, session not cleared. Please try again.", - ) except Exception: logger.exception("/new archival failed for {}", session.key) return OutboundMessage( @@ -859,23 +395,20 @@ class AgentLoop: return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id, content="🐈 nanobot commands:\n/new — Start a new conversation\n/stop — Stop the current task\n/help — Show available commands") + await self.memory_consolidator.maybe_consolidate_by_tokens(session) + self._set_tool_context(msg.channel, msg.chat_id, msg.metadata.get("message_id")) if message_tool := self.tools.get("message"): if isinstance(message_tool, MessageTool): message_tool.start_turn() - # 正常对话:使用压缩后的历史视图(压缩在回合结束后进行) - history = self._build_compressed_history_view(session) + history = session.get_history(max_messages=0) initial_messages = self.context.build_messages( history=history, current_message=msg.content, media=msg.media if msg.media else None, channel=msg.channel, chat_id=msg.chat_id, ) - # Add [CRON JOB] identifier for cron sessions (session_key starts with "cron:") - if session_key and session_key.startswith("cron:"): - if initial_messages and initial_messages[0].get("role") == "system": - initial_messages[0]["content"] = f"[CRON JOB] {initial_messages[0]['content']}" async def _bus_progress(content: str, *, tool_hint: bool = False) -> None: meta = dict(msg.metadata or {}) @@ -885,23 +418,16 @@ class AgentLoop: channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta, )) - final_content, _, all_msgs, total_tokens_this_turn, token_source = await self._run_agent_loop( + final_content, _, all_msgs = await self._run_agent_loop( initial_messages, on_progress=on_progress or _bus_progress, ) if final_content is None: final_content = "I've completed processing but have no response to give." - if self._last_turn_prompt_tokens > 0: - session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens - session.metadata["_last_prompt_source"] = self._last_turn_prompt_source - else: - session.metadata.pop("_last_prompt_tokens", None) - session.metadata.pop("_last_prompt_source", None) - - self._save_turn(session, all_msgs, 1 + len(history), total_tokens_this_turn) + self._save_turn(session, all_msgs, 1 + len(history)) self.sessions.save(session) - self._schedule_background_compression(session.key) + await self.memory_consolidator.maybe_consolidate_by_tokens(session) if (mt := self.tools.get("message")) and isinstance(mt, MessageTool) and mt._sent_in_turn: return None @@ -913,7 +439,7 @@ class AgentLoop: metadata=msg.metadata or {}, ) - def _save_turn(self, session: Session, messages: list[dict], skip: int, total_tokens_this_turn: int = 0) -> None: + def _save_turn(self, session: Session, messages: list[dict], skip: int) -> None: """Save new-turn messages into session, truncating large tool results.""" from datetime import datetime for m in messages[skip:]: @@ -947,14 +473,6 @@ class AgentLoop: entry.setdefault("timestamp", datetime.now().isoformat()) session.messages.append(entry) session.updated_at = datetime.now() - - # Update cumulative token count for compression tracking - if total_tokens_this_turn > 0: - current_cumulative = session.metadata.get("_cumulative_tokens", 0) - if isinstance(current_cumulative, (int, float)): - session.metadata["_cumulative_tokens"] = int(current_cumulative) + total_tokens_this_turn - else: - session.metadata["_cumulative_tokens"] = total_tokens_this_turn async def process_direct( self, diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index e29788a..cd5f54f 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -2,17 +2,19 @@ from __future__ import annotations +import asyncio import json +import weakref from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Callable from loguru import logger -from nanobot.utils.helpers import ensure_dir +from nanobot.utils.helpers import ensure_dir, estimate_message_tokens, estimate_prompt_tokens_chain if TYPE_CHECKING: from nanobot.providers.base import LLMProvider - from nanobot.session.manager import Session + from nanobot.session.manager import Session, SessionManager _SAVE_MEMORY_TOOL = [ @@ -26,7 +28,7 @@ _SAVE_MEMORY_TOOL = [ "properties": { "history_entry": { "type": "string", - "description": "A paragraph (2-5 sentences) summarizing key events/decisions/topics. " + "description": "A paragraph summarizing key events/decisions/topics. " "Start with [YYYY-MM-DD HH:MM]. Include detail useful for grep search.", }, "memory_update": { @@ -42,6 +44,20 @@ _SAVE_MEMORY_TOOL = [ ] +def _ensure_text(value: Any) -> str: + """Normalize tool-call payload values to text for file storage.""" + return value if isinstance(value, str) else json.dumps(value, ensure_ascii=False) + + +def _normalize_save_memory_args(args: Any) -> dict[str, Any] | None: + """Normalize provider tool-call arguments to the expected dict shape.""" + if isinstance(args, str): + args = json.loads(args) + if isinstance(args, list): + return args[0] if args and isinstance(args[0], dict) else None + return args if isinstance(args, dict) else None + + class MemoryStore: """Two-layer memory: MEMORY.md (long-term facts) + HISTORY.md (grep-searchable log).""" @@ -66,29 +82,27 @@ class MemoryStore: long_term = self.read_long_term() return f"## Long-term Memory\n{long_term}" if long_term else "" - async def consolidate_chunk( + @staticmethod + def _format_messages(messages: list[dict]) -> str: + lines = [] + for message in messages: + if not message.get("content"): + continue + tools = f" [tools: {', '.join(message['tools_used'])}]" if message.get("tools_used") else "" + lines.append( + f"[{message.get('timestamp', '?')[:16]}] {message['role'].upper()}{tools}: {message['content']}" + ) + return "\n".join(lines) + + async def consolidate( self, messages: list[dict], provider: LLMProvider, model: str, - ) -> tuple[bool, str | None]: - """Consolidate a chunk of messages into MEMORY.md + HISTORY.md via LLM tool call. - - Returns (success, None). - - - success: True on success (including no-op), False on failure. - - The second return value is reserved for future use (e.g. RAG-style summaries) and is - always None in the current implementation. - """ + ) -> bool: + """Consolidate the provided message chunk into MEMORY.md + HISTORY.md.""" if not messages: - return True, None - - lines = [] - for m in messages: - if not m.get("content"): - continue - tools = f" [tools: {', '.join(m['tools_used'])}]" if m.get("tools_used") else "" - lines.append(f"[{m.get('timestamp', '?')[:16]}] {m['role'].upper()}{tools}: {m['content']}") + return True current_memory = self.read_long_term() prompt = f"""Process this conversation and call the save_memory tool with your consolidation. @@ -97,24 +111,12 @@ class MemoryStore: {current_memory or "(empty)"} ## Conversation to Process -{chr(10).join(lines)}""" +{self._format_messages(messages)}""" try: response = await provider.chat_with_retry( messages=[ - { - "role": "system", - "content": ( - "You are a memory consolidation agent.\n" - "Your job is to:\n" - "1) Append a concise but grep-friendly entry to HISTORY.md summarizing key events, decisions and topics.\n" - " - Write 1 paragraph of 2–5 sentences that starts with [YYYY-MM-DD HH:MM].\n" - " - Include concrete names, IDs and numbers so it is easy to search with grep.\n" - "2) Update long-term MEMORY.md with stable facts and user preferences as markdown, including all existing facts plus new ones.\n" - "3) Optionally return a short context_summary (1–3 sentences) that will replace the raw messages in future dialogue history.\n\n" - "Always call the save_memory tool with history_entry, memory_update and (optionally) context_summary." - ), - }, + {"role": "system", "content": "You are a memory consolidation agent. Call the save_memory tool with your consolidation of the conversation."}, {"role": "user", "content": prompt}, ], tools=_SAVE_MEMORY_TOOL, @@ -123,35 +125,160 @@ class MemoryStore: if not response.has_tool_calls: logger.warning("Memory consolidation: LLM did not call save_memory, skipping") - return False, None + return False - args = response.tool_calls[0].arguments - # Some providers return arguments as a JSON string instead of dict - if isinstance(args, str): - args = json.loads(args) - # Some providers return arguments as a list (handle edge case) - if isinstance(args, list): - if args and isinstance(args[0], dict): - args = args[0] - else: - logger.warning("Memory consolidation: unexpected arguments as empty or non-dict list") - return False, None - if not isinstance(args, dict): - logger.warning("Memory consolidation: unexpected arguments type {}", type(args).__name__) - return False, None + args = _normalize_save_memory_args(response.tool_calls[0].arguments) + if args is None: + logger.warning("Memory consolidation: unexpected save_memory arguments") + return False if entry := args.get("history_entry"): - if not isinstance(entry, str): - entry = json.dumps(entry, ensure_ascii=False) - self.append_history(entry) + self.append_history(_ensure_text(entry)) if update := args.get("memory_update"): - if not isinstance(update, str): - update = json.dumps(update, ensure_ascii=False) + update = _ensure_text(update) if update != current_memory: self.write_long_term(update) logger.info("Memory consolidation done for {} messages", len(messages)) - return True, None + return True except Exception: logger.exception("Memory consolidation failed") - return False, None + return False + + +class MemoryConsolidator: + """Owns consolidation policy, locking, and session offset updates.""" + + _MAX_CONSOLIDATION_ROUNDS = 5 + + def __init__( + self, + workspace: Path, + provider: LLMProvider, + model: str, + sessions: SessionManager, + context_window_tokens: int, + build_messages: Callable[..., list[dict[str, Any]]], + get_tool_definitions: Callable[[], list[dict[str, Any]]], + ): + self.store = MemoryStore(workspace) + self.provider = provider + self.model = model + self.sessions = sessions + self.context_window_tokens = context_window_tokens + self._build_messages = build_messages + self._get_tool_definitions = get_tool_definitions + self._locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary() + + def get_lock(self, session_key: str) -> asyncio.Lock: + """Return the shared consolidation lock for one session.""" + return self._locks.setdefault(session_key, asyncio.Lock()) + + async def consolidate_messages(self, messages: list[dict[str, object]]) -> bool: + """Archive a selected message chunk into persistent memory.""" + return await self.store.consolidate(messages, self.provider, self.model) + + def pick_consolidation_boundary( + self, + session: Session, + tokens_to_remove: int, + ) -> tuple[int, int] | None: + """Pick a user-turn boundary that removes enough old prompt tokens.""" + start = session.last_consolidated + if start >= len(session.messages) or tokens_to_remove <= 0: + return None + + removed_tokens = 0 + last_boundary: tuple[int, int] | None = None + for idx in range(start, len(session.messages)): + message = session.messages[idx] + if idx > start and message.get("role") == "user": + last_boundary = (idx, removed_tokens) + if removed_tokens >= tokens_to_remove: + return last_boundary + removed_tokens += estimate_message_tokens(message) + + return last_boundary + + def estimate_session_prompt_tokens(self, session: Session) -> tuple[int, str]: + """Estimate current prompt size for the normal session history view.""" + history = session.get_history(max_messages=0) + channel, chat_id = (session.key.split(":", 1) if ":" in session.key else (None, None)) + probe_messages = self._build_messages( + history=history, + current_message="[token-probe]", + channel=channel, + chat_id=chat_id, + ) + return estimate_prompt_tokens_chain( + self.provider, + self.model, + probe_messages, + self._get_tool_definitions(), + ) + + async def archive_unconsolidated(self, session: Session) -> bool: + """Archive the full unconsolidated tail for /new-style session rollover.""" + lock = self.get_lock(session.key) + async with lock: + snapshot = session.messages[session.last_consolidated:] + if not snapshot: + return True + return await self.consolidate_messages(snapshot) + + async def maybe_consolidate_by_tokens(self, session: Session) -> None: + """Loop: archive old messages until prompt fits within half the context window.""" + if not session.messages or self.context_window_tokens <= 0: + return + + lock = self.get_lock(session.key) + async with lock: + target = self.context_window_tokens // 2 + estimated, source = self.estimate_session_prompt_tokens(session) + if estimated <= 0: + return + if estimated < self.context_window_tokens: + logger.debug( + "Token consolidation idle {}: {}/{} via {}", + session.key, + estimated, + self.context_window_tokens, + source, + ) + return + + for round_num in range(self._MAX_CONSOLIDATION_ROUNDS): + if estimated <= target: + return + + boundary = self.pick_consolidation_boundary(session, max(1, estimated - target)) + if boundary is None: + logger.debug( + "Token consolidation: no safe boundary for {} (round {})", + session.key, + round_num, + ) + return + + end_idx = boundary[0] + chunk = session.messages[session.last_consolidated:end_idx] + if not chunk: + return + + logger.info( + "Token consolidation round {} for {}: {}/{} via {}, chunk={} msgs", + round_num, + session.key, + estimated, + self.context_window_tokens, + source, + len(chunk), + ) + if not await self.consolidate_messages(chunk): + return + session.last_consolidated = end_idx + self.sessions.save(session) + + estimated, source = self.estimate_session_prompt_tokens(session) + if estimated <= 0: + return diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 36e2a53..cf69450 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -191,6 +191,8 @@ def onboard(): save_config(Config()) console.print(f"[green]✓[/green] Created config at {config_path}") + console.print("[dim]Config template now uses `maxTokens` + `contextWindowTokens`; `memoryWindow` is no longer a runtime setting.[/dim]") + # Create workspace workspace = get_workspace_path() @@ -283,6 +285,16 @@ def _load_runtime_config(config: str | None = None, workspace: str | None = None return loaded +def _print_deprecated_memory_window_notice(config: Config) -> None: + """Warn when running with old memoryWindow-only config.""" + if config.agents.defaults.should_warn_deprecated_memory_window: + console.print( + "[yellow]Hint:[/yellow] Detected deprecated `memoryWindow` without " + "`contextWindowTokens`. `memoryWindow` is ignored; run " + "[cyan]nanobot onboard[/cyan] to refresh your config template." + ) + + # ============================================================================ # Gateway / Server # ============================================================================ @@ -310,6 +322,7 @@ def gateway( logging.basicConfig(level=logging.DEBUG) config = _load_runtime_config(config, workspace) + _print_deprecated_memory_window_notice(config) port = port if port is not None else config.gateway.port console.print(f"{__logo__} Starting nanobot gateway on port {port}...") @@ -329,12 +342,10 @@ def gateway( workspace=config.workspace_path, model=config.agents.defaults.model, temperature=config.agents.defaults.temperature, - max_tokens=config.agents.defaults.max_tokens_output, + max_tokens=config.agents.defaults.max_tokens, max_iterations=config.agents.defaults.max_tool_iterations, reasoning_effort=config.agents.defaults.reasoning_effort, - max_tokens_input=config.agents.defaults.max_tokens_input, - compression_start_ratio=config.agents.defaults.compression_start_ratio, - compression_target_ratio=config.agents.defaults.compression_target_ratio, + context_window_tokens=config.agents.defaults.context_window_tokens, brave_api_key=config.tools.web.search.api_key or None, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, @@ -496,6 +507,7 @@ def agent( from nanobot.cron.service import CronService config = _load_runtime_config(config, workspace) + _print_deprecated_memory_window_notice(config) sync_workspace_templates(config.workspace_path) bus = MessageBus() @@ -516,12 +528,10 @@ def agent( workspace=config.workspace_path, model=config.agents.defaults.model, temperature=config.agents.defaults.temperature, - max_tokens=config.agents.defaults.max_tokens_output, + max_tokens=config.agents.defaults.max_tokens, max_iterations=config.agents.defaults.max_tool_iterations, reasoning_effort=config.agents.defaults.reasoning_effort, - max_tokens_input=config.agents.defaults.max_tokens_input, - compression_start_ratio=config.agents.defaults.compression_start_ratio, - compression_target_ratio=config.agents.defaults.compression_target_ratio, + context_window_tokens=config.agents.defaults.context_window_tokens, brave_api_key=config.tools.web.search.api_key or None, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 0e41d12..a2de239 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -190,22 +190,11 @@ class SlackConfig(Base): class QQConfig(Base): - """QQ channel configuration. - - Supports two implementations: - 1. Official botpy SDK: requires app_id and secret - 2. OneBot protocol: requires api_url (and optionally ws_reverse_url, bot_qq, access_token) - """ + """QQ channel configuration using botpy SDK.""" enabled: bool = False - # Official botpy SDK fields app_id: str = "" # 机器人 ID (AppID) from q.qq.com secret: str = "" # 机器人密钥 (AppSecret) from q.qq.com - # OneBot protocol fields - api_url: str = "" # OneBot HTTP API URL (e.g. "http://localhost:5700") - ws_reverse_url: str = "" # OneBot WebSocket reverse URL (e.g. "ws://localhost:8080/ws/reverse") - bot_qq: int | None = None # Bot's QQ number (for filtering self messages) - access_token: str = "" # Optional access token for OneBot API allow_from: list[str] = Field( default_factory=list ) # Allowed user openids (empty = public access) @@ -238,20 +227,19 @@ class AgentDefaults(Base): provider: str = ( "auto" # Provider name (e.g. "anthropic", "openrouter") or "auto" for auto-detection ) - # 原生上下文最大窗口(通常对应模型的 max_input_tokens / max_context_tokens) - # 默认按照主流大模型(如 GPT-4o、Claude 3.x 等)的 128k 上下文给一个宽松上限,实际应根据所选模型文档手动调整。 - max_tokens_input: int = 128_000 - # 默认单次回复的最大输出 token 上限(调用时可按需要再做截断或比例分配) - # 8192 足以覆盖大多数实际对话/工具使用场景,同样可按需手动调整。 - max_tokens_output: int = 8192 - # 会话历史压缩触发比例:当估算的输入 token 使用量 >= maxTokensInput * compressionStartRatio 时开始压缩。 - compression_start_ratio: float = 0.7 - # 会话历史压缩目标比例:每轮压缩后尽量把估算的输入 token 使用量压到 maxTokensInput * compressionTargetRatio 附近。 - compression_target_ratio: float = 0.4 + max_tokens: int = 8192 + context_window_tokens: int = 65_536 temperature: float = 0.1 max_tool_iterations: int = 40 + # Deprecated compatibility field: accepted from old configs but ignored at runtime. + memory_window: int | None = Field(default=None, exclude=True) reasoning_effort: str | None = None # low / medium / high — enables LLM thinking mode + @property + def should_warn_deprecated_memory_window(self) -> bool: + """Return True when old memoryWindow is present without contextWindowTokens.""" + return self.memory_window is not None and "context_window_tokens" not in self.model_fields_set + class AgentsConfig(Base): """Agent configuration.""" diff --git a/nanobot/session/manager.py b/nanobot/session/manager.py index 1cb8a51..f0a6484 100644 --- a/nanobot/session/manager.py +++ b/nanobot/session/manager.py @@ -9,6 +9,7 @@ from typing import Any from loguru import logger +from nanobot.config.paths import get_legacy_sessions_dir from nanobot.utils.helpers import ensure_dir, safe_filename @@ -29,6 +30,7 @@ class Session: created_at: datetime = field(default_factory=datetime.now) updated_at: datetime = field(default_factory=datetime.now) metadata: dict[str, Any] = field(default_factory=dict) + last_consolidated: int = 0 # Number of messages already consolidated to files def add_message(self, role: str, content: str, **kwargs: Any) -> None: """Add a message to the session.""" @@ -42,13 +44,9 @@ class Session: self.updated_at = datetime.now() def get_history(self, max_messages: int = 500) -> list[dict[str, Any]]: - """ - Return messages for LLM input, aligned to a user turn. - - - max_messages > 0 时只保留最近 max_messages 条; - - max_messages <= 0 时不做条数截断,返回全部消息。 - """ - sliced = self.messages if max_messages <= 0 else self.messages[-max_messages:] + """Return unconsolidated messages for LLM input, aligned to a user turn.""" + unconsolidated = self.messages[self.last_consolidated:] + sliced = unconsolidated[-max_messages:] # Drop leading non-user messages to avoid orphaned tool_result blocks for i, m in enumerate(sliced): @@ -68,7 +66,7 @@ class Session: def clear(self) -> None: """Clear all messages and reset session to initial state.""" self.messages = [] - self.metadata = {} + self.last_consolidated = 0 self.updated_at = datetime.now() @@ -82,7 +80,7 @@ class SessionManager: def __init__(self, workspace: Path): self.workspace = workspace self.sessions_dir = ensure_dir(self.workspace / "sessions") - self.legacy_sessions_dir = Path.home() / ".nanobot" / "sessions" + self.legacy_sessions_dir = get_legacy_sessions_dir() self._cache: dict[str, Session] = {} def _get_session_path(self, key: str) -> Path: @@ -134,6 +132,7 @@ class SessionManager: messages = [] metadata = {} created_at = None + last_consolidated = 0 with open(path, encoding="utf-8") as f: for line in f: @@ -146,6 +145,7 @@ class SessionManager: if data.get("_type") == "metadata": metadata = data.get("metadata", {}) created_at = datetime.fromisoformat(data["created_at"]) if data.get("created_at") else None + last_consolidated = data.get("last_consolidated", 0) else: messages.append(data) @@ -154,6 +154,7 @@ class SessionManager: messages=messages, created_at=created_at or datetime.now(), metadata=metadata, + last_consolidated=last_consolidated ) except Exception as e: logger.warning("Failed to load session {}: {}", key, e) @@ -170,6 +171,7 @@ class SessionManager: "created_at": session.created_at.isoformat(), "updated_at": session.updated_at.isoformat(), "metadata": session.metadata, + "last_consolidated": session.last_consolidated } f.write(json.dumps(metadata_line, ensure_ascii=False) + "\n") for msg in session.messages: diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py index 57c60dc..9242ba6 100644 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -1,8 +1,12 @@ """Utility functions for nanobot.""" +import json import re from datetime import datetime from pathlib import Path +from typing import Any + +import tiktoken def detect_image_mime(data: bytes) -> str | None: @@ -68,6 +72,87 @@ def split_message(content: str, max_len: int = 2000) -> list[str]: return chunks +def estimate_prompt_tokens( + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None = None, +) -> int: + """Estimate prompt tokens with tiktoken.""" + try: + enc = tiktoken.get_encoding("cl100k_base") + parts: list[str] = [] + for msg in messages: + content = msg.get("content") + if isinstance(content, str): + parts.append(content) + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + txt = part.get("text", "") + if txt: + parts.append(txt) + if tools: + parts.append(json.dumps(tools, ensure_ascii=False)) + return len(enc.encode("\n".join(parts))) + except Exception: + return 0 + + +def estimate_message_tokens(message: dict[str, Any]) -> int: + """Estimate prompt tokens contributed by one persisted message.""" + content = message.get("content") + parts: list[str] = [] + if isinstance(content, str): + parts.append(content) + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + text = part.get("text", "") + if text: + parts.append(text) + else: + parts.append(json.dumps(part, ensure_ascii=False)) + elif content is not None: + parts.append(json.dumps(content, ensure_ascii=False)) + + for key in ("name", "tool_call_id"): + value = message.get(key) + if isinstance(value, str) and value: + parts.append(value) + if message.get("tool_calls"): + parts.append(json.dumps(message["tool_calls"], ensure_ascii=False)) + + payload = "\n".join(parts) + if not payload: + return 1 + try: + enc = tiktoken.get_encoding("cl100k_base") + return max(1, len(enc.encode(payload))) + except Exception: + return max(1, len(payload) // 4) + + +def estimate_prompt_tokens_chain( + provider: Any, + model: str | None, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None = None, +) -> tuple[int, str]: + """Estimate prompt tokens via provider counter first, then tiktoken fallback.""" + provider_counter = getattr(provider, "estimate_prompt_tokens", None) + if callable(provider_counter): + try: + tokens, source = provider_counter(messages, tools, model) + if isinstance(tokens, (int, float)) and tokens > 0: + return int(tokens), str(source or "provider_counter") + except Exception: + pass + + estimated = estimate_prompt_tokens(messages, tools) + if estimated > 0: + return int(estimated), "tiktoken" + return 0, "none" + + def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str]: """Sync bundled templates to workspace. Only creates missing files.""" from importlib.resources import files as pkg_files diff --git a/pyproject.toml b/pyproject.toml index 62cf616..0344348 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ dependencies = [ "json-repair>=0.57.0,<1.0.0", "chardet>=3.0.2,<6.0.0", "openai>=2.8.0", + "tiktoken>=0.12.0,<1.0.0", ] [project.optional-dependencies] diff --git a/tests/test_commands.py b/tests/test_commands.py index 5e3760a..1375a3a 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -267,6 +267,16 @@ def test_agent_workspace_override_wins_over_config_workspace(mock_agent_runtime, assert mock_agent_runtime["agent_loop_cls"].call_args.kwargs["workspace"] == workspace_path +def test_agent_warns_about_deprecated_memory_window(mock_agent_runtime): + mock_agent_runtime["config"].agents.defaults.memory_window = 100 + + result = runner.invoke(app, ["agent", "-m", "hello"]) + + assert result.exit_code == 0 + assert "memoryWindow" in result.stdout + assert "contextWindowTokens" in result.stdout + + def test_gateway_uses_workspace_from_config_by_default(monkeypatch, tmp_path: Path) -> None: config_file = tmp_path / "instance" / "config.json" config_file.parent.mkdir(parents=True) @@ -327,6 +337,29 @@ def test_gateway_workspace_option_overrides_config(monkeypatch, tmp_path: Path) assert seen["workspace"] == override assert config.workspace_path == override + +def test_gateway_warns_about_deprecated_memory_window(monkeypatch, tmp_path: Path) -> None: + config_file = tmp_path / "instance" / "config.json" + config_file.parent.mkdir(parents=True) + config_file.write_text("{}") + + config = Config() + config.agents.defaults.memory_window = 100 + + monkeypatch.setattr("nanobot.config.loader.set_config_path", lambda _path: None) + monkeypatch.setattr("nanobot.config.loader.load_config", lambda _path=None: config) + monkeypatch.setattr("nanobot.cli.commands.sync_workspace_templates", lambda _path: None) + monkeypatch.setattr( + "nanobot.cli.commands._make_provider", + lambda _config: (_ for _ in ()).throw(_StopGateway("stop")), + ) + + result = runner.invoke(app, ["gateway", "--config", str(config_file)]) + + assert isinstance(result.exception, _StopGateway) + assert "memoryWindow" in result.stdout + assert "contextWindowTokens" in result.stdout + def test_gateway_uses_config_directory_for_cron_store(monkeypatch, tmp_path: Path) -> None: config_file = tmp_path / "instance" / "config.json" config_file.parent.mkdir(parents=True) diff --git a/tests/test_config_migration.py b/tests/test_config_migration.py new file mode 100644 index 0000000..62e601e --- /dev/null +++ b/tests/test_config_migration.py @@ -0,0 +1,88 @@ +import json + +from typer.testing import CliRunner + +from nanobot.cli.commands import app +from nanobot.config.loader import load_config, save_config + +runner = CliRunner() + + +def test_load_config_keeps_max_tokens_and_warns_on_legacy_memory_window(tmp_path) -> None: + config_path = tmp_path / "config.json" + config_path.write_text( + json.dumps( + { + "agents": { + "defaults": { + "maxTokens": 1234, + "memoryWindow": 42, + } + } + } + ), + encoding="utf-8", + ) + + config = load_config(config_path) + + assert config.agents.defaults.max_tokens == 1234 + assert config.agents.defaults.context_window_tokens == 65_536 + assert config.agents.defaults.should_warn_deprecated_memory_window is True + + +def test_save_config_writes_context_window_tokens_but_not_memory_window(tmp_path) -> None: + config_path = tmp_path / "config.json" + config_path.write_text( + json.dumps( + { + "agents": { + "defaults": { + "maxTokens": 2222, + "memoryWindow": 30, + } + } + } + ), + encoding="utf-8", + ) + + config = load_config(config_path) + save_config(config, config_path) + saved = json.loads(config_path.read_text(encoding="utf-8")) + defaults = saved["agents"]["defaults"] + + assert defaults["maxTokens"] == 2222 + assert defaults["contextWindowTokens"] == 65_536 + assert "memoryWindow" not in defaults + + +def test_onboard_refresh_rewrites_legacy_config_template(tmp_path, monkeypatch) -> None: + config_path = tmp_path / "config.json" + workspace = tmp_path / "workspace" + config_path.write_text( + json.dumps( + { + "agents": { + "defaults": { + "maxTokens": 3333, + "memoryWindow": 50, + } + } + } + ), + encoding="utf-8", + ) + + monkeypatch.setattr("nanobot.config.loader.get_config_path", lambda: config_path) + monkeypatch.setattr("nanobot.cli.commands.get_workspace_path", lambda: workspace) + + result = runner.invoke(app, ["onboard"], input="n\n") + + assert result.exit_code == 0 + assert "contextWindowTokens" in result.stdout + saved = json.loads(config_path.read_text(encoding="utf-8")) + defaults = saved["agents"]["defaults"] + assert defaults["maxTokens"] == 3333 + assert defaults["contextWindowTokens"] == 65_536 + assert "memoryWindow" not in defaults diff --git a/tests/test_consolidate_offset.py b/tests/test_consolidate_offset.py index a3213dd..7d12338 100644 --- a/tests/test_consolidate_offset.py +++ b/tests/test_consolidate_offset.py @@ -480,226 +480,35 @@ class TestEmptyAndBoundarySessions: assert_messages_content(old_messages, 10, 34) -class TestConsolidationDeduplicationGuard: - """Test that consolidation tasks are deduplicated and serialized.""" +class TestNewCommandArchival: + """Test /new archival behavior with the simplified consolidation flow.""" - @pytest.mark.asyncio - async def test_consolidation_guard_prevents_duplicate_tasks(self, tmp_path: Path) -> None: - """Concurrent messages above memory_window spawn only one consolidation task.""" + @staticmethod + def _make_loop(tmp_path: Path): from nanobot.agent.loop import AgentLoop - from nanobot.bus.events import InboundMessage from nanobot.bus.queue import MessageBus from nanobot.providers.base import LLMResponse bus = MessageBus() provider = MagicMock() provider.get_default_model.return_value = "test-model" + provider.estimate_prompt_tokens.return_value = (10_000, "test") loop = AgentLoop( - bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10 + bus=bus, + provider=provider, + workspace=tmp_path, + model="test-model", + context_window_tokens=1, ) - - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) + loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) loop.tools.get_definitions = MagicMock(return_value=[]) - - session = loop.sessions.get_or_create("cli:test") - for i in range(15): - session.add_message("user", f"msg{i}") - session.add_message("assistant", f"resp{i}") - loop.sessions.save(session) - - consolidation_calls = 0 - - async def _fake_consolidate(_session, archive_all: bool = False) -> None: - nonlocal consolidation_calls - consolidation_calls += 1 - await asyncio.sleep(0.05) - - loop._consolidate_memory = _fake_consolidate # type: ignore[method-assign] - - msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello") - await loop._process_message(msg) - await loop._process_message(msg) - await asyncio.sleep(0.1) - - assert consolidation_calls == 1, ( - f"Expected exactly 1 consolidation, got {consolidation_calls}" - ) - - @pytest.mark.asyncio - async def test_new_command_guard_prevents_concurrent_consolidation( - self, tmp_path: Path - ) -> None: - """/new command does not run consolidation concurrently with in-flight consolidation.""" - from nanobot.agent.loop import AgentLoop - from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - from nanobot.providers.base import LLMResponse - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - loop = AgentLoop( - bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10 - ) - - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) - loop.tools.get_definitions = MagicMock(return_value=[]) - - session = loop.sessions.get_or_create("cli:test") - for i in range(15): - session.add_message("user", f"msg{i}") - session.add_message("assistant", f"resp{i}") - loop.sessions.save(session) - - consolidation_calls = 0 - active = 0 - max_active = 0 - - async def _fake_consolidate(_session, archive_all: bool = False) -> None: - nonlocal consolidation_calls, active, max_active - consolidation_calls += 1 - active += 1 - max_active = max(max_active, active) - await asyncio.sleep(0.05) - active -= 1 - - loop._consolidate_memory = _fake_consolidate # type: ignore[method-assign] - - msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello") - await loop._process_message(msg) - - new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new") - await loop._process_message(new_msg) - await asyncio.sleep(0.1) - - assert consolidation_calls == 2, ( - f"Expected normal + /new consolidations, got {consolidation_calls}" - ) - assert max_active == 1, ( - f"Expected serialized consolidation, observed concurrency={max_active}" - ) - - @pytest.mark.asyncio - async def test_consolidation_tasks_are_referenced(self, tmp_path: Path) -> None: - """create_task results are tracked in _consolidation_tasks while in flight.""" - from nanobot.agent.loop import AgentLoop - from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - from nanobot.providers.base import LLMResponse - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - loop = AgentLoop( - bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10 - ) - - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) - loop.tools.get_definitions = MagicMock(return_value=[]) - - session = loop.sessions.get_or_create("cli:test") - for i in range(15): - session.add_message("user", f"msg{i}") - session.add_message("assistant", f"resp{i}") - loop.sessions.save(session) - - started = asyncio.Event() - - async def _slow_consolidate(_session, archive_all: bool = False) -> None: - started.set() - await asyncio.sleep(0.1) - - loop._consolidate_memory = _slow_consolidate # type: ignore[method-assign] - - msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello") - await loop._process_message(msg) - - await started.wait() - assert len(loop._consolidation_tasks) == 1, "Task must be referenced while in-flight" - - await asyncio.sleep(0.15) - assert len(loop._consolidation_tasks) == 0, ( - "Task reference must be removed after completion" - ) - - @pytest.mark.asyncio - async def test_new_waits_for_inflight_consolidation_and_preserves_messages( - self, tmp_path: Path - ) -> None: - """/new waits for in-flight consolidation and archives before clear.""" - from nanobot.agent.loop import AgentLoop - from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - from nanobot.providers.base import LLMResponse - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - loop = AgentLoop( - bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10 - ) - - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) - loop.tools.get_definitions = MagicMock(return_value=[]) - - session = loop.sessions.get_or_create("cli:test") - for i in range(15): - session.add_message("user", f"msg{i}") - session.add_message("assistant", f"resp{i}") - loop.sessions.save(session) - - started = asyncio.Event() - release = asyncio.Event() - archived_count = 0 - - async def _fake_consolidate(sess, archive_all: bool = False) -> bool: - nonlocal archived_count - if archive_all: - archived_count = len(sess.messages) - return True - started.set() - await release.wait() - return True - - loop._consolidate_memory = _fake_consolidate # type: ignore[method-assign] - - msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello") - await loop._process_message(msg) - await started.wait() - - new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new") - pending_new = asyncio.create_task(loop._process_message(new_msg)) - - await asyncio.sleep(0.02) - assert not pending_new.done(), "/new should wait while consolidation is in-flight" - - release.set() - response = await pending_new - assert response is not None - assert "new session started" in response.content.lower() - assert archived_count > 0, "Expected /new archival to process a non-empty snapshot" - - session_after = loop.sessions.get_or_create("cli:test") - assert session_after.messages == [], "Session should be cleared after successful archival" + return loop @pytest.mark.asyncio async def test_new_does_not_clear_session_when_archive_fails(self, tmp_path: Path) -> None: - """/new must keep session data if archive step reports failure.""" - from nanobot.agent.loop import AgentLoop from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - from nanobot.providers.base import LLMResponse - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - loop = AgentLoop( - bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10 - ) - - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) - loop.tools.get_definitions = MagicMock(return_value=[]) + loop = self._make_loop(tmp_path) session = loop.sessions.get_or_create("cli:test") for i in range(5): session.add_message("user", f"msg{i}") @@ -707,111 +516,61 @@ class TestConsolidationDeduplicationGuard: loop.sessions.save(session) before_count = len(session.messages) - async def _failing_consolidate(sess, archive_all: bool = False) -> bool: - if archive_all: - return False - return True + async def _failing_consolidate(_messages) -> bool: + return False - loop._consolidate_memory = _failing_consolidate # type: ignore[method-assign] + loop.memory_consolidator.consolidate_messages = _failing_consolidate # type: ignore[method-assign] new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new") response = await loop._process_message(new_msg) assert response is not None assert "failed" in response.content.lower() - session_after = loop.sessions.get_or_create("cli:test") - assert len(session_after.messages) == before_count, ( - "Session must remain intact when /new archival fails" - ) + assert len(loop.sessions.get_or_create("cli:test").messages) == before_count @pytest.mark.asyncio - async def test_new_archives_only_unconsolidated_messages_after_inflight_task( - self, tmp_path: Path - ) -> None: - """/new should archive only messages not yet consolidated by prior task.""" - from nanobot.agent.loop import AgentLoop + async def test_new_archives_only_unconsolidated_messages(self, tmp_path: Path) -> None: from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - from nanobot.providers.base import LLMResponse - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - loop = AgentLoop( - bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10 - ) - - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) - loop.tools.get_definitions = MagicMock(return_value=[]) + loop = self._make_loop(tmp_path) session = loop.sessions.get_or_create("cli:test") for i in range(15): session.add_message("user", f"msg{i}") session.add_message("assistant", f"resp{i}") + session.last_consolidated = len(session.messages) - 3 loop.sessions.save(session) - started = asyncio.Event() - release = asyncio.Event() archived_count = -1 - async def _fake_consolidate(sess, archive_all: bool = False) -> bool: + async def _fake_consolidate(messages) -> bool: nonlocal archived_count - if archive_all: - archived_count = len(sess.messages) - return True - - started.set() - await release.wait() - sess.last_consolidated = len(sess.messages) - 3 + archived_count = len(messages) return True - loop._consolidate_memory = _fake_consolidate # type: ignore[method-assign] - - msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello") - await loop._process_message(msg) - await started.wait() + loop.memory_consolidator.consolidate_messages = _fake_consolidate # type: ignore[method-assign] new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new") - pending_new = asyncio.create_task(loop._process_message(new_msg)) - await asyncio.sleep(0.02) - assert not pending_new.done() - - release.set() - response = await pending_new + response = await loop._process_message(new_msg) assert response is not None assert "new session started" in response.content.lower() - assert archived_count == 3, ( - f"Expected only unconsolidated tail to archive, got {archived_count}" - ) + assert archived_count == 3 @pytest.mark.asyncio async def test_new_clears_session_and_responds(self, tmp_path: Path) -> None: - """/new clears session and returns confirmation.""" - from nanobot.agent.loop import AgentLoop from nanobot.bus.events import InboundMessage - from nanobot.bus.queue import MessageBus - from nanobot.providers.base import LLMResponse - - bus = MessageBus() - provider = MagicMock() - provider.get_default_model.return_value = "test-model" - loop = AgentLoop( - bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10 - ) - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) - loop.tools.get_definitions = MagicMock(return_value=[]) + loop = self._make_loop(tmp_path) session = loop.sessions.get_or_create("cli:test") for i in range(3): session.add_message("user", f"msg{i}") session.add_message("assistant", f"resp{i}") loop.sessions.save(session) - async def _ok_consolidate(sess, archive_all: bool = False) -> bool: + async def _ok_consolidate(_messages) -> bool: return True - loop._consolidate_memory = _ok_consolidate # type: ignore[method-assign] + loop.memory_consolidator.consolidate_messages = _ok_consolidate # type: ignore[method-assign] new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new") response = await loop._process_message(new_msg) diff --git a/tests/test_loop_consolidation_tokens.py b/tests/test_loop_consolidation_tokens.py new file mode 100644 index 0000000..b0f3dda --- /dev/null +++ b/tests/test_loop_consolidation_tokens.py @@ -0,0 +1,190 @@ +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from nanobot.agent.loop import AgentLoop +import nanobot.agent.memory as memory_module +from nanobot.bus.queue import MessageBus +from nanobot.providers.base import LLMResponse + + +def _make_loop(tmp_path, *, estimated_tokens: int, context_window_tokens: int) -> AgentLoop: + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + provider.estimate_prompt_tokens.return_value = (estimated_tokens, "test-counter") + provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[])) + + loop = AgentLoop( + bus=MessageBus(), + provider=provider, + workspace=tmp_path, + model="test-model", + context_window_tokens=context_window_tokens, + ) + loop.tools.get_definitions = MagicMock(return_value=[]) + return loop + + +@pytest.mark.asyncio +async def test_prompt_below_threshold_does_not_consolidate(tmp_path) -> None: + loop = _make_loop(tmp_path, estimated_tokens=100, context_window_tokens=200) + loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign] + + await loop.process_direct("hello", session_key="cli:test") + + loop.memory_consolidator.consolidate_messages.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_prompt_above_threshold_triggers_consolidation(tmp_path, monkeypatch) -> None: + loop = _make_loop(tmp_path, estimated_tokens=1000, context_window_tokens=200) + loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign] + session = loop.sessions.get_or_create("cli:test") + session.messages = [ + {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"}, + {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"}, + {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"}, + ] + loop.sessions.save(session) + monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _message: 500) + + await loop.process_direct("hello", session_key="cli:test") + + assert loop.memory_consolidator.consolidate_messages.await_count >= 1 + + +@pytest.mark.asyncio +async def test_prompt_above_threshold_archives_until_next_user_boundary(tmp_path, monkeypatch) -> None: + loop = _make_loop(tmp_path, estimated_tokens=1000, context_window_tokens=200) + loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign] + + session = loop.sessions.get_or_create("cli:test") + session.messages = [ + {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"}, + {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"}, + {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"}, + {"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"}, + {"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"}, + ] + loop.sessions.save(session) + + token_map = {"u1": 120, "a1": 120, "u2": 120, "a2": 120, "u3": 120} + monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda message: token_map[message["content"]]) + + await loop.memory_consolidator.maybe_consolidate_by_tokens(session) + + archived_chunk = loop.memory_consolidator.consolidate_messages.await_args.args[0] + assert [message["content"] for message in archived_chunk] == ["u1", "a1", "u2", "a2"] + assert session.last_consolidated == 4 + + +@pytest.mark.asyncio +async def test_consolidation_loops_until_target_met(tmp_path, monkeypatch) -> None: + """Verify maybe_consolidate_by_tokens keeps looping until under threshold.""" + loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200) + loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign] + + session = loop.sessions.get_or_create("cli:test") + session.messages = [ + {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"}, + {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"}, + {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"}, + {"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"}, + {"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"}, + {"role": "assistant", "content": "a3", "timestamp": "2026-01-01T00:00:05"}, + {"role": "user", "content": "u4", "timestamp": "2026-01-01T00:00:06"}, + ] + loop.sessions.save(session) + + call_count = [0] + def mock_estimate(_session): + call_count[0] += 1 + if call_count[0] == 1: + return (500, "test") + if call_count[0] == 2: + return (300, "test") + return (80, "test") + + loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate # type: ignore[method-assign] + monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 100) + + await loop.memory_consolidator.maybe_consolidate_by_tokens(session) + + assert loop.memory_consolidator.consolidate_messages.await_count == 2 + assert session.last_consolidated == 6 + + +@pytest.mark.asyncio +async def test_consolidation_continues_below_trigger_until_half_target(tmp_path, monkeypatch) -> None: + """Once triggered, consolidation should continue until it drops below half threshold.""" + loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200) + loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign] + + session = loop.sessions.get_or_create("cli:test") + session.messages = [ + {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"}, + {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"}, + {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"}, + {"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"}, + {"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"}, + {"role": "assistant", "content": "a3", "timestamp": "2026-01-01T00:00:05"}, + {"role": "user", "content": "u4", "timestamp": "2026-01-01T00:00:06"}, + ] + loop.sessions.save(session) + + call_count = [0] + + def mock_estimate(_session): + call_count[0] += 1 + if call_count[0] == 1: + return (500, "test") + if call_count[0] == 2: + return (150, "test") + return (80, "test") + + loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate # type: ignore[method-assign] + monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 100) + + await loop.memory_consolidator.maybe_consolidate_by_tokens(session) + + assert loop.memory_consolidator.consolidate_messages.await_count == 2 + assert session.last_consolidated == 6 + + +@pytest.mark.asyncio +async def test_preflight_consolidation_before_llm_call(tmp_path, monkeypatch) -> None: + """Verify preflight consolidation runs before the LLM call in process_direct.""" + order: list[str] = [] + + loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200) + + async def track_consolidate(messages): + order.append("consolidate") + return True + loop.memory_consolidator.consolidate_messages = track_consolidate # type: ignore[method-assign] + + async def track_llm(*args, **kwargs): + order.append("llm") + return LLMResponse(content="ok", tool_calls=[]) + loop.provider.chat_with_retry = track_llm + + session = loop.sessions.get_or_create("cli:test") + session.messages = [ + {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"}, + {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"}, + {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"}, + ] + loop.sessions.save(session) + monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 500) + + call_count = [0] + def mock_estimate(_session): + call_count[0] += 1 + return (1000 if call_count[0] <= 1 else 80, "test") + loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate # type: ignore[method-assign] + + await loop.process_direct("hello", session_key="cli:test") + + assert "consolidate" in order + assert "llm" in order + assert order.index("consolidate") < order.index("llm") diff --git a/tests/test_memory_consolidation_types.py b/tests/test_memory_consolidation_types.py index 2605bf7..0263f01 100644 --- a/tests/test_memory_consolidation_types.py +++ b/tests/test_memory_consolidation_types.py @@ -7,7 +7,7 @@ tool call response, it should serialize them to JSON instead of raising TypeErro import json from pathlib import Path -from unittest.mock import AsyncMock, MagicMock +from unittest.mock import AsyncMock import pytest @@ -15,15 +15,12 @@ from nanobot.agent.memory import MemoryStore from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest -def _make_session(message_count: int = 30, memory_window: int = 50): - """Create a mock session with messages.""" - session = MagicMock() - session.messages = [ +def _make_messages(message_count: int = 30): + """Create a list of mock messages.""" + return [ {"role": "user", "content": f"msg{i}", "timestamp": "2026-01-01 00:00"} for i in range(message_count) ] - session.last_consolidated = 0 - return session def _make_tool_response(history_entry, memory_update): @@ -74,9 +71,9 @@ class TestMemoryConsolidationTypeHandling: ) ) provider.chat_with_retry = provider.chat - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is True assert store.history_file.exists() @@ -95,9 +92,9 @@ class TestMemoryConsolidationTypeHandling: ) ) provider.chat_with_retry = provider.chat - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is True assert store.history_file.exists() @@ -131,9 +128,9 @@ class TestMemoryConsolidationTypeHandling: ) provider.chat = AsyncMock(return_value=response) provider.chat_with_retry = provider.chat - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is True assert "User discussed testing." in store.history_file.read_text() @@ -147,22 +144,22 @@ class TestMemoryConsolidationTypeHandling: return_value=LLMResponse(content="I summarized the conversation.", tool_calls=[]) ) provider.chat_with_retry = provider.chat - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is False assert not store.history_file.exists() @pytest.mark.asyncio - async def test_skips_when_few_messages(self, tmp_path: Path) -> None: - """Consolidation should be a no-op when messages < keep_count.""" + async def test_skips_when_message_chunk_is_empty(self, tmp_path: Path) -> None: + """Consolidation should be a no-op when the selected chunk is empty.""" store = MemoryStore(tmp_path) provider = AsyncMock() provider.chat_with_retry = provider.chat - session = _make_session(message_count=10) + messages: list[dict] = [] - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is True provider.chat.assert_not_called() @@ -189,9 +186,9 @@ class TestMemoryConsolidationTypeHandling: ) provider.chat = AsyncMock(return_value=response) provider.chat_with_retry = provider.chat - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is True assert "User discussed testing." in store.history_file.read_text() @@ -215,9 +212,9 @@ class TestMemoryConsolidationTypeHandling: ) provider.chat = AsyncMock(return_value=response) provider.chat_with_retry = provider.chat - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is False @@ -239,9 +236,9 @@ class TestMemoryConsolidationTypeHandling: ) provider.chat = AsyncMock(return_value=response) provider.chat_with_retry = provider.chat - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is False @@ -255,7 +252,7 @@ class TestMemoryConsolidationTypeHandling: memory_update="# Memory\nUser likes testing.", ), ]) - session = _make_session(message_count=60) + messages = _make_messages(message_count=60) delays: list[int] = [] async def _fake_sleep(delay: int) -> None: @@ -263,7 +260,7 @@ class TestMemoryConsolidationTypeHandling: monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep) - result = await store.consolidate(session, provider, "test-model", memory_window=50) + result = await store.consolidate(messages, provider, "test-model") assert result is True assert provider.calls == 2 diff --git a/tests/test_message_tool_suppress.py b/tests/test_message_tool_suppress.py index 63b0fd1..1091de4 100644 --- a/tests/test_message_tool_suppress.py +++ b/tests/test_message_tool_suppress.py @@ -16,7 +16,7 @@ def _make_loop(tmp_path: Path) -> AgentLoop: bus = MessageBus() provider = MagicMock() provider.get_default_model.return_value = "test-model" - return AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10) + return AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model") class TestMessageToolSuppressLogic: @@ -33,7 +33,7 @@ class TestMessageToolSuppressLogic: LLMResponse(content="", tool_calls=[tool_call]), LLMResponse(content="Done", tool_calls=[]), ]) - loop.provider.chat = AsyncMock(side_effect=lambda *a, **kw: next(calls)) + loop.provider.chat_with_retry = AsyncMock(side_effect=lambda *a, **kw: next(calls)) loop.tools.get_definitions = MagicMock(return_value=[]) sent: list[OutboundMessage] = [] @@ -58,7 +58,7 @@ class TestMessageToolSuppressLogic: LLMResponse(content="", tool_calls=[tool_call]), LLMResponse(content="I've sent the email.", tool_calls=[]), ]) - loop.provider.chat = AsyncMock(side_effect=lambda *a, **kw: next(calls)) + loop.provider.chat_with_retry = AsyncMock(side_effect=lambda *a, **kw: next(calls)) loop.tools.get_definitions = MagicMock(return_value=[]) sent: list[OutboundMessage] = [] @@ -77,7 +77,7 @@ class TestMessageToolSuppressLogic: @pytest.mark.asyncio async def test_not_suppress_when_no_message_tool_used(self, tmp_path: Path) -> None: loop = _make_loop(tmp_path) - loop.provider.chat = AsyncMock(return_value=LLMResponse(content="Hello!", tool_calls=[])) + loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="Hello!", tool_calls=[])) loop.tools.get_definitions = MagicMock(return_value=[]) msg = InboundMessage(channel="feishu", sender_id="user1", chat_id="chat123", content="Hi") @@ -98,7 +98,7 @@ class TestMessageToolSuppressLogic: ), LLMResponse(content="Done", tool_calls=[]), ]) - loop.provider.chat = AsyncMock(side_effect=lambda *a, **kw: next(calls)) + loop.provider.chat_with_retry = AsyncMock(side_effect=lambda *a, **kw: next(calls)) loop.tools.get_definitions = MagicMock(return_value=[]) loop.tools.execute = AsyncMock(return_value="ok") From a44ee115d1188a62012d3d7cc38077ff5013f4ee Mon Sep 17 00:00:00 2001 From: greyishsong Date: Wed, 11 Mar 2026 09:02:28 +0800 Subject: [PATCH 24/29] fix: bump litellm version to 1.82.1 for Moonshot provider support see issue #1628 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 62cf616..7127354 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ classifiers = [ dependencies = [ "typer>=0.20.0,<1.0.0", - "litellm>=1.81.5,<2.0.0", + "litellm>=1.82.1,<2.0.0", "pydantic>=2.12.0,<3.0.0", "pydantic-settings>=2.12.0,<3.0.0", "websockets>=16.0,<17.0", From d1df53aaf783d44394d3d335948b5eaf31af803f Mon Sep 17 00:00:00 2001 From: YinAnPing Date: Wed, 11 Mar 2026 09:30:33 +0800 Subject: [PATCH 25/29] fix: exclude hidden files when syncing workspace templates Skip files starting with '.' (e.g., macOS extended attributes like ._AGENTS.md) to prevent UnicodeDecodeError during template synchronization. --- nanobot/utils/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 nanobot/utils/helpers.py diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py old mode 100644 new mode 100755 index 57c60dc..a387b79 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -88,7 +88,7 @@ def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str] added.append(str(dest.relative_to(workspace))) for item in tpl.iterdir(): - if item.name.endswith(".md"): + if item.name.endswith(".md") and not item.name.startswith("."): _write(item, workspace / item.name) _write(tpl / "memory" / "MEMORY.md", workspace / "memory" / "MEMORY.md") _write(None, workspace / "memory" / "HISTORY.md") From 35d811c99790b71ef34c5908b23168eeb526ca6b Mon Sep 17 00:00:00 2001 From: dingyanyi2019 Date: Wed, 11 Mar 2026 10:19:43 +0800 Subject: [PATCH 26/29] feat: support retrieving DingTalk voice recognition text --- nanobot/channels/dingtalk.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nanobot/channels/dingtalk.py b/nanobot/channels/dingtalk.py index 3c301a9..cdcba57 100644 --- a/nanobot/channels/dingtalk.py +++ b/nanobot/channels/dingtalk.py @@ -57,6 +57,8 @@ class NanobotDingTalkHandler(CallbackHandler): content = "" if chatbot_msg.text: content = chatbot_msg.text.content.strip() + elif chatbot_msg.extensions.get("content", {}).get("recognition"): + content = chatbot_msg.extensions["content"]["recognition"].strip() if not content: content = message.data.get("text", {}).get("content", "").strip() From 91f17cad00b14b7a550f154791be3fc8eb12b746 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Wed, 11 Mar 2026 03:40:33 +0000 Subject: [PATCH 27/29] feat(dingtalk): support voice recognition text fallback Read DingTalk recognition text when text.content is empty, and add a handler-level regression test for voice transcript delivery. --- tests/test_dingtalk_channel.py | 47 +++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/tests/test_dingtalk_channel.py b/tests/test_dingtalk_channel.py index 7595a33..6051014 100644 --- a/tests/test_dingtalk_channel.py +++ b/tests/test_dingtalk_channel.py @@ -1,9 +1,11 @@ +import asyncio from types import SimpleNamespace import pytest from nanobot.bus.queue import MessageBus -from nanobot.channels.dingtalk import DingTalkChannel +import nanobot.channels.dingtalk as dingtalk_module +from nanobot.channels.dingtalk import DingTalkChannel, NanobotDingTalkHandler from nanobot.config.schema import DingTalkConfig @@ -64,3 +66,46 @@ async def test_group_send_uses_group_messages_api() -> None: assert call["url"] == "https://api.dingtalk.com/v1.0/robot/groupMessages/send" assert call["json"]["openConversationId"] == "conv123" assert call["json"]["msgKey"] == "sampleMarkdown" + + +@pytest.mark.asyncio +async def test_handler_uses_voice_recognition_text_when_text_is_empty(monkeypatch) -> None: + bus = MessageBus() + channel = DingTalkChannel( + DingTalkConfig(client_id="app", client_secret="secret", allow_from=["user1"]), + bus, + ) + handler = NanobotDingTalkHandler(channel) + + class _FakeChatbotMessage: + text = None + extensions = {"content": {"recognition": "voice transcript"}} + sender_staff_id = "user1" + sender_id = "fallback-user" + sender_nick = "Alice" + message_type = "audio" + + @staticmethod + def from_dict(_data): + return _FakeChatbotMessage() + + monkeypatch.setattr(dingtalk_module, "ChatbotMessage", _FakeChatbotMessage) + monkeypatch.setattr(dingtalk_module, "AckMessage", SimpleNamespace(STATUS_OK="OK")) + + status, body = await handler.process( + SimpleNamespace( + data={ + "conversationType": "2", + "conversationId": "conv123", + "text": {"content": ""}, + } + ) + ) + + await asyncio.gather(*list(channel._background_tasks)) + msg = await bus.consume_inbound() + + assert (status, body) == ("OK", "OK") + assert msg.content == "voice transcript" + assert msg.sender_id == "user1" + assert msg.chat_id == "group:conv123" From ddccf25bb1be8529d453d2344eea21bd593021c2 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Wed, 11 Mar 2026 03:47:24 +0000 Subject: [PATCH 28/29] fix(subagent): preserve reasoning fields across tool turns Share assistant message construction between the main agent and subagents, and add a regression test to keep reasoning_content and thinking_blocks in follow-up tool rounds. --- nanobot/agent/context.py | 16 +++++++-------- nanobot/agent/subagent.py | 21 +++++++------------ nanobot/utils/helpers.py | 17 ++++++++++++++++ tests/test_task_cancel.py | 43 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+), 23 deletions(-) diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index 2c648eb..e47fcb8 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -10,7 +10,7 @@ from typing import Any from nanobot.agent.memory import MemoryStore from nanobot.agent.skills import SkillsLoader -from nanobot.utils.helpers import detect_image_mime +from nanobot.utils.helpers import build_assistant_message, detect_image_mime class ContextBuilder: @@ -182,12 +182,10 @@ Reply directly with text for conversations. Only use the 'message' tool to send thinking_blocks: list[dict] | None = None, ) -> list[dict[str, Any]]: """Add an assistant message to the message list.""" - msg: dict[str, Any] = {"role": "assistant", "content": content} - if tool_calls: - msg["tool_calls"] = tool_calls - if reasoning_content is not None: - msg["reasoning_content"] = reasoning_content - if thinking_blocks: - msg["thinking_blocks"] = thinking_blocks - messages.append(msg) + messages.append(build_assistant_message( + content, + tool_calls=tool_calls, + reasoning_content=reasoning_content, + thinking_blocks=thinking_blocks, + )) return messages diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index 308e67d..eff0b4f 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -16,6 +16,7 @@ from nanobot.bus.events import InboundMessage from nanobot.bus.queue import MessageBus from nanobot.config.schema import ExecToolConfig from nanobot.providers.base import LLMProvider +from nanobot.utils.helpers import build_assistant_message class SubagentManager: @@ -133,7 +134,6 @@ class SubagentManager: ) if response.has_tool_calls: - # Add assistant message with tool calls tool_call_dicts = [ { "id": tc.id, @@ -145,19 +145,12 @@ class SubagentManager: } for tc in response.tool_calls ] - assistant_msg: dict[str, Any] = { - "role": "assistant", - "content": response.content or "", - "tool_calls": tool_call_dicts, - } - # Preserve reasoning_content for providers that require it - # (e.g. Deepseek Reasoner mandates this field on every - # assistant message when thinking mode is active). - if response.reasoning_content is not None: - assistant_msg["reasoning_content"] = response.reasoning_content - if response.thinking_blocks: - assistant_msg["thinking_blocks"] = response.thinking_blocks - messages.append(assistant_msg) + messages.append(build_assistant_message( + response.content or "", + tool_calls=tool_call_dicts, + reasoning_content=response.reasoning_content, + thinking_blocks=response.thinking_blocks, + )) # Execute tools for tool_call in response.tool_calls: diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py index 9242ba6..6d2c670 100644 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -72,6 +72,23 @@ def split_message(content: str, max_len: int = 2000) -> list[str]: return chunks +def build_assistant_message( + content: str | None, + tool_calls: list[dict[str, Any]] | None = None, + reasoning_content: str | None = None, + thinking_blocks: list[dict] | None = None, +) -> dict[str, Any]: + """Build a provider-safe assistant message with optional reasoning fields.""" + msg: dict[str, Any] = {"role": "assistant", "content": content} + if tool_calls: + msg["tool_calls"] = tool_calls + if reasoning_content is not None: + msg["reasoning_content"] = reasoning_content + if thinking_blocks: + msg["thinking_blocks"] = thinking_blocks + return msg + + def estimate_prompt_tokens( messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None, diff --git a/tests/test_task_cancel.py b/tests/test_task_cancel.py index 27a2d73..62ab2cc 100644 --- a/tests/test_task_cancel.py +++ b/tests/test_task_cancel.py @@ -165,3 +165,46 @@ class TestSubagentCancellation: provider.get_default_model.return_value = "test-model" mgr = SubagentManager(provider=provider, workspace=MagicMock(), bus=bus) assert await mgr.cancel_by_session("nonexistent") == 0 + + @pytest.mark.asyncio + async def test_subagent_preserves_reasoning_fields_in_tool_turn(self, monkeypatch, tmp_path): + from nanobot.agent.subagent import SubagentManager + from nanobot.bus.queue import MessageBus + from nanobot.providers.base import LLMResponse, ToolCallRequest + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + + captured_second_call: list[dict] = [] + + call_count = {"n": 0} + + async def scripted_chat_with_retry(*, messages, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 1: + return LLMResponse( + content="thinking", + tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})], + reasoning_content="hidden reasoning", + thinking_blocks=[{"type": "thinking", "thinking": "step"}], + ) + captured_second_call[:] = messages + return LLMResponse(content="done", tool_calls=[]) + provider.chat_with_retry = scripted_chat_with_retry + mgr = SubagentManager(provider=provider, workspace=tmp_path, bus=bus) + + async def fake_execute(self, name, arguments): + return "tool result" + + monkeypatch.setattr("nanobot.agent.tools.registry.ToolRegistry.execute", fake_execute) + + await mgr._run_subagent("sub-1", "do task", "label", {"channel": "test", "chat_id": "c1"}) + + assistant_messages = [ + msg for msg in captured_second_call + if msg.get("role") == "assistant" and msg.get("tool_calls") + ] + assert len(assistant_messages) == 1 + assert assistant_messages[0]["reasoning_content"] == "hidden reasoning" + assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}] From 76c6063141f84d8bde3f3a95896c36e4e673c5c7 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Wed, 11 Mar 2026 03:50:54 +0000 Subject: [PATCH 29/29] chore: normalize helpers.py file mode --- nanobot/utils/helpers.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 nanobot/utils/helpers.py diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py old mode 100755 new mode 100644