From 43329018f78826770f28674bf0c01643414f65fe Mon Sep 17 00:00:00 2001
From: Sense_wang <167664334+haosenwang1018@users.noreply.github.com>
Date: Sun, 1 Mar 2026 16:50:02 +0000
Subject: [PATCH 01/53] fix(telegram): add group_policy config for Telegram
 groups

Add `group_policy` field to `TelegramConfig` with "open" (default) and
"mention" options, consistent with Slack and Matrix channel configs.
---
 nanobot/config/schema.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index 6b80c81..69cf826 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -31,6 +31,7 @@ class TelegramConfig(Base):
     allow_from: list[str] = Field(default_factory=list)  # Allowed user IDs or usernames
     proxy: str | None = None  # HTTP/SOCKS5 proxy URL, e.g. "http://127.0.0.1:7890" or "socks5://127.0.0.1:1080"
     reply_to_message: bool = False  # If true, bot replies quote the original message
+    group_policy: Literal["open", "mention"] = "open"  # "open" responds to all, "mention" only when @mentioned or replied to
 
 
 class FeishuConfig(Base):

From 521217a7f50f0a8de46a88e101c8e9bf16abae27 Mon Sep 17 00:00:00 2001
From: Sense_wang <167664334+haosenwang1018@users.noreply.github.com>
Date: Sun, 1 Mar 2026 16:50:36 +0000
Subject: [PATCH 02/53] fix(telegram): enforce group_policy in _on_message

When `group_policy` is set to "mention", skip messages in group chats
unless the bot is @mentioned or the message is a reply to the bot.

Fixes #1380
---
 nanobot/channels/telegram.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py
index c290535..3702666 100644
--- a/nanobot/channels/telegram.py
+++ b/nanobot/channels/telegram.py
@@ -341,6 +341,23 @@ class TelegramChannel(BaseChannel):
         # Store chat_id for replies
         self._chat_ids[sender_id] = chat_id
 
+        # Enforce group_policy: in group chats with "mention" policy,
+        # only respond when the bot is @mentioned or the message is a reply to the bot.
+        is_group = message.chat.type != "private"
+        if is_group and getattr(self.config, "group_policy", "open") == "mention":
+            bot_username = (await self._app.bot.get_me()).username if self._app else None
+            mentioned = False
+            # Check if bot is @mentioned in text
+            if bot_username and message.text:
+                mentioned = f"@{bot_username}" in message.text
+            # Check if the message is a reply to the bot
+            if not mentioned and message.reply_to_message and message.reply_to_message.from_user:
+                bot_id = (await self._app.bot.get_me()).id if self._app else None
+                if bot_id and message.reply_to_message.from_user.id == bot_id:
+                    mentioned = True
+            if not mentioned:
+                return
+
         # Build content from text and/or media
         content_parts = []
         media_paths = []

From a7be0b3c9eaf967c0079ab1c1a08be4f2010fc09 Mon Sep 17 00:00:00 2001
From: Yan-ke Guo <hi@gyk.me>
Date: Tue, 3 Mar 2026 18:14:26 +0800
Subject: [PATCH 03/53] sync missing scripts from upstream openclaw repository

---
 nanobot/skills/skill-creator/SKILL.md         |   3 +-
 .../skill-creator/scripts/init_skill.py       | 378 ++++++++++++++++++
 .../skill-creator/scripts/package_skill.py    | 139 +++++++
 3 files changed, 519 insertions(+), 1 deletion(-)
 create mode 100755 nanobot/skills/skill-creator/scripts/init_skill.py
 create mode 100755 nanobot/skills/skill-creator/scripts/package_skill.py

diff --git a/nanobot/skills/skill-creator/SKILL.md b/nanobot/skills/skill-creator/SKILL.md
index 9b5eb6f..f4d6e0b 100644
--- a/nanobot/skills/skill-creator/SKILL.md
+++ b/nanobot/skills/skill-creator/SKILL.md
@@ -349,7 +349,6 @@ scripts/package_skill.py <path/to/skill-folder> ./dist
 The packaging script will:
 
 1. **Validate** the skill automatically, checking:
-
    - YAML frontmatter format and required fields
    - Skill naming conventions and directory structure
    - Description completeness and quality
@@ -357,6 +356,8 @@ The packaging script will:
 
 2. **Package** the skill if validation passes, creating a .skill file named after the skill (e.g., `my-skill.skill`) that includes all files and maintains the proper directory structure for distribution. The .skill file is a zip file with a .skill extension.
 
+   Security restriction: symlinks are rejected and packaging fails when any symlink is present.
+
 If validation fails, the script will report the errors and exit without creating a package. Fix any validation errors and run the packaging command again.
 
 ### Step 6: Iterate
diff --git a/nanobot/skills/skill-creator/scripts/init_skill.py b/nanobot/skills/skill-creator/scripts/init_skill.py
new file mode 100755
index 0000000..8633fe9
--- /dev/null
+++ b/nanobot/skills/skill-creator/scripts/init_skill.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+"""
+Skill Initializer - Creates a new skill from template
+
+Usage:
+    init_skill.py <skill-name> --path <path> [--resources scripts,references,assets] [--examples]
+
+Examples:
+    init_skill.py my-new-skill --path skills/public
+    init_skill.py my-new-skill --path skills/public --resources scripts,references
+    init_skill.py my-api-helper --path skills/private --resources scripts --examples
+    init_skill.py custom-skill --path /custom/location
+"""
+
+import argparse
+import re
+import sys
+from pathlib import Path
+
+MAX_SKILL_NAME_LENGTH = 64
+ALLOWED_RESOURCES = {"scripts", "references", "assets"}
+
+SKILL_TEMPLATE = """---
+name: {skill_name}
+description: [TODO: Complete and informative explanation of what the skill does and when to use it. Include WHEN to use this skill - specific scenarios, file types, or tasks that trigger it.]
+---
+
+# {skill_title}
+
+## Overview
+
+[TODO: 1-2 sentences explaining what this skill enables]
+
+## Structuring This Skill
+
+[TODO: Choose the structure that best fits this skill's purpose. Common patterns:
+
+**1. Workflow-Based** (best for sequential processes)
+- Works well when there are clear step-by-step procedures
+- Example: DOCX skill with "Workflow Decision Tree" -> "Reading" -> "Creating" -> "Editing"
+- Structure: ## Overview -> ## Workflow Decision Tree -> ## Step 1 -> ## Step 2...
+
+**2. Task-Based** (best for tool collections)
+- Works well when the skill offers different operations/capabilities
+- Example: PDF skill with "Quick Start" -> "Merge PDFs" -> "Split PDFs" -> "Extract Text"
+- Structure: ## Overview -> ## Quick Start -> ## Task Category 1 -> ## Task Category 2...
+
+**3. Reference/Guidelines** (best for standards or specifications)
+- Works well for brand guidelines, coding standards, or requirements
+- Example: Brand styling with "Brand Guidelines" -> "Colors" -> "Typography" -> "Features"
+- Structure: ## Overview -> ## Guidelines -> ## Specifications -> ## Usage...
+
+**4. Capabilities-Based** (best for integrated systems)
+- Works well when the skill provides multiple interrelated features
+- Example: Product Management with "Core Capabilities" -> numbered capability list
+- Structure: ## Overview -> ## Core Capabilities -> ### 1. Feature -> ### 2. Feature...
+
+Patterns can be mixed and matched as needed. Most skills combine patterns (e.g., start with task-based, add workflow for complex operations).
+
+Delete this entire "Structuring This Skill" section when done - it's just guidance.]
+
+## [TODO: Replace with the first main section based on chosen structure]
+
+[TODO: Add content here. See examples in existing skills:
+- Code samples for technical skills
+- Decision trees for complex workflows
+- Concrete examples with realistic user requests
+- References to scripts/templates/references as needed]
+
+## Resources (optional)
+
+Create only the resource directories this skill actually needs. Delete this section if no resources are required.
+
+### scripts/
+Executable code (Python/Bash/etc.) that can be run directly to perform specific operations.
+
+**Examples from other skills:**
+- PDF skill: `fill_fillable_fields.py`, `extract_form_field_info.py` - utilities for PDF manipulation
+- DOCX skill: `document.py`, `utilities.py` - Python modules for document processing
+
+**Appropriate for:** Python scripts, shell scripts, or any executable code that performs automation, data processing, or specific operations.
+
+**Note:** Scripts may be executed without loading into context, but can still be read by Codex for patching or environment adjustments.
+
+### references/
+Documentation and reference material intended to be loaded into context to inform Codex's process and thinking.
+
+**Examples from other skills:**
+- Product management: `communication.md`, `context_building.md` - detailed workflow guides
+- BigQuery: API reference documentation and query examples
+- Finance: Schema documentation, company policies
+
+**Appropriate for:** In-depth documentation, API references, database schemas, comprehensive guides, or any detailed information that Codex should reference while working.
+
+### assets/
+Files not intended to be loaded into context, but rather used within the output Codex produces.
+
+**Examples from other skills:**
+- Brand styling: PowerPoint template files (.pptx), logo files
+- Frontend builder: HTML/React boilerplate project directories
+- Typography: Font files (.ttf, .woff2)
+
+**Appropriate for:** Templates, boilerplate code, document templates, images, icons, fonts, or any files meant to be copied or used in the final output.
+
+---
+
+**Not every skill requires all three types of resources.**
+"""
+
+EXAMPLE_SCRIPT = '''#!/usr/bin/env python3
+"""
+Example helper script for {skill_name}
+
+This is a placeholder script that can be executed directly.
+Replace with actual implementation or delete if not needed.
+
+Example real scripts from other skills:
+- pdf/scripts/fill_fillable_fields.py - Fills PDF form fields
+- pdf/scripts/convert_pdf_to_images.py - Converts PDF pages to images
+"""
+
+def main():
+    print("This is an example script for {skill_name}")
+    # TODO: Add actual script logic here
+    # This could be data processing, file conversion, API calls, etc.
+
+if __name__ == "__main__":
+    main()
+'''
+
+EXAMPLE_REFERENCE = """# Reference Documentation for {skill_title}
+
+This is a placeholder for detailed reference documentation.
+Replace with actual reference content or delete if not needed.
+
+Example real reference docs from other skills:
+- product-management/references/communication.md - Comprehensive guide for status updates
+- product-management/references/context_building.md - Deep-dive on gathering context
+- bigquery/references/ - API references and query examples
+
+## When Reference Docs Are Useful
+
+Reference docs are ideal for:
+- Comprehensive API documentation
+- Detailed workflow guides
+- Complex multi-step processes
+- Information too lengthy for main SKILL.md
+- Content that's only needed for specific use cases
+
+## Structure Suggestions
+
+### API Reference Example
+- Overview
+- Authentication
+- Endpoints with examples
+- Error codes
+- Rate limits
+
+### Workflow Guide Example
+- Prerequisites
+- Step-by-step instructions
+- Common patterns
+- Troubleshooting
+- Best practices
+"""
+
+EXAMPLE_ASSET = """# Example Asset File
+
+This placeholder represents where asset files would be stored.
+Replace with actual asset files (templates, images, fonts, etc.) or delete if not needed.
+
+Asset files are NOT intended to be loaded into context, but rather used within
+the output Codex produces.
+
+Example asset files from other skills:
+- Brand guidelines: logo.png, slides_template.pptx
+- Frontend builder: hello-world/ directory with HTML/React boilerplate
+- Typography: custom-font.ttf, font-family.woff2
+- Data: sample_data.csv, test_dataset.json
+
+## Common Asset Types
+
+- Templates: .pptx, .docx, boilerplate directories
+- Images: .png, .jpg, .svg, .gif
+- Fonts: .ttf, .otf, .woff, .woff2
+- Boilerplate code: Project directories, starter files
+- Icons: .ico, .svg
+- Data files: .csv, .json, .xml, .yaml
+
+Note: This is a text placeholder. Actual assets can be any file type.
+"""
+
+
+def normalize_skill_name(skill_name):
+    """Normalize a skill name to lowercase hyphen-case."""
+    normalized = skill_name.strip().lower()
+    normalized = re.sub(r"[^a-z0-9]+", "-", normalized)
+    normalized = normalized.strip("-")
+    normalized = re.sub(r"-{2,}", "-", normalized)
+    return normalized
+
+
+def title_case_skill_name(skill_name):
+    """Convert hyphenated skill name to Title Case for display."""
+    return " ".join(word.capitalize() for word in skill_name.split("-"))
+
+
+def parse_resources(raw_resources):
+    if not raw_resources:
+        return []
+    resources = [item.strip() for item in raw_resources.split(",") if item.strip()]
+    invalid = sorted({item for item in resources if item not in ALLOWED_RESOURCES})
+    if invalid:
+        allowed = ", ".join(sorted(ALLOWED_RESOURCES))
+        print(f"[ERROR] Unknown resource type(s): {', '.join(invalid)}")
+        print(f"   Allowed: {allowed}")
+        sys.exit(1)
+    deduped = []
+    seen = set()
+    for resource in resources:
+        if resource not in seen:
+            deduped.append(resource)
+            seen.add(resource)
+    return deduped
+
+
+def create_resource_dirs(skill_dir, skill_name, skill_title, resources, include_examples):
+    for resource in resources:
+        resource_dir = skill_dir / resource
+        resource_dir.mkdir(exist_ok=True)
+        if resource == "scripts":
+            if include_examples:
+                example_script = resource_dir / "example.py"
+                example_script.write_text(EXAMPLE_SCRIPT.format(skill_name=skill_name))
+                example_script.chmod(0o755)
+                print("[OK] Created scripts/example.py")
+            else:
+                print("[OK] Created scripts/")
+        elif resource == "references":
+            if include_examples:
+                example_reference = resource_dir / "api_reference.md"
+                example_reference.write_text(EXAMPLE_REFERENCE.format(skill_title=skill_title))
+                print("[OK] Created references/api_reference.md")
+            else:
+                print("[OK] Created references/")
+        elif resource == "assets":
+            if include_examples:
+                example_asset = resource_dir / "example_asset.txt"
+                example_asset.write_text(EXAMPLE_ASSET)
+                print("[OK] Created assets/example_asset.txt")
+            else:
+                print("[OK] Created assets/")
+
+
+def init_skill(skill_name, path, resources, include_examples):
+    """
+    Initialize a new skill directory with template SKILL.md.
+
+    Args:
+        skill_name: Name of the skill
+        path: Path where the skill directory should be created
+        resources: Resource directories to create
+        include_examples: Whether to create example files in resource directories
+
+    Returns:
+        Path to created skill directory, or None if error
+    """
+    # Determine skill directory path
+    skill_dir = Path(path).resolve() / skill_name
+
+    # Check if directory already exists
+    if skill_dir.exists():
+        print(f"[ERROR] Skill directory already exists: {skill_dir}")
+        return None
+
+    # Create skill directory
+    try:
+        skill_dir.mkdir(parents=True, exist_ok=False)
+        print(f"[OK] Created skill directory: {skill_dir}")
+    except Exception as e:
+        print(f"[ERROR] Error creating directory: {e}")
+        return None
+
+    # Create SKILL.md from template
+    skill_title = title_case_skill_name(skill_name)
+    skill_content = SKILL_TEMPLATE.format(skill_name=skill_name, skill_title=skill_title)
+
+    skill_md_path = skill_dir / "SKILL.md"
+    try:
+        skill_md_path.write_text(skill_content)
+        print("[OK] Created SKILL.md")
+    except Exception as e:
+        print(f"[ERROR] Error creating SKILL.md: {e}")
+        return None
+
+    # Create resource directories if requested
+    if resources:
+        try:
+            create_resource_dirs(skill_dir, skill_name, skill_title, resources, include_examples)
+        except Exception as e:
+            print(f"[ERROR] Error creating resource directories: {e}")
+            return None
+
+    # Print next steps
+    print(f"\n[OK] Skill '{skill_name}' initialized successfully at {skill_dir}")
+    print("\nNext steps:")
+    print("1. Edit SKILL.md to complete the TODO items and update the description")
+    if resources:
+        if include_examples:
+            print("2. Customize or delete the example files in scripts/, references/, and assets/")
+        else:
+            print("2. Add resources to scripts/, references/, and assets/ as needed")
+    else:
+        print("2. Create resource directories only if needed (scripts/, references/, assets/)")
+    print("3. Run the validator when ready to check the skill structure")
+
+    return skill_dir
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Create a new skill directory with a SKILL.md template.",
+    )
+    parser.add_argument("skill_name", help="Skill name (normalized to hyphen-case)")
+    parser.add_argument("--path", required=True, help="Output directory for the skill")
+    parser.add_argument(
+        "--resources",
+        default="",
+        help="Comma-separated list: scripts,references,assets",
+    )
+    parser.add_argument(
+        "--examples",
+        action="store_true",
+        help="Create example files inside the selected resource directories",
+    )
+    args = parser.parse_args()
+
+    raw_skill_name = args.skill_name
+    skill_name = normalize_skill_name(raw_skill_name)
+    if not skill_name:
+        print("[ERROR] Skill name must include at least one letter or digit.")
+        sys.exit(1)
+    if len(skill_name) > MAX_SKILL_NAME_LENGTH:
+        print(
+            f"[ERROR] Skill name '{skill_name}' is too long ({len(skill_name)} characters). "
+            f"Maximum is {MAX_SKILL_NAME_LENGTH} characters."
+        )
+        sys.exit(1)
+    if skill_name != raw_skill_name:
+        print(f"Note: Normalized skill name from '{raw_skill_name}' to '{skill_name}'.")
+
+    resources = parse_resources(args.resources)
+    if args.examples and not resources:
+        print("[ERROR] --examples requires --resources to be set.")
+        sys.exit(1)
+
+    path = args.path
+
+    print(f"Initializing skill: {skill_name}")
+    print(f"   Location: {path}")
+    if resources:
+        print(f"   Resources: {', '.join(resources)}")
+        if args.examples:
+            print("   Examples: enabled")
+    else:
+        print("   Resources: none (create as needed)")
+    print()
+
+    result = init_skill(skill_name, path, resources, args.examples)
+
+    if result:
+        sys.exit(0)
+    else:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/nanobot/skills/skill-creator/scripts/package_skill.py b/nanobot/skills/skill-creator/scripts/package_skill.py
new file mode 100755
index 0000000..aa4de89
--- /dev/null
+++ b/nanobot/skills/skill-creator/scripts/package_skill.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+"""
+Skill Packager - Creates a distributable .skill file of a skill folder
+
+Usage:
+    python utils/package_skill.py <path/to/skill-folder> [output-directory]
+
+Example:
+    python utils/package_skill.py skills/public/my-skill
+    python utils/package_skill.py skills/public/my-skill ./dist
+"""
+
+import sys
+import zipfile
+from pathlib import Path
+
+from quick_validate import validate_skill
+
+
+def _is_within(path: Path, root: Path) -> bool:
+    try:
+        path.relative_to(root)
+        return True
+    except ValueError:
+        return False
+
+
+def package_skill(skill_path, output_dir=None):
+    """
+    Package a skill folder into a .skill file.
+
+    Args:
+        skill_path: Path to the skill folder
+        output_dir: Optional output directory for the .skill file (defaults to current directory)
+
+    Returns:
+        Path to the created .skill file, or None if error
+    """
+    skill_path = Path(skill_path).resolve()
+
+    # Validate skill folder exists
+    if not skill_path.exists():
+        print(f"[ERROR] Skill folder not found: {skill_path}")
+        return None
+
+    if not skill_path.is_dir():
+        print(f"[ERROR] Path is not a directory: {skill_path}")
+        return None
+
+    # Validate SKILL.md exists
+    skill_md = skill_path / "SKILL.md"
+    if not skill_md.exists():
+        print(f"[ERROR] SKILL.md not found in {skill_path}")
+        return None
+
+    # Run validation before packaging
+    print("Validating skill...")
+    valid, message = validate_skill(skill_path)
+    if not valid:
+        print(f"[ERROR] Validation failed: {message}")
+        print("   Please fix the validation errors before packaging.")
+        return None
+    print(f"[OK] {message}\n")
+
+    # Determine output location
+    skill_name = skill_path.name
+    if output_dir:
+        output_path = Path(output_dir).resolve()
+        output_path.mkdir(parents=True, exist_ok=True)
+    else:
+        output_path = Path.cwd()
+
+    skill_filename = output_path / f"{skill_name}.skill"
+
+    EXCLUDED_DIRS = {".git", ".svn", ".hg", "__pycache__", "node_modules"}
+
+    # Create the .skill file (zip format)
+    try:
+        with zipfile.ZipFile(skill_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
+            # Walk through the skill directory
+            for file_path in skill_path.rglob("*"):
+                # Security: never follow or package symlinks.
+                if file_path.is_symlink():
+                    print(f"[WARN] Skipping symlink: {file_path}")
+                    continue
+
+                rel_parts = file_path.relative_to(skill_path).parts
+                if any(part in EXCLUDED_DIRS for part in rel_parts):
+                    continue
+
+                if file_path.is_file():
+                    resolved_file = file_path.resolve()
+                    if not _is_within(resolved_file, skill_path):
+                        print(f"[ERROR] File escapes skill root: {file_path}")
+                        return None
+                    # If output lives under skill_path, avoid writing archive into itself.
+                    if resolved_file == skill_filename.resolve():
+                        print(f"[WARN] Skipping output archive: {file_path}")
+                        continue
+
+                    # Calculate the relative path within the zip.
+                    arcname = Path(skill_name) / file_path.relative_to(skill_path)
+                    zipf.write(file_path, arcname)
+                    print(f"  Added: {arcname}")
+
+        print(f"\n[OK] Successfully packaged skill to: {skill_filename}")
+        return skill_filename
+
+    except Exception as e:
+        print(f"[ERROR] Error creating .skill file: {e}")
+        return None
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python utils/package_skill.py <path/to/skill-folder> [output-directory]")
+        print("\nExample:")
+        print("  python utils/package_skill.py skills/public/my-skill")
+        print("  python utils/package_skill.py skills/public/my-skill ./dist")
+        sys.exit(1)
+
+    skill_path = sys.argv[1]
+    output_dir = sys.argv[2] if len(sys.argv) > 2 else None
+
+    print(f"Packaging skill: {skill_path}")
+    if output_dir:
+        print(f"   Output directory: {output_dir}")
+    print()
+
+    result = package_skill(skill_path, output_dir)
+
+    if result:
+        sys.exit(0)
+    else:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

From d0c647918616f4d5f133f5bf07032d477de3c8f0 Mon Sep 17 00:00:00 2001
From: Kiplangatkorir <korirkiplangat22@gmail.com>
Date: Wed, 4 Mar 2026 11:20:50 +0300
Subject: [PATCH 04/53] feat: add LLM retry with exponential backoff for
 transient errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

provider.chat() had no retry logic — a transient 429 rate limit,
502 gateway error, or network timeout would permanently fail the
entire message. For a system running cron jobs and heartbeats 24/7,
even a brief provider blip causes lost tasks.

Adds _chat_with_retry() that:
- Retries up to 3 times with 1s/2s/4s exponential backoff
- Only retries transient errors (429, 5xx, timeout, connection)
- Returns immediately on permanent errors (400, 401, etc.)
- Falls through to the final attempt if all retries exhaust
---
 nanobot/agent/loop.py | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 65a62e5..9819a38 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -159,6 +159,33 @@ class AgentLoop:
                 if hasattr(tool, "set_context"):
                     tool.set_context(channel, chat_id, *([message_id] if name == "message" else []))
 
+    _RETRY_DELAYS = (1, 2, 4)  # seconds — exponential backoff for transient LLM errors
+
+    async def _chat_with_retry(self, **kwargs: Any) -> Any:
+        """Call provider.chat() with retry on transient errors (429, 5xx, network)."""
+        from nanobot.providers.base import LLMResponse
+
+        last_response: LLMResponse | None = None
+        for attempt, delay in enumerate(self._RETRY_DELAYS):
+            response = await self.provider.chat(**kwargs)
+            if response.finish_reason != "error":
+                return response
+            # Check if the error looks transient (rate limit, server error, network)
+            err = (response.content or "").lower()
+            is_transient = any(kw in err for kw in (
+                "429", "rate limit", "500", "502", "503", "504",
+                "overloaded", "timeout", "connection", "server error",
+            ))
+            if not is_transient:
+                return response  # permanent error (400, 401, etc.) — don't retry
+            last_response = response
+            logger.warning("LLM transient error (attempt {}/{}), retrying in {}s: {}",
+                           attempt + 1, len(self._RETRY_DELAYS), delay, err[:120])
+            await asyncio.sleep(delay)
+        # All retries exhausted — make one final attempt
+        response = await self.provider.chat(**kwargs)
+        return response if response.finish_reason != "error" else (last_response or response)
+
     @staticmethod
     def _strip_think(text: str | None) -> str | None:
         """Remove <think>…</think> blocks that some models embed in content."""
@@ -191,7 +218,7 @@ class AgentLoop:
         while iteration < self.max_iterations:
             iteration += 1
 
-            response = await self.provider.chat(
+            response = await self._chat_with_retry(
                 messages=messages,
                 tools=self.tools.get_definitions(),
                 model=self.model,

From dbc518098e913d2f382121820dd58bbaf7a04234 Mon Sep 17 00:00:00 2001
From: VITOHJL <hejl2023@shanghaitech.edu.cn>
Date: Sun, 8 Mar 2026 14:20:16 +0800
Subject: [PATCH 05/53] refactor: implement token-based context compression
 mechanism

Major changes:
- Replace message-count-based memory window with token-budget-based compression
- Add max_tokens_input, compression_start_ratio, compression_target_ratio config
- Implement _maybe_compress_history() that triggers based on prompt token usage
- Use _build_compressed_history_view() to provide compressed history to LLM
- Refactor MemoryStore.consolidate() -> consolidate_chunk() for chunk-based compression
- Remove last_consolidated from Session, use _compressed_until metadata instead
- Add background compression scheduling to avoid blocking message processing

Key improvements:
- Compression now based on actual token usage, not arbitrary message counts
- Better handling of long conversations with large context windows
- Non-destructive compression: old messages remain in session, but excluded from prompt
- Automatic compression when history exceeds configured token thresholds
---
 nanobot/agent/loop.py      | 521 +++++++++++++++++++++++++++++++++----
 nanobot/agent/memory.py    |  62 ++---
 nanobot/config/schema.py   |  25 +-
 nanobot/session/manager.py |  20 +-
 4 files changed, 529 insertions(+), 99 deletions(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index ca9a06e..696e2a7 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -5,19 +5,24 @@ from __future__ import annotations
 import asyncio
 import json
 import re
-import weakref
 from contextlib import AsyncExitStack
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Awaitable, Callable
 
 from loguru import logger
 
+try:
+    import tiktoken  # type: ignore
+except Exception:  # pragma: no cover - optional dependency
+    tiktoken = None
+
 from nanobot.agent.context import ContextBuilder
-from nanobot.agent.memory import MemoryStore
 from nanobot.agent.subagent import SubagentManager
 from nanobot.agent.tools.cron import CronTool
 from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool
+from nanobot.agent.tools.huggingface import HuggingFaceModelSearchTool
 from nanobot.agent.tools.message import MessageTool
+from nanobot.agent.tools.model_config import ValidateDeployJSONTool, ValidateUsageYAMLTool
 from nanobot.agent.tools.registry import ToolRegistry
 from nanobot.agent.tools.shell import ExecTool
 from nanobot.agent.tools.spawn import SpawnTool
@@ -55,8 +60,11 @@ class AgentLoop:
         max_iterations: int = 40,
         temperature: float = 0.1,
         max_tokens: int = 4096,
-        memory_window: int = 100,
+        memory_window: int | None = None,  # backward-compat only (unused)
         reasoning_effort: str | None = None,
+        max_tokens_input: int = 128_000,
+        compression_start_ratio: float = 0.7,
+        compression_target_ratio: float = 0.4,
         brave_api_key: str | None = None,
         web_proxy: str | None = None,
         exec_config: ExecToolConfig | None = None,
@@ -74,9 +82,18 @@ class AgentLoop:
         self.model = model or provider.get_default_model()
         self.max_iterations = max_iterations
         self.temperature = temperature
+        # max_tokens: per-call output token cap (maxTokensOutput in config)
         self.max_tokens = max_tokens
+        # Keep legacy attribute for older call sites/tests; compression no longer uses it.
         self.memory_window = memory_window
         self.reasoning_effort = reasoning_effort
+        # max_tokens_input: model native context window (maxTokensInput in config)
+        self.max_tokens_input = max_tokens_input
+        # Token-based compression watermarks (fractions of available input budget)
+        self.compression_start_ratio = compression_start_ratio
+        self.compression_target_ratio = compression_target_ratio
+        # Reserve tokens for safety margin
+        self._reserve_tokens = 1000
         self.brave_api_key = brave_api_key
         self.web_proxy = web_proxy
         self.exec_config = exec_config or ExecToolConfig()
@@ -105,18 +122,373 @@ class AgentLoop:
         self._mcp_stack: AsyncExitStack | None = None
         self._mcp_connected = False
         self._mcp_connecting = False
-        self._consolidating: set[str] = set()  # Session keys with consolidation in progress
-        self._consolidation_tasks: set[asyncio.Task] = set()  # Strong refs to in-flight tasks
-        self._consolidation_locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary()
         self._active_tasks: dict[str, list[asyncio.Task]] = {}  # session_key -> tasks
+        self._compression_tasks: dict[str, asyncio.Task] = {}  # session_key -> task
         self._processing_lock = asyncio.Lock()
         self._register_default_tools()
 
+    @staticmethod
+    def _estimate_prompt_tokens(
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]] | None = None,
+    ) -> int:
+        """Estimate prompt tokens with tiktoken (fallback only)."""
+        if tiktoken is None:
+            return 0
+
+        try:
+            enc = tiktoken.get_encoding("cl100k_base")
+            parts: list[str] = []
+            for msg in messages:
+                content = msg.get("content")
+                if isinstance(content, str):
+                    parts.append(content)
+                elif isinstance(content, list):
+                    for part in content:
+                        if isinstance(part, dict) and part.get("type") == "text":
+                            txt = part.get("text", "")
+                            if txt:
+                                parts.append(txt)
+            if tools:
+                parts.append(json.dumps(tools, ensure_ascii=False))
+            return len(enc.encode("\n".join(parts)))
+        except Exception:
+            return 0
+
+    def _estimate_prompt_tokens_chain(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]] | None = None,
+    ) -> tuple[int, str]:
+        """Unified prompt-token estimation: provider counter -> tiktoken."""
+        provider_counter = getattr(self.provider, "estimate_prompt_tokens", None)
+        if callable(provider_counter):
+            try:
+                tokens, source = provider_counter(messages, tools, self.model)
+                if isinstance(tokens, (int, float)) and tokens > 0:
+                    return int(tokens), str(source or "provider_counter")
+            except Exception:
+                logger.debug("Provider token counter failed; fallback to tiktoken")
+
+        estimated = self._estimate_prompt_tokens(messages, tools)
+        if estimated > 0:
+            return int(estimated), "tiktoken"
+        return 0, "none"
+
+    @staticmethod
+    def _estimate_completion_tokens(content: str) -> int:
+        """Estimate completion tokens with tiktoken (fallback only)."""
+        if tiktoken is None:
+            return 0
+        try:
+            enc = tiktoken.get_encoding("cl100k_base")
+            return len(enc.encode(content or ""))
+        except Exception:
+            return 0
+
+    def _get_compressed_until(self, session: Session) -> int:
+        """Read/normalize compressed boundary and migrate old metadata format."""
+        raw = session.metadata.get("_compressed_until", 0)
+        try:
+            compressed_until = int(raw)
+        except (TypeError, ValueError):
+            compressed_until = 0
+
+        if compressed_until <= 0:
+            ranges = session.metadata.get("_compressed_ranges")
+            if isinstance(ranges, list):
+                inferred = 0
+                for item in ranges:
+                    if not isinstance(item, (list, tuple)) or len(item) != 2:
+                        continue
+                    try:
+                        inferred = max(inferred, int(item[1]))
+                    except (TypeError, ValueError):
+                        continue
+                compressed_until = inferred
+
+        compressed_until = max(0, min(compressed_until, len(session.messages)))
+        session.metadata["_compressed_until"] = compressed_until
+        # 兼容旧版本：一旦迁移出连续边界，就可以清理旧字段
+        session.metadata.pop("_compressed_ranges", None)
+        session.metadata.pop("_cumulative_tokens", None)
+        return compressed_until
+
+    def _set_compressed_until(self, session: Session, idx: int) -> None:
+        """Persist a contiguous compressed boundary."""
+        session.metadata["_compressed_until"] = max(0, min(int(idx), len(session.messages)))
+        session.metadata.pop("_compressed_ranges", None)
+        session.metadata.pop("_cumulative_tokens", None)
+
+    @staticmethod
+    def _estimate_message_tokens(message: dict[str, Any]) -> int:
+        """Rough token estimate for a single persisted message."""
+        content = message.get("content")
+        parts: list[str] = []
+        if isinstance(content, str):
+            parts.append(content)
+        elif isinstance(content, list):
+            for part in content:
+                if isinstance(part, dict) and part.get("type") == "text":
+                    txt = part.get("text", "")
+                    if txt:
+                        parts.append(txt)
+                else:
+                    parts.append(json.dumps(part, ensure_ascii=False))
+        elif content is not None:
+            parts.append(json.dumps(content, ensure_ascii=False))
+
+        for key in ("name", "tool_call_id"):
+            val = message.get(key)
+            if isinstance(val, str) and val:
+                parts.append(val)
+        if message.get("tool_calls"):
+            parts.append(json.dumps(message["tool_calls"], ensure_ascii=False))
+
+        payload = "\n".join(parts)
+        if not payload:
+            return 1
+        if tiktoken is not None:
+            try:
+                enc = tiktoken.get_encoding("cl100k_base")
+                return max(1, len(enc.encode(payload)))
+            except Exception:
+                pass
+        return max(1, len(payload) // 4)
+
+    def _pick_compression_chunk_by_tokens(
+        self,
+        session: Session,
+        reduction_tokens: int,
+        *,
+        tail_keep: int = 12,
+    ) -> tuple[int, int, int] | None:
+        """
+        Pick one contiguous old chunk so its estimated size is roughly enough
+        to reduce `reduction_tokens`.
+        """
+        messages = session.messages
+        start = self._get_compressed_until(session)
+        if len(messages) - start <= tail_keep + 2:
+            return None
+
+        end_limit = len(messages) - tail_keep
+        if end_limit - start < 2:
+            return None
+
+        target = max(1, reduction_tokens)
+        end = start
+        collected = 0
+        while end < end_limit and collected < target:
+            collected += self._estimate_message_tokens(messages[end])
+            end += 1
+
+        if end - start < 2:
+            end = min(end_limit, start + 2)
+            collected = sum(self._estimate_message_tokens(m) for m in messages[start:end])
+        if end - start < 2:
+            return None
+        return start, end, collected
+
+    def _estimate_session_prompt_tokens(self, session: Session) -> tuple[int, str]:
+        """
+        Estimate current full prompt tokens for this session view
+        (system + compressed history view + runtime/user placeholder + tools).
+        """
+        history = self._build_compressed_history_view(session)
+        channel, chat_id = (session.key.split(":", 1) if ":" in session.key else (None, None))
+        probe_messages = self.context.build_messages(
+            history=history,
+            current_message="[token-probe]",
+            channel=channel,
+            chat_id=chat_id,
+        )
+        return self._estimate_prompt_tokens_chain(probe_messages, self.tools.get_definitions())
+
+    async def _maybe_compress_history(
+        self,
+        session: Session,
+    ) -> None:
+        """
+        End-of-turn policy:
+        - Estimate current prompt usage from persisted session view.
+        - If above start ratio, perform one best-effort compression chunk.
+        """
+        if not session.messages:
+            self._set_compressed_until(session, 0)
+            return
+
+        budget = max(1, self.max_tokens_input - self.max_tokens - self._reserve_tokens)
+        start_threshold = int(budget * self.compression_start_ratio)
+        target_threshold = int(budget * self.compression_target_ratio)
+        if target_threshold >= start_threshold:
+            target_threshold = max(0, start_threshold - 1)
+
+        current_tokens, token_source = self._estimate_session_prompt_tokens(session)
+        current_ratio = current_tokens / budget if budget else 0.0
+        if current_tokens <= 0:
+            logger.debug("Compression skip {}: token estimate unavailable", session.key)
+            return
+        if current_tokens < start_threshold:
+            logger.debug(
+                "Compression idle {}: {}/{} ({:.1%}) via {}",
+                session.key,
+                current_tokens,
+                budget,
+                current_ratio,
+                token_source,
+            )
+            return
+        logger.info(
+            "Compression trigger {}: {}/{} ({:.1%}) via {}",
+            session.key,
+            current_tokens,
+            budget,
+            current_ratio,
+            token_source,
+        )
+
+        reduction_by_target = max(0, current_tokens - target_threshold)
+        reduction_by_delta = max(1, start_threshold - target_threshold)
+        reduction_need = max(reduction_by_target, reduction_by_delta)
+
+        chunk_range = self._pick_compression_chunk_by_tokens(session, reduction_need, tail_keep=10)
+        if chunk_range is None:
+            logger.info("Compression skipped for {}: no compressible chunk", session.key)
+            return
+
+        start_idx, end_idx, estimated_chunk_tokens = chunk_range
+        chunk = session.messages[start_idx:end_idx]
+        if len(chunk) < 2:
+            return
+
+        logger.info(
+            "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})",
+            session.key,
+            start_idx,
+            end_idx - 1,
+            len(chunk),
+            estimated_chunk_tokens,
+            reduction_need,
+        )
+        success, _ = await self.context.memory.consolidate_chunk(
+            chunk,
+            self.provider,
+            self.model,
+        )
+        if not success:
+            logger.warning("Compression aborted for {}: consolidation failed", session.key)
+            return
+
+        self._set_compressed_until(session, end_idx)
+        self.sessions.save(session)
+
+        after_tokens, after_source = self._estimate_session_prompt_tokens(session)
+        after_ratio = after_tokens / budget if budget else 0.0
+        reduced = max(0, current_tokens - after_tokens)
+        reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0
+        logger.info(
+            "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})",
+            session.key,
+            after_tokens,
+            budget,
+            after_ratio,
+            after_source,
+            reduced,
+            reduced_ratio,
+        )
+
+    def _schedule_background_compression(self, session_key: str) -> None:
+        """Schedule best-effort background compression for a session."""
+        existing = self._compression_tasks.get(session_key)
+        if existing is not None and not existing.done():
+            return
+
+        async def _runner() -> None:
+            session = self.sessions.get_or_create(session_key)
+            try:
+                await self._maybe_compress_history(session)
+            except Exception:
+                logger.exception("Background compression failed for {}", session_key)
+
+        task = asyncio.create_task(_runner())
+        self._compression_tasks[session_key] = task
+
+        def _cleanup(t: asyncio.Task) -> None:
+            cur = self._compression_tasks.get(session_key)
+            if cur is t:
+                self._compression_tasks.pop(session_key, None)
+            try:
+                t.result()
+            except BaseException:
+                pass
+
+        task.add_done_callback(_cleanup)
+
+    async def wait_for_background_compression(self, timeout_s: float | None = None) -> None:
+        """Wait for currently scheduled compression tasks."""
+        pending = [t for t in self._compression_tasks.values() if not t.done()]
+        if not pending:
+            return
+
+        logger.info("Waiting for {} background compression task(s)", len(pending))
+        waiter = asyncio.gather(*pending, return_exceptions=True)
+        if timeout_s is None:
+            await waiter
+            return
+
+        try:
+            await asyncio.wait_for(waiter, timeout=timeout_s)
+        except asyncio.TimeoutError:
+            logger.warning(
+                "Background compression wait timed out after {}s ({} task(s) still running)",
+                timeout_s,
+                len([t for t in self._compression_tasks.values() if not t.done()]),
+            )
+
+    def _build_compressed_history_view(
+        self,
+        session: Session,
+    ) -> list[dict]:
+        """Build non-destructive history view using the compressed boundary."""
+        compressed_until = self._get_compressed_until(session)
+        if compressed_until <= 0:
+            return session.get_history(max_messages=0)
+
+        notice_msg: dict[str, Any] = {
+            "role": "assistant",
+            "content": (
+                "As your assistant, I have compressed earlier context. "
+                "If you need details, please check memory/HISTORY.md."
+            ),
+        }
+
+        tail: list[dict[str, Any]] = []
+        for msg in session.messages[compressed_until:]:
+            entry: dict[str, Any] = {"role": msg["role"], "content": msg.get("content", "")}
+            for k in ("tool_calls", "tool_call_id", "name"):
+                if k in msg:
+                    entry[k] = msg[k]
+            tail.append(entry)
+
+        # Drop leading non-user entries from tail to avoid orphan tool blocks.
+        for i, m in enumerate(tail):
+            if m.get("role") == "user":
+                tail = tail[i:]
+                break
+        else:
+            tail = []
+
+        return [notice_msg, *tail]
+
     def _register_default_tools(self) -> None:
         """Register the default set of tools."""
         allowed_dir = self.workspace if self.restrict_to_workspace else None
         for cls in (ReadFileTool, WriteFileTool, EditFileTool, ListDirTool):
             self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir))
+        self.tools.register(ValidateDeployJSONTool())
+        self.tools.register(ValidateUsageYAMLTool())
+        self.tools.register(HuggingFaceModelSearchTool())
         self.tools.register(ExecTool(
             working_dir=str(self.workspace),
             timeout=self.exec_config.timeout,
@@ -181,25 +553,78 @@ class AgentLoop:
         self,
         initial_messages: list[dict],
         on_progress: Callable[..., Awaitable[None]] | None = None,
-    ) -> tuple[str | None, list[str], list[dict]]:
-        """Run the agent iteration loop. Returns (final_content, tools_used, messages)."""
+    ) -> tuple[str | None, list[str], list[dict], int, str]:
+        """
+        Run the agent iteration loop.
+
+        Returns:
+            (final_content, tools_used, messages, total_tokens_this_turn, token_source)
+            total_tokens_this_turn: total tokens (prompt + completion) for this turn
+            token_source: provider_total / provider_sum / provider_prompt /
+                          provider_counter+tiktoken_completion / tiktoken / none
+        """
         messages = initial_messages
         iteration = 0
         final_content = None
         tools_used: list[str] = []
+        total_tokens_this_turn = 0
+        token_source = "none"
 
         while iteration < self.max_iterations:
             iteration += 1
 
+            tool_defs = self.tools.get_definitions()
+
             response = await self.provider.chat(
                 messages=messages,
-                tools=self.tools.get_definitions(),
+                tools=tool_defs,
                 model=self.model,
                 temperature=self.temperature,
                 max_tokens=self.max_tokens,
                 reasoning_effort=self.reasoning_effort,
             )
 
+            # Prefer provider usage from the turn-ending model call; fallback to tiktoken.
+            # Calculate total tokens (prompt + completion) for this turn.
+            usage = response.usage or {}
+            t_tokens = usage.get("total_tokens")
+            p_tokens = usage.get("prompt_tokens")
+            c_tokens = usage.get("completion_tokens")
+            
+            if isinstance(t_tokens, (int, float)) and t_tokens > 0:
+                total_tokens_this_turn = int(t_tokens)
+                token_source = "provider_total"
+            elif isinstance(p_tokens, (int, float)) and isinstance(c_tokens, (int, float)):
+                # If we have both prompt and completion tokens, sum them
+                total_tokens_this_turn = int(p_tokens) + int(c_tokens)
+                token_source = "provider_sum"
+            elif isinstance(p_tokens, (int, float)) and p_tokens > 0:
+                # Fallback: use prompt tokens only (completion might be 0 for tool calls)
+                total_tokens_this_turn = int(p_tokens)
+                token_source = "provider_prompt"
+            else:
+                # Estimate with unified chain (provider counter -> tiktoken), plus completion tiktoken.
+                estimated_prompt, prompt_source = self._estimate_prompt_tokens_chain(messages, tool_defs)
+                estimated_completion = self._estimate_completion_tokens(response.content or "")
+                total_tokens_this_turn = estimated_prompt + estimated_completion
+                if total_tokens_this_turn > 0:
+                    token_source = (
+                        "tiktoken"
+                        if prompt_source == "tiktoken"
+                        else f"{prompt_source}+tiktoken_completion"
+                    )
+                if total_tokens_this_turn <= 0:
+                    total_tokens_this_turn = 0
+                    token_source = "none"
+
+            logger.debug(
+                "Turn token usage: source={}, total={}, prompt={}, completion={}",
+                token_source,
+                total_tokens_this_turn,
+                p_tokens if isinstance(p_tokens, (int, float)) else None,
+                c_tokens if isinstance(c_tokens, (int, float)) else None,
+            )
+
             if response.has_tool_calls:
                 if on_progress:
                     thought = self._strip_think(response.content)
@@ -254,7 +679,7 @@ class AgentLoop:
                 "without completing the task. You can try breaking the task into smaller steps."
             )
 
-        return final_content, tools_used, messages
+        return final_content, tools_used, messages, total_tokens_this_turn, token_source
 
     async def run(self) -> None:
         """Run the agent loop, dispatching messages as tasks to stay responsive to /stop."""
@@ -279,6 +704,9 @@ class AgentLoop:
         """Cancel all active tasks and subagents for the session."""
         tasks = self._active_tasks.pop(msg.session_key, [])
         cancelled = sum(1 for t in tasks if not t.done() and t.cancel())
+        comp = self._compression_tasks.get(msg.session_key)
+        if comp is not None and not comp.done() and comp.cancel():
+            cancelled += 1
         for t in tasks:
             try:
                 await t
@@ -325,6 +753,9 @@ class AgentLoop:
     def stop(self) -> None:
         """Stop the agent loop."""
         self._running = False
+        for task in list(self._compression_tasks.values()):
+            if not task.done():
+                task.cancel()
         logger.info("Agent loop stopping")
 
     async def _process_message(
@@ -342,14 +773,15 @@ class AgentLoop:
             key = f"{channel}:{chat_id}"
             session = self.sessions.get_or_create(key)
             self._set_tool_context(channel, chat_id, msg.metadata.get("message_id"))
-            history = session.get_history(max_messages=self.memory_window)
+            history = self._build_compressed_history_view(session)
             messages = self.context.build_messages(
                 history=history,
                 current_message=msg.content, channel=channel, chat_id=chat_id,
             )
-            final_content, _, all_msgs = await self._run_agent_loop(messages)
+            final_content, _, all_msgs, _, _ = await self._run_agent_loop(messages)
             self._save_turn(session, all_msgs, 1 + len(history))
             self.sessions.save(session)
+            self._schedule_background_compression(session.key)
             return OutboundMessage(channel=channel, chat_id=chat_id,
                                   content=final_content or "Background task completed.")
 
@@ -362,27 +794,27 @@ class AgentLoop:
         # Slash commands
         cmd = msg.content.strip().lower()
         if cmd == "/new":
-            lock = self._consolidation_locks.setdefault(session.key, asyncio.Lock())
-            self._consolidating.add(session.key)
             try:
-                async with lock:
-                    snapshot = session.messages[session.last_consolidated:]
-                    if snapshot:
-                        temp = Session(key=session.key)
-                        temp.messages = list(snapshot)
-                        if not await self._consolidate_memory(temp, archive_all=True):
-                            return OutboundMessage(
-                                channel=msg.channel, chat_id=msg.chat_id,
-                                content="Memory archival failed, session not cleared. Please try again.",
-                            )
+                # 在清空会话前，将当前完整对话做一次归档压缩到 MEMORY/HISTORY 中
+                if session.messages:
+                    ok, _ = await self.context.memory.consolidate_chunk(
+                        session.messages,
+                        self.provider,
+                        self.model,
+                    )
+                    if not ok:
+                        return OutboundMessage(
+                            channel=msg.channel,
+                            chat_id=msg.chat_id,
+                            content="Memory archival failed, session not cleared. Please try again.",
+                        )
             except Exception:
                 logger.exception("/new archival failed for {}", session.key)
                 return OutboundMessage(
-                    channel=msg.channel, chat_id=msg.chat_id,
+                    channel=msg.channel,
+                    chat_id=msg.chat_id,
                     content="Memory archival failed, session not cleared. Please try again.",
                 )
-            finally:
-                self._consolidating.discard(session.key)
 
             session.clear()
             self.sessions.save(session)
@@ -393,36 +825,23 @@ class AgentLoop:
             return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id,
                                   content="🐈 nanobot commands:\n/new — Start a new conversation\n/stop — Stop the current task\n/help — Show available commands")
 
-        unconsolidated = len(session.messages) - session.last_consolidated
-        if (unconsolidated >= self.memory_window and session.key not in self._consolidating):
-            self._consolidating.add(session.key)
-            lock = self._consolidation_locks.setdefault(session.key, asyncio.Lock())
-
-            async def _consolidate_and_unlock():
-                try:
-                    async with lock:
-                        await self._consolidate_memory(session)
-                finally:
-                    self._consolidating.discard(session.key)
-                    _task = asyncio.current_task()
-                    if _task is not None:
-                        self._consolidation_tasks.discard(_task)
-
-            _task = asyncio.create_task(_consolidate_and_unlock())
-            self._consolidation_tasks.add(_task)
-
         self._set_tool_context(msg.channel, msg.chat_id, msg.metadata.get("message_id"))
         if message_tool := self.tools.get("message"):
             if isinstance(message_tool, MessageTool):
                 message_tool.start_turn()
 
-        history = session.get_history(max_messages=self.memory_window)
+        # 正常对话：使用压缩后的历史视图（压缩在回合结束后进行）
+        history = self._build_compressed_history_view(session)
         initial_messages = self.context.build_messages(
             history=history,
             current_message=msg.content,
             media=msg.media if msg.media else None,
             channel=msg.channel, chat_id=msg.chat_id,
         )
+        # Add [CRON JOB] identifier for cron sessions (session_key starts with "cron:")
+        if session_key and session_key.startswith("cron:"):
+            if initial_messages and initial_messages[0].get("role") == "system":
+                initial_messages[0]["content"] = f"[CRON JOB] {initial_messages[0]['content']}"
 
         async def _bus_progress(content: str, *, tool_hint: bool = False) -> None:
             meta = dict(msg.metadata or {})
@@ -432,7 +851,7 @@ class AgentLoop:
                 channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta,
             ))
 
-        final_content, _, all_msgs = await self._run_agent_loop(
+        final_content, _, all_msgs, _, _ = await self._run_agent_loop(
             initial_messages, on_progress=on_progress or _bus_progress,
         )
 
@@ -441,6 +860,7 @@ class AgentLoop:
 
         self._save_turn(session, all_msgs, 1 + len(history))
         self.sessions.save(session)
+        self._schedule_background_compression(session.key)
 
         if (mt := self.tools.get("message")) and isinstance(mt, MessageTool) and mt._sent_in_turn:
             return None
@@ -487,13 +907,6 @@ class AgentLoop:
             session.messages.append(entry)
         session.updated_at = datetime.now()
 
-    async def _consolidate_memory(self, session, archive_all: bool = False) -> bool:
-        """Delegate to MemoryStore.consolidate(). Returns True on success."""
-        return await MemoryStore(self.workspace).consolidate(
-            session, self.provider, self.model,
-            archive_all=archive_all, memory_window=self.memory_window,
-        )
-
     async def process_direct(
         self,
         content: str,
diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index 21fe77d..c8896c8 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -66,36 +66,25 @@ class MemoryStore:
         long_term = self.read_long_term()
         return f"## Long-term Memory\n{long_term}" if long_term else ""
 
-    async def consolidate(
+    async def consolidate_chunk(
         self,
-        session: Session,
+        messages: list[dict],
         provider: LLMProvider,
         model: str,
-        *,
-        archive_all: bool = False,
-        memory_window: int = 50,
-    ) -> bool:
-        """Consolidate old messages into MEMORY.md + HISTORY.md via LLM tool call.
+    ) -> tuple[bool, str | None]:
+        """Consolidate a chunk of messages into MEMORY.md + HISTORY.md via LLM tool call.
 
-        Returns True on success (including no-op), False on failure.
+        Returns (success, None).
+
+        - success: True on success (including no-op), False on failure.
+        - The second return value is reserved for future use (e.g. RAG-style summaries) and is
+          always None in the current implementation.
         """
-        if archive_all:
-            old_messages = session.messages
-            keep_count = 0
-            logger.info("Memory consolidation (archive_all): {} messages", len(session.messages))
-        else:
-            keep_count = memory_window // 2
-            if len(session.messages) <= keep_count:
-                return True
-            if len(session.messages) - session.last_consolidated <= 0:
-                return True
-            old_messages = session.messages[session.last_consolidated:-keep_count]
-            if not old_messages:
-                return True
-            logger.info("Memory consolidation: {} to consolidate, {} keep", len(old_messages), keep_count)
+        if not messages:
+            return True, None
 
         lines = []
-        for m in old_messages:
+        for m in messages:
             if not m.get("content"):
                 continue
             tools = f" [tools: {', '.join(m['tools_used'])}]" if m.get("tools_used") else ""
@@ -113,7 +102,19 @@ class MemoryStore:
         try:
             response = await provider.chat(
                 messages=[
-                    {"role": "system", "content": "You are a memory consolidation agent. Call the save_memory tool with your consolidation of the conversation."},
+                    {
+                        "role": "system",
+                        "content": (
+                            "You are a memory consolidation agent.\n"
+                            "Your job is to:\n"
+                            "1) Append a concise but grep-friendly entry to HISTORY.md summarizing key events, decisions and topics.\n"
+                            "   - Write 1 paragraph of 2–5 sentences that starts with [YYYY-MM-DD HH:MM].\n"
+                            "   - Include concrete names, IDs and numbers so it is easy to search with grep.\n"
+                            "2) Update long-term MEMORY.md with stable facts and user preferences as markdown, including all existing facts plus new ones.\n"
+                            "3) Optionally return a short context_summary (1–3 sentences) that will replace the raw messages in future dialogue history.\n\n"
+                            "Always call the save_memory tool with history_entry, memory_update and (optionally) context_summary."
+                        ),
+                    },
                     {"role": "user", "content": prompt},
                 ],
                 tools=_SAVE_MEMORY_TOOL,
@@ -122,7 +123,7 @@ class MemoryStore:
 
             if not response.has_tool_calls:
                 logger.warning("Memory consolidation: LLM did not call save_memory, skipping")
-                return False
+                return False, None
 
             args = response.tool_calls[0].arguments
             # Some providers return arguments as a JSON string instead of dict
@@ -134,10 +135,10 @@ class MemoryStore:
                     args = args[0]
                 else:
                     logger.warning("Memory consolidation: unexpected arguments as empty or non-dict list")
-                    return False
+                    return False, None
             if not isinstance(args, dict):
                 logger.warning("Memory consolidation: unexpected arguments type {}", type(args).__name__)
-                return False
+                return False, None
 
             if entry := args.get("history_entry"):
                 if not isinstance(entry, str):
@@ -149,9 +150,8 @@ class MemoryStore:
                 if update != current_memory:
                     self.write_long_term(update)
 
-            session.last_consolidated = 0 if archive_all else len(session.messages) - keep_count
-            logger.info("Memory consolidation done: {} messages, last_consolidated={}", len(session.messages), session.last_consolidated)
-            return True
+            logger.info("Memory consolidation done for {} messages", len(messages))
+            return True, None
         except Exception:
             logger.exception("Memory consolidation failed")
-            return False
+            return False, None
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index 803cb61..1ebde20 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -189,11 +189,22 @@ class SlackConfig(Base):
 
 
 class QQConfig(Base):
-    """QQ channel configuration using botpy SDK."""
+    """QQ channel configuration.
+    
+    Supports two implementations:
+    1. Official botpy SDK: requires app_id and secret
+    2. OneBot protocol: requires api_url (and optionally ws_reverse_url, bot_qq, access_token)
+    """
 
     enabled: bool = False
+    # Official botpy SDK fields
     app_id: str = ""  # 机器人 ID (AppID) from q.qq.com
     secret: str = ""  # 机器人密钥 (AppSecret) from q.qq.com
+    # OneBot protocol fields
+    api_url: str = ""  # OneBot HTTP API URL (e.g. "http://localhost:5700")
+    ws_reverse_url: str = ""  # OneBot WebSocket reverse URL (e.g. "ws://localhost:8080/ws/reverse")
+    bot_qq: int | None = None  # Bot's QQ number (for filtering self messages)
+    access_token: str = ""  # Optional access token for OneBot API
     allow_from: list[str] = Field(
         default_factory=list
     )  # Allowed user openids (empty = public access)
@@ -226,10 +237,18 @@ class AgentDefaults(Base):
     provider: str = (
         "auto"  # Provider name (e.g. "anthropic", "openrouter") or "auto" for auto-detection
     )
-    max_tokens: int = 8192
+    # 原生上下文最大窗口（通常对应模型的 max_input_tokens / max_context_tokens）
+    # 默认按照主流大模型（如 GPT-4o、Claude 3.x 等）的 128k 上下文给一个宽松上限，实际应根据所选模型文档手动调整。
+    max_tokens_input: int = 128_000
+    # 默认单次回复的最大输出 token 上限（调用时可按需要再做截断或比例分配）
+    # 8192 足以覆盖大多数实际对话/工具使用场景，同样可按需手动调整。
+    max_tokens_output: int = 8192
+    # 会话历史压缩触发比例：当估算的输入 token 使用量 >= maxTokensInput * compressionStartRatio 时开始压缩。
+    compression_start_ratio: float = 0.7
+    # 会话历史压缩目标比例：每轮压缩后尽量把估算的输入 token 使用量压到 maxTokensInput * compressionTargetRatio 附近。
+    compression_target_ratio: float = 0.4
     temperature: float = 0.1
     max_tool_iterations: int = 40
-    memory_window: int = 100
     reasoning_effort: str | None = None  # low / medium / high — enables LLM thinking mode
 
 
diff --git a/nanobot/session/manager.py b/nanobot/session/manager.py
index f0a6484..1cb8a51 100644
--- a/nanobot/session/manager.py
+++ b/nanobot/session/manager.py
@@ -9,7 +9,6 @@ from typing import Any
 
 from loguru import logger
 
-from nanobot.config.paths import get_legacy_sessions_dir
 from nanobot.utils.helpers import ensure_dir, safe_filename
 
 
@@ -30,7 +29,6 @@ class Session:
     created_at: datetime = field(default_factory=datetime.now)
     updated_at: datetime = field(default_factory=datetime.now)
     metadata: dict[str, Any] = field(default_factory=dict)
-    last_consolidated: int = 0  # Number of messages already consolidated to files
 
     def add_message(self, role: str, content: str, **kwargs: Any) -> None:
         """Add a message to the session."""
@@ -44,9 +42,13 @@ class Session:
         self.updated_at = datetime.now()
 
     def get_history(self, max_messages: int = 500) -> list[dict[str, Any]]:
-        """Return unconsolidated messages for LLM input, aligned to a user turn."""
-        unconsolidated = self.messages[self.last_consolidated:]
-        sliced = unconsolidated[-max_messages:]
+        """
+        Return messages for LLM input, aligned to a user turn.
+
+        - max_messages > 0 时只保留最近 max_messages 条；
+        - max_messages <= 0 时不做条数截断，返回全部消息。
+        """
+        sliced = self.messages if max_messages <= 0 else self.messages[-max_messages:]
 
         # Drop leading non-user messages to avoid orphaned tool_result blocks
         for i, m in enumerate(sliced):
@@ -66,7 +68,7 @@ class Session:
     def clear(self) -> None:
         """Clear all messages and reset session to initial state."""
         self.messages = []
-        self.last_consolidated = 0
+        self.metadata = {}
         self.updated_at = datetime.now()
 
 
@@ -80,7 +82,7 @@ class SessionManager:
     def __init__(self, workspace: Path):
         self.workspace = workspace
         self.sessions_dir = ensure_dir(self.workspace / "sessions")
-        self.legacy_sessions_dir = get_legacy_sessions_dir()
+        self.legacy_sessions_dir = Path.home() / ".nanobot" / "sessions"
         self._cache: dict[str, Session] = {}
 
     def _get_session_path(self, key: str) -> Path:
@@ -132,7 +134,6 @@ class SessionManager:
             messages = []
             metadata = {}
             created_at = None
-            last_consolidated = 0
 
             with open(path, encoding="utf-8") as f:
                 for line in f:
@@ -145,7 +146,6 @@ class SessionManager:
                     if data.get("_type") == "metadata":
                         metadata = data.get("metadata", {})
                         created_at = datetime.fromisoformat(data["created_at"]) if data.get("created_at") else None
-                        last_consolidated = data.get("last_consolidated", 0)
                     else:
                         messages.append(data)
 
@@ -154,7 +154,6 @@ class SessionManager:
                 messages=messages,
                 created_at=created_at or datetime.now(),
                 metadata=metadata,
-                last_consolidated=last_consolidated
             )
         except Exception as e:
             logger.warning("Failed to load session {}: {}", key, e)
@@ -171,7 +170,6 @@ class SessionManager:
                 "created_at": session.created_at.isoformat(),
                 "updated_at": session.updated_at.isoformat(),
                 "metadata": session.metadata,
-                "last_consolidated": session.last_consolidated
             }
             f.write(json.dumps(metadata_line, ensure_ascii=False) + "\n")
             for msg in session.messages:

From 2dcb4de422ddec8c0f114dc6b0fdce06b9388b8f Mon Sep 17 00:00:00 2001
From: VITOHJL <hejl2023@shanghaitech.edu.cn>
Date: Sun, 8 Mar 2026 15:04:38 +0800
Subject: [PATCH 06/53] fix(commands): update AgentLoop calls to use
 token-based compression parameters

---
 nanobot/cli/commands.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index 2c8d6d3..cf29cc5 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -330,8 +330,10 @@ def gateway(
         temperature=config.agents.defaults.temperature,
         max_tokens=config.agents.defaults.max_tokens,
         max_iterations=config.agents.defaults.max_tool_iterations,
-        memory_window=config.agents.defaults.memory_window,
         reasoning_effort=config.agents.defaults.reasoning_effort,
+        max_tokens_input=config.agents.defaults.max_tokens_input,
+        compression_start_ratio=config.agents.defaults.compression_start_ratio,
+        compression_target_ratio=config.agents.defaults.compression_target_ratio,
         brave_api_key=config.tools.web.search.api_key or None,
         web_proxy=config.tools.web.proxy or None,
         exec_config=config.tools.exec,
@@ -515,8 +517,10 @@ def agent(
         temperature=config.agents.defaults.temperature,
         max_tokens=config.agents.defaults.max_tokens,
         max_iterations=config.agents.defaults.max_tool_iterations,
-        memory_window=config.agents.defaults.memory_window,
         reasoning_effort=config.agents.defaults.reasoning_effort,
+        max_tokens_input=config.agents.defaults.max_tokens_input,
+        compression_start_ratio=config.agents.defaults.compression_start_ratio,
+        compression_target_ratio=config.agents.defaults.compression_target_ratio,
         brave_api_key=config.tools.web.search.api_key or None,
         web_proxy=config.tools.web.proxy or None,
         exec_config=config.tools.exec,

From 2706d3c317be7325795e9dac74d07512e57112f4 Mon Sep 17 00:00:00 2001
From: VITOHJL <hejl2023@shanghaitech.edu.cn>
Date: Sun, 8 Mar 2026 15:20:34 +0800
Subject: [PATCH 07/53] fix(commands): use max_tokens_output instead of
 max_tokens from AgentDefaults

---
 nanobot/cli/commands.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index cf29cc5..18c9d56 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -328,7 +328,7 @@ def gateway(
         workspace=config.workspace_path,
         model=config.agents.defaults.model,
         temperature=config.agents.defaults.temperature,
-        max_tokens=config.agents.defaults.max_tokens,
+        max_tokens=config.agents.defaults.max_tokens_output,
         max_iterations=config.agents.defaults.max_tool_iterations,
         reasoning_effort=config.agents.defaults.reasoning_effort,
         max_tokens_input=config.agents.defaults.max_tokens_input,
@@ -515,7 +515,7 @@ def agent(
         workspace=config.workspace_path,
         model=config.agents.defaults.model,
         temperature=config.agents.defaults.temperature,
-        max_tokens=config.agents.defaults.max_tokens,
+        max_tokens=config.agents.defaults.max_tokens_output,
         max_iterations=config.agents.defaults.max_tool_iterations,
         reasoning_effort=config.agents.defaults.reasoning_effort,
         max_tokens_input=config.agents.defaults.max_tokens_input,

From a984e0df3752f6a8883a0e9b6d8efee4abd7f9dd Mon Sep 17 00:00:00 2001
From: VITOHJL <hejl2023@shanghaitech.edu.cn>
Date: Sun, 8 Mar 2026 15:23:55 +0800
Subject: [PATCH 08/53] feat(loop): add history message count logging in
 compression

---
 nanobot/agent/loop.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 696e2a7..5d316ea 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -362,6 +362,7 @@ class AgentLoop:
         if len(chunk) < 2:
             return
 
+        before_msg_count = len(session.messages)
         logger.info(
             "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})",
             session.key,
@@ -383,12 +384,13 @@ class AgentLoop:
         self._set_compressed_until(session, end_idx)
         self.sessions.save(session)
 
+        after_msg_count = len(session.messages)
         after_tokens, after_source = self._estimate_session_prompt_tokens(session)
         after_ratio = after_tokens / budget if budget else 0.0
         reduced = max(0, current_tokens - after_tokens)
         reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0
         logger.info(
-            "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})",
+            "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%}), history: {} -> {}",
             session.key,
             after_tokens,
             budget,
@@ -396,6 +398,8 @@ class AgentLoop:
             after_source,
             reduced,
             reduced_ratio,
+            before_msg_count,
+            after_msg_count,
         )
 
     def _schedule_background_compression(self, session_key: str) -> None:

From 1b16d48390b3fded3438f4fdbc3f0ae0a0379878 Mon Sep 17 00:00:00 2001
From: VITOHJL <hejl2023@shanghaitech.edu.cn>
Date: Sun, 8 Mar 2026 15:26:49 +0800
Subject: [PATCH 09/53] fix(loop): update _cumulative_tokens in _save_turn and
 preserve it in compression methods

---
 nanobot/agent/loop.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 5d316ea..5e01b79 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -211,14 +211,14 @@ class AgentLoop:
         session.metadata["_compressed_until"] = compressed_until
         # 兼容旧版本：一旦迁移出连续边界，就可以清理旧字段
         session.metadata.pop("_compressed_ranges", None)
-        session.metadata.pop("_cumulative_tokens", None)
+        # 注意：不要删除 _cumulative_tokens，压缩逻辑需要它来跟踪累积 token 计数
         return compressed_until
 
     def _set_compressed_until(self, session: Session, idx: int) -> None:
         """Persist a contiguous compressed boundary."""
         session.metadata["_compressed_until"] = max(0, min(int(idx), len(session.messages)))
         session.metadata.pop("_compressed_ranges", None)
-        session.metadata.pop("_cumulative_tokens", None)
+        # 注意：不要删除 _cumulative_tokens，压缩逻辑需要它来跟踪累积 token 计数
 
     @staticmethod
     def _estimate_message_tokens(message: dict[str, Any]) -> int:
@@ -362,7 +362,6 @@ class AgentLoop:
         if len(chunk) < 2:
             return
 
-        before_msg_count = len(session.messages)
         logger.info(
             "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})",
             session.key,
@@ -384,13 +383,12 @@ class AgentLoop:
         self._set_compressed_until(session, end_idx)
         self.sessions.save(session)
 
-        after_msg_count = len(session.messages)
         after_tokens, after_source = self._estimate_session_prompt_tokens(session)
         after_ratio = after_tokens / budget if budget else 0.0
         reduced = max(0, current_tokens - after_tokens)
         reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0
         logger.info(
-            "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%}), history: {} -> {}",
+            "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})",
             session.key,
             after_tokens,
             budget,
@@ -398,8 +396,6 @@ class AgentLoop:
             after_source,
             reduced,
             reduced_ratio,
-            before_msg_count,
-            after_msg_count,
         )
 
     def _schedule_background_compression(self, session_key: str) -> None:
@@ -855,14 +851,14 @@ class AgentLoop:
                 channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta,
             ))
 
-        final_content, _, all_msgs, _, _ = await self._run_agent_loop(
+        final_content, _, all_msgs, total_tokens_this_turn, token_source = await self._run_agent_loop(
             initial_messages, on_progress=on_progress or _bus_progress,
         )
 
         if final_content is None:
             final_content = "I've completed processing but have no response to give."
 
-        self._save_turn(session, all_msgs, 1 + len(history))
+        self._save_turn(session, all_msgs, 1 + len(history), total_tokens_this_turn)
         self.sessions.save(session)
         self._schedule_background_compression(session.key)
 
@@ -876,7 +872,7 @@ class AgentLoop:
             metadata=msg.metadata or {},
         )
 
-    def _save_turn(self, session: Session, messages: list[dict], skip: int) -> None:
+    def _save_turn(self, session: Session, messages: list[dict], skip: int, total_tokens_this_turn: int = 0) -> None:
         """Save new-turn messages into session, truncating large tool results."""
         from datetime import datetime
         for m in messages[skip:]:
@@ -910,6 +906,14 @@ class AgentLoop:
             entry.setdefault("timestamp", datetime.now().isoformat())
             session.messages.append(entry)
         session.updated_at = datetime.now()
+        
+        # Update cumulative token count for compression tracking
+        if total_tokens_this_turn > 0:
+            current_cumulative = session.metadata.get("_cumulative_tokens", 0)
+            if isinstance(current_cumulative, (int, float)):
+                session.metadata["_cumulative_tokens"] = int(current_cumulative) + total_tokens_this_turn
+            else:
+                session.metadata["_cumulative_tokens"] = total_tokens_this_turn
 
     async def process_direct(
         self,

From 274edc5451c1d0f79eda80c76127f497ec6923e9 Mon Sep 17 00:00:00 2001
From: VITOHJL <hejl2023@shanghaitech.edu.cn>
Date: Sun, 8 Mar 2026 17:25:59 +0800
Subject: [PATCH 10/53] fix(compression): prefer provider prompt token usage

---
 nanobot/agent/loop.py | 43 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 5e01b79..4f6a051 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -124,6 +124,8 @@ class AgentLoop:
         self._mcp_connecting = False
         self._active_tasks: dict[str, list[asyncio.Task]] = {}  # session_key -> tasks
         self._compression_tasks: dict[str, asyncio.Task] = {}  # session_key -> task
+        self._last_turn_prompt_tokens: int = 0
+        self._last_turn_prompt_source: str = "none"
         self._processing_lock = asyncio.Lock()
         self._register_default_tools()
 
@@ -324,7 +326,15 @@ class AgentLoop:
         if target_threshold >= start_threshold:
             target_threshold = max(0, start_threshold - 1)
 
-        current_tokens, token_source = self._estimate_session_prompt_tokens(session)
+        # Prefer provider usage prompt tokens from the turn-ending call.
+        # If unavailable, fall back to estimator chain.
+        raw_prompt_tokens = session.metadata.get("_last_prompt_tokens")
+        if isinstance(raw_prompt_tokens, (int, float)) and raw_prompt_tokens > 0:
+            current_tokens = int(raw_prompt_tokens)
+            token_source = str(session.metadata.get("_last_prompt_source") or "usage_prompt")
+        else:
+            current_tokens, token_source = self._estimate_session_prompt_tokens(session)
+
         current_ratio = current_tokens / budget if budget else 0.0
         if current_tokens <= 0:
             logger.debug("Compression skip {}: token estimate unavailable", session.key)
@@ -569,6 +579,8 @@ class AgentLoop:
         tools_used: list[str] = []
         total_tokens_this_turn = 0
         token_source = "none"
+        self._last_turn_prompt_tokens = 0
+        self._last_turn_prompt_source = "none"
 
         while iteration < self.max_iterations:
             iteration += 1
@@ -594,19 +606,35 @@ class AgentLoop:
             if isinstance(t_tokens, (int, float)) and t_tokens > 0:
                 total_tokens_this_turn = int(t_tokens)
                 token_source = "provider_total"
+                if isinstance(p_tokens, (int, float)) and p_tokens > 0:
+                    self._last_turn_prompt_tokens = int(p_tokens)
+                    self._last_turn_prompt_source = "usage_prompt"
+                elif isinstance(c_tokens, (int, float)):
+                    prompt_derived = int(t_tokens) - int(c_tokens)
+                    if prompt_derived > 0:
+                        self._last_turn_prompt_tokens = prompt_derived
+                        self._last_turn_prompt_source = "usage_total_minus_completion"
             elif isinstance(p_tokens, (int, float)) and isinstance(c_tokens, (int, float)):
                 # If we have both prompt and completion tokens, sum them
                 total_tokens_this_turn = int(p_tokens) + int(c_tokens)
                 token_source = "provider_sum"
+                if p_tokens > 0:
+                    self._last_turn_prompt_tokens = int(p_tokens)
+                    self._last_turn_prompt_source = "usage_prompt"
             elif isinstance(p_tokens, (int, float)) and p_tokens > 0:
                 # Fallback: use prompt tokens only (completion might be 0 for tool calls)
                 total_tokens_this_turn = int(p_tokens)
                 token_source = "provider_prompt"
+                self._last_turn_prompt_tokens = int(p_tokens)
+                self._last_turn_prompt_source = "usage_prompt"
             else:
                 # Estimate with unified chain (provider counter -> tiktoken), plus completion tiktoken.
                 estimated_prompt, prompt_source = self._estimate_prompt_tokens_chain(messages, tool_defs)
                 estimated_completion = self._estimate_completion_tokens(response.content or "")
                 total_tokens_this_turn = estimated_prompt + estimated_completion
+                if estimated_prompt > 0:
+                    self._last_turn_prompt_tokens = int(estimated_prompt)
+                    self._last_turn_prompt_source = str(prompt_source or "tiktoken")
                 if total_tokens_this_turn > 0:
                     token_source = (
                         "tiktoken"
@@ -779,6 +807,12 @@ class AgentLoop:
                 current_message=msg.content, channel=channel, chat_id=chat_id,
             )
             final_content, _, all_msgs, _, _ = await self._run_agent_loop(messages)
+            if self._last_turn_prompt_tokens > 0:
+                session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens
+                session.metadata["_last_prompt_source"] = self._last_turn_prompt_source
+            else:
+                session.metadata.pop("_last_prompt_tokens", None)
+                session.metadata.pop("_last_prompt_source", None)
             self._save_turn(session, all_msgs, 1 + len(history))
             self.sessions.save(session)
             self._schedule_background_compression(session.key)
@@ -858,6 +892,13 @@ class AgentLoop:
         if final_content is None:
             final_content = "I've completed processing but have no response to give."
 
+        if self._last_turn_prompt_tokens > 0:
+            session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens
+            session.metadata["_last_prompt_source"] = self._last_turn_prompt_source
+        else:
+            session.metadata.pop("_last_prompt_tokens", None)
+            session.metadata.pop("_last_prompt_source", None)
+
         self._save_turn(session, all_msgs, 1 + len(history), total_tokens_this_turn)
         self.sessions.save(session)
         self._schedule_background_compression(session.key)

From 4044b85d4bfa9104b633f3cb408894f0459a0164 Mon Sep 17 00:00:00 2001
From: chengyongru <2755839590@qq.com>
Date: Mon, 9 Mar 2026 01:32:10 +0800
Subject: [PATCH 11/53] fix: ensure feishu audio file has .opus extension for
 Groq Whisper compatibility

---
 nanobot/channels/feishu.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/nanobot/channels/feishu.py b/nanobot/channels/feishu.py
index a637025..0409c32 100644
--- a/nanobot/channels/feishu.py
+++ b/nanobot/channels/feishu.py
@@ -753,8 +753,9 @@ class FeishuChannel(BaseChannel):
                     None, self._download_file_sync, message_id, file_key, msg_type
                 )
                 if not filename:
-                    ext = {"audio": ".opus", "media": ".mp4"}.get(msg_type, "")
-                    filename = f"{file_key[:16]}{ext}"
+                    filename = file_key[:16]
+                if msg_type == "audio" and not filename.endswith(".opus"):
+                    filename = f"{filename}.opus"
 
         if data and filename:
             file_path = media_dir / filename

From 85c56d7410ab4eed78ec70d75489cf453afcfbb3 Mon Sep 17 00:00:00 2001
From: Renato Machado <groudas@gmail.com>
Date: Mon, 9 Mar 2026 01:37:35 +0000
Subject: [PATCH 12/53] feat: add "restart" command

---
 nanobot/agent/loop.py        | 11 +++++++++++
 nanobot/channels/telegram.py |  2 ++
 2 files changed, 13 insertions(+)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index ca9a06e..5311921 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -5,6 +5,8 @@ from __future__ import annotations
 import asyncio
 import json
 import re
+import os
+import sys
 import weakref
 from contextlib import AsyncExitStack
 from pathlib import Path
@@ -392,6 +394,15 @@ class AgentLoop:
         if cmd == "/help":
             return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id,
                                   content="🐈 nanobot commands:\n/new — Start a new conversation\n/stop — Stop the current task\n/help — Show available commands")
+        if cmd == "/restart":
+            await self.bus.publish_outbound(OutboundMessage(
+                channel=msg.channel, chat_id=msg.chat_id, content="🔄 Restarting..."
+            ))
+            async def _r():
+                await asyncio.sleep(1)
+                os.execv(sys.executable, [sys.executable] + sys.argv)
+            asyncio.create_task(_r())
+            return None
 
         unconsolidated = len(session.messages) - session.last_consolidated
         if (unconsolidated >= self.memory_window and session.key not in self._consolidating):
diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py
index ecb1440..f37ab1d 100644
--- a/nanobot/channels/telegram.py
+++ b/nanobot/channels/telegram.py
@@ -162,6 +162,7 @@ class TelegramChannel(BaseChannel):
         BotCommand("new", "Start a new conversation"),
         BotCommand("stop", "Stop the current task"),
         BotCommand("help", "Show available commands"),
+        BotCommand("restart", "Restart the bot"),
     ]
 
     def __init__(
@@ -223,6 +224,7 @@ class TelegramChannel(BaseChannel):
         self._app.add_handler(CommandHandler("start", self._on_start))
         self._app.add_handler(CommandHandler("new", self._forward_command))
         self._app.add_handler(CommandHandler("stop", self._forward_command))
+        self._app.add_handler(CommandHandler("restart", self._forward_command))
         self._app.add_handler(CommandHandler("help", self._on_help))
 
         # Add message handler for text, photos, voice, documents

From a660a25504b48170579a57496378e2fd843a556f Mon Sep 17 00:00:00 2001
From: chengyongru <2755839590@qq.com>
Date: Mon, 9 Mar 2026 22:00:45 +0800
Subject: [PATCH 13/53] feat(wecom): add wecom channel [wobsocket]

support text/audio[wecom support audio message by default]
---
 nanobot/channels/manager.py |  14 +-
 nanobot/channels/wecom.py   | 352 ++++++++++++++++++++++++++++++++++++
 nanobot/config/schema.py    |   9 +
 pyproject.toml              |   1 +
 4 files changed, 375 insertions(+), 1 deletion(-)
 create mode 100644 nanobot/channels/wecom.py

diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py
index 51539dd..369795a 100644
--- a/nanobot/channels/manager.py
+++ b/nanobot/channels/manager.py
@@ -7,7 +7,6 @@ from typing import Any
 
 from loguru import logger
 
-from nanobot.bus.events import OutboundMessage
 from nanobot.bus.queue import MessageBus
 from nanobot.channels.base import BaseChannel
 from nanobot.config.schema import Config
@@ -150,6 +149,19 @@ class ChannelManager:
             except ImportError as e:
                 logger.warning("Matrix channel not available: {}", e)
 
+        # WeCom channel
+        if self.config.channels.wecom.enabled:
+            try:
+                from nanobot.channels.wecom import WecomChannel
+                self.channels["wecom"] = WecomChannel(
+                    self.config.channels.wecom,
+                    self.bus,
+                    groq_api_key=self.config.providers.groq.api_key,
+                )
+                logger.info("WeCom channel enabled")
+            except ImportError as e:
+                logger.warning("WeCom channel not available: {}", e)
+
         self._validate_allow_from()
 
     def _validate_allow_from(self) -> None:
diff --git a/nanobot/channels/wecom.py b/nanobot/channels/wecom.py
new file mode 100644
index 0000000..dc97311
--- /dev/null
+++ b/nanobot/channels/wecom.py
@@ -0,0 +1,352 @@
+"""WeCom (Enterprise WeChat) channel implementation using wecom_aibot_sdk."""
+
+import asyncio
+import importlib.util
+from collections import OrderedDict
+from typing import Any
+
+from loguru import logger
+
+from nanobot.bus.events import OutboundMessage
+from nanobot.bus.queue import MessageBus
+from nanobot.channels.base import BaseChannel
+from nanobot.config.paths import get_media_dir
+from nanobot.config.schema import WecomConfig
+
+WECOM_AVAILABLE = importlib.util.find_spec("wecom_aibot_sdk") is not None
+
+# Message type display mapping
+MSG_TYPE_MAP = {
+    "image": "[image]",
+    "voice": "[voice]",
+    "file": "[file]",
+    "mixed": "[mixed content]",
+}
+
+
+class WecomChannel(BaseChannel):
+    """
+    WeCom (Enterprise WeChat) channel using WebSocket long connection.
+
+    Uses WebSocket to receive events - no public IP or webhook required.
+
+    Requires:
+    - Bot ID and Secret from WeCom AI Bot platform
+    """
+
+    name = "wecom"
+
+    def __init__(self, config: WecomConfig, bus: MessageBus, groq_api_key: str = ""):
+        super().__init__(config, bus)
+        self.config: WecomConfig = config
+        self.groq_api_key = groq_api_key
+        self._client: Any = None
+        self._processed_message_ids: OrderedDict[str, None] = OrderedDict()
+        self._loop: asyncio.AbstractEventLoop | None = None
+        self._generate_req_id = None
+        # Store frame headers for each chat to enable replies
+        self._chat_frames: dict[str, Any] = {}
+
+    async def start(self) -> None:
+        """Start the WeCom bot with WebSocket long connection."""
+        if not WECOM_AVAILABLE:
+            logger.error("WeCom SDK not installed. Run: pip install wecom-aibot-sdk-python")
+            return
+
+        if not self.config.bot_id or not self.config.secret:
+            logger.error("WeCom bot_id and secret not configured")
+            return
+
+        from wecom_aibot_sdk import WSClient, generate_req_id
+
+        self._running = True
+        self._loop = asyncio.get_running_loop()
+        self._generate_req_id = generate_req_id
+
+        # Create WebSocket client
+        self._client = WSClient({
+            "bot_id": self.config.bot_id,
+            "secret": self.config.secret,
+            "reconnect_interval": 1000,
+            "max_reconnect_attempts": -1,  # Infinite reconnect
+            "heartbeat_interval": 30000,
+        })
+
+        # Register event handlers
+        self._client.on("connected", self._on_connected)
+        self._client.on("authenticated", self._on_authenticated)
+        self._client.on("disconnected", self._on_disconnected)
+        self._client.on("error", self._on_error)
+        self._client.on("message.text", self._on_text_message)
+        self._client.on("message.image", self._on_image_message)
+        self._client.on("message.voice", self._on_voice_message)
+        self._client.on("message.file", self._on_file_message)
+        self._client.on("message.mixed", self._on_mixed_message)
+        self._client.on("event.enter_chat", self._on_enter_chat)
+
+        logger.info("WeCom bot starting with WebSocket long connection")
+        logger.info("No public IP required - using WebSocket to receive events")
+
+        # Connect
+        await self._client.connect_async()
+
+        # Keep running until stopped
+        while self._running:
+            await asyncio.sleep(1)
+
+    async def stop(self) -> None:
+        """Stop the WeCom bot."""
+        self._running = False
+        if self._client:
+            self._client.disconnect()
+        logger.info("WeCom bot stopped")
+
+    async def _on_connected(self, frame: Any) -> None:
+        """Handle WebSocket connected event."""
+        logger.info("WeCom WebSocket connected")
+
+    async def _on_authenticated(self, frame: Any) -> None:
+        """Handle authentication success event."""
+        logger.info("WeCom authenticated successfully")
+
+    async def _on_disconnected(self, frame: Any) -> None:
+        """Handle WebSocket disconnected event."""
+        reason = frame.body if hasattr(frame, 'body') else str(frame)
+        logger.warning("WeCom WebSocket disconnected: {}", reason)
+
+    async def _on_error(self, frame: Any) -> None:
+        """Handle error event."""
+        logger.error("WeCom error: {}", frame)
+
+    async def _on_text_message(self, frame: Any) -> None:
+        """Handle text message."""
+        await self._process_message(frame, "text")
+
+    async def _on_image_message(self, frame: Any) -> None:
+        """Handle image message."""
+        await self._process_message(frame, "image")
+
+    async def _on_voice_message(self, frame: Any) -> None:
+        """Handle voice message."""
+        await self._process_message(frame, "voice")
+
+    async def _on_file_message(self, frame: Any) -> None:
+        """Handle file message."""
+        await self._process_message(frame, "file")
+
+    async def _on_mixed_message(self, frame: Any) -> None:
+        """Handle mixed content message."""
+        await self._process_message(frame, "mixed")
+
+    async def _on_enter_chat(self, frame: Any) -> None:
+        """Handle enter_chat event (user opens chat with bot)."""
+        try:
+            # Extract body from WsFrame dataclass or dict
+            if hasattr(frame, 'body'):
+                body = frame.body or {}
+            elif isinstance(frame, dict):
+                body = frame.get("body", frame)
+            else:
+                body = {}
+
+            chat_id = body.get("chatid", "") if isinstance(body, dict) else ""
+
+            if chat_id and self.config.welcome_message:
+                await self._client.reply_welcome(frame, {
+                    "msgtype": "text",
+                    "text": {"content": self.config.welcome_message},
+                })
+        except Exception as e:
+            logger.error("Error handling enter_chat: {}", e)
+
+    async def _process_message(self, frame: Any, msg_type: str) -> None:
+        """Process incoming message and forward to bus."""
+        try:
+            # Extract body from WsFrame dataclass or dict
+            if hasattr(frame, 'body'):
+                body = frame.body or {}
+            elif isinstance(frame, dict):
+                body = frame.get("body", frame)
+            else:
+                body = {}
+
+            # Ensure body is a dict
+            if not isinstance(body, dict):
+                logger.warning("Invalid body type: {}", type(body))
+                return
+
+            # Extract message info
+            msg_id = body.get("msgid", "")
+            if not msg_id:
+                msg_id = f"{body.get('chatid', '')}_{body.get('sendertime', '')}"
+
+            # Deduplication check
+            if msg_id in self._processed_message_ids:
+                return
+            self._processed_message_ids[msg_id] = None
+
+            # Trim cache
+            while len(self._processed_message_ids) > 1000:
+                self._processed_message_ids.popitem(last=False)
+
+            # Extract sender info from "from" field (SDK format)
+            from_info = body.get("from", {})
+            sender_id = from_info.get("userid", "unknown") if isinstance(from_info, dict) else "unknown"
+
+            # For single chat, chatid is the sender's userid
+            # For group chat, chatid is provided in body
+            chat_type = body.get("chattype", "single")
+            chat_id = body.get("chatid", sender_id)
+
+            content_parts = []
+
+            if msg_type == "text":
+                text = body.get("text", {}).get("content", "")
+                if text:
+                    content_parts.append(text)
+
+            elif msg_type == "image":
+                image_info = body.get("image", {})
+                file_url = image_info.get("url", "")
+                aes_key = image_info.get("aeskey", "")
+
+                if file_url and aes_key:
+                    file_path = await self._download_and_save_media(file_url, aes_key, "image")
+                    if file_path:
+                        import os
+                        filename = os.path.basename(file_path)
+                        content_parts.append(f"[image: {filename}]\n[Image: source: {file_path}]")
+                    else:
+                        content_parts.append("[image: download failed]")
+                else:
+                    content_parts.append("[image: download failed]")
+
+            elif msg_type == "voice":
+                voice_info = body.get("voice", {})
+                # Voice message already contains transcribed content from WeCom
+                voice_content = voice_info.get("content", "")
+                if voice_content:
+                    content_parts.append(f"[voice] {voice_content}")
+                else:
+                    content_parts.append("[voice]")
+
+            elif msg_type == "file":
+                file_info = body.get("file", {})
+                file_url = file_info.get("url", "")
+                aes_key = file_info.get("aeskey", "")
+                file_name = file_info.get("name", "unknown")
+
+                if file_url and aes_key:
+                    file_path = await self._download_and_save_media(file_url, aes_key, "file", file_name)
+                    if file_path:
+                        content_parts.append(f"[file: {file_name}]\n[File: source: {file_path}]")
+                    else:
+                        content_parts.append(f"[file: {file_name}: download failed]")
+                else:
+                    content_parts.append(f"[file: {file_name}: download failed]")
+
+            elif msg_type == "mixed":
+                # Mixed content contains multiple message items
+                msg_items = body.get("mixed", {}).get("item", [])
+                for item in msg_items:
+                    item_type = item.get("type", "")
+                    if item_type == "text":
+                        text = item.get("text", {}).get("content", "")
+                        if text:
+                            content_parts.append(text)
+                    else:
+                        content_parts.append(MSG_TYPE_MAP.get(item_type, f"[{item_type}]"))
+
+            else:
+                content_parts.append(MSG_TYPE_MAP.get(msg_type, f"[{msg_type}]"))
+
+            content = "\n".join(content_parts) if content_parts else ""
+
+            if not content:
+                return
+
+            # Store frame for this chat to enable replies
+            self._chat_frames[chat_id] = frame
+
+            # Forward to message bus
+            # Note: media paths are included in content for broader model compatibility
+            await self._handle_message(
+                sender_id=sender_id,
+                chat_id=chat_id,
+                content=content,
+                media=None,
+                metadata={
+                    "message_id": msg_id,
+                    "msg_type": msg_type,
+                    "chat_type": chat_type,
+                }
+            )
+
+        except Exception as e:
+            logger.error("Error processing WeCom message: {}", e)
+
+    async def _download_and_save_media(
+        self,
+        file_url: str,
+        aes_key: str,
+        media_type: str,
+        filename: str | None = None,
+    ) -> str | None:
+        """
+        Download and decrypt media from WeCom.
+
+        Returns:
+            file_path or None if download failed
+        """
+        try:
+            data, fname = await self._client.download_file(file_url, aes_key)
+
+            if not data:
+                logger.warning("Failed to download media from WeCom")
+                return None
+
+            media_dir = get_media_dir("wecom")
+            if not filename:
+                filename = fname or f"{media_type}_{hash(file_url) % 100000}"
+
+            file_path = media_dir / filename
+            file_path.write_bytes(data)
+            logger.debug("Downloaded {} to {}", media_type, file_path)
+            return str(file_path)
+
+        except Exception as e:
+            logger.error("Error downloading media: {}", e)
+            return None
+
+    async def send(self, msg: OutboundMessage) -> None:
+        """Send a message through WeCom."""
+        if not self._client:
+            logger.warning("WeCom client not initialized")
+            return
+
+        try:
+            content = msg.content.strip()
+            if not content:
+                return
+
+            # Get the stored frame for this chat
+            frame = self._chat_frames.get(msg.chat_id)
+            if not frame:
+                logger.warning("No frame found for chat {}, cannot reply", msg.chat_id)
+                return
+
+            # Use streaming reply for better UX
+            stream_id = self._generate_req_id("stream")
+
+            # Send as streaming message with finish=True
+            await self._client.reply_stream(
+                frame,
+                stream_id,
+                content,
+                finish=True,
+            )
+
+            logger.debug("WeCom message sent to {}", msg.chat_id)
+
+        except Exception as e:
+            logger.error("Error sending WeCom message: {}", e)
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index 803cb61..63eae48 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -199,7 +199,15 @@ class QQConfig(Base):
     )  # Allowed user openids (empty = public access)
 
 
+class WecomConfig(Base):
+    """WeCom (Enterprise WeChat) AI Bot channel configuration."""
 
+    enabled: bool = False
+    bot_id: str = ""  # Bot ID from WeCom AI Bot platform
+    secret: str = ""  # Bot Secret from WeCom AI Bot platform
+    allow_from: list[str] = Field(default_factory=list)  # Allowed user IDs
+    welcome_message: str = ""  # Welcome message for enter_chat event
+    react_emoji: str = "eyes"  # Emoji for message reactions
 
 class ChannelsConfig(Base):
     """Configuration for chat channels."""
@@ -216,6 +224,7 @@ class ChannelsConfig(Base):
     slack: SlackConfig = Field(default_factory=SlackConfig)
     qq: QQConfig = Field(default_factory=QQConfig)
     matrix: MatrixConfig = Field(default_factory=MatrixConfig)
+    wecom: WecomConfig = Field(default_factory=WecomConfig)
 
 
 class AgentDefaults(Base):
diff --git a/pyproject.toml b/pyproject.toml
index 62cf616..fac53ce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,7 @@ dependencies = [
     "json-repair>=0.57.0,<1.0.0",
     "chardet>=3.0.2,<6.0.0",
     "openai>=2.8.0",
+    "wecom-aibot-sdk-python>=0.1.2",
 ]
 
 [project.optional-dependencies]

From 620d7896c710748053257695d25c3391aa637dc5 Mon Sep 17 00:00:00 2001
From: ailuntz <ailuntz@icloud.com>
Date: Tue, 10 Mar 2026 00:14:34 +0800
Subject: [PATCH 14/53] fix(slack): define thread usage when sending messages

---
 nanobot/channels/slack.py   |  2 +-
 tests/test_slack_channel.py | 88 +++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_slack_channel.py

diff --git a/nanobot/channels/slack.py b/nanobot/channels/slack.py
index a4e7324..e36c4c9 100644
--- a/nanobot/channels/slack.py
+++ b/nanobot/channels/slack.py
@@ -82,6 +82,7 @@ class SlackChannel(BaseChannel):
             thread_ts = slack_meta.get("thread_ts")
             channel_type = slack_meta.get("channel_type")
             # Only reply in thread for channel/group messages; DMs don't use threads
+            use_thread = bool(thread_ts and channel_type != "im")
             thread_ts_param = thread_ts if use_thread else None
 
             # Slack rejects empty text payloads. Keep media-only messages media-only,
@@ -278,4 +279,3 @@ class SlackChannel(BaseChannel):
             if parts:
                 rows.append(" · ".join(parts))
         return "\n".join(rows)
-
diff --git a/tests/test_slack_channel.py b/tests/test_slack_channel.py
new file mode 100644
index 0000000..18b96ef
--- /dev/null
+++ b/tests/test_slack_channel.py
@@ -0,0 +1,88 @@
+from __future__ import annotations
+
+import pytest
+
+from nanobot.bus.events import OutboundMessage
+from nanobot.bus.queue import MessageBus
+from nanobot.channels.slack import SlackChannel
+from nanobot.config.schema import SlackConfig
+
+
+class _FakeAsyncWebClient:
+    def __init__(self) -> None:
+        self.chat_post_calls: list[dict[str, object | None]] = []
+        self.file_upload_calls: list[dict[str, object | None]] = []
+
+    async def chat_postMessage(
+        self,
+        *,
+        channel: str,
+        text: str,
+        thread_ts: str | None = None,
+    ) -> None:
+        self.chat_post_calls.append(
+            {
+                "channel": channel,
+                "text": text,
+                "thread_ts": thread_ts,
+            }
+        )
+
+    async def files_upload_v2(
+        self,
+        *,
+        channel: str,
+        file: str,
+        thread_ts: str | None = None,
+    ) -> None:
+        self.file_upload_calls.append(
+            {
+                "channel": channel,
+                "file": file,
+                "thread_ts": thread_ts,
+            }
+        )
+
+
+@pytest.mark.asyncio
+async def test_send_uses_thread_for_channel_messages() -> None:
+    channel = SlackChannel(SlackConfig(enabled=True), MessageBus())
+    fake_web = _FakeAsyncWebClient()
+    channel._web_client = fake_web
+
+    await channel.send(
+        OutboundMessage(
+            channel="slack",
+            chat_id="C123",
+            content="hello",
+            media=["/tmp/demo.txt"],
+            metadata={"slack": {"thread_ts": "1700000000.000100", "channel_type": "channel"}},
+        )
+    )
+
+    assert len(fake_web.chat_post_calls) == 1
+    assert fake_web.chat_post_calls[0]["thread_ts"] == "1700000000.000100"
+    assert len(fake_web.file_upload_calls) == 1
+    assert fake_web.file_upload_calls[0]["thread_ts"] == "1700000000.000100"
+
+
+@pytest.mark.asyncio
+async def test_send_omits_thread_for_dm_messages() -> None:
+    channel = SlackChannel(SlackConfig(enabled=True), MessageBus())
+    fake_web = _FakeAsyncWebClient()
+    channel._web_client = fake_web
+
+    await channel.send(
+        OutboundMessage(
+            channel="slack",
+            chat_id="D123",
+            content="hello",
+            media=["/tmp/demo.txt"],
+            metadata={"slack": {"thread_ts": "1700000000.000100", "channel_type": "im"}},
+        )
+    )
+
+    assert len(fake_web.chat_post_calls) == 1
+    assert fake_web.chat_post_calls[0]["thread_ts"] is None
+    assert len(fake_web.file_upload_calls) == 1
+    assert fake_web.file_upload_calls[0]["thread_ts"] is None

From 9c88e40a616190aca65ce3d3149f4529865ca5d8 Mon Sep 17 00:00:00 2001
From: ailuntz <ailuntz@icloud.com>
Date: Tue, 10 Mar 2026 00:32:42 +0800
Subject: [PATCH 15/53] fix(cli): respect gateway port from config when --port
 omitted

---
 nanobot/cli/commands.py |  5 +++--
 tests/test_commands.py  | 44 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index 2c8d6d3..a5906d2 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -290,7 +290,7 @@ def _load_runtime_config(config: str | None = None, workspace: str | None = None
 
 @app.command()
 def gateway(
-    port: int = typer.Option(18790, "--port", "-p", help="Gateway port"),
+    port: int | None = typer.Option(None, "--port", "-p", help="Gateway port"),
     workspace: str | None = typer.Option(None, "--workspace", "-w", help="Workspace directory"),
     verbose: bool = typer.Option(False, "--verbose", "-v", help="Verbose output"),
     config: str | None = typer.Option(None, "--config", "-c", help="Path to config file"),
@@ -310,8 +310,9 @@ def gateway(
         logging.basicConfig(level=logging.DEBUG)
 
     config = _load_runtime_config(config, workspace)
+    selected_port = port if port is not None else config.gateway.port
 
-    console.print(f"{__logo__} Starting nanobot gateway on port {port}...")
+    console.print(f"{__logo__} Starting nanobot gateway on port {selected_port}...")
     sync_workspace_templates(config.workspace_path)
     bus = MessageBus()
     provider = _make_provider(config)
diff --git a/tests/test_commands.py b/tests/test_commands.py
index 19c1998..9479dad 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -328,6 +328,50 @@ def test_gateway_workspace_option_overrides_config(monkeypatch, tmp_path: Path)
     assert config.workspace_path == override
 
 
+def test_gateway_uses_port_from_config_when_cli_port_is_omitted(monkeypatch, tmp_path: Path) -> None:
+    config_file = tmp_path / "instance" / "config.json"
+    config_file.parent.mkdir(parents=True)
+    config_file.write_text("{}")
+
+    config = Config()
+    config.gateway.port = 18791
+
+    monkeypatch.setattr("nanobot.config.loader.set_config_path", lambda _path: None)
+    monkeypatch.setattr("nanobot.config.loader.load_config", lambda _path=None: config)
+    monkeypatch.setattr("nanobot.cli.commands.sync_workspace_templates", lambda _path: None)
+    monkeypatch.setattr(
+        "nanobot.cli.commands._make_provider",
+        lambda _config: (_ for _ in ()).throw(_StopGateway("stop")),
+    )
+
+    result = runner.invoke(app, ["gateway", "--config", str(config_file)])
+
+    assert isinstance(result.exception, _StopGateway)
+    assert "Starting nanobot gateway on port 18791" in result.stdout
+
+
+def test_gateway_cli_port_overrides_config_port(monkeypatch, tmp_path: Path) -> None:
+    config_file = tmp_path / "instance" / "config.json"
+    config_file.parent.mkdir(parents=True)
+    config_file.write_text("{}")
+
+    config = Config()
+    config.gateway.port = 18791
+
+    monkeypatch.setattr("nanobot.config.loader.set_config_path", lambda _path: None)
+    monkeypatch.setattr("nanobot.config.loader.load_config", lambda _path=None: config)
+    monkeypatch.setattr("nanobot.cli.commands.sync_workspace_templates", lambda _path: None)
+    monkeypatch.setattr(
+        "nanobot.cli.commands._make_provider",
+        lambda _config: (_ for _ in ()).throw(_StopGateway("stop")),
+    )
+
+    result = runner.invoke(app, ["gateway", "--config", str(config_file), "--port", "18801"])
+
+    assert isinstance(result.exception, _StopGateway)
+    assert "Starting nanobot gateway on port 18801" in result.stdout
+
+
 def test_gateway_uses_config_directory_for_cron_store(monkeypatch, tmp_path: Path) -> None:
     config_file = tmp_path / "instance" / "config.json"
     config_file.parent.mkdir(parents=True)

From 45c0eebae5a700cfa5da28c2ff31208f34180509 Mon Sep 17 00:00:00 2001
From: chengyongru <2755839590@qq.com>
Date: Tue, 10 Mar 2026 00:53:23 +0800
Subject: [PATCH 16/53] docs(wecom): add wecom configuration guide in readme

---
 README.md | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/README.md b/README.md
index d3401ea..3d5fb63 100644
--- a/README.md
+++ b/README.md
@@ -207,6 +207,7 @@ Connect nanobot to your favorite chat platform.
 | **Slack** | Bot token + App-Level token |
 | **Email** | IMAP/SMTP credentials |
 | **QQ** | App ID + App Secret |
+| **Wecom** | Bot ID + App Secret |
 
 <details>
 <summary><b>Telegram</b> (Recommended)</summary>
@@ -676,6 +677,44 @@ nanobot gateway
 
 </details>
 
+<details>
+<summary><b>Wecom (企业微信)</b></summary>
+
+Uses **WebSocket** long connection — no public IP required.
+
+**1. Create a wecom bot**
+
+In the client's workspace, click on "Intelligent Robot" to create a robot and choose API mode for creation.
+Select to create in "long connection" mode, and obtain Bot ID and Secret.
+
+**2. Configure**
+
+```json
+{
+  "channels": {
+    "wecom": {
+      "enabled": true,
+      "botId": "your_bot_id",
+      "secret": "your_secret",
+      "allowFrom": [
+        "your_id"
+      ]
+    }
+  }
+}
+```
+
+**3. Run**
+
+```bash
+nanobot gateway
+```
+
+> [!TIP]
+> wecom uses WebSocket to receive messages — no webhook or public IP needed!
+
+</details>
+
 ## 🌐 Agent Social Network
 
 🐈 nanobot is capable of linking to the agent social network (agent community). **Just send one message and your nanobot joins automatically!**

From 28330940d0b2cefbfe740957ee8f51ed9349c24e Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Mon, 9 Mar 2026 17:18:10 +0000
Subject: [PATCH 17/53] fix(slack): skip thread_ts for direct messages

---
 nanobot/channels/slack.py   | 5 ++---
 tests/test_slack_channel.py | 2 ++
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/nanobot/channels/slack.py b/nanobot/channels/slack.py
index e36c4c9..0384d8d 100644
--- a/nanobot/channels/slack.py
+++ b/nanobot/channels/slack.py
@@ -81,9 +81,8 @@ class SlackChannel(BaseChannel):
             slack_meta = msg.metadata.get("slack", {}) if msg.metadata else {}
             thread_ts = slack_meta.get("thread_ts")
             channel_type = slack_meta.get("channel_type")
-            # Only reply in thread for channel/group messages; DMs don't use threads
-            use_thread = bool(thread_ts and channel_type != "im")
-            thread_ts_param = thread_ts if use_thread else None
+            # Slack DMs don't use threads; channel/group replies may keep thread_ts.
+            thread_ts_param = thread_ts if thread_ts and channel_type != "im" else None
 
             # Slack rejects empty text payloads. Keep media-only messages media-only,
             # but send a single blank message when the bot has no text or files to send.
diff --git a/tests/test_slack_channel.py b/tests/test_slack_channel.py
index 18b96ef..891f86a 100644
--- a/tests/test_slack_channel.py
+++ b/tests/test_slack_channel.py
@@ -61,6 +61,7 @@ async def test_send_uses_thread_for_channel_messages() -> None:
     )
 
     assert len(fake_web.chat_post_calls) == 1
+    assert fake_web.chat_post_calls[0]["text"] == "hello\n"
     assert fake_web.chat_post_calls[0]["thread_ts"] == "1700000000.000100"
     assert len(fake_web.file_upload_calls) == 1
     assert fake_web.file_upload_calls[0]["thread_ts"] == "1700000000.000100"
@@ -83,6 +84,7 @@ async def test_send_omits_thread_for_dm_messages() -> None:
     )
 
     assert len(fake_web.chat_post_calls) == 1
+    assert fake_web.chat_post_calls[0]["text"] == "hello\n"
     assert fake_web.chat_post_calls[0]["thread_ts"] is None
     assert len(fake_web.file_upload_calls) == 1
     assert fake_web.file_upload_calls[0]["thread_ts"] is None

From 1284c7217ea2c59a5a9e2786c5f550e9fb5ace1b Mon Sep 17 00:00:00 2001
From: Protocol Zero <257158451+Protocol-zero-0@users.noreply.github.com>
Date: Mon, 9 Mar 2026 20:12:11 +0000
Subject: [PATCH 18/53] fix(cli): let gateway use config port by default

Respect config.gateway.port when --port is omitted, while keeping CLI flags as the highest-precedence override.
---
 nanobot/cli/commands.py |  3 ++-
 tests/test_commands.py  | 44 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index 2c8d6d3..37f08b2 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -290,7 +290,7 @@ def _load_runtime_config(config: str | None = None, workspace: str | None = None
 
 @app.command()
 def gateway(
-    port: int = typer.Option(18790, "--port", "-p", help="Gateway port"),
+    port: int | None = typer.Option(None, "--port", "-p", help="Gateway port"),
     workspace: str | None = typer.Option(None, "--workspace", "-w", help="Workspace directory"),
     verbose: bool = typer.Option(False, "--verbose", "-v", help="Verbose output"),
     config: str | None = typer.Option(None, "--config", "-c", help="Path to config file"),
@@ -310,6 +310,7 @@ def gateway(
         logging.basicConfig(level=logging.DEBUG)
 
     config = _load_runtime_config(config, workspace)
+    port = port if port is not None else config.gateway.port
 
     console.print(f"{__logo__} Starting nanobot gateway on port {port}...")
     sync_workspace_templates(config.workspace_path)
diff --git a/tests/test_commands.py b/tests/test_commands.py
index 19c1998..5d38942 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -356,3 +356,47 @@ def test_gateway_uses_config_directory_for_cron_store(monkeypatch, tmp_path: Pat
 
     assert isinstance(result.exception, _StopGateway)
     assert seen["cron_store"] == config_file.parent / "cron" / "jobs.json"
+
+
+def test_gateway_uses_configured_port_when_cli_flag_is_missing(monkeypatch, tmp_path: Path) -> None:
+    config_file = tmp_path / "instance" / "config.json"
+    config_file.parent.mkdir(parents=True)
+    config_file.write_text("{}")
+
+    config = Config()
+    config.gateway.port = 18791
+
+    monkeypatch.setattr("nanobot.config.loader.set_config_path", lambda _path: None)
+    monkeypatch.setattr("nanobot.config.loader.load_config", lambda _path=None: config)
+    monkeypatch.setattr("nanobot.cli.commands.sync_workspace_templates", lambda _path: None)
+    monkeypatch.setattr(
+        "nanobot.cli.commands._make_provider",
+        lambda _config: (_ for _ in ()).throw(_StopGateway("stop")),
+    )
+
+    result = runner.invoke(app, ["gateway", "--config", str(config_file)])
+
+    assert isinstance(result.exception, _StopGateway)
+    assert "port 18791" in result.stdout
+
+
+def test_gateway_cli_port_overrides_configured_port(monkeypatch, tmp_path: Path) -> None:
+    config_file = tmp_path / "instance" / "config.json"
+    config_file.parent.mkdir(parents=True)
+    config_file.write_text("{}")
+
+    config = Config()
+    config.gateway.port = 18791
+
+    monkeypatch.setattr("nanobot.config.loader.set_config_path", lambda _path: None)
+    monkeypatch.setattr("nanobot.config.loader.load_config", lambda _path=None: config)
+    monkeypatch.setattr("nanobot.cli.commands.sync_workspace_templates", lambda _path: None)
+    monkeypatch.setattr(
+        "nanobot.cli.commands._make_provider",
+        lambda _config: (_ for _ in ()).throw(_StopGateway("stop")),
+    )
+
+    result = runner.invoke(app, ["gateway", "--config", str(config_file), "--port", "18792"])
+
+    assert isinstance(result.exception, _StopGateway)
+    assert "port 18792" in result.stdout

From 4f9857f85f1f8aeddceb019bc0062d3ba7cab032 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Tue, 10 Mar 2026 04:34:15 +0000
Subject: [PATCH 19/53] feat(telegram): add configurable group mention policy

---
 nanobot/channels/telegram.py   |  86 ++++++++++++++----
 nanobot/config/schema.py       |   2 +-
 tests/test_telegram_channel.py | 156 ++++++++++++++++++++++++++++++++-
 3 files changed, 226 insertions(+), 18 deletions(-)

diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py
index 0821b7d..5b294cc 100644
--- a/nanobot/channels/telegram.py
+++ b/nanobot/channels/telegram.py
@@ -179,6 +179,8 @@ class TelegramChannel(BaseChannel):
         self._media_group_buffers: dict[str, dict] = {}
         self._media_group_tasks: dict[str, asyncio.Task] = {}
         self._message_threads: dict[tuple[str, int], int] = {}
+        self._bot_user_id: int | None = None
+        self._bot_username: str | None = None
 
     def is_allowed(self, sender_id: str) -> bool:
         """Preserve Telegram's legacy id|username allowlist matching."""
@@ -242,6 +244,8 @@ class TelegramChannel(BaseChannel):
 
         # Get bot info and register command menu
         bot_info = await self._app.bot.get_me()
+        self._bot_user_id = getattr(bot_info, "id", None)
+        self._bot_username = getattr(bot_info, "username", None)
         logger.info("Telegram bot @{} connected", bot_info.username)
 
         try:
@@ -462,6 +466,70 @@ class TelegramChannel(BaseChannel):
             "is_forum": bool(getattr(message.chat, "is_forum", False)),
         }
 
+    async def _ensure_bot_identity(self) -> tuple[int | None, str | None]:
+        """Load bot identity once and reuse it for mention/reply checks."""
+        if self._bot_user_id is not None or self._bot_username is not None:
+            return self._bot_user_id, self._bot_username
+        if not self._app:
+            return None, None
+        bot_info = await self._app.bot.get_me()
+        self._bot_user_id = getattr(bot_info, "id", None)
+        self._bot_username = getattr(bot_info, "username", None)
+        return self._bot_user_id, self._bot_username
+
+    @staticmethod
+    def _has_mention_entity(
+        text: str,
+        entities,
+        bot_username: str,
+        bot_id: int | None,
+    ) -> bool:
+        """Check Telegram mention entities against the bot username."""
+        handle = f"@{bot_username}".lower()
+        for entity in entities or []:
+            entity_type = getattr(entity, "type", None)
+            if entity_type == "text_mention":
+                user = getattr(entity, "user", None)
+                if user is not None and bot_id is not None and getattr(user, "id", None) == bot_id:
+                    return True
+                continue
+            if entity_type != "mention":
+                continue
+            offset = getattr(entity, "offset", None)
+            length = getattr(entity, "length", None)
+            if offset is None or length is None:
+                continue
+            if text[offset : offset + length].lower() == handle:
+                return True
+        return handle in text.lower()
+
+    async def _is_group_message_for_bot(self, message) -> bool:
+        """Allow group messages when policy is open, @mentioned, or replying to the bot."""
+        if message.chat.type == "private" or self.config.group_policy == "open":
+            return True
+
+        bot_id, bot_username = await self._ensure_bot_identity()
+        if bot_username:
+            text = message.text or ""
+            caption = message.caption or ""
+            if self._has_mention_entity(
+                text,
+                getattr(message, "entities", None),
+                bot_username,
+                bot_id,
+            ):
+                return True
+            if self._has_mention_entity(
+                caption,
+                getattr(message, "caption_entities", None),
+                bot_username,
+                bot_id,
+            ):
+                return True
+
+        reply_user = getattr(getattr(message, "reply_to_message", None), "from_user", None)
+        return bool(bot_id and reply_user and reply_user.id == bot_id)
+
     def _remember_thread_context(self, message) -> None:
         """Cache topic thread id by chat/message id for follow-up replies."""
         message_thread_id = getattr(message, "message_thread_id", None)
@@ -501,22 +569,8 @@ class TelegramChannel(BaseChannel):
         # Store chat_id for replies
         self._chat_ids[sender_id] = chat_id
 
-        # Enforce group_policy: in group chats with "mention" policy,
-        # only respond when the bot is @mentioned or the message is a reply to the bot.
-        is_group = message.chat.type != "private"
-        if is_group and getattr(self.config, "group_policy", "open") == "mention":
-            bot_username = (await self._app.bot.get_me()).username if self._app else None
-            mentioned = False
-            # Check if bot is @mentioned in text
-            if bot_username and message.text:
-                mentioned = f"@{bot_username}" in message.text
-            # Check if the message is a reply to the bot
-            if not mentioned and message.reply_to_message and message.reply_to_message.from_user:
-                bot_id = (await self._app.bot.get_me()).id if self._app else None
-                if bot_id and message.reply_to_message.from_user.id == bot_id:
-                    mentioned = True
-            if not mentioned:
-                return
+        if not await self._is_group_message_for_bot(message):
+            return
 
         # Build content from text and/or media
         content_parts = []
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index 3c5e315..8cfcad6 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -33,7 +33,7 @@ class TelegramConfig(Base):
         None  # HTTP/SOCKS5 proxy URL, e.g. "http://127.0.0.1:7890" or "socks5://127.0.0.1:1080"
     )
     reply_to_message: bool = False  # If true, bot replies quote the original message
-    group_policy: Literal["open", "mention"] = "open"  # "open" responds to all, "mention" only when @mentioned or replied to
+    group_policy: Literal["open", "mention"] = "mention"  # "mention" responds when @mentioned or replied to, "open" responds to all
 
 
 class FeishuConfig(Base):
diff --git a/tests/test_telegram_channel.py b/tests/test_telegram_channel.py
index 88c3f54..678512d 100644
--- a/tests/test_telegram_channel.py
+++ b/tests/test_telegram_channel.py
@@ -27,9 +27,11 @@ class _FakeUpdater:
 class _FakeBot:
     def __init__(self) -> None:
         self.sent_messages: list[dict] = []
+        self.get_me_calls = 0
 
     async def get_me(self):
-        return SimpleNamespace(username="nanobot_test")
+        self.get_me_calls += 1
+        return SimpleNamespace(id=999, username="nanobot_test")
 
     async def set_my_commands(self, commands) -> None:
         self.commands = commands
@@ -37,6 +39,9 @@ class _FakeBot:
     async def send_message(self, **kwargs) -> None:
         self.sent_messages.append(kwargs)
 
+    async def send_chat_action(self, **kwargs) -> None:
+        pass
+
 
 class _FakeApp:
     def __init__(self, on_start_polling) -> None:
@@ -87,6 +92,35 @@ class _FakeBuilder:
         return self.app
 
 
+def _make_telegram_update(
+    *,
+    chat_type: str = "group",
+    text: str | None = None,
+    caption: str | None = None,
+    entities=None,
+    caption_entities=None,
+    reply_to_message=None,
+):
+    user = SimpleNamespace(id=12345, username="alice", first_name="Alice")
+    message = SimpleNamespace(
+        chat=SimpleNamespace(type=chat_type, is_forum=False),
+        chat_id=-100123,
+        text=text,
+        caption=caption,
+        entities=entities or [],
+        caption_entities=caption_entities or [],
+        reply_to_message=reply_to_message,
+        photo=None,
+        voice=None,
+        audio=None,
+        document=None,
+        media_group_id=None,
+        message_thread_id=None,
+        message_id=1,
+    )
+    return SimpleNamespace(message=message, effective_user=user)
+
+
 @pytest.mark.asyncio
 async def test_start_uses_request_proxy_without_builder_proxy(monkeypatch) -> None:
     config = TelegramConfig(
@@ -131,6 +165,10 @@ def test_get_extension_falls_back_to_original_filename() -> None:
     assert channel._get_extension("file", None, "archive.tar.gz") == ".tar.gz"
 
 
+def test_telegram_group_policy_defaults_to_mention() -> None:
+    assert TelegramConfig().group_policy == "mention"
+
+
 def test_is_allowed_accepts_legacy_telegram_id_username_formats() -> None:
     channel = TelegramChannel(TelegramConfig(allow_from=["12345", "alice", "67890|bob"]), MessageBus())
 
@@ -182,3 +220,119 @@ async def test_send_reply_infers_topic_from_message_id_cache() -> None:
 
     assert channel._app.bot.sent_messages[0]["message_thread_id"] == 42
     assert channel._app.bot.sent_messages[0]["reply_parameters"].message_id == 10
+
+
+@pytest.mark.asyncio
+async def test_group_policy_mention_ignores_unmentioned_group_message() -> None:
+    channel = TelegramChannel(
+        TelegramConfig(enabled=True, token="123:abc", allow_from=["*"], group_policy="mention"),
+        MessageBus(),
+    )
+    channel._app = _FakeApp(lambda: None)
+
+    handled = []
+
+    async def capture_handle(**kwargs) -> None:
+        handled.append(kwargs)
+
+    channel._handle_message = capture_handle
+    channel._start_typing = lambda _chat_id: None
+
+    await channel._on_message(_make_telegram_update(text="hello everyone"), None)
+
+    assert handled == []
+    assert channel._app.bot.get_me_calls == 1
+
+
+@pytest.mark.asyncio
+async def test_group_policy_mention_accepts_text_mention_and_caches_bot_identity() -> None:
+    channel = TelegramChannel(
+        TelegramConfig(enabled=True, token="123:abc", allow_from=["*"], group_policy="mention"),
+        MessageBus(),
+    )
+    channel._app = _FakeApp(lambda: None)
+
+    handled = []
+
+    async def capture_handle(**kwargs) -> None:
+        handled.append(kwargs)
+
+    channel._handle_message = capture_handle
+    channel._start_typing = lambda _chat_id: None
+
+    mention = SimpleNamespace(type="mention", offset=0, length=13)
+    await channel._on_message(_make_telegram_update(text="@nanobot_test hi", entities=[mention]), None)
+    await channel._on_message(_make_telegram_update(text="@nanobot_test again", entities=[mention]), None)
+
+    assert len(handled) == 2
+    assert channel._app.bot.get_me_calls == 1
+
+
+@pytest.mark.asyncio
+async def test_group_policy_mention_accepts_caption_mention() -> None:
+    channel = TelegramChannel(
+        TelegramConfig(enabled=True, token="123:abc", allow_from=["*"], group_policy="mention"),
+        MessageBus(),
+    )
+    channel._app = _FakeApp(lambda: None)
+
+    handled = []
+
+    async def capture_handle(**kwargs) -> None:
+        handled.append(kwargs)
+
+    channel._handle_message = capture_handle
+    channel._start_typing = lambda _chat_id: None
+
+    mention = SimpleNamespace(type="mention", offset=0, length=13)
+    await channel._on_message(
+        _make_telegram_update(caption="@nanobot_test photo", caption_entities=[mention]),
+        None,
+    )
+
+    assert len(handled) == 1
+    assert handled[0]["content"] == "@nanobot_test photo"
+
+
+@pytest.mark.asyncio
+async def test_group_policy_mention_accepts_reply_to_bot() -> None:
+    channel = TelegramChannel(
+        TelegramConfig(enabled=True, token="123:abc", allow_from=["*"], group_policy="mention"),
+        MessageBus(),
+    )
+    channel._app = _FakeApp(lambda: None)
+
+    handled = []
+
+    async def capture_handle(**kwargs) -> None:
+        handled.append(kwargs)
+
+    channel._handle_message = capture_handle
+    channel._start_typing = lambda _chat_id: None
+
+    reply = SimpleNamespace(from_user=SimpleNamespace(id=999))
+    await channel._on_message(_make_telegram_update(text="reply", reply_to_message=reply), None)
+
+    assert len(handled) == 1
+
+
+@pytest.mark.asyncio
+async def test_group_policy_open_accepts_plain_group_message() -> None:
+    channel = TelegramChannel(
+        TelegramConfig(enabled=True, token="123:abc", allow_from=["*"], group_policy="open"),
+        MessageBus(),
+    )
+    channel._app = _FakeApp(lambda: None)
+
+    handled = []
+
+    async def capture_handle(**kwargs) -> None:
+        handled.append(kwargs)
+
+    channel._handle_message = capture_handle
+    channel._start_typing = lambda _chat_id: None
+
+    await channel._on_message(_make_telegram_update(text="hello group"), None)
+
+    assert len(handled) == 1
+    assert channel._app.bot.get_me_calls == 0

From 6c70154feeeff638cfb79a6e19d263f36ea7f5f6 Mon Sep 17 00:00:00 2001
From: suger-m <gau306323@gmail.com>
Date: Tue, 10 Mar 2026 15:55:04 +0800
Subject: [PATCH 20/53] fix(exec): enforce workspace guard for home-expanded
 paths

---
 nanobot/agent/tools/shell.py  |  6 ++++--
 tests/test_tool_validation.py | 13 +++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/nanobot/agent/tools/shell.py b/nanobot/agent/tools/shell.py
index ce19920..4726e3c 100644
--- a/nanobot/agent/tools/shell.py
+++ b/nanobot/agent/tools/shell.py
@@ -143,7 +143,8 @@ class ExecTool(Tool):
 
             for raw in self._extract_absolute_paths(cmd):
                 try:
-                    p = Path(raw.strip()).resolve()
+                    expanded = os.path.expandvars(raw.strip())
+                    p = Path(expanded).expanduser().resolve()
                 except Exception:
                     continue
                 if p.is_absolute() and cwd_path not in p.parents and p != cwd_path:
@@ -155,4 +156,5 @@ class ExecTool(Tool):
     def _extract_absolute_paths(command: str) -> list[str]:
         win_paths = re.findall(r"[A-Za-z]:\\[^\s\"'|><;]+", command)   # Windows: C:\...
         posix_paths = re.findall(r"(?:^|[\s|>])(/[^\s\"'>]+)", command) # POSIX: /absolute only
-        return win_paths + posix_paths
+        home_paths = re.findall(r"(?:^|[\s|>])(~[^\s\"'>;|<]*)", command) # POSIX/Windows home shortcut: ~
+        return win_paths + posix_paths + home_paths
diff --git a/tests/test_tool_validation.py b/tests/test_tool_validation.py
index c2b4b6a..cf648bf 100644
--- a/tests/test_tool_validation.py
+++ b/tests/test_tool_validation.py
@@ -108,6 +108,19 @@ def test_exec_extract_absolute_paths_captures_posix_absolute_paths() -> None:
     assert "/tmp/out.txt" in paths
 
 
+def test_exec_extract_absolute_paths_captures_home_paths() -> None:
+    cmd = "cat ~/.nanobot/config.json > ~/out.txt"
+    paths = ExecTool._extract_absolute_paths(cmd)
+    assert "~/.nanobot/config.json" in paths
+    assert "~/out.txt" in paths
+
+
+def test_exec_guard_blocks_home_path_outside_workspace(tmp_path) -> None:
+    tool = ExecTool(restrict_to_workspace=True)
+    error = tool._guard_command("cat ~/.nanobot/config.json", str(tmp_path))
+    assert error == "Error: Command blocked by safety guard (path outside working dir)"
+
+
 # --- cast_params tests ---
 
 

From b7ecc94c9b85aadc79e0d6598ea42ad7dbaa15f1 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Tue, 10 Mar 2026 09:16:23 +0000
Subject: [PATCH 21/53] fix(skill-creator): restore validation and align
 packaging docs

---
 nanobot/skills/skill-creator/SKILL.md         |  10 +-
 .../skill-creator/scripts/package_skill.py    |  77 ++++---
 .../skill-creator/scripts/quick_validate.py   | 213 ++++++++++++++++++
 tests/test_skill_creator_scripts.py           | 127 +++++++++++
 4 files changed, 392 insertions(+), 35 deletions(-)
 create mode 100644 nanobot/skills/skill-creator/scripts/quick_validate.py
 create mode 100644 tests/test_skill_creator_scripts.py

diff --git a/nanobot/skills/skill-creator/SKILL.md b/nanobot/skills/skill-creator/SKILL.md
index f4d6e0b..ea53abe 100644
--- a/nanobot/skills/skill-creator/SKILL.md
+++ b/nanobot/skills/skill-creator/SKILL.md
@@ -268,6 +268,8 @@ Skip this step only if the skill being developed already exists, and iteration o
 
 When creating a new skill from scratch, always run the `init_skill.py` script. The script conveniently generates a new template skill directory that automatically includes everything a skill requires, making the skill creation process much more efficient and reliable.
 
+For `nanobot`, custom skills should live under the active workspace `skills/` directory so they can be discovered automatically at runtime (for example, `<workspace>/skills/my-skill/SKILL.md`).
+
 Usage:
 
 ```bash
@@ -277,9 +279,9 @@ scripts/init_skill.py <skill-name> --path <output-directory> [--resources script
 Examples:
 
 ```bash
-scripts/init_skill.py my-skill --path skills/public
-scripts/init_skill.py my-skill --path skills/public --resources scripts,references
-scripts/init_skill.py my-skill --path skills/public --resources scripts --examples
+scripts/init_skill.py my-skill --path ./workspace/skills
+scripts/init_skill.py my-skill --path ./workspace/skills --resources scripts,references
+scripts/init_skill.py my-skill --path ./workspace/skills --resources scripts --examples
 ```
 
 The script:
@@ -326,7 +328,7 @@ Write the YAML frontmatter with `name` and `description`:
   - Include all "when to use" information here - Not in the body. The body is only loaded after triggering, so "When to Use This Skill" sections in the body are not helpful to the agent.
   - Example description for a `docx` skill: "Comprehensive document creation, editing, and analysis with support for tracked changes, comments, formatting preservation, and text extraction. Use when the agent needs to work with professional documents (.docx files) for: (1) Creating new documents, (2) Modifying or editing content, (3) Working with tracked changes, (4) Adding comments, or any other document tasks"
 
-Do not include any other fields in YAML frontmatter.
+Keep frontmatter minimal. In `nanobot`, `metadata` and `always` are also supported when needed, but avoid adding extra fields unless they are actually required.
 
 ##### Body
 
diff --git a/nanobot/skills/skill-creator/scripts/package_skill.py b/nanobot/skills/skill-creator/scripts/package_skill.py
index aa4de89..48fcbbe 100755
--- a/nanobot/skills/skill-creator/scripts/package_skill.py
+++ b/nanobot/skills/skill-creator/scripts/package_skill.py
@@ -3,11 +3,11 @@
 Skill Packager - Creates a distributable .skill file of a skill folder
 
 Usage:
-    python utils/package_skill.py <path/to/skill-folder> [output-directory]
+    python package_skill.py <path/to/skill-folder> [output-directory]
 
 Example:
-    python utils/package_skill.py skills/public/my-skill
-    python utils/package_skill.py skills/public/my-skill ./dist
+    python package_skill.py skills/public/my-skill
+    python package_skill.py skills/public/my-skill ./dist
 """
 
 import sys
@@ -25,6 +25,14 @@ def _is_within(path: Path, root: Path) -> bool:
         return False
 
 
+def _cleanup_partial_archive(skill_filename: Path) -> None:
+    try:
+        if skill_filename.exists():
+            skill_filename.unlink()
+    except OSError:
+        pass
+
+
 def package_skill(skill_path, output_dir=None):
     """
     Package a skill folder into a .skill file.
@@ -74,49 +82,56 @@ def package_skill(skill_path, output_dir=None):
 
     EXCLUDED_DIRS = {".git", ".svn", ".hg", "__pycache__", "node_modules"}
 
+    files_to_package = []
+    resolved_archive = skill_filename.resolve()
+
+    for file_path in skill_path.rglob("*"):
+        # Fail closed on symlinks so the packaged contents are explicit and predictable.
+        if file_path.is_symlink():
+            print(f"[ERROR] Symlink not allowed in packaged skill: {file_path}")
+            _cleanup_partial_archive(skill_filename)
+            return None
+
+        rel_parts = file_path.relative_to(skill_path).parts
+        if any(part in EXCLUDED_DIRS for part in rel_parts):
+            continue
+
+        if file_path.is_file():
+            resolved_file = file_path.resolve()
+            if not _is_within(resolved_file, skill_path):
+                print(f"[ERROR] File escapes skill root: {file_path}")
+                _cleanup_partial_archive(skill_filename)
+                return None
+            # If output lives under skill_path, avoid writing archive into itself.
+            if resolved_file == resolved_archive:
+                print(f"[WARN] Skipping output archive: {file_path}")
+                continue
+            files_to_package.append(file_path)
+
     # Create the .skill file (zip format)
     try:
         with zipfile.ZipFile(skill_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
-            # Walk through the skill directory
-            for file_path in skill_path.rglob("*"):
-                # Security: never follow or package symlinks.
-                if file_path.is_symlink():
-                    print(f"[WARN] Skipping symlink: {file_path}")
-                    continue
-
-                rel_parts = file_path.relative_to(skill_path).parts
-                if any(part in EXCLUDED_DIRS for part in rel_parts):
-                    continue
-
-                if file_path.is_file():
-                    resolved_file = file_path.resolve()
-                    if not _is_within(resolved_file, skill_path):
-                        print(f"[ERROR] File escapes skill root: {file_path}")
-                        return None
-                    # If output lives under skill_path, avoid writing archive into itself.
-                    if resolved_file == skill_filename.resolve():
-                        print(f"[WARN] Skipping output archive: {file_path}")
-                        continue
-
-                    # Calculate the relative path within the zip.
-                    arcname = Path(skill_name) / file_path.relative_to(skill_path)
-                    zipf.write(file_path, arcname)
-                    print(f"  Added: {arcname}")
+            for file_path in files_to_package:
+                # Calculate the relative path within the zip.
+                arcname = Path(skill_name) / file_path.relative_to(skill_path)
+                zipf.write(file_path, arcname)
+                print(f"  Added: {arcname}")
 
         print(f"\n[OK] Successfully packaged skill to: {skill_filename}")
         return skill_filename
 
     except Exception as e:
+        _cleanup_partial_archive(skill_filename)
         print(f"[ERROR] Error creating .skill file: {e}")
         return None
 
 
 def main():
     if len(sys.argv) < 2:
-        print("Usage: python utils/package_skill.py <path/to/skill-folder> [output-directory]")
+        print("Usage: python package_skill.py <path/to/skill-folder> [output-directory]")
         print("\nExample:")
-        print("  python utils/package_skill.py skills/public/my-skill")
-        print("  python utils/package_skill.py skills/public/my-skill ./dist")
+        print("  python package_skill.py skills/public/my-skill")
+        print("  python package_skill.py skills/public/my-skill ./dist")
         sys.exit(1)
 
     skill_path = sys.argv[1]
diff --git a/nanobot/skills/skill-creator/scripts/quick_validate.py b/nanobot/skills/skill-creator/scripts/quick_validate.py
new file mode 100644
index 0000000..03d246d
--- /dev/null
+++ b/nanobot/skills/skill-creator/scripts/quick_validate.py
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+"""
+Minimal validator for nanobot skill folders.
+"""
+
+import re
+import sys
+from pathlib import Path
+from typing import Optional
+
+try:
+    import yaml
+except ModuleNotFoundError:
+    yaml = None
+
+MAX_SKILL_NAME_LENGTH = 64
+ALLOWED_FRONTMATTER_KEYS = {
+    "name",
+    "description",
+    "metadata",
+    "always",
+    "license",
+    "allowed-tools",
+}
+ALLOWED_RESOURCE_DIRS = {"scripts", "references", "assets"}
+PLACEHOLDER_MARKERS = ("[todo", "todo:")
+
+
+def _extract_frontmatter(content: str) -> Optional[str]:
+    lines = content.splitlines()
+    if not lines or lines[0].strip() != "---":
+        return None
+    for i in range(1, len(lines)):
+        if lines[i].strip() == "---":
+            return "\n".join(lines[1:i])
+    return None
+
+
+def _parse_simple_frontmatter(frontmatter_text: str) -> Optional[dict[str, str]]:
+    """Fallback parser for simple frontmatter when PyYAML is unavailable."""
+    parsed: dict[str, str] = {}
+    current_key: Optional[str] = None
+    multiline_key: Optional[str] = None
+
+    for raw_line in frontmatter_text.splitlines():
+        stripped = raw_line.strip()
+        if not stripped or stripped.startswith("#"):
+            continue
+
+        is_indented = raw_line[:1].isspace()
+        if is_indented:
+            if current_key is None:
+                return None
+            current_value = parsed[current_key]
+            parsed[current_key] = f"{current_value}\n{stripped}" if current_value else stripped
+            continue
+
+        if ":" not in stripped:
+            return None
+
+        key, value = stripped.split(":", 1)
+        key = key.strip()
+        value = value.strip()
+        if not key:
+            return None
+
+        if value in {"|", ">"}:
+            parsed[key] = ""
+            current_key = key
+            multiline_key = key
+            continue
+
+        if (value.startswith('"') and value.endswith('"')) or (
+            value.startswith("'") and value.endswith("'")
+        ):
+            value = value[1:-1]
+        parsed[key] = value
+        current_key = key
+        multiline_key = None
+
+    if multiline_key is not None and multiline_key not in parsed:
+        return None
+    return parsed
+
+
+def _load_frontmatter(frontmatter_text: str) -> tuple[Optional[dict], Optional[str]]:
+    if yaml is not None:
+        try:
+            frontmatter = yaml.safe_load(frontmatter_text)
+        except yaml.YAMLError as exc:
+            return None, f"Invalid YAML in frontmatter: {exc}"
+        if not isinstance(frontmatter, dict):
+            return None, "Frontmatter must be a YAML dictionary"
+        return frontmatter, None
+
+    frontmatter = _parse_simple_frontmatter(frontmatter_text)
+    if frontmatter is None:
+        return None, "Invalid YAML in frontmatter: unsupported syntax without PyYAML installed"
+    return frontmatter, None
+
+
+def _validate_skill_name(name: str, folder_name: str) -> Optional[str]:
+    if not re.fullmatch(r"[a-z0-9]+(?:-[a-z0-9]+)*", name):
+        return (
+            f"Name '{name}' should be hyphen-case "
+            "(lowercase letters, digits, and single hyphens only)"
+        )
+    if len(name) > MAX_SKILL_NAME_LENGTH:
+        return (
+            f"Name is too long ({len(name)} characters). "
+            f"Maximum is {MAX_SKILL_NAME_LENGTH} characters."
+        )
+    if name != folder_name:
+        return f"Skill name '{name}' must match directory name '{folder_name}'"
+    return None
+
+
+def _validate_description(description: str) -> Optional[str]:
+    trimmed = description.strip()
+    if not trimmed:
+        return "Description cannot be empty"
+    lowered = trimmed.lower()
+    if any(marker in lowered for marker in PLACEHOLDER_MARKERS):
+        return "Description still contains TODO placeholder text"
+    if "<" in trimmed or ">" in trimmed:
+        return "Description cannot contain angle brackets (< or >)"
+    if len(trimmed) > 1024:
+        return f"Description is too long ({len(trimmed)} characters). Maximum is 1024 characters."
+    return None
+
+
+def validate_skill(skill_path):
+    """Validate a skill folder structure and required frontmatter."""
+    skill_path = Path(skill_path).resolve()
+
+    if not skill_path.exists():
+        return False, f"Skill folder not found: {skill_path}"
+    if not skill_path.is_dir():
+        return False, f"Path is not a directory: {skill_path}"
+
+    skill_md = skill_path / "SKILL.md"
+    if not skill_md.exists():
+        return False, "SKILL.md not found"
+
+    try:
+        content = skill_md.read_text(encoding="utf-8")
+    except OSError as exc:
+        return False, f"Could not read SKILL.md: {exc}"
+
+    frontmatter_text = _extract_frontmatter(content)
+    if frontmatter_text is None:
+        return False, "Invalid frontmatter format"
+
+    frontmatter, error = _load_frontmatter(frontmatter_text)
+    if error:
+        return False, error
+
+    unexpected_keys = sorted(set(frontmatter.keys()) - ALLOWED_FRONTMATTER_KEYS)
+    if unexpected_keys:
+        allowed = ", ".join(sorted(ALLOWED_FRONTMATTER_KEYS))
+        unexpected = ", ".join(unexpected_keys)
+        return (
+            False,
+            f"Unexpected key(s) in SKILL.md frontmatter: {unexpected}. Allowed properties are: {allowed}",
+        )
+
+    if "name" not in frontmatter:
+        return False, "Missing 'name' in frontmatter"
+    if "description" not in frontmatter:
+        return False, "Missing 'description' in frontmatter"
+
+    name = frontmatter["name"]
+    if not isinstance(name, str):
+        return False, f"Name must be a string, got {type(name).__name__}"
+    name_error = _validate_skill_name(name.strip(), skill_path.name)
+    if name_error:
+        return False, name_error
+
+    description = frontmatter["description"]
+    if not isinstance(description, str):
+        return False, f"Description must be a string, got {type(description).__name__}"
+    description_error = _validate_description(description)
+    if description_error:
+        return False, description_error
+
+    always = frontmatter.get("always")
+    if always is not None and not isinstance(always, bool):
+        return False, f"'always' must be a boolean, got {type(always).__name__}"
+
+    for child in skill_path.iterdir():
+        if child.name == "SKILL.md":
+            continue
+        if child.is_dir() and child.name in ALLOWED_RESOURCE_DIRS:
+            continue
+        if child.is_symlink():
+            continue
+        return (
+            False,
+            f"Unexpected file or directory in skill root: {child.name}. "
+            "Only SKILL.md, scripts/, references/, and assets/ are allowed.",
+        )
+
+    return True, "Skill is valid!"
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python quick_validate.py <skill_directory>")
+        sys.exit(1)
+
+    valid, message = validate_skill(sys.argv[1])
+    print(message)
+    sys.exit(0 if valid else 1)
diff --git a/tests/test_skill_creator_scripts.py b/tests/test_skill_creator_scripts.py
new file mode 100644
index 0000000..4207c6f
--- /dev/null
+++ b/tests/test_skill_creator_scripts.py
@@ -0,0 +1,127 @@
+import importlib
+import shutil
+import sys
+import zipfile
+from pathlib import Path
+
+
+SCRIPT_DIR = Path("nanobot/skills/skill-creator/scripts").resolve()
+if str(SCRIPT_DIR) not in sys.path:
+    sys.path.insert(0, str(SCRIPT_DIR))
+
+init_skill = importlib.import_module("init_skill")
+package_skill = importlib.import_module("package_skill")
+quick_validate = importlib.import_module("quick_validate")
+
+
+def test_init_skill_creates_expected_files(tmp_path: Path) -> None:
+    skill_dir = init_skill.init_skill(
+        "demo-skill",
+        tmp_path,
+        ["scripts", "references", "assets"],
+        include_examples=True,
+    )
+
+    assert skill_dir == tmp_path / "demo-skill"
+    assert (skill_dir / "SKILL.md").exists()
+    assert (skill_dir / "scripts" / "example.py").exists()
+    assert (skill_dir / "references" / "api_reference.md").exists()
+    assert (skill_dir / "assets" / "example_asset.txt").exists()
+
+
+def test_validate_skill_accepts_existing_skill_creator() -> None:
+    valid, message = quick_validate.validate_skill(
+        Path("nanobot/skills/skill-creator").resolve()
+    )
+
+    assert valid, message
+
+
+def test_validate_skill_rejects_placeholder_description(tmp_path: Path) -> None:
+    skill_dir = tmp_path / "placeholder-skill"
+    skill_dir.mkdir()
+    (skill_dir / "SKILL.md").write_text(
+        "---\n"
+        "name: placeholder-skill\n"
+        'description: "[TODO: fill me in]"\n'
+        "---\n"
+        "# Placeholder\n",
+        encoding="utf-8",
+    )
+
+    valid, message = quick_validate.validate_skill(skill_dir)
+
+    assert not valid
+    assert "TODO placeholder" in message
+
+
+def test_validate_skill_rejects_root_files_outside_allowed_dirs(tmp_path: Path) -> None:
+    skill_dir = tmp_path / "bad-root-skill"
+    skill_dir.mkdir()
+    (skill_dir / "SKILL.md").write_text(
+        "---\n"
+        "name: bad-root-skill\n"
+        "description: Valid description\n"
+        "---\n"
+        "# Skill\n",
+        encoding="utf-8",
+    )
+    (skill_dir / "README.md").write_text("extra\n", encoding="utf-8")
+
+    valid, message = quick_validate.validate_skill(skill_dir)
+
+    assert not valid
+    assert "Unexpected file or directory in skill root" in message
+
+
+def test_package_skill_creates_archive(tmp_path: Path) -> None:
+    skill_dir = tmp_path / "package-me"
+    skill_dir.mkdir()
+    (skill_dir / "SKILL.md").write_text(
+        "---\n"
+        "name: package-me\n"
+        "description: Package this skill.\n"
+        "---\n"
+        "# Skill\n",
+        encoding="utf-8",
+    )
+    scripts_dir = skill_dir / "scripts"
+    scripts_dir.mkdir()
+    (scripts_dir / "helper.py").write_text("print('ok')\n", encoding="utf-8")
+
+    archive_path = package_skill.package_skill(skill_dir, tmp_path / "dist")
+
+    assert archive_path == (tmp_path / "dist" / "package-me.skill")
+    assert archive_path.exists()
+    with zipfile.ZipFile(archive_path, "r") as archive:
+        names = set(archive.namelist())
+    assert "package-me/SKILL.md" in names
+    assert "package-me/scripts/helper.py" in names
+
+
+def test_package_skill_rejects_symlink(tmp_path: Path) -> None:
+    skill_dir = tmp_path / "symlink-skill"
+    skill_dir.mkdir()
+    (skill_dir / "SKILL.md").write_text(
+        "---\n"
+        "name: symlink-skill\n"
+        "description: Reject symlinks during packaging.\n"
+        "---\n"
+        "# Skill\n",
+        encoding="utf-8",
+    )
+    scripts_dir = skill_dir / "scripts"
+    scripts_dir.mkdir()
+    target = tmp_path / "outside.txt"
+    target.write_text("secret\n", encoding="utf-8")
+    link = scripts_dir / "outside.txt"
+
+    try:
+        link.symlink_to(target)
+    except (OSError, NotImplementedError):
+        return
+
+    archive_path = package_skill.package_skill(skill_dir, tmp_path / "dist")
+
+    assert archive_path is None
+    assert not (tmp_path / "dist" / "symlink-skill.skill").exists()

From b0a5435b8720a5968e683ce5aa82a8b16e614452 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Tue, 10 Mar 2026 10:10:37 +0000
Subject: [PATCH 22/53] refactor(llm): share transient retry across agent paths

---
 nanobot/agent/loop.py                    | 29 +-------
 nanobot/agent/memory.py                  |  2 +-
 nanobot/agent/subagent.py                |  2 +-
 nanobot/heartbeat/service.py             |  2 +-
 nanobot/providers/base.py                | 84 ++++++++++++++++++++++
 tests/test_heartbeat_service.py          | 47 +++++++++++-
 tests/test_memory_consolidation_types.py | 50 ++++++++++++-
 tests/test_provider_retry.py             | 92 ++++++++++++++++++++++++
 8 files changed, 274 insertions(+), 34 deletions(-)
 create mode 100644 tests/test_provider_retry.py

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index b67baae..fcbc880 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -159,33 +159,6 @@ class AgentLoop:
                 if hasattr(tool, "set_context"):
                     tool.set_context(channel, chat_id, *([message_id] if name == "message" else []))
 
-    _RETRY_DELAYS = (1, 2, 4)  # seconds — exponential backoff for transient LLM errors
-
-    async def _chat_with_retry(self, **kwargs: Any) -> Any:
-        """Call provider.chat() with retry on transient errors (429, 5xx, network)."""
-        from nanobot.providers.base import LLMResponse
-
-        last_response: LLMResponse | None = None
-        for attempt, delay in enumerate(self._RETRY_DELAYS):
-            response = await self.provider.chat(**kwargs)
-            if response.finish_reason != "error":
-                return response
-            # Check if the error looks transient (rate limit, server error, network)
-            err = (response.content or "").lower()
-            is_transient = any(kw in err for kw in (
-                "429", "rate limit", "500", "502", "503", "504",
-                "overloaded", "timeout", "connection", "server error",
-            ))
-            if not is_transient:
-                return response  # permanent error (400, 401, etc.) — don't retry
-            last_response = response
-            logger.warning("LLM transient error (attempt {}/{}), retrying in {}s: {}",
-                           attempt + 1, len(self._RETRY_DELAYS), delay, err[:120])
-            await asyncio.sleep(delay)
-        # All retries exhausted — make one final attempt
-        response = await self.provider.chat(**kwargs)
-        return response if response.finish_reason != "error" else (last_response or response)
-
     @staticmethod
     def _strip_think(text: str | None) -> str | None:
         """Remove <think>…</think> blocks that some models embed in content."""
@@ -218,7 +191,7 @@ class AgentLoop:
         while iteration < self.max_iterations:
             iteration += 1
 
-            response = await self._chat_with_retry(
+            response = await self.provider.chat_with_retry(
                 messages=messages,
                 tools=self.tools.get_definitions(),
                 model=self.model,
diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index 21fe77d..66efec2 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -111,7 +111,7 @@ class MemoryStore:
 {chr(10).join(lines)}"""
 
         try:
-            response = await provider.chat(
+            response = await provider.chat_with_retry(
                 messages=[
                     {"role": "system", "content": "You are a memory consolidation agent. Call the save_memory tool with your consolidation of the conversation."},
                     {"role": "user", "content": prompt},
diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
index f2d6ee5..f9eda1f 100644
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -123,7 +123,7 @@ class SubagentManager:
             while iteration < max_iterations:
                 iteration += 1
 
-                response = await self.provider.chat(
+                response = await self.provider.chat_with_retry(
                     messages=messages,
                     tools=tools.get_definitions(),
                     model=self.model,
diff --git a/nanobot/heartbeat/service.py b/nanobot/heartbeat/service.py
index e534017..831ae85 100644
--- a/nanobot/heartbeat/service.py
+++ b/nanobot/heartbeat/service.py
@@ -87,7 +87,7 @@ class HeartbeatService:
 
         Returns (action, tasks) where action is 'skip' or 'run'.
         """
-        response = await self.provider.chat(
+        response = await self.provider.chat_with_retry(
             messages=[
                 {"role": "system", "content": "You are a heartbeat agent. Call the heartbeat tool to report your decision."},
                 {"role": "user", "content": (
diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py
index 0f73544..a3b6c47 100644
--- a/nanobot/providers/base.py
+++ b/nanobot/providers/base.py
@@ -1,9 +1,12 @@
 """Base LLM provider interface."""
 
+import asyncio
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from typing import Any
 
+from loguru import logger
+
 
 @dataclass
 class ToolCallRequest:
@@ -37,6 +40,22 @@ class LLMProvider(ABC):
     while maintaining a consistent interface.
     """
 
+    _CHAT_RETRY_DELAYS = (1, 2, 4)
+    _TRANSIENT_ERROR_MARKERS = (
+        "429",
+        "rate limit",
+        "500",
+        "502",
+        "503",
+        "504",
+        "overloaded",
+        "timeout",
+        "timed out",
+        "connection",
+        "server error",
+        "temporarily unavailable",
+    )
+
     def __init__(self, api_key: str | None = None, api_base: str | None = None):
         self.api_key = api_key
         self.api_base = api_base
@@ -126,6 +145,71 @@ class LLMProvider(ABC):
         """
         pass
 
+    @classmethod
+    def _is_transient_error(cls, content: str | None) -> bool:
+        err = (content or "").lower()
+        return any(marker in err for marker in cls._TRANSIENT_ERROR_MARKERS)
+
+    async def chat_with_retry(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]] | None = None,
+        model: str | None = None,
+        max_tokens: int = 4096,
+        temperature: float = 0.7,
+        reasoning_effort: str | None = None,
+    ) -> LLMResponse:
+        """Call chat() with retry on transient provider failures."""
+        for attempt, delay in enumerate(self._CHAT_RETRY_DELAYS, start=1):
+            try:
+                response = await self.chat(
+                    messages=messages,
+                    tools=tools,
+                    model=model,
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                    reasoning_effort=reasoning_effort,
+                )
+            except asyncio.CancelledError:
+                raise
+            except Exception as exc:
+                response = LLMResponse(
+                    content=f"Error calling LLM: {exc}",
+                    finish_reason="error",
+                )
+
+            if response.finish_reason != "error":
+                return response
+            if not self._is_transient_error(response.content):
+                return response
+
+            err = (response.content or "").lower()
+            logger.warning(
+                "LLM transient error (attempt {}/{}), retrying in {}s: {}",
+                attempt,
+                len(self._CHAT_RETRY_DELAYS),
+                delay,
+                err[:120],
+            )
+            await asyncio.sleep(delay)
+
+        try:
+            return await self.chat(
+                messages=messages,
+                tools=tools,
+                model=model,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                reasoning_effort=reasoning_effort,
+            )
+        except asyncio.CancelledError:
+            raise
+        except Exception as exc:
+            return LLMResponse(
+                content=f"Error calling LLM: {exc}",
+                finish_reason="error",
+            )
+
     @abstractmethod
     def get_default_model(self) -> str:
         """Get the default model for this provider."""
diff --git a/tests/test_heartbeat_service.py b/tests/test_heartbeat_service.py
index c5478af..9ce8912 100644
--- a/tests/test_heartbeat_service.py
+++ b/tests/test_heartbeat_service.py
@@ -3,18 +3,24 @@ import asyncio
 import pytest
 
 from nanobot.heartbeat.service import HeartbeatService
-from nanobot.providers.base import LLMResponse, ToolCallRequest
+from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
 
 
-class DummyProvider:
+class DummyProvider(LLMProvider):
     def __init__(self, responses: list[LLMResponse]):
+        super().__init__()
         self._responses = list(responses)
+        self.calls = 0
 
     async def chat(self, *args, **kwargs) -> LLMResponse:
+        self.calls += 1
         if self._responses:
             return self._responses.pop(0)
         return LLMResponse(content="", tool_calls=[])
 
+    def get_default_model(self) -> str:
+        return "test-model"
+
 
 @pytest.mark.asyncio
 async def test_start_is_idempotent(tmp_path) -> None:
@@ -115,3 +121,40 @@ async def test_trigger_now_returns_none_when_decision_is_skip(tmp_path) -> None:
     )
 
     assert await service.trigger_now() is None
+
+
+@pytest.mark.asyncio
+async def test_decide_retries_transient_error_then_succeeds(tmp_path, monkeypatch) -> None:
+    provider = DummyProvider([
+        LLMResponse(content="429 rate limit", finish_reason="error"),
+        LLMResponse(
+            content="",
+            tool_calls=[
+                ToolCallRequest(
+                    id="hb_1",
+                    name="heartbeat",
+                    arguments={"action": "run", "tasks": "check open tasks"},
+                )
+            ],
+        ),
+    ])
+
+    delays: list[int] = []
+
+    async def _fake_sleep(delay: int) -> None:
+        delays.append(delay)
+
+    monkeypatch.setattr(asyncio, "sleep", _fake_sleep)
+
+    service = HeartbeatService(
+        workspace=tmp_path,
+        provider=provider,
+        model="openai/gpt-4o-mini",
+    )
+
+    action, tasks = await service._decide("heartbeat content")
+
+    assert action == "run"
+    assert tasks == "check open tasks"
+    assert provider.calls == 2
+    assert delays == [1]
diff --git a/tests/test_memory_consolidation_types.py b/tests/test_memory_consolidation_types.py
index ff15584..2605bf7 100644
--- a/tests/test_memory_consolidation_types.py
+++ b/tests/test_memory_consolidation_types.py
@@ -12,7 +12,7 @@ from unittest.mock import AsyncMock, MagicMock
 import pytest
 
 from nanobot.agent.memory import MemoryStore
-from nanobot.providers.base import LLMResponse, ToolCallRequest
+from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
 
 
 def _make_session(message_count: int = 30, memory_window: int = 50):
@@ -43,6 +43,22 @@ def _make_tool_response(history_entry, memory_update):
     )
 
 
+class ScriptedProvider(LLMProvider):
+    def __init__(self, responses: list[LLMResponse]):
+        super().__init__()
+        self._responses = list(responses)
+        self.calls = 0
+
+    async def chat(self, *args, **kwargs) -> LLMResponse:
+        self.calls += 1
+        if self._responses:
+            return self._responses.pop(0)
+        return LLMResponse(content="", tool_calls=[])
+
+    def get_default_model(self) -> str:
+        return "test-model"
+
+
 class TestMemoryConsolidationTypeHandling:
     """Test that consolidation handles various argument types correctly."""
 
@@ -57,6 +73,7 @@ class TestMemoryConsolidationTypeHandling:
                 memory_update="# Memory\nUser likes testing.",
             )
         )
+        provider.chat_with_retry = provider.chat
         session = _make_session(message_count=60)
 
         result = await store.consolidate(session, provider, "test-model", memory_window=50)
@@ -77,6 +94,7 @@ class TestMemoryConsolidationTypeHandling:
                 memory_update={"facts": ["User likes testing"], "topics": ["testing"]},
             )
         )
+        provider.chat_with_retry = provider.chat
         session = _make_session(message_count=60)
 
         result = await store.consolidate(session, provider, "test-model", memory_window=50)
@@ -112,6 +130,7 @@ class TestMemoryConsolidationTypeHandling:
             ],
         )
         provider.chat = AsyncMock(return_value=response)
+        provider.chat_with_retry = provider.chat
         session = _make_session(message_count=60)
 
         result = await store.consolidate(session, provider, "test-model", memory_window=50)
@@ -127,6 +146,7 @@ class TestMemoryConsolidationTypeHandling:
         provider.chat = AsyncMock(
             return_value=LLMResponse(content="I summarized the conversation.", tool_calls=[])
         )
+        provider.chat_with_retry = provider.chat
         session = _make_session(message_count=60)
 
         result = await store.consolidate(session, provider, "test-model", memory_window=50)
@@ -139,6 +159,7 @@ class TestMemoryConsolidationTypeHandling:
         """Consolidation should be a no-op when messages < keep_count."""
         store = MemoryStore(tmp_path)
         provider = AsyncMock()
+        provider.chat_with_retry = provider.chat
         session = _make_session(message_count=10)
 
         result = await store.consolidate(session, provider, "test-model", memory_window=50)
@@ -167,6 +188,7 @@ class TestMemoryConsolidationTypeHandling:
             ],
         )
         provider.chat = AsyncMock(return_value=response)
+        provider.chat_with_retry = provider.chat
         session = _make_session(message_count=60)
 
         result = await store.consolidate(session, provider, "test-model", memory_window=50)
@@ -192,6 +214,7 @@ class TestMemoryConsolidationTypeHandling:
             ],
         )
         provider.chat = AsyncMock(return_value=response)
+        provider.chat_with_retry = provider.chat
         session = _make_session(message_count=60)
 
         result = await store.consolidate(session, provider, "test-model", memory_window=50)
@@ -215,8 +238,33 @@ class TestMemoryConsolidationTypeHandling:
             ],
         )
         provider.chat = AsyncMock(return_value=response)
+        provider.chat_with_retry = provider.chat
         session = _make_session(message_count=60)
 
         result = await store.consolidate(session, provider, "test-model", memory_window=50)
 
         assert result is False
+
+    @pytest.mark.asyncio
+    async def test_retries_transient_error_then_succeeds(self, tmp_path: Path, monkeypatch) -> None:
+        store = MemoryStore(tmp_path)
+        provider = ScriptedProvider([
+            LLMResponse(content="503 server error", finish_reason="error"),
+            _make_tool_response(
+                history_entry="[2026-01-01] User discussed testing.",
+                memory_update="# Memory\nUser likes testing.",
+            ),
+        ])
+        session = _make_session(message_count=60)
+        delays: list[int] = []
+
+        async def _fake_sleep(delay: int) -> None:
+            delays.append(delay)
+
+        monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
+
+        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+
+        assert result is True
+        assert provider.calls == 2
+        assert delays == [1]
diff --git a/tests/test_provider_retry.py b/tests/test_provider_retry.py
new file mode 100644
index 0000000..751ecc3
--- /dev/null
+++ b/tests/test_provider_retry.py
@@ -0,0 +1,92 @@
+import asyncio
+
+import pytest
+
+from nanobot.providers.base import LLMProvider, LLMResponse
+
+
+class ScriptedProvider(LLMProvider):
+    def __init__(self, responses):
+        super().__init__()
+        self._responses = list(responses)
+        self.calls = 0
+
+    async def chat(self, *args, **kwargs) -> LLMResponse:
+        self.calls += 1
+        response = self._responses.pop(0)
+        if isinstance(response, BaseException):
+            raise response
+        return response
+
+    def get_default_model(self) -> str:
+        return "test-model"
+
+
+@pytest.mark.asyncio
+async def test_chat_with_retry_retries_transient_error_then_succeeds(monkeypatch) -> None:
+    provider = ScriptedProvider([
+        LLMResponse(content="429 rate limit", finish_reason="error"),
+        LLMResponse(content="ok"),
+    ])
+    delays: list[int] = []
+
+    async def _fake_sleep(delay: int) -> None:
+        delays.append(delay)
+
+    monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
+
+    response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
+
+    assert response.finish_reason == "stop"
+    assert response.content == "ok"
+    assert provider.calls == 2
+    assert delays == [1]
+
+
+@pytest.mark.asyncio
+async def test_chat_with_retry_does_not_retry_non_transient_error(monkeypatch) -> None:
+    provider = ScriptedProvider([
+        LLMResponse(content="401 unauthorized", finish_reason="error"),
+    ])
+    delays: list[int] = []
+
+    async def _fake_sleep(delay: int) -> None:
+        delays.append(delay)
+
+    monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
+
+    response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
+
+    assert response.content == "401 unauthorized"
+    assert provider.calls == 1
+    assert delays == []
+
+
+@pytest.mark.asyncio
+async def test_chat_with_retry_returns_final_error_after_retries(monkeypatch) -> None:
+    provider = ScriptedProvider([
+        LLMResponse(content="429 rate limit a", finish_reason="error"),
+        LLMResponse(content="429 rate limit b", finish_reason="error"),
+        LLMResponse(content="429 rate limit c", finish_reason="error"),
+        LLMResponse(content="503 final server error", finish_reason="error"),
+    ])
+    delays: list[int] = []
+
+    async def _fake_sleep(delay: int) -> None:
+        delays.append(delay)
+
+    monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
+
+    response = await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
+
+    assert response.content == "503 final server error"
+    assert provider.calls == 4
+    assert delays == [1, 2, 4]
+
+
+@pytest.mark.asyncio
+async def test_chat_with_retry_preserves_cancelled_error() -> None:
+    provider = ScriptedProvider([asyncio.CancelledError()])
+
+    with pytest.raises(asyncio.CancelledError):
+        await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])

From 947ed508ad876bdc227c27fd1b008b163ea830b3 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Tue, 10 Mar 2026 10:13:46 +0000
Subject: [PATCH 23/53] chore: exclude skills from core agent line count

---
 core_agent_lines.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core_agent_lines.sh b/core_agent_lines.sh
index 3f5301a..df32394 100755
--- a/core_agent_lines.sh
+++ b/core_agent_lines.sh
@@ -15,7 +15,7 @@ root=$(cat nanobot/__init__.py nanobot/__main__.py | wc -l)
 printf "  %-16s %5s lines\n" "(root)" "$root"
 
 echo ""
-total=$(find nanobot -name "*.py" ! -path "*/channels/*" ! -path "*/cli/*" ! -path "*/providers/*" | xargs cat | wc -l)
+total=$(find nanobot -name "*.py" ! -path "*/channels/*" ! -path "*/cli/*" ! -path "*/providers/*" ! -path "*/skills/*" | xargs cat | wc -l)
 echo "  Core total:     $total lines"
 echo ""
-echo "  (excludes: channels/, cli/, providers/)"
+echo "  (excludes: channels/, cli/, providers/, skills/)"

From 808064e26bf03ad1b645b76af2181d3356d35e47 Mon Sep 17 00:00:00 2001
From: Nikolas de Hor <nikolasdehor79@gmail.com>
Date: Tue, 10 Mar 2026 13:45:05 -0300
Subject: [PATCH 24/53] fix: detect tilde paths in restrictToWorkspace shell
 guard

_extract_absolute_paths() only matched paths starting with / or drive
letters, missing ~ paths that expand to the home directory. This
allowed agents to bypass restrictToWorkspace by using commands like
cat ~/.nanobot/config.json to access files outside the workspace.

Add tilde path extraction regex and use expanduser() before resolving.
Also switch from manual parent-chain check to is_relative_to() for
more robust path containment validation.

Fixes #1817
---
 nanobot/agent/tools/shell.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/nanobot/agent/tools/shell.py b/nanobot/agent/tools/shell.py
index ce19920..b4a4044 100644
--- a/nanobot/agent/tools/shell.py
+++ b/nanobot/agent/tools/shell.py
@@ -143,10 +143,10 @@ class ExecTool(Tool):
 
             for raw in self._extract_absolute_paths(cmd):
                 try:
-                    p = Path(raw.strip()).resolve()
+                    p = Path(raw.strip()).expanduser().resolve()
                 except Exception:
                     continue
-                if p.is_absolute() and cwd_path not in p.parents and p != cwd_path:
+                if not p.is_relative_to(cwd_path):
                     return "Error: Command blocked by safety guard (path outside working dir)"
 
         return None
@@ -155,4 +155,5 @@ class ExecTool(Tool):
     def _extract_absolute_paths(command: str) -> list[str]:
         win_paths = re.findall(r"[A-Za-z]:\\[^\s\"'|><;]+", command)   # Windows: C:\...
         posix_paths = re.findall(r"(?:^|[\s|>])(/[^\s\"'>]+)", command) # POSIX: /absolute only
-        return win_paths + posix_paths
+        tilde_paths = re.findall(r"(?:^|[\s|>])(~[^\s\"'>]*)", command) # Tilde: ~/...
+        return win_paths + posix_paths + tilde_paths

From 2ffeb9295bdb4a5ef308498f60f45b2448ab48d2 Mon Sep 17 00:00:00 2001
From: lailoo <ll1042668699@gmail.com>
Date: Wed, 11 Mar 2026 00:47:09 +0800
Subject: [PATCH 25/53] fix(subagent): preserve reasoning_content in assistant
 messages

Subagent's _run_subagent() was dropping reasoning_content and
thinking_blocks when building assistant messages for the conversation
history. Providers like Deepseek Reasoner require reasoning_content on
every assistant message when thinking mode is active, causing a 400
BadRequestError on the second LLM round-trip.

Align with the main AgentLoop which already preserves these fields via
ContextBuilder.add_assistant_message().

Closes #1834
---
 nanobot/agent/subagent.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
index f9eda1f..308e67d 100644
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -145,11 +145,19 @@ class SubagentManager:
                         }
                         for tc in response.tool_calls
                     ]
-                    messages.append({
+                    assistant_msg: dict[str, Any] = {
                         "role": "assistant",
                         "content": response.content or "",
                         "tool_calls": tool_call_dicts,
-                    })
+                    }
+                    # Preserve reasoning_content for providers that require it
+                    # (e.g. Deepseek Reasoner mandates this field on every
+                    # assistant message when thinking mode is active).
+                    if response.reasoning_content is not None:
+                        assistant_msg["reasoning_content"] = response.reasoning_content
+                    if response.thinking_blocks:
+                        assistant_msg["thinking_blocks"] = response.thinking_blocks
+                    messages.append(assistant_msg)
 
                     # Execute tools
                     for tool_call in response.tool_calls:

From 62ccda43b980d53c5ac7a79adf8edf43294f1fdb Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Tue, 10 Mar 2026 19:55:06 +0000
Subject: [PATCH 26/53] refactor(memory): switch consolidation to token-based
 context windows

Move consolidation policy into MemoryConsolidator, keep backward compatibility for legacy config, and compress history by token budget instead of message count.
---
 nanobot/agent/loop.py                    | 544 ++---------------------
 nanobot/agent/memory.py                  | 243 +++++++---
 nanobot/cli/commands.py                  |  26 +-
 nanobot/config/schema.py                 |  32 +-
 nanobot/session/manager.py               |  20 +-
 nanobot/utils/helpers.py                 |  85 ++++
 pyproject.toml                           |   1 +
 tests/test_commands.py                   |  33 ++
 tests/test_config_migration.py           |  88 ++++
 tests/test_consolidate_offset.py         | 297 ++-----------
 tests/test_loop_consolidation_tokens.py  | 190 ++++++++
 tests/test_memory_consolidation_types.py |  51 +--
 tests/test_message_tool_suppress.py      |  10 +-
 13 files changed, 709 insertions(+), 911 deletions(-)
 create mode 100644 tests/test_config_migration.py
 create mode 100644 tests/test_loop_consolidation_tokens.py

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index ba35a23..8605a09 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -11,18 +11,12 @@ from typing import TYPE_CHECKING, Any, Awaitable, Callable
 
 from loguru import logger
 
-try:
-    import tiktoken  # type: ignore
-except Exception:  # pragma: no cover - optional dependency
-    tiktoken = None
-
 from nanobot.agent.context import ContextBuilder
+from nanobot.agent.memory import MemoryConsolidator
 from nanobot.agent.subagent import SubagentManager
 from nanobot.agent.tools.cron import CronTool
 from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool
-from nanobot.agent.tools.huggingface import HuggingFaceModelSearchTool
 from nanobot.agent.tools.message import MessageTool
-from nanobot.agent.tools.model_config import ValidateDeployJSONTool, ValidateUsageYAMLTool
 from nanobot.agent.tools.registry import ToolRegistry
 from nanobot.agent.tools.shell import ExecTool
 from nanobot.agent.tools.spawn import SpawnTool
@@ -60,11 +54,8 @@ class AgentLoop:
         max_iterations: int = 40,
         temperature: float = 0.1,
         max_tokens: int = 4096,
-        memory_window: int | None = None,  # backward-compat only (unused)
         reasoning_effort: str | None = None,
-        max_tokens_input: int = 128_000,
-        compression_start_ratio: float = 0.7,
-        compression_target_ratio: float = 0.4,
+        context_window_tokens: int = 65_536,
         brave_api_key: str | None = None,
         web_proxy: str | None = None,
         exec_config: ExecToolConfig | None = None,
@@ -82,18 +73,9 @@ class AgentLoop:
         self.model = model or provider.get_default_model()
         self.max_iterations = max_iterations
         self.temperature = temperature
-        # max_tokens: per-call output token cap (maxTokensOutput in config)
         self.max_tokens = max_tokens
-        # Keep legacy attribute for older call sites/tests; compression no longer uses it.
-        self.memory_window = memory_window
         self.reasoning_effort = reasoning_effort
-        # max_tokens_input: model native context window (maxTokensInput in config)
-        self.max_tokens_input = max_tokens_input
-        # Token-based compression watermarks (fractions of available input budget)
-        self.compression_start_ratio = compression_start_ratio
-        self.compression_target_ratio = compression_target_ratio
-        # Reserve tokens for safety margin
-        self._reserve_tokens = 1000
+        self.context_window_tokens = context_window_tokens
         self.brave_api_key = brave_api_key
         self.web_proxy = web_proxy
         self.exec_config = exec_config or ExecToolConfig()
@@ -123,382 +105,23 @@ class AgentLoop:
         self._mcp_connected = False
         self._mcp_connecting = False
         self._active_tasks: dict[str, list[asyncio.Task]] = {}  # session_key -> tasks
-        self._compression_tasks: dict[str, asyncio.Task] = {}  # session_key -> task
-        self._last_turn_prompt_tokens: int = 0
-        self._last_turn_prompt_source: str = "none"
         self._processing_lock = asyncio.Lock()
+        self.memory_consolidator = MemoryConsolidator(
+            workspace=workspace,
+            provider=provider,
+            model=self.model,
+            sessions=self.sessions,
+            context_window_tokens=context_window_tokens,
+            build_messages=self.context.build_messages,
+            get_tool_definitions=self.tools.get_definitions,
+        )
         self._register_default_tools()
 
-    @staticmethod
-    def _estimate_prompt_tokens(
-        messages: list[dict[str, Any]],
-        tools: list[dict[str, Any]] | None = None,
-    ) -> int:
-        """Estimate prompt tokens with tiktoken (fallback only)."""
-        if tiktoken is None:
-            return 0
-
-        try:
-            enc = tiktoken.get_encoding("cl100k_base")
-            parts: list[str] = []
-            for msg in messages:
-                content = msg.get("content")
-                if isinstance(content, str):
-                    parts.append(content)
-                elif isinstance(content, list):
-                    for part in content:
-                        if isinstance(part, dict) and part.get("type") == "text":
-                            txt = part.get("text", "")
-                            if txt:
-                                parts.append(txt)
-            if tools:
-                parts.append(json.dumps(tools, ensure_ascii=False))
-            return len(enc.encode("\n".join(parts)))
-        except Exception:
-            return 0
-
-    def _estimate_prompt_tokens_chain(
-        self,
-        messages: list[dict[str, Any]],
-        tools: list[dict[str, Any]] | None = None,
-    ) -> tuple[int, str]:
-        """Unified prompt-token estimation: provider counter -> tiktoken."""
-        provider_counter = getattr(self.provider, "estimate_prompt_tokens", None)
-        if callable(provider_counter):
-            try:
-                tokens, source = provider_counter(messages, tools, self.model)
-                if isinstance(tokens, (int, float)) and tokens > 0:
-                    return int(tokens), str(source or "provider_counter")
-            except Exception:
-                logger.debug("Provider token counter failed; fallback to tiktoken")
-
-        estimated = self._estimate_prompt_tokens(messages, tools)
-        if estimated > 0:
-            return int(estimated), "tiktoken"
-        return 0, "none"
-
-    @staticmethod
-    def _estimate_completion_tokens(content: str) -> int:
-        """Estimate completion tokens with tiktoken (fallback only)."""
-        if tiktoken is None:
-            return 0
-        try:
-            enc = tiktoken.get_encoding("cl100k_base")
-            return len(enc.encode(content or ""))
-        except Exception:
-            return 0
-
-    def _get_compressed_until(self, session: Session) -> int:
-        """Read/normalize compressed boundary and migrate old metadata format."""
-        raw = session.metadata.get("_compressed_until", 0)
-        try:
-            compressed_until = int(raw)
-        except (TypeError, ValueError):
-            compressed_until = 0
-
-        if compressed_until <= 0:
-            ranges = session.metadata.get("_compressed_ranges")
-            if isinstance(ranges, list):
-                inferred = 0
-                for item in ranges:
-                    if not isinstance(item, (list, tuple)) or len(item) != 2:
-                        continue
-                    try:
-                        inferred = max(inferred, int(item[1]))
-                    except (TypeError, ValueError):
-                        continue
-                compressed_until = inferred
-
-        compressed_until = max(0, min(compressed_until, len(session.messages)))
-        session.metadata["_compressed_until"] = compressed_until
-        # 兼容旧版本：一旦迁移出连续边界，就可以清理旧字段
-        session.metadata.pop("_compressed_ranges", None)
-        # 注意：不要删除 _cumulative_tokens，压缩逻辑需要它来跟踪累积 token 计数
-        return compressed_until
-
-    def _set_compressed_until(self, session: Session, idx: int) -> None:
-        """Persist a contiguous compressed boundary."""
-        session.metadata["_compressed_until"] = max(0, min(int(idx), len(session.messages)))
-        session.metadata.pop("_compressed_ranges", None)
-        # 注意：不要删除 _cumulative_tokens，压缩逻辑需要它来跟踪累积 token 计数
-
-    @staticmethod
-    def _estimate_message_tokens(message: dict[str, Any]) -> int:
-        """Rough token estimate for a single persisted message."""
-        content = message.get("content")
-        parts: list[str] = []
-        if isinstance(content, str):
-            parts.append(content)
-        elif isinstance(content, list):
-            for part in content:
-                if isinstance(part, dict) and part.get("type") == "text":
-                    txt = part.get("text", "")
-                    if txt:
-                        parts.append(txt)
-                else:
-                    parts.append(json.dumps(part, ensure_ascii=False))
-        elif content is not None:
-            parts.append(json.dumps(content, ensure_ascii=False))
-
-        for key in ("name", "tool_call_id"):
-            val = message.get(key)
-            if isinstance(val, str) and val:
-                parts.append(val)
-        if message.get("tool_calls"):
-            parts.append(json.dumps(message["tool_calls"], ensure_ascii=False))
-
-        payload = "\n".join(parts)
-        if not payload:
-            return 1
-        if tiktoken is not None:
-            try:
-                enc = tiktoken.get_encoding("cl100k_base")
-                return max(1, len(enc.encode(payload)))
-            except Exception:
-                pass
-        return max(1, len(payload) // 4)
-
-    def _pick_compression_chunk_by_tokens(
-        self,
-        session: Session,
-        reduction_tokens: int,
-        *,
-        tail_keep: int = 12,
-    ) -> tuple[int, int, int] | None:
-        """
-        Pick one contiguous old chunk so its estimated size is roughly enough
-        to reduce `reduction_tokens`.
-        """
-        messages = session.messages
-        start = self._get_compressed_until(session)
-        if len(messages) - start <= tail_keep + 2:
-            return None
-
-        end_limit = len(messages) - tail_keep
-        if end_limit - start < 2:
-            return None
-
-        target = max(1, reduction_tokens)
-        end = start
-        collected = 0
-        while end < end_limit and collected < target:
-            collected += self._estimate_message_tokens(messages[end])
-            end += 1
-
-        if end - start < 2:
-            end = min(end_limit, start + 2)
-            collected = sum(self._estimate_message_tokens(m) for m in messages[start:end])
-        if end - start < 2:
-            return None
-        return start, end, collected
-
-    def _estimate_session_prompt_tokens(self, session: Session) -> tuple[int, str]:
-        """
-        Estimate current full prompt tokens for this session view
-        (system + compressed history view + runtime/user placeholder + tools).
-        """
-        history = self._build_compressed_history_view(session)
-        channel, chat_id = (session.key.split(":", 1) if ":" in session.key else (None, None))
-        probe_messages = self.context.build_messages(
-            history=history,
-            current_message="[token-probe]",
-            channel=channel,
-            chat_id=chat_id,
-        )
-        return self._estimate_prompt_tokens_chain(probe_messages, self.tools.get_definitions())
-
-    async def _maybe_compress_history(
-        self,
-        session: Session,
-    ) -> None:
-        """
-        End-of-turn policy:
-        - Estimate current prompt usage from persisted session view.
-        - If above start ratio, perform one best-effort compression chunk.
-        """
-        if not session.messages:
-            self._set_compressed_until(session, 0)
-            return
-
-        budget = max(1, self.max_tokens_input - self.max_tokens - self._reserve_tokens)
-        start_threshold = int(budget * self.compression_start_ratio)
-        target_threshold = int(budget * self.compression_target_ratio)
-        if target_threshold >= start_threshold:
-            target_threshold = max(0, start_threshold - 1)
-
-        # Prefer provider usage prompt tokens from the turn-ending call.
-        # If unavailable, fall back to estimator chain.
-        raw_prompt_tokens = session.metadata.get("_last_prompt_tokens")
-        if isinstance(raw_prompt_tokens, (int, float)) and raw_prompt_tokens > 0:
-            current_tokens = int(raw_prompt_tokens)
-            token_source = str(session.metadata.get("_last_prompt_source") or "usage_prompt")
-        else:
-            current_tokens, token_source = self._estimate_session_prompt_tokens(session)
-
-        current_ratio = current_tokens / budget if budget else 0.0
-        if current_tokens <= 0:
-            logger.debug("Compression skip {}: token estimate unavailable", session.key)
-            return
-        if current_tokens < start_threshold:
-            logger.debug(
-                "Compression idle {}: {}/{} ({:.1%}) via {}",
-                session.key,
-                current_tokens,
-                budget,
-                current_ratio,
-                token_source,
-            )
-            return
-        logger.info(
-            "Compression trigger {}: {}/{} ({:.1%}) via {}",
-            session.key,
-            current_tokens,
-            budget,
-            current_ratio,
-            token_source,
-        )
-
-        reduction_by_target = max(0, current_tokens - target_threshold)
-        reduction_by_delta = max(1, start_threshold - target_threshold)
-        reduction_need = max(reduction_by_target, reduction_by_delta)
-
-        chunk_range = self._pick_compression_chunk_by_tokens(session, reduction_need, tail_keep=10)
-        if chunk_range is None:
-            logger.info("Compression skipped for {}: no compressible chunk", session.key)
-            return
-
-        start_idx, end_idx, estimated_chunk_tokens = chunk_range
-        chunk = session.messages[start_idx:end_idx]
-        if len(chunk) < 2:
-            return
-
-        logger.info(
-            "Compression chunk {}: msgs {}-{} (count={}, est~{}, need~{})",
-            session.key,
-            start_idx,
-            end_idx - 1,
-            len(chunk),
-            estimated_chunk_tokens,
-            reduction_need,
-        )
-        success, _ = await self.context.memory.consolidate_chunk(
-            chunk,
-            self.provider,
-            self.model,
-        )
-        if not success:
-            logger.warning("Compression aborted for {}: consolidation failed", session.key)
-            return
-
-        self._set_compressed_until(session, end_idx)
-        self.sessions.save(session)
-
-        after_tokens, after_source = self._estimate_session_prompt_tokens(session)
-        after_ratio = after_tokens / budget if budget else 0.0
-        reduced = max(0, current_tokens - after_tokens)
-        reduced_ratio = (reduced / current_tokens) if current_tokens > 0 else 0.0
-        logger.info(
-            "Compression done {}: {}/{} ({:.1%}) via {}, reduced={} ({:.1%})",
-            session.key,
-            after_tokens,
-            budget,
-            after_ratio,
-            after_source,
-            reduced,
-            reduced_ratio,
-        )
-
-    def _schedule_background_compression(self, session_key: str) -> None:
-        """Schedule best-effort background compression for a session."""
-        existing = self._compression_tasks.get(session_key)
-        if existing is not None and not existing.done():
-            return
-
-        async def _runner() -> None:
-            session = self.sessions.get_or_create(session_key)
-            try:
-                await self._maybe_compress_history(session)
-            except Exception:
-                logger.exception("Background compression failed for {}", session_key)
-
-        task = asyncio.create_task(_runner())
-        self._compression_tasks[session_key] = task
-
-        def _cleanup(t: asyncio.Task) -> None:
-            cur = self._compression_tasks.get(session_key)
-            if cur is t:
-                self._compression_tasks.pop(session_key, None)
-            try:
-                t.result()
-            except BaseException:
-                pass
-
-        task.add_done_callback(_cleanup)
-
-    async def wait_for_background_compression(self, timeout_s: float | None = None) -> None:
-        """Wait for currently scheduled compression tasks."""
-        pending = [t for t in self._compression_tasks.values() if not t.done()]
-        if not pending:
-            return
-
-        logger.info("Waiting for {} background compression task(s)", len(pending))
-        waiter = asyncio.gather(*pending, return_exceptions=True)
-        if timeout_s is None:
-            await waiter
-            return
-
-        try:
-            await asyncio.wait_for(waiter, timeout=timeout_s)
-        except asyncio.TimeoutError:
-            logger.warning(
-                "Background compression wait timed out after {}s ({} task(s) still running)",
-                timeout_s,
-                len([t for t in self._compression_tasks.values() if not t.done()]),
-            )
-
-    def _build_compressed_history_view(
-        self,
-        session: Session,
-    ) -> list[dict]:
-        """Build non-destructive history view using the compressed boundary."""
-        compressed_until = self._get_compressed_until(session)
-        if compressed_until <= 0:
-            return session.get_history(max_messages=0)
-
-        notice_msg: dict[str, Any] = {
-            "role": "assistant",
-            "content": (
-                "As your assistant, I have compressed earlier context. "
-                "If you need details, please check memory/HISTORY.md."
-            ),
-        }
-
-        tail: list[dict[str, Any]] = []
-        for msg in session.messages[compressed_until:]:
-            entry: dict[str, Any] = {"role": msg["role"], "content": msg.get("content", "")}
-            for k in ("tool_calls", "tool_call_id", "name"):
-                if k in msg:
-                    entry[k] = msg[k]
-            tail.append(entry)
-
-        # Drop leading non-user entries from tail to avoid orphan tool blocks.
-        for i, m in enumerate(tail):
-            if m.get("role") == "user":
-                tail = tail[i:]
-                break
-        else:
-            tail = []
-
-        return [notice_msg, *tail]
-
     def _register_default_tools(self) -> None:
         """Register the default set of tools."""
         allowed_dir = self.workspace if self.restrict_to_workspace else None
         for cls in (ReadFileTool, WriteFileTool, EditFileTool, ListDirTool):
             self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir))
-        self.tools.register(ValidateDeployJSONTool())
-        self.tools.register(ValidateUsageYAMLTool())
-        self.tools.register(HuggingFaceModelSearchTool())
         self.tools.register(ExecTool(
             working_dir=str(self.workspace),
             timeout=self.exec_config.timeout,
@@ -563,24 +186,12 @@ class AgentLoop:
         self,
         initial_messages: list[dict],
         on_progress: Callable[..., Awaitable[None]] | None = None,
-    ) -> tuple[str | None, list[str], list[dict], int, str]:
-        """
-        Run the agent iteration loop.
-
-        Returns:
-            (final_content, tools_used, messages, total_tokens_this_turn, token_source)
-            total_tokens_this_turn: total tokens (prompt + completion) for this turn
-            token_source: provider_total / provider_sum / provider_prompt /
-                          provider_counter+tiktoken_completion / tiktoken / none
-        """
+    ) -> tuple[str | None, list[str], list[dict]]:
+        """Run the agent iteration loop."""
         messages = initial_messages
         iteration = 0
         final_content = None
         tools_used: list[str] = []
-        total_tokens_this_turn = 0
-        token_source = "none"
-        self._last_turn_prompt_tokens = 0
-        self._last_turn_prompt_source = "none"
 
         while iteration < self.max_iterations:
             iteration += 1
@@ -596,63 +207,6 @@ class AgentLoop:
                 reasoning_effort=self.reasoning_effort,
             )
 
-            # Prefer provider usage from the turn-ending model call; fallback to tiktoken.
-            # Calculate total tokens (prompt + completion) for this turn.
-            usage = response.usage or {}
-            t_tokens = usage.get("total_tokens")
-            p_tokens = usage.get("prompt_tokens")
-            c_tokens = usage.get("completion_tokens")
-            
-            if isinstance(t_tokens, (int, float)) and t_tokens > 0:
-                total_tokens_this_turn = int(t_tokens)
-                token_source = "provider_total"
-                if isinstance(p_tokens, (int, float)) and p_tokens > 0:
-                    self._last_turn_prompt_tokens = int(p_tokens)
-                    self._last_turn_prompt_source = "usage_prompt"
-                elif isinstance(c_tokens, (int, float)):
-                    prompt_derived = int(t_tokens) - int(c_tokens)
-                    if prompt_derived > 0:
-                        self._last_turn_prompt_tokens = prompt_derived
-                        self._last_turn_prompt_source = "usage_total_minus_completion"
-            elif isinstance(p_tokens, (int, float)) and isinstance(c_tokens, (int, float)):
-                # If we have both prompt and completion tokens, sum them
-                total_tokens_this_turn = int(p_tokens) + int(c_tokens)
-                token_source = "provider_sum"
-                if p_tokens > 0:
-                    self._last_turn_prompt_tokens = int(p_tokens)
-                    self._last_turn_prompt_source = "usage_prompt"
-            elif isinstance(p_tokens, (int, float)) and p_tokens > 0:
-                # Fallback: use prompt tokens only (completion might be 0 for tool calls)
-                total_tokens_this_turn = int(p_tokens)
-                token_source = "provider_prompt"
-                self._last_turn_prompt_tokens = int(p_tokens)
-                self._last_turn_prompt_source = "usage_prompt"
-            else:
-                # Estimate with unified chain (provider counter -> tiktoken), plus completion tiktoken.
-                estimated_prompt, prompt_source = self._estimate_prompt_tokens_chain(messages, tool_defs)
-                estimated_completion = self._estimate_completion_tokens(response.content or "")
-                total_tokens_this_turn = estimated_prompt + estimated_completion
-                if estimated_prompt > 0:
-                    self._last_turn_prompt_tokens = int(estimated_prompt)
-                    self._last_turn_prompt_source = str(prompt_source or "tiktoken")
-                if total_tokens_this_turn > 0:
-                    token_source = (
-                        "tiktoken"
-                        if prompt_source == "tiktoken"
-                        else f"{prompt_source}+tiktoken_completion"
-                    )
-                if total_tokens_this_turn <= 0:
-                    total_tokens_this_turn = 0
-                    token_source = "none"
-
-            logger.debug(
-                "Turn token usage: source={}, total={}, prompt={}, completion={}",
-                token_source,
-                total_tokens_this_turn,
-                p_tokens if isinstance(p_tokens, (int, float)) else None,
-                c_tokens if isinstance(c_tokens, (int, float)) else None,
-            )
-
             if response.has_tool_calls:
                 if on_progress:
                     thought = self._strip_think(response.content)
@@ -707,7 +261,7 @@ class AgentLoop:
                 "without completing the task. You can try breaking the task into smaller steps."
             )
 
-        return final_content, tools_used, messages, total_tokens_this_turn, token_source
+        return final_content, tools_used, messages
 
     async def run(self) -> None:
         """Run the agent loop, dispatching messages as tasks to stay responsive to /stop."""
@@ -732,9 +286,6 @@ class AgentLoop:
         """Cancel all active tasks and subagents for the session."""
         tasks = self._active_tasks.pop(msg.session_key, [])
         cancelled = sum(1 for t in tasks if not t.done() and t.cancel())
-        comp = self._compression_tasks.get(msg.session_key)
-        if comp is not None and not comp.done() and comp.cancel():
-            cancelled += 1
         for t in tasks:
             try:
                 await t
@@ -781,9 +332,6 @@ class AgentLoop:
     def stop(self) -> None:
         """Stop the agent loop."""
         self._running = False
-        for task in list(self._compression_tasks.values()):
-            if not task.done():
-                task.cancel()
         logger.info("Agent loop stopping")
 
     async def _process_message(
@@ -800,22 +348,17 @@ class AgentLoop:
             logger.info("Processing system message from {}", msg.sender_id)
             key = f"{channel}:{chat_id}"
             session = self.sessions.get_or_create(key)
+            await self.memory_consolidator.maybe_consolidate_by_tokens(session)
             self._set_tool_context(channel, chat_id, msg.metadata.get("message_id"))
-            history = self._build_compressed_history_view(session)
+            history = session.get_history(max_messages=0)
             messages = self.context.build_messages(
                 history=history,
                 current_message=msg.content, channel=channel, chat_id=chat_id,
             )
-            final_content, _, all_msgs, _, _ = await self._run_agent_loop(messages)
-            if self._last_turn_prompt_tokens > 0:
-                session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens
-                session.metadata["_last_prompt_source"] = self._last_turn_prompt_source
-            else:
-                session.metadata.pop("_last_prompt_tokens", None)
-                session.metadata.pop("_last_prompt_source", None)
+            final_content, _, all_msgs = await self._run_agent_loop(messages)
             self._save_turn(session, all_msgs, 1 + len(history))
             self.sessions.save(session)
-            self._schedule_background_compression(session.key)
+            await self.memory_consolidator.maybe_consolidate_by_tokens(session)
             return OutboundMessage(channel=channel, chat_id=chat_id,
                                   content=final_content or "Background task completed.")
 
@@ -829,19 +372,12 @@ class AgentLoop:
         cmd = msg.content.strip().lower()
         if cmd == "/new":
             try:
-                # 在清空会话前，将当前完整对话做一次归档压缩到 MEMORY/HISTORY 中
-                if session.messages:
-                    ok, _ = await self.context.memory.consolidate_chunk(
-                        session.messages,
-                        self.provider,
-                        self.model,
+                if not await self.memory_consolidator.archive_unconsolidated(session):
+                    return OutboundMessage(
+                        channel=msg.channel,
+                        chat_id=msg.chat_id,
+                        content="Memory archival failed, session not cleared. Please try again.",
                     )
-                    if not ok:
-                        return OutboundMessage(
-                            channel=msg.channel,
-                            chat_id=msg.chat_id,
-                            content="Memory archival failed, session not cleared. Please try again.",
-                        )
             except Exception:
                 logger.exception("/new archival failed for {}", session.key)
                 return OutboundMessage(
@@ -859,23 +395,20 @@ class AgentLoop:
             return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id,
                                   content="🐈 nanobot commands:\n/new — Start a new conversation\n/stop — Stop the current task\n/help — Show available commands")
 
+        await self.memory_consolidator.maybe_consolidate_by_tokens(session)
+
         self._set_tool_context(msg.channel, msg.chat_id, msg.metadata.get("message_id"))
         if message_tool := self.tools.get("message"):
             if isinstance(message_tool, MessageTool):
                 message_tool.start_turn()
 
-        # 正常对话：使用压缩后的历史视图（压缩在回合结束后进行）
-        history = self._build_compressed_history_view(session)
+        history = session.get_history(max_messages=0)
         initial_messages = self.context.build_messages(
             history=history,
             current_message=msg.content,
             media=msg.media if msg.media else None,
             channel=msg.channel, chat_id=msg.chat_id,
         )
-        # Add [CRON JOB] identifier for cron sessions (session_key starts with "cron:")
-        if session_key and session_key.startswith("cron:"):
-            if initial_messages and initial_messages[0].get("role") == "system":
-                initial_messages[0]["content"] = f"[CRON JOB] {initial_messages[0]['content']}"
 
         async def _bus_progress(content: str, *, tool_hint: bool = False) -> None:
             meta = dict(msg.metadata or {})
@@ -885,23 +418,16 @@ class AgentLoop:
                 channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta,
             ))
 
-        final_content, _, all_msgs, total_tokens_this_turn, token_source = await self._run_agent_loop(
+        final_content, _, all_msgs = await self._run_agent_loop(
             initial_messages, on_progress=on_progress or _bus_progress,
         )
 
         if final_content is None:
             final_content = "I've completed processing but have no response to give."
 
-        if self._last_turn_prompt_tokens > 0:
-            session.metadata["_last_prompt_tokens"] = self._last_turn_prompt_tokens
-            session.metadata["_last_prompt_source"] = self._last_turn_prompt_source
-        else:
-            session.metadata.pop("_last_prompt_tokens", None)
-            session.metadata.pop("_last_prompt_source", None)
-
-        self._save_turn(session, all_msgs, 1 + len(history), total_tokens_this_turn)
+        self._save_turn(session, all_msgs, 1 + len(history))
         self.sessions.save(session)
-        self._schedule_background_compression(session.key)
+        await self.memory_consolidator.maybe_consolidate_by_tokens(session)
 
         if (mt := self.tools.get("message")) and isinstance(mt, MessageTool) and mt._sent_in_turn:
             return None
@@ -913,7 +439,7 @@ class AgentLoop:
             metadata=msg.metadata or {},
         )
 
-    def _save_turn(self, session: Session, messages: list[dict], skip: int, total_tokens_this_turn: int = 0) -> None:
+    def _save_turn(self, session: Session, messages: list[dict], skip: int) -> None:
         """Save new-turn messages into session, truncating large tool results."""
         from datetime import datetime
         for m in messages[skip:]:
@@ -947,14 +473,6 @@ class AgentLoop:
             entry.setdefault("timestamp", datetime.now().isoformat())
             session.messages.append(entry)
         session.updated_at = datetime.now()
-        
-        # Update cumulative token count for compression tracking
-        if total_tokens_this_turn > 0:
-            current_cumulative = session.metadata.get("_cumulative_tokens", 0)
-            if isinstance(current_cumulative, (int, float)):
-                session.metadata["_cumulative_tokens"] = int(current_cumulative) + total_tokens_this_turn
-            else:
-                session.metadata["_cumulative_tokens"] = total_tokens_this_turn
 
     async def process_direct(
         self,
diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index e29788a..cd5f54f 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -2,17 +2,19 @@
 
 from __future__ import annotations
 
+import asyncio
 import json
+import weakref
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Callable
 
 from loguru import logger
 
-from nanobot.utils.helpers import ensure_dir
+from nanobot.utils.helpers import ensure_dir, estimate_message_tokens, estimate_prompt_tokens_chain
 
 if TYPE_CHECKING:
     from nanobot.providers.base import LLMProvider
-    from nanobot.session.manager import Session
+    from nanobot.session.manager import Session, SessionManager
 
 
 _SAVE_MEMORY_TOOL = [
@@ -26,7 +28,7 @@ _SAVE_MEMORY_TOOL = [
                 "properties": {
                     "history_entry": {
                         "type": "string",
-                        "description": "A paragraph (2-5 sentences) summarizing key events/decisions/topics. "
+                        "description": "A paragraph summarizing key events/decisions/topics. "
                         "Start with [YYYY-MM-DD HH:MM]. Include detail useful for grep search.",
                     },
                     "memory_update": {
@@ -42,6 +44,20 @@ _SAVE_MEMORY_TOOL = [
 ]
 
 
+def _ensure_text(value: Any) -> str:
+    """Normalize tool-call payload values to text for file storage."""
+    return value if isinstance(value, str) else json.dumps(value, ensure_ascii=False)
+
+
+def _normalize_save_memory_args(args: Any) -> dict[str, Any] | None:
+    """Normalize provider tool-call arguments to the expected dict shape."""
+    if isinstance(args, str):
+        args = json.loads(args)
+    if isinstance(args, list):
+        return args[0] if args and isinstance(args[0], dict) else None
+    return args if isinstance(args, dict) else None
+
+
 class MemoryStore:
     """Two-layer memory: MEMORY.md (long-term facts) + HISTORY.md (grep-searchable log)."""
 
@@ -66,29 +82,27 @@ class MemoryStore:
         long_term = self.read_long_term()
         return f"## Long-term Memory\n{long_term}" if long_term else ""
 
-    async def consolidate_chunk(
+    @staticmethod
+    def _format_messages(messages: list[dict]) -> str:
+        lines = []
+        for message in messages:
+            if not message.get("content"):
+                continue
+            tools = f" [tools: {', '.join(message['tools_used'])}]" if message.get("tools_used") else ""
+            lines.append(
+                f"[{message.get('timestamp', '?')[:16]}] {message['role'].upper()}{tools}: {message['content']}"
+            )
+        return "\n".join(lines)
+
+    async def consolidate(
         self,
         messages: list[dict],
         provider: LLMProvider,
         model: str,
-    ) -> tuple[bool, str | None]:
-        """Consolidate a chunk of messages into MEMORY.md + HISTORY.md via LLM tool call.
-
-        Returns (success, None).
-
-        - success: True on success (including no-op), False on failure.
-        - The second return value is reserved for future use (e.g. RAG-style summaries) and is
-          always None in the current implementation.
-        """
+    ) -> bool:
+        """Consolidate the provided message chunk into MEMORY.md + HISTORY.md."""
         if not messages:
-            return True, None
-
-        lines = []
-        for m in messages:
-            if not m.get("content"):
-                continue
-            tools = f" [tools: {', '.join(m['tools_used'])}]" if m.get("tools_used") else ""
-            lines.append(f"[{m.get('timestamp', '?')[:16]}] {m['role'].upper()}{tools}: {m['content']}")
+            return True
 
         current_memory = self.read_long_term()
         prompt = f"""Process this conversation and call the save_memory tool with your consolidation.
@@ -97,24 +111,12 @@ class MemoryStore:
 {current_memory or "(empty)"}
 
 ## Conversation to Process
-{chr(10).join(lines)}"""
+{self._format_messages(messages)}"""
 
         try:
             response = await provider.chat_with_retry(
                 messages=[
-                    {
-                        "role": "system",
-                        "content": (
-                            "You are a memory consolidation agent.\n"
-                            "Your job is to:\n"
-                            "1) Append a concise but grep-friendly entry to HISTORY.md summarizing key events, decisions and topics.\n"
-                            "   - Write 1 paragraph of 2–5 sentences that starts with [YYYY-MM-DD HH:MM].\n"
-                            "   - Include concrete names, IDs and numbers so it is easy to search with grep.\n"
-                            "2) Update long-term MEMORY.md with stable facts and user preferences as markdown, including all existing facts plus new ones.\n"
-                            "3) Optionally return a short context_summary (1–3 sentences) that will replace the raw messages in future dialogue history.\n\n"
-                            "Always call the save_memory tool with history_entry, memory_update and (optionally) context_summary."
-                        ),
-                    },
+                    {"role": "system", "content": "You are a memory consolidation agent. Call the save_memory tool with your consolidation of the conversation."},
                     {"role": "user", "content": prompt},
                 ],
                 tools=_SAVE_MEMORY_TOOL,
@@ -123,35 +125,160 @@ class MemoryStore:
 
             if not response.has_tool_calls:
                 logger.warning("Memory consolidation: LLM did not call save_memory, skipping")
-                return False, None
+                return False
 
-            args = response.tool_calls[0].arguments
-            # Some providers return arguments as a JSON string instead of dict
-            if isinstance(args, str):
-                args = json.loads(args)
-            # Some providers return arguments as a list (handle edge case)
-            if isinstance(args, list):
-                if args and isinstance(args[0], dict):
-                    args = args[0]
-                else:
-                    logger.warning("Memory consolidation: unexpected arguments as empty or non-dict list")
-                    return False, None
-            if not isinstance(args, dict):
-                logger.warning("Memory consolidation: unexpected arguments type {}", type(args).__name__)
-                return False, None
+            args = _normalize_save_memory_args(response.tool_calls[0].arguments)
+            if args is None:
+                logger.warning("Memory consolidation: unexpected save_memory arguments")
+                return False
 
             if entry := args.get("history_entry"):
-                if not isinstance(entry, str):
-                    entry = json.dumps(entry, ensure_ascii=False)
-                self.append_history(entry)
+                self.append_history(_ensure_text(entry))
             if update := args.get("memory_update"):
-                if not isinstance(update, str):
-                    update = json.dumps(update, ensure_ascii=False)
+                update = _ensure_text(update)
                 if update != current_memory:
                     self.write_long_term(update)
 
             logger.info("Memory consolidation done for {} messages", len(messages))
-            return True, None
+            return True
         except Exception:
             logger.exception("Memory consolidation failed")
-            return False, None
+            return False
+
+
+class MemoryConsolidator:
+    """Owns consolidation policy, locking, and session offset updates."""
+
+    _MAX_CONSOLIDATION_ROUNDS = 5
+
+    def __init__(
+        self,
+        workspace: Path,
+        provider: LLMProvider,
+        model: str,
+        sessions: SessionManager,
+        context_window_tokens: int,
+        build_messages: Callable[..., list[dict[str, Any]]],
+        get_tool_definitions: Callable[[], list[dict[str, Any]]],
+    ):
+        self.store = MemoryStore(workspace)
+        self.provider = provider
+        self.model = model
+        self.sessions = sessions
+        self.context_window_tokens = context_window_tokens
+        self._build_messages = build_messages
+        self._get_tool_definitions = get_tool_definitions
+        self._locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary()
+
+    def get_lock(self, session_key: str) -> asyncio.Lock:
+        """Return the shared consolidation lock for one session."""
+        return self._locks.setdefault(session_key, asyncio.Lock())
+
+    async def consolidate_messages(self, messages: list[dict[str, object]]) -> bool:
+        """Archive a selected message chunk into persistent memory."""
+        return await self.store.consolidate(messages, self.provider, self.model)
+
+    def pick_consolidation_boundary(
+        self,
+        session: Session,
+        tokens_to_remove: int,
+    ) -> tuple[int, int] | None:
+        """Pick a user-turn boundary that removes enough old prompt tokens."""
+        start = session.last_consolidated
+        if start >= len(session.messages) or tokens_to_remove <= 0:
+            return None
+
+        removed_tokens = 0
+        last_boundary: tuple[int, int] | None = None
+        for idx in range(start, len(session.messages)):
+            message = session.messages[idx]
+            if idx > start and message.get("role") == "user":
+                last_boundary = (idx, removed_tokens)
+                if removed_tokens >= tokens_to_remove:
+                    return last_boundary
+            removed_tokens += estimate_message_tokens(message)
+
+        return last_boundary
+
+    def estimate_session_prompt_tokens(self, session: Session) -> tuple[int, str]:
+        """Estimate current prompt size for the normal session history view."""
+        history = session.get_history(max_messages=0)
+        channel, chat_id = (session.key.split(":", 1) if ":" in session.key else (None, None))
+        probe_messages = self._build_messages(
+            history=history,
+            current_message="[token-probe]",
+            channel=channel,
+            chat_id=chat_id,
+        )
+        return estimate_prompt_tokens_chain(
+            self.provider,
+            self.model,
+            probe_messages,
+            self._get_tool_definitions(),
+        )
+
+    async def archive_unconsolidated(self, session: Session) -> bool:
+        """Archive the full unconsolidated tail for /new-style session rollover."""
+        lock = self.get_lock(session.key)
+        async with lock:
+            snapshot = session.messages[session.last_consolidated:]
+            if not snapshot:
+                return True
+            return await self.consolidate_messages(snapshot)
+
+    async def maybe_consolidate_by_tokens(self, session: Session) -> None:
+        """Loop: archive old messages until prompt fits within half the context window."""
+        if not session.messages or self.context_window_tokens <= 0:
+            return
+
+        lock = self.get_lock(session.key)
+        async with lock:
+            target = self.context_window_tokens // 2
+            estimated, source = self.estimate_session_prompt_tokens(session)
+            if estimated <= 0:
+                return
+            if estimated < self.context_window_tokens:
+                logger.debug(
+                    "Token consolidation idle {}: {}/{} via {}",
+                    session.key,
+                    estimated,
+                    self.context_window_tokens,
+                    source,
+                )
+                return
+
+            for round_num in range(self._MAX_CONSOLIDATION_ROUNDS):
+                if estimated <= target:
+                    return
+
+                boundary = self.pick_consolidation_boundary(session, max(1, estimated - target))
+                if boundary is None:
+                    logger.debug(
+                        "Token consolidation: no safe boundary for {} (round {})",
+                        session.key,
+                        round_num,
+                    )
+                    return
+
+                end_idx = boundary[0]
+                chunk = session.messages[session.last_consolidated:end_idx]
+                if not chunk:
+                    return
+
+                logger.info(
+                    "Token consolidation round {} for {}: {}/{} via {}, chunk={} msgs",
+                    round_num,
+                    session.key,
+                    estimated,
+                    self.context_window_tokens,
+                    source,
+                    len(chunk),
+                )
+                if not await self.consolidate_messages(chunk):
+                    return
+                session.last_consolidated = end_idx
+                self.sessions.save(session)
+
+                estimated, source = self.estimate_session_prompt_tokens(session)
+                if estimated <= 0:
+                    return
diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index 36e2a53..cf69450 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -191,6 +191,8 @@ def onboard():
         save_config(Config())
         console.print(f"[green]✓[/green] Created config at {config_path}")
 
+    console.print("[dim]Config template now uses `maxTokens` + `contextWindowTokens`; `memoryWindow` is no longer a runtime setting.[/dim]")
+
     # Create workspace
     workspace = get_workspace_path()
 
@@ -283,6 +285,16 @@ def _load_runtime_config(config: str | None = None, workspace: str | None = None
     return loaded
 
 
+def _print_deprecated_memory_window_notice(config: Config) -> None:
+    """Warn when running with old memoryWindow-only config."""
+    if config.agents.defaults.should_warn_deprecated_memory_window:
+        console.print(
+            "[yellow]Hint:[/yellow] Detected deprecated `memoryWindow` without "
+            "`contextWindowTokens`. `memoryWindow` is ignored; run "
+            "[cyan]nanobot onboard[/cyan] to refresh your config template."
+        )
+
+
 # ============================================================================
 # Gateway / Server
 # ============================================================================
@@ -310,6 +322,7 @@ def gateway(
         logging.basicConfig(level=logging.DEBUG)
 
     config = _load_runtime_config(config, workspace)
+    _print_deprecated_memory_window_notice(config)
     port = port if port is not None else config.gateway.port
 
     console.print(f"{__logo__} Starting nanobot gateway on port {port}...")
@@ -329,12 +342,10 @@ def gateway(
         workspace=config.workspace_path,
         model=config.agents.defaults.model,
         temperature=config.agents.defaults.temperature,
-        max_tokens=config.agents.defaults.max_tokens_output,
+        max_tokens=config.agents.defaults.max_tokens,
         max_iterations=config.agents.defaults.max_tool_iterations,
         reasoning_effort=config.agents.defaults.reasoning_effort,
-        max_tokens_input=config.agents.defaults.max_tokens_input,
-        compression_start_ratio=config.agents.defaults.compression_start_ratio,
-        compression_target_ratio=config.agents.defaults.compression_target_ratio,
+        context_window_tokens=config.agents.defaults.context_window_tokens,
         brave_api_key=config.tools.web.search.api_key or None,
         web_proxy=config.tools.web.proxy or None,
         exec_config=config.tools.exec,
@@ -496,6 +507,7 @@ def agent(
     from nanobot.cron.service import CronService
 
     config = _load_runtime_config(config, workspace)
+    _print_deprecated_memory_window_notice(config)
     sync_workspace_templates(config.workspace_path)
 
     bus = MessageBus()
@@ -516,12 +528,10 @@ def agent(
         workspace=config.workspace_path,
         model=config.agents.defaults.model,
         temperature=config.agents.defaults.temperature,
-        max_tokens=config.agents.defaults.max_tokens_output,
+        max_tokens=config.agents.defaults.max_tokens,
         max_iterations=config.agents.defaults.max_tool_iterations,
         reasoning_effort=config.agents.defaults.reasoning_effort,
-        max_tokens_input=config.agents.defaults.max_tokens_input,
-        compression_start_ratio=config.agents.defaults.compression_start_ratio,
-        compression_target_ratio=config.agents.defaults.compression_target_ratio,
+        context_window_tokens=config.agents.defaults.context_window_tokens,
         brave_api_key=config.tools.web.search.api_key or None,
         web_proxy=config.tools.web.proxy or None,
         exec_config=config.tools.exec,
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index 0e41d12..a2de239 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -190,22 +190,11 @@ class SlackConfig(Base):
 
 
 class QQConfig(Base):
-    """QQ channel configuration.
-    
-    Supports two implementations:
-    1. Official botpy SDK: requires app_id and secret
-    2. OneBot protocol: requires api_url (and optionally ws_reverse_url, bot_qq, access_token)
-    """
+    """QQ channel configuration using botpy SDK."""
 
     enabled: bool = False
-    # Official botpy SDK fields
     app_id: str = ""  # 机器人 ID (AppID) from q.qq.com
     secret: str = ""  # 机器人密钥 (AppSecret) from q.qq.com
-    # OneBot protocol fields
-    api_url: str = ""  # OneBot HTTP API URL (e.g. "http://localhost:5700")
-    ws_reverse_url: str = ""  # OneBot WebSocket reverse URL (e.g. "ws://localhost:8080/ws/reverse")
-    bot_qq: int | None = None  # Bot's QQ number (for filtering self messages)
-    access_token: str = ""  # Optional access token for OneBot API
     allow_from: list[str] = Field(
         default_factory=list
     )  # Allowed user openids (empty = public access)
@@ -238,20 +227,19 @@ class AgentDefaults(Base):
     provider: str = (
         "auto"  # Provider name (e.g. "anthropic", "openrouter") or "auto" for auto-detection
     )
-    # 原生上下文最大窗口（通常对应模型的 max_input_tokens / max_context_tokens）
-    # 默认按照主流大模型（如 GPT-4o、Claude 3.x 等）的 128k 上下文给一个宽松上限，实际应根据所选模型文档手动调整。
-    max_tokens_input: int = 128_000
-    # 默认单次回复的最大输出 token 上限（调用时可按需要再做截断或比例分配）
-    # 8192 足以覆盖大多数实际对话/工具使用场景，同样可按需手动调整。
-    max_tokens_output: int = 8192
-    # 会话历史压缩触发比例：当估算的输入 token 使用量 >= maxTokensInput * compressionStartRatio 时开始压缩。
-    compression_start_ratio: float = 0.7
-    # 会话历史压缩目标比例：每轮压缩后尽量把估算的输入 token 使用量压到 maxTokensInput * compressionTargetRatio 附近。
-    compression_target_ratio: float = 0.4
+    max_tokens: int = 8192
+    context_window_tokens: int = 65_536
     temperature: float = 0.1
     max_tool_iterations: int = 40
+    # Deprecated compatibility field: accepted from old configs but ignored at runtime.
+    memory_window: int | None = Field(default=None, exclude=True)
     reasoning_effort: str | None = None  # low / medium / high — enables LLM thinking mode
 
+    @property
+    def should_warn_deprecated_memory_window(self) -> bool:
+        """Return True when old memoryWindow is present without contextWindowTokens."""
+        return self.memory_window is not None and "context_window_tokens" not in self.model_fields_set
+
 
 class AgentsConfig(Base):
     """Agent configuration."""
diff --git a/nanobot/session/manager.py b/nanobot/session/manager.py
index 1cb8a51..f0a6484 100644
--- a/nanobot/session/manager.py
+++ b/nanobot/session/manager.py
@@ -9,6 +9,7 @@ from typing import Any
 
 from loguru import logger
 
+from nanobot.config.paths import get_legacy_sessions_dir
 from nanobot.utils.helpers import ensure_dir, safe_filename
 
 
@@ -29,6 +30,7 @@ class Session:
     created_at: datetime = field(default_factory=datetime.now)
     updated_at: datetime = field(default_factory=datetime.now)
     metadata: dict[str, Any] = field(default_factory=dict)
+    last_consolidated: int = 0  # Number of messages already consolidated to files
 
     def add_message(self, role: str, content: str, **kwargs: Any) -> None:
         """Add a message to the session."""
@@ -42,13 +44,9 @@ class Session:
         self.updated_at = datetime.now()
 
     def get_history(self, max_messages: int = 500) -> list[dict[str, Any]]:
-        """
-        Return messages for LLM input, aligned to a user turn.
-
-        - max_messages > 0 时只保留最近 max_messages 条；
-        - max_messages <= 0 时不做条数截断，返回全部消息。
-        """
-        sliced = self.messages if max_messages <= 0 else self.messages[-max_messages:]
+        """Return unconsolidated messages for LLM input, aligned to a user turn."""
+        unconsolidated = self.messages[self.last_consolidated:]
+        sliced = unconsolidated[-max_messages:]
 
         # Drop leading non-user messages to avoid orphaned tool_result blocks
         for i, m in enumerate(sliced):
@@ -68,7 +66,7 @@ class Session:
     def clear(self) -> None:
         """Clear all messages and reset session to initial state."""
         self.messages = []
-        self.metadata = {}
+        self.last_consolidated = 0
         self.updated_at = datetime.now()
 
 
@@ -82,7 +80,7 @@ class SessionManager:
     def __init__(self, workspace: Path):
         self.workspace = workspace
         self.sessions_dir = ensure_dir(self.workspace / "sessions")
-        self.legacy_sessions_dir = Path.home() / ".nanobot" / "sessions"
+        self.legacy_sessions_dir = get_legacy_sessions_dir()
         self._cache: dict[str, Session] = {}
 
     def _get_session_path(self, key: str) -> Path:
@@ -134,6 +132,7 @@ class SessionManager:
             messages = []
             metadata = {}
             created_at = None
+            last_consolidated = 0
 
             with open(path, encoding="utf-8") as f:
                 for line in f:
@@ -146,6 +145,7 @@ class SessionManager:
                     if data.get("_type") == "metadata":
                         metadata = data.get("metadata", {})
                         created_at = datetime.fromisoformat(data["created_at"]) if data.get("created_at") else None
+                        last_consolidated = data.get("last_consolidated", 0)
                     else:
                         messages.append(data)
 
@@ -154,6 +154,7 @@ class SessionManager:
                 messages=messages,
                 created_at=created_at or datetime.now(),
                 metadata=metadata,
+                last_consolidated=last_consolidated
             )
         except Exception as e:
             logger.warning("Failed to load session {}: {}", key, e)
@@ -170,6 +171,7 @@ class SessionManager:
                 "created_at": session.created_at.isoformat(),
                 "updated_at": session.updated_at.isoformat(),
                 "metadata": session.metadata,
+                "last_consolidated": session.last_consolidated
             }
             f.write(json.dumps(metadata_line, ensure_ascii=False) + "\n")
             for msg in session.messages:
diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py
index 57c60dc..9242ba6 100644
--- a/nanobot/utils/helpers.py
+++ b/nanobot/utils/helpers.py
@@ -1,8 +1,12 @@
 """Utility functions for nanobot."""
 
+import json
 import re
 from datetime import datetime
 from pathlib import Path
+from typing import Any
+
+import tiktoken
 
 
 def detect_image_mime(data: bytes) -> str | None:
@@ -68,6 +72,87 @@ def split_message(content: str, max_len: int = 2000) -> list[str]:
     return chunks
 
 
+def estimate_prompt_tokens(
+    messages: list[dict[str, Any]],
+    tools: list[dict[str, Any]] | None = None,
+) -> int:
+    """Estimate prompt tokens with tiktoken."""
+    try:
+        enc = tiktoken.get_encoding("cl100k_base")
+        parts: list[str] = []
+        for msg in messages:
+            content = msg.get("content")
+            if isinstance(content, str):
+                parts.append(content)
+            elif isinstance(content, list):
+                for part in content:
+                    if isinstance(part, dict) and part.get("type") == "text":
+                        txt = part.get("text", "")
+                        if txt:
+                            parts.append(txt)
+        if tools:
+            parts.append(json.dumps(tools, ensure_ascii=False))
+        return len(enc.encode("\n".join(parts)))
+    except Exception:
+        return 0
+
+
+def estimate_message_tokens(message: dict[str, Any]) -> int:
+    """Estimate prompt tokens contributed by one persisted message."""
+    content = message.get("content")
+    parts: list[str] = []
+    if isinstance(content, str):
+        parts.append(content)
+    elif isinstance(content, list):
+        for part in content:
+            if isinstance(part, dict) and part.get("type") == "text":
+                text = part.get("text", "")
+                if text:
+                    parts.append(text)
+            else:
+                parts.append(json.dumps(part, ensure_ascii=False))
+    elif content is not None:
+        parts.append(json.dumps(content, ensure_ascii=False))
+
+    for key in ("name", "tool_call_id"):
+        value = message.get(key)
+        if isinstance(value, str) and value:
+            parts.append(value)
+    if message.get("tool_calls"):
+        parts.append(json.dumps(message["tool_calls"], ensure_ascii=False))
+
+    payload = "\n".join(parts)
+    if not payload:
+        return 1
+    try:
+        enc = tiktoken.get_encoding("cl100k_base")
+        return max(1, len(enc.encode(payload)))
+    except Exception:
+        return max(1, len(payload) // 4)
+
+
+def estimate_prompt_tokens_chain(
+    provider: Any,
+    model: str | None,
+    messages: list[dict[str, Any]],
+    tools: list[dict[str, Any]] | None = None,
+) -> tuple[int, str]:
+    """Estimate prompt tokens via provider counter first, then tiktoken fallback."""
+    provider_counter = getattr(provider, "estimate_prompt_tokens", None)
+    if callable(provider_counter):
+        try:
+            tokens, source = provider_counter(messages, tools, model)
+            if isinstance(tokens, (int, float)) and tokens > 0:
+                return int(tokens), str(source or "provider_counter")
+        except Exception:
+            pass
+
+    estimated = estimate_prompt_tokens(messages, tools)
+    if estimated > 0:
+        return int(estimated), "tiktoken"
+    return 0, "none"
+
+
 def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str]:
     """Sync bundled templates to workspace. Only creates missing files."""
     from importlib.resources import files as pkg_files
diff --git a/pyproject.toml b/pyproject.toml
index 62cf616..0344348 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,7 @@ dependencies = [
     "json-repair>=0.57.0,<1.0.0",
     "chardet>=3.0.2,<6.0.0",
     "openai>=2.8.0",
+    "tiktoken>=0.12.0,<1.0.0",
 ]
 
 [project.optional-dependencies]
diff --git a/tests/test_commands.py b/tests/test_commands.py
index 5e3760a..1375a3a 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -267,6 +267,16 @@ def test_agent_workspace_override_wins_over_config_workspace(mock_agent_runtime,
     assert mock_agent_runtime["agent_loop_cls"].call_args.kwargs["workspace"] == workspace_path
 
 
+def test_agent_warns_about_deprecated_memory_window(mock_agent_runtime):
+    mock_agent_runtime["config"].agents.defaults.memory_window = 100
+
+    result = runner.invoke(app, ["agent", "-m", "hello"])
+
+    assert result.exit_code == 0
+    assert "memoryWindow" in result.stdout
+    assert "contextWindowTokens" in result.stdout
+
+
 def test_gateway_uses_workspace_from_config_by_default(monkeypatch, tmp_path: Path) -> None:
     config_file = tmp_path / "instance" / "config.json"
     config_file.parent.mkdir(parents=True)
@@ -327,6 +337,29 @@ def test_gateway_workspace_option_overrides_config(monkeypatch, tmp_path: Path)
     assert seen["workspace"] == override
     assert config.workspace_path == override
 
+
+def test_gateway_warns_about_deprecated_memory_window(monkeypatch, tmp_path: Path) -> None:
+    config_file = tmp_path / "instance" / "config.json"
+    config_file.parent.mkdir(parents=True)
+    config_file.write_text("{}")
+
+    config = Config()
+    config.agents.defaults.memory_window = 100
+
+    monkeypatch.setattr("nanobot.config.loader.set_config_path", lambda _path: None)
+    monkeypatch.setattr("nanobot.config.loader.load_config", lambda _path=None: config)
+    monkeypatch.setattr("nanobot.cli.commands.sync_workspace_templates", lambda _path: None)
+    monkeypatch.setattr(
+        "nanobot.cli.commands._make_provider",
+        lambda _config: (_ for _ in ()).throw(_StopGateway("stop")),
+    )
+
+    result = runner.invoke(app, ["gateway", "--config", str(config_file)])
+
+    assert isinstance(result.exception, _StopGateway)
+    assert "memoryWindow" in result.stdout
+    assert "contextWindowTokens" in result.stdout
+
 def test_gateway_uses_config_directory_for_cron_store(monkeypatch, tmp_path: Path) -> None:
     config_file = tmp_path / "instance" / "config.json"
     config_file.parent.mkdir(parents=True)
diff --git a/tests/test_config_migration.py b/tests/test_config_migration.py
new file mode 100644
index 0000000..62e601e
--- /dev/null
+++ b/tests/test_config_migration.py
@@ -0,0 +1,88 @@
+import json
+
+from typer.testing import CliRunner
+
+from nanobot.cli.commands import app
+from nanobot.config.loader import load_config, save_config
+
+runner = CliRunner()
+
+
+def test_load_config_keeps_max_tokens_and_warns_on_legacy_memory_window(tmp_path) -> None:
+    config_path = tmp_path / "config.json"
+    config_path.write_text(
+        json.dumps(
+            {
+                "agents": {
+                    "defaults": {
+                        "maxTokens": 1234,
+                        "memoryWindow": 42,
+                    }
+                }
+            }
+        ),
+        encoding="utf-8",
+    )
+
+    config = load_config(config_path)
+
+    assert config.agents.defaults.max_tokens == 1234
+    assert config.agents.defaults.context_window_tokens == 65_536
+    assert config.agents.defaults.should_warn_deprecated_memory_window is True
+
+
+def test_save_config_writes_context_window_tokens_but_not_memory_window(tmp_path) -> None:
+    config_path = tmp_path / "config.json"
+    config_path.write_text(
+        json.dumps(
+            {
+                "agents": {
+                    "defaults": {
+                        "maxTokens": 2222,
+                        "memoryWindow": 30,
+                    }
+                }
+            }
+        ),
+        encoding="utf-8",
+    )
+
+    config = load_config(config_path)
+    save_config(config, config_path)
+    saved = json.loads(config_path.read_text(encoding="utf-8"))
+    defaults = saved["agents"]["defaults"]
+
+    assert defaults["maxTokens"] == 2222
+    assert defaults["contextWindowTokens"] == 65_536
+    assert "memoryWindow" not in defaults
+
+
+def test_onboard_refresh_rewrites_legacy_config_template(tmp_path, monkeypatch) -> None:
+    config_path = tmp_path / "config.json"
+    workspace = tmp_path / "workspace"
+    config_path.write_text(
+        json.dumps(
+            {
+                "agents": {
+                    "defaults": {
+                        "maxTokens": 3333,
+                        "memoryWindow": 50,
+                    }
+                }
+            }
+        ),
+        encoding="utf-8",
+    )
+
+    monkeypatch.setattr("nanobot.config.loader.get_config_path", lambda: config_path)
+    monkeypatch.setattr("nanobot.cli.commands.get_workspace_path", lambda: workspace)
+
+    result = runner.invoke(app, ["onboard"], input="n\n")
+
+    assert result.exit_code == 0
+    assert "contextWindowTokens" in result.stdout
+    saved = json.loads(config_path.read_text(encoding="utf-8"))
+    defaults = saved["agents"]["defaults"]
+    assert defaults["maxTokens"] == 3333
+    assert defaults["contextWindowTokens"] == 65_536
+    assert "memoryWindow" not in defaults
diff --git a/tests/test_consolidate_offset.py b/tests/test_consolidate_offset.py
index a3213dd..7d12338 100644
--- a/tests/test_consolidate_offset.py
+++ b/tests/test_consolidate_offset.py
@@ -480,226 +480,35 @@ class TestEmptyAndBoundarySessions:
         assert_messages_content(old_messages, 10, 34)
 
 
-class TestConsolidationDeduplicationGuard:
-    """Test that consolidation tasks are deduplicated and serialized."""
+class TestNewCommandArchival:
+    """Test /new archival behavior with the simplified consolidation flow."""
 
-    @pytest.mark.asyncio
-    async def test_consolidation_guard_prevents_duplicate_tasks(self, tmp_path: Path) -> None:
-        """Concurrent messages above memory_window spawn only one consolidation task."""
+    @staticmethod
+    def _make_loop(tmp_path: Path):
         from nanobot.agent.loop import AgentLoop
-        from nanobot.bus.events import InboundMessage
         from nanobot.bus.queue import MessageBus
         from nanobot.providers.base import LLMResponse
 
         bus = MessageBus()
         provider = MagicMock()
         provider.get_default_model.return_value = "test-model"
+        provider.estimate_prompt_tokens.return_value = (10_000, "test")
         loop = AgentLoop(
-            bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10
+            bus=bus,
+            provider=provider,
+            workspace=tmp_path,
+            model="test-model",
+            context_window_tokens=1,
         )
-
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
+        loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
         loop.tools.get_definitions = MagicMock(return_value=[])
-
-        session = loop.sessions.get_or_create("cli:test")
-        for i in range(15):
-            session.add_message("user", f"msg{i}")
-            session.add_message("assistant", f"resp{i}")
-        loop.sessions.save(session)
-
-        consolidation_calls = 0
-
-        async def _fake_consolidate(_session, archive_all: bool = False) -> None:
-            nonlocal consolidation_calls
-            consolidation_calls += 1
-            await asyncio.sleep(0.05)
-
-        loop._consolidate_memory = _fake_consolidate  # type: ignore[method-assign]
-
-        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello")
-        await loop._process_message(msg)
-        await loop._process_message(msg)
-        await asyncio.sleep(0.1)
-
-        assert consolidation_calls == 1, (
-            f"Expected exactly 1 consolidation, got {consolidation_calls}"
-        )
-
-    @pytest.mark.asyncio
-    async def test_new_command_guard_prevents_concurrent_consolidation(
-        self, tmp_path: Path
-    ) -> None:
-        """/new command does not run consolidation concurrently with in-flight consolidation."""
-        from nanobot.agent.loop import AgentLoop
-        from nanobot.bus.events import InboundMessage
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "test-model"
-        loop = AgentLoop(
-            bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10
-        )
-
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
-        loop.tools.get_definitions = MagicMock(return_value=[])
-
-        session = loop.sessions.get_or_create("cli:test")
-        for i in range(15):
-            session.add_message("user", f"msg{i}")
-            session.add_message("assistant", f"resp{i}")
-        loop.sessions.save(session)
-
-        consolidation_calls = 0
-        active = 0
-        max_active = 0
-
-        async def _fake_consolidate(_session, archive_all: bool = False) -> None:
-            nonlocal consolidation_calls, active, max_active
-            consolidation_calls += 1
-            active += 1
-            max_active = max(max_active, active)
-            await asyncio.sleep(0.05)
-            active -= 1
-
-        loop._consolidate_memory = _fake_consolidate  # type: ignore[method-assign]
-
-        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello")
-        await loop._process_message(msg)
-
-        new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
-        await loop._process_message(new_msg)
-        await asyncio.sleep(0.1)
-
-        assert consolidation_calls == 2, (
-            f"Expected normal + /new consolidations, got {consolidation_calls}"
-        )
-        assert max_active == 1, (
-            f"Expected serialized consolidation, observed concurrency={max_active}"
-        )
-
-    @pytest.mark.asyncio
-    async def test_consolidation_tasks_are_referenced(self, tmp_path: Path) -> None:
-        """create_task results are tracked in _consolidation_tasks while in flight."""
-        from nanobot.agent.loop import AgentLoop
-        from nanobot.bus.events import InboundMessage
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "test-model"
-        loop = AgentLoop(
-            bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10
-        )
-
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
-        loop.tools.get_definitions = MagicMock(return_value=[])
-
-        session = loop.sessions.get_or_create("cli:test")
-        for i in range(15):
-            session.add_message("user", f"msg{i}")
-            session.add_message("assistant", f"resp{i}")
-        loop.sessions.save(session)
-
-        started = asyncio.Event()
-
-        async def _slow_consolidate(_session, archive_all: bool = False) -> None:
-            started.set()
-            await asyncio.sleep(0.1)
-
-        loop._consolidate_memory = _slow_consolidate  # type: ignore[method-assign]
-
-        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello")
-        await loop._process_message(msg)
-
-        await started.wait()
-        assert len(loop._consolidation_tasks) == 1, "Task must be referenced while in-flight"
-
-        await asyncio.sleep(0.15)
-        assert len(loop._consolidation_tasks) == 0, (
-            "Task reference must be removed after completion"
-        )
-
-    @pytest.mark.asyncio
-    async def test_new_waits_for_inflight_consolidation_and_preserves_messages(
-        self, tmp_path: Path
-    ) -> None:
-        """/new waits for in-flight consolidation and archives before clear."""
-        from nanobot.agent.loop import AgentLoop
-        from nanobot.bus.events import InboundMessage
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "test-model"
-        loop = AgentLoop(
-            bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10
-        )
-
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
-        loop.tools.get_definitions = MagicMock(return_value=[])
-
-        session = loop.sessions.get_or_create("cli:test")
-        for i in range(15):
-            session.add_message("user", f"msg{i}")
-            session.add_message("assistant", f"resp{i}")
-        loop.sessions.save(session)
-
-        started = asyncio.Event()
-        release = asyncio.Event()
-        archived_count = 0
-
-        async def _fake_consolidate(sess, archive_all: bool = False) -> bool:
-            nonlocal archived_count
-            if archive_all:
-                archived_count = len(sess.messages)
-                return True
-            started.set()
-            await release.wait()
-            return True
-
-        loop._consolidate_memory = _fake_consolidate  # type: ignore[method-assign]
-
-        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello")
-        await loop._process_message(msg)
-        await started.wait()
-
-        new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
-        pending_new = asyncio.create_task(loop._process_message(new_msg))
-
-        await asyncio.sleep(0.02)
-        assert not pending_new.done(), "/new should wait while consolidation is in-flight"
-
-        release.set()
-        response = await pending_new
-        assert response is not None
-        assert "new session started" in response.content.lower()
-        assert archived_count > 0, "Expected /new archival to process a non-empty snapshot"
-
-        session_after = loop.sessions.get_or_create("cli:test")
-        assert session_after.messages == [], "Session should be cleared after successful archival"
+        return loop
 
     @pytest.mark.asyncio
     async def test_new_does_not_clear_session_when_archive_fails(self, tmp_path: Path) -> None:
-        """/new must keep session data if archive step reports failure."""
-        from nanobot.agent.loop import AgentLoop
         from nanobot.bus.events import InboundMessage
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "test-model"
-        loop = AgentLoop(
-            bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10
-        )
-
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
-        loop.tools.get_definitions = MagicMock(return_value=[])
 
+        loop = self._make_loop(tmp_path)
         session = loop.sessions.get_or_create("cli:test")
         for i in range(5):
             session.add_message("user", f"msg{i}")
@@ -707,111 +516,61 @@ class TestConsolidationDeduplicationGuard:
         loop.sessions.save(session)
         before_count = len(session.messages)
 
-        async def _failing_consolidate(sess, archive_all: bool = False) -> bool:
-            if archive_all:
-                return False
-            return True
+        async def _failing_consolidate(_messages) -> bool:
+            return False
 
-        loop._consolidate_memory = _failing_consolidate  # type: ignore[method-assign]
+        loop.memory_consolidator.consolidate_messages = _failing_consolidate  # type: ignore[method-assign]
 
         new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
         response = await loop._process_message(new_msg)
 
         assert response is not None
         assert "failed" in response.content.lower()
-        session_after = loop.sessions.get_or_create("cli:test")
-        assert len(session_after.messages) == before_count, (
-            "Session must remain intact when /new archival fails"
-        )
+        assert len(loop.sessions.get_or_create("cli:test").messages) == before_count
 
     @pytest.mark.asyncio
-    async def test_new_archives_only_unconsolidated_messages_after_inflight_task(
-        self, tmp_path: Path
-    ) -> None:
-        """/new should archive only messages not yet consolidated by prior task."""
-        from nanobot.agent.loop import AgentLoop
+    async def test_new_archives_only_unconsolidated_messages(self, tmp_path: Path) -> None:
         from nanobot.bus.events import InboundMessage
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "test-model"
-        loop = AgentLoop(
-            bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10
-        )
-
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
-        loop.tools.get_definitions = MagicMock(return_value=[])
 
+        loop = self._make_loop(tmp_path)
         session = loop.sessions.get_or_create("cli:test")
         for i in range(15):
             session.add_message("user", f"msg{i}")
             session.add_message("assistant", f"resp{i}")
+        session.last_consolidated = len(session.messages) - 3
         loop.sessions.save(session)
 
-        started = asyncio.Event()
-        release = asyncio.Event()
         archived_count = -1
 
-        async def _fake_consolidate(sess, archive_all: bool = False) -> bool:
+        async def _fake_consolidate(messages) -> bool:
             nonlocal archived_count
-            if archive_all:
-                archived_count = len(sess.messages)
-                return True
-
-            started.set()
-            await release.wait()
-            sess.last_consolidated = len(sess.messages) - 3
+            archived_count = len(messages)
             return True
 
-        loop._consolidate_memory = _fake_consolidate  # type: ignore[method-assign]
-
-        msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="hello")
-        await loop._process_message(msg)
-        await started.wait()
+        loop.memory_consolidator.consolidate_messages = _fake_consolidate  # type: ignore[method-assign]
 
         new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
-        pending_new = asyncio.create_task(loop._process_message(new_msg))
-        await asyncio.sleep(0.02)
-        assert not pending_new.done()
-
-        release.set()
-        response = await pending_new
+        response = await loop._process_message(new_msg)
 
         assert response is not None
         assert "new session started" in response.content.lower()
-        assert archived_count == 3, (
-            f"Expected only unconsolidated tail to archive, got {archived_count}"
-        )
+        assert archived_count == 3
 
     @pytest.mark.asyncio
     async def test_new_clears_session_and_responds(self, tmp_path: Path) -> None:
-        """/new clears session and returns confirmation."""
-        from nanobot.agent.loop import AgentLoop
         from nanobot.bus.events import InboundMessage
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "test-model"
-        loop = AgentLoop(
-            bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10
-        )
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
-        loop.tools.get_definitions = MagicMock(return_value=[])
 
+        loop = self._make_loop(tmp_path)
         session = loop.sessions.get_or_create("cli:test")
         for i in range(3):
             session.add_message("user", f"msg{i}")
             session.add_message("assistant", f"resp{i}")
         loop.sessions.save(session)
 
-        async def _ok_consolidate(sess, archive_all: bool = False) -> bool:
+        async def _ok_consolidate(_messages) -> bool:
             return True
 
-        loop._consolidate_memory = _ok_consolidate  # type: ignore[method-assign]
+        loop.memory_consolidator.consolidate_messages = _ok_consolidate  # type: ignore[method-assign]
 
         new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
         response = await loop._process_message(new_msg)
diff --git a/tests/test_loop_consolidation_tokens.py b/tests/test_loop_consolidation_tokens.py
new file mode 100644
index 0000000..b0f3dda
--- /dev/null
+++ b/tests/test_loop_consolidation_tokens.py
@@ -0,0 +1,190 @@
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from nanobot.agent.loop import AgentLoop
+import nanobot.agent.memory as memory_module
+from nanobot.bus.queue import MessageBus
+from nanobot.providers.base import LLMResponse
+
+
+def _make_loop(tmp_path, *, estimated_tokens: int, context_window_tokens: int) -> AgentLoop:
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+    provider.estimate_prompt_tokens.return_value = (estimated_tokens, "test-counter")
+    provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
+
+    loop = AgentLoop(
+        bus=MessageBus(),
+        provider=provider,
+        workspace=tmp_path,
+        model="test-model",
+        context_window_tokens=context_window_tokens,
+    )
+    loop.tools.get_definitions = MagicMock(return_value=[])
+    return loop
+
+
+@pytest.mark.asyncio
+async def test_prompt_below_threshold_does_not_consolidate(tmp_path) -> None:
+    loop = _make_loop(tmp_path, estimated_tokens=100, context_window_tokens=200)
+    loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True)  # type: ignore[method-assign]
+
+    await loop.process_direct("hello", session_key="cli:test")
+
+    loop.memory_consolidator.consolidate_messages.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_prompt_above_threshold_triggers_consolidation(tmp_path, monkeypatch) -> None:
+    loop = _make_loop(tmp_path, estimated_tokens=1000, context_window_tokens=200)
+    loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True)  # type: ignore[method-assign]
+    session = loop.sessions.get_or_create("cli:test")
+    session.messages = [
+        {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
+        {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
+        {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
+    ]
+    loop.sessions.save(session)
+    monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _message: 500)
+
+    await loop.process_direct("hello", session_key="cli:test")
+
+    assert loop.memory_consolidator.consolidate_messages.await_count >= 1
+
+
+@pytest.mark.asyncio
+async def test_prompt_above_threshold_archives_until_next_user_boundary(tmp_path, monkeypatch) -> None:
+    loop = _make_loop(tmp_path, estimated_tokens=1000, context_window_tokens=200)
+    loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True)  # type: ignore[method-assign]
+
+    session = loop.sessions.get_or_create("cli:test")
+    session.messages = [
+        {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
+        {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
+        {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
+        {"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"},
+        {"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"},
+    ]
+    loop.sessions.save(session)
+
+    token_map = {"u1": 120, "a1": 120, "u2": 120, "a2": 120, "u3": 120}
+    monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda message: token_map[message["content"]])
+
+    await loop.memory_consolidator.maybe_consolidate_by_tokens(session)
+
+    archived_chunk = loop.memory_consolidator.consolidate_messages.await_args.args[0]
+    assert [message["content"] for message in archived_chunk] == ["u1", "a1", "u2", "a2"]
+    assert session.last_consolidated == 4
+
+
+@pytest.mark.asyncio
+async def test_consolidation_loops_until_target_met(tmp_path, monkeypatch) -> None:
+    """Verify maybe_consolidate_by_tokens keeps looping until under threshold."""
+    loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200)
+    loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True)  # type: ignore[method-assign]
+
+    session = loop.sessions.get_or_create("cli:test")
+    session.messages = [
+        {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
+        {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
+        {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
+        {"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"},
+        {"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"},
+        {"role": "assistant", "content": "a3", "timestamp": "2026-01-01T00:00:05"},
+        {"role": "user", "content": "u4", "timestamp": "2026-01-01T00:00:06"},
+    ]
+    loop.sessions.save(session)
+
+    call_count = [0]
+    def mock_estimate(_session):
+        call_count[0] += 1
+        if call_count[0] == 1:
+            return (500, "test")
+        if call_count[0] == 2:
+            return (300, "test")
+        return (80, "test")
+
+    loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate  # type: ignore[method-assign]
+    monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 100)
+
+    await loop.memory_consolidator.maybe_consolidate_by_tokens(session)
+
+    assert loop.memory_consolidator.consolidate_messages.await_count == 2
+    assert session.last_consolidated == 6
+
+
+@pytest.mark.asyncio
+async def test_consolidation_continues_below_trigger_until_half_target(tmp_path, monkeypatch) -> None:
+    """Once triggered, consolidation should continue until it drops below half threshold."""
+    loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200)
+    loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True)  # type: ignore[method-assign]
+
+    session = loop.sessions.get_or_create("cli:test")
+    session.messages = [
+        {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
+        {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
+        {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
+        {"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"},
+        {"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"},
+        {"role": "assistant", "content": "a3", "timestamp": "2026-01-01T00:00:05"},
+        {"role": "user", "content": "u4", "timestamp": "2026-01-01T00:00:06"},
+    ]
+    loop.sessions.save(session)
+
+    call_count = [0]
+
+    def mock_estimate(_session):
+        call_count[0] += 1
+        if call_count[0] == 1:
+            return (500, "test")
+        if call_count[0] == 2:
+            return (150, "test")
+        return (80, "test")
+
+    loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate  # type: ignore[method-assign]
+    monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 100)
+
+    await loop.memory_consolidator.maybe_consolidate_by_tokens(session)
+
+    assert loop.memory_consolidator.consolidate_messages.await_count == 2
+    assert session.last_consolidated == 6
+
+
+@pytest.mark.asyncio
+async def test_preflight_consolidation_before_llm_call(tmp_path, monkeypatch) -> None:
+    """Verify preflight consolidation runs before the LLM call in process_direct."""
+    order: list[str] = []
+
+    loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200)
+
+    async def track_consolidate(messages):
+        order.append("consolidate")
+        return True
+    loop.memory_consolidator.consolidate_messages = track_consolidate  # type: ignore[method-assign]
+
+    async def track_llm(*args, **kwargs):
+        order.append("llm")
+        return LLMResponse(content="ok", tool_calls=[])
+    loop.provider.chat_with_retry = track_llm
+
+    session = loop.sessions.get_or_create("cli:test")
+    session.messages = [
+        {"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
+        {"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
+        {"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
+    ]
+    loop.sessions.save(session)
+    monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 500)
+
+    call_count = [0]
+    def mock_estimate(_session):
+        call_count[0] += 1
+        return (1000 if call_count[0] <= 1 else 80, "test")
+    loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate  # type: ignore[method-assign]
+
+    await loop.process_direct("hello", session_key="cli:test")
+
+    assert "consolidate" in order
+    assert "llm" in order
+    assert order.index("consolidate") < order.index("llm")
diff --git a/tests/test_memory_consolidation_types.py b/tests/test_memory_consolidation_types.py
index 2605bf7..0263f01 100644
--- a/tests/test_memory_consolidation_types.py
+++ b/tests/test_memory_consolidation_types.py
@@ -7,7 +7,7 @@ tool call response, it should serialize them to JSON instead of raising TypeErro
 
 import json
 from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import AsyncMock
 
 import pytest
 
@@ -15,15 +15,12 @@ from nanobot.agent.memory import MemoryStore
 from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
 
 
-def _make_session(message_count: int = 30, memory_window: int = 50):
-    """Create a mock session with messages."""
-    session = MagicMock()
-    session.messages = [
+def _make_messages(message_count: int = 30):
+    """Create a list of mock messages."""
+    return [
         {"role": "user", "content": f"msg{i}", "timestamp": "2026-01-01 00:00"}
         for i in range(message_count)
     ]
-    session.last_consolidated = 0
-    return session
 
 
 def _make_tool_response(history_entry, memory_update):
@@ -74,9 +71,9 @@ class TestMemoryConsolidationTypeHandling:
             )
         )
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is True
         assert store.history_file.exists()
@@ -95,9 +92,9 @@ class TestMemoryConsolidationTypeHandling:
             )
         )
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is True
         assert store.history_file.exists()
@@ -131,9 +128,9 @@ class TestMemoryConsolidationTypeHandling:
         )
         provider.chat = AsyncMock(return_value=response)
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is True
         assert "User discussed testing." in store.history_file.read_text()
@@ -147,22 +144,22 @@ class TestMemoryConsolidationTypeHandling:
             return_value=LLMResponse(content="I summarized the conversation.", tool_calls=[])
         )
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is False
         assert not store.history_file.exists()
 
     @pytest.mark.asyncio
-    async def test_skips_when_few_messages(self, tmp_path: Path) -> None:
-        """Consolidation should be a no-op when messages < keep_count."""
+    async def test_skips_when_message_chunk_is_empty(self, tmp_path: Path) -> None:
+        """Consolidation should be a no-op when the selected chunk is empty."""
         store = MemoryStore(tmp_path)
         provider = AsyncMock()
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=10)
+        messages: list[dict] = []
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is True
         provider.chat.assert_not_called()
@@ -189,9 +186,9 @@ class TestMemoryConsolidationTypeHandling:
         )
         provider.chat = AsyncMock(return_value=response)
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is True
         assert "User discussed testing." in store.history_file.read_text()
@@ -215,9 +212,9 @@ class TestMemoryConsolidationTypeHandling:
         )
         provider.chat = AsyncMock(return_value=response)
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is False
 
@@ -239,9 +236,9 @@ class TestMemoryConsolidationTypeHandling:
         )
         provider.chat = AsyncMock(return_value=response)
         provider.chat_with_retry = provider.chat
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is False
 
@@ -255,7 +252,7 @@ class TestMemoryConsolidationTypeHandling:
                 memory_update="# Memory\nUser likes testing.",
             ),
         ])
-        session = _make_session(message_count=60)
+        messages = _make_messages(message_count=60)
         delays: list[int] = []
 
         async def _fake_sleep(delay: int) -> None:
@@ -263,7 +260,7 @@ class TestMemoryConsolidationTypeHandling:
 
         monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
 
-        result = await store.consolidate(session, provider, "test-model", memory_window=50)
+        result = await store.consolidate(messages, provider, "test-model")
 
         assert result is True
         assert provider.calls == 2
diff --git a/tests/test_message_tool_suppress.py b/tests/test_message_tool_suppress.py
index 63b0fd1..1091de4 100644
--- a/tests/test_message_tool_suppress.py
+++ b/tests/test_message_tool_suppress.py
@@ -16,7 +16,7 @@ def _make_loop(tmp_path: Path) -> AgentLoop:
     bus = MessageBus()
     provider = MagicMock()
     provider.get_default_model.return_value = "test-model"
-    return AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model", memory_window=10)
+    return AgentLoop(bus=bus, provider=provider, workspace=tmp_path, model="test-model")
 
 
 class TestMessageToolSuppressLogic:
@@ -33,7 +33,7 @@ class TestMessageToolSuppressLogic:
             LLMResponse(content="", tool_calls=[tool_call]),
             LLMResponse(content="Done", tool_calls=[]),
         ])
-        loop.provider.chat = AsyncMock(side_effect=lambda *a, **kw: next(calls))
+        loop.provider.chat_with_retry = AsyncMock(side_effect=lambda *a, **kw: next(calls))
         loop.tools.get_definitions = MagicMock(return_value=[])
 
         sent: list[OutboundMessage] = []
@@ -58,7 +58,7 @@ class TestMessageToolSuppressLogic:
             LLMResponse(content="", tool_calls=[tool_call]),
             LLMResponse(content="I've sent the email.", tool_calls=[]),
         ])
-        loop.provider.chat = AsyncMock(side_effect=lambda *a, **kw: next(calls))
+        loop.provider.chat_with_retry = AsyncMock(side_effect=lambda *a, **kw: next(calls))
         loop.tools.get_definitions = MagicMock(return_value=[])
 
         sent: list[OutboundMessage] = []
@@ -77,7 +77,7 @@ class TestMessageToolSuppressLogic:
     @pytest.mark.asyncio
     async def test_not_suppress_when_no_message_tool_used(self, tmp_path: Path) -> None:
         loop = _make_loop(tmp_path)
-        loop.provider.chat = AsyncMock(return_value=LLMResponse(content="Hello!", tool_calls=[]))
+        loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="Hello!", tool_calls=[]))
         loop.tools.get_definitions = MagicMock(return_value=[])
 
         msg = InboundMessage(channel="feishu", sender_id="user1", chat_id="chat123", content="Hi")
@@ -98,7 +98,7 @@ class TestMessageToolSuppressLogic:
             ),
             LLMResponse(content="Done", tool_calls=[]),
         ])
-        loop.provider.chat = AsyncMock(side_effect=lambda *a, **kw: next(calls))
+        loop.provider.chat_with_retry = AsyncMock(side_effect=lambda *a, **kw: next(calls))
         loop.tools.get_definitions = MagicMock(return_value=[])
         loop.tools.execute = AsyncMock(return_value="ok")
 

From a44ee115d1188a62012d3d7cc38077ff5013f4ee Mon Sep 17 00:00:00 2001
From: greyishsong <greyishsong@qq.com>
Date: Wed, 11 Mar 2026 09:02:28 +0800
Subject: [PATCH 27/53] fix: bump litellm version to 1.82.1 for Moonshot
 provider support

see issue #1628
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 62cf616..7127354 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ classifiers = [
 
 dependencies = [
     "typer>=0.20.0,<1.0.0",
-    "litellm>=1.81.5,<2.0.0",
+    "litellm>=1.82.1,<2.0.0",
     "pydantic>=2.12.0,<3.0.0",
     "pydantic-settings>=2.12.0,<3.0.0",
     "websockets>=16.0,<17.0",

From d1df53aaf783d44394d3d335948b5eaf31af803f Mon Sep 17 00:00:00 2001
From: YinAnPing <yapex@192.168.5.4>
Date: Wed, 11 Mar 2026 09:30:33 +0800
Subject: [PATCH 28/53] fix: exclude hidden files when syncing workspace
 templates

Skip files starting with '.' (e.g., macOS extended attributes like ._AGENTS.md)
to prevent UnicodeDecodeError during template synchronization.
---
 nanobot/utils/helpers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 mode change 100644 => 100755 nanobot/utils/helpers.py

diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py
old mode 100644
new mode 100755
index 57c60dc..a387b79
--- a/nanobot/utils/helpers.py
+++ b/nanobot/utils/helpers.py
@@ -88,7 +88,7 @@ def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str]
         added.append(str(dest.relative_to(workspace)))
 
     for item in tpl.iterdir():
-        if item.name.endswith(".md"):
+        if item.name.endswith(".md") and not item.name.startswith("."):
             _write(item, workspace / item.name)
     _write(tpl / "memory" / "MEMORY.md", workspace / "memory" / "MEMORY.md")
     _write(None, workspace / "memory" / "HISTORY.md")

From 35d811c99790b71ef34c5908b23168eeb526ca6b Mon Sep 17 00:00:00 2001
From: dingyanyi2019 <dingyanyi2019@outlook.com>
Date: Wed, 11 Mar 2026 10:19:43 +0800
Subject: [PATCH 29/53] feat: support retrieving DingTalk voice recognition
 text

---
 nanobot/channels/dingtalk.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nanobot/channels/dingtalk.py b/nanobot/channels/dingtalk.py
index 3c301a9..cdcba57 100644
--- a/nanobot/channels/dingtalk.py
+++ b/nanobot/channels/dingtalk.py
@@ -57,6 +57,8 @@ class NanobotDingTalkHandler(CallbackHandler):
             content = ""
             if chatbot_msg.text:
                 content = chatbot_msg.text.content.strip()
+            elif chatbot_msg.extensions.get("content", {}).get("recognition"):
+                content = chatbot_msg.extensions["content"]["recognition"].strip()
             if not content:
                 content = message.data.get("text", {}).get("content", "").strip()
 

From 91f17cad00b14b7a550f154791be3fc8eb12b746 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 03:40:33 +0000
Subject: [PATCH 30/53] feat(dingtalk): support voice recognition text fallback

Read DingTalk recognition text when text.content is empty, and add a handler-level regression test for voice transcript delivery.
---
 tests/test_dingtalk_channel.py | 47 +++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/tests/test_dingtalk_channel.py b/tests/test_dingtalk_channel.py
index 7595a33..6051014 100644
--- a/tests/test_dingtalk_channel.py
+++ b/tests/test_dingtalk_channel.py
@@ -1,9 +1,11 @@
+import asyncio
 from types import SimpleNamespace
 
 import pytest
 
 from nanobot.bus.queue import MessageBus
-from nanobot.channels.dingtalk import DingTalkChannel
+import nanobot.channels.dingtalk as dingtalk_module
+from nanobot.channels.dingtalk import DingTalkChannel, NanobotDingTalkHandler
 from nanobot.config.schema import DingTalkConfig
 
 
@@ -64,3 +66,46 @@ async def test_group_send_uses_group_messages_api() -> None:
     assert call["url"] == "https://api.dingtalk.com/v1.0/robot/groupMessages/send"
     assert call["json"]["openConversationId"] == "conv123"
     assert call["json"]["msgKey"] == "sampleMarkdown"
+
+
+@pytest.mark.asyncio
+async def test_handler_uses_voice_recognition_text_when_text_is_empty(monkeypatch) -> None:
+    bus = MessageBus()
+    channel = DingTalkChannel(
+        DingTalkConfig(client_id="app", client_secret="secret", allow_from=["user1"]),
+        bus,
+    )
+    handler = NanobotDingTalkHandler(channel)
+
+    class _FakeChatbotMessage:
+        text = None
+        extensions = {"content": {"recognition": "voice transcript"}}
+        sender_staff_id = "user1"
+        sender_id = "fallback-user"
+        sender_nick = "Alice"
+        message_type = "audio"
+
+        @staticmethod
+        def from_dict(_data):
+            return _FakeChatbotMessage()
+
+    monkeypatch.setattr(dingtalk_module, "ChatbotMessage", _FakeChatbotMessage)
+    monkeypatch.setattr(dingtalk_module, "AckMessage", SimpleNamespace(STATUS_OK="OK"))
+
+    status, body = await handler.process(
+        SimpleNamespace(
+            data={
+                "conversationType": "2",
+                "conversationId": "conv123",
+                "text": {"content": ""},
+            }
+        )
+    )
+
+    await asyncio.gather(*list(channel._background_tasks))
+    msg = await bus.consume_inbound()
+
+    assert (status, body) == ("OK", "OK")
+    assert msg.content == "voice transcript"
+    assert msg.sender_id == "user1"
+    assert msg.chat_id == "group:conv123"

From ddccf25bb1be8529d453d2344eea21bd593021c2 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 03:47:24 +0000
Subject: [PATCH 31/53] fix(subagent): preserve reasoning fields across tool
 turns

Share assistant message construction between the main agent and subagents, and add a regression test to keep reasoning_content and thinking_blocks in follow-up tool rounds.
---
 nanobot/agent/context.py  | 16 +++++++--------
 nanobot/agent/subagent.py | 21 +++++++------------
 nanobot/utils/helpers.py  | 17 ++++++++++++++++
 tests/test_task_cancel.py | 43 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 74 insertions(+), 23 deletions(-)

diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py
index 2c648eb..e47fcb8 100644
--- a/nanobot/agent/context.py
+++ b/nanobot/agent/context.py
@@ -10,7 +10,7 @@ from typing import Any
 
 from nanobot.agent.memory import MemoryStore
 from nanobot.agent.skills import SkillsLoader
-from nanobot.utils.helpers import detect_image_mime
+from nanobot.utils.helpers import build_assistant_message, detect_image_mime
 
 
 class ContextBuilder:
@@ -182,12 +182,10 @@ Reply directly with text for conversations. Only use the 'message' tool to send
         thinking_blocks: list[dict] | None = None,
     ) -> list[dict[str, Any]]:
         """Add an assistant message to the message list."""
-        msg: dict[str, Any] = {"role": "assistant", "content": content}
-        if tool_calls:
-            msg["tool_calls"] = tool_calls
-        if reasoning_content is not None:
-            msg["reasoning_content"] = reasoning_content
-        if thinking_blocks:
-            msg["thinking_blocks"] = thinking_blocks
-        messages.append(msg)
+        messages.append(build_assistant_message(
+            content,
+            tool_calls=tool_calls,
+            reasoning_content=reasoning_content,
+            thinking_blocks=thinking_blocks,
+        ))
         return messages
diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
index 308e67d..eff0b4f 100644
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -16,6 +16,7 @@ from nanobot.bus.events import InboundMessage
 from nanobot.bus.queue import MessageBus
 from nanobot.config.schema import ExecToolConfig
 from nanobot.providers.base import LLMProvider
+from nanobot.utils.helpers import build_assistant_message
 
 
 class SubagentManager:
@@ -133,7 +134,6 @@ class SubagentManager:
                 )
 
                 if response.has_tool_calls:
-                    # Add assistant message with tool calls
                     tool_call_dicts = [
                         {
                             "id": tc.id,
@@ -145,19 +145,12 @@ class SubagentManager:
                         }
                         for tc in response.tool_calls
                     ]
-                    assistant_msg: dict[str, Any] = {
-                        "role": "assistant",
-                        "content": response.content or "",
-                        "tool_calls": tool_call_dicts,
-                    }
-                    # Preserve reasoning_content for providers that require it
-                    # (e.g. Deepseek Reasoner mandates this field on every
-                    # assistant message when thinking mode is active).
-                    if response.reasoning_content is not None:
-                        assistant_msg["reasoning_content"] = response.reasoning_content
-                    if response.thinking_blocks:
-                        assistant_msg["thinking_blocks"] = response.thinking_blocks
-                    messages.append(assistant_msg)
+                    messages.append(build_assistant_message(
+                        response.content or "",
+                        tool_calls=tool_call_dicts,
+                        reasoning_content=response.reasoning_content,
+                        thinking_blocks=response.thinking_blocks,
+                    ))
 
                     # Execute tools
                     for tool_call in response.tool_calls:
diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py
index 9242ba6..6d2c670 100644
--- a/nanobot/utils/helpers.py
+++ b/nanobot/utils/helpers.py
@@ -72,6 +72,23 @@ def split_message(content: str, max_len: int = 2000) -> list[str]:
     return chunks
 
 
+def build_assistant_message(
+    content: str | None,
+    tool_calls: list[dict[str, Any]] | None = None,
+    reasoning_content: str | None = None,
+    thinking_blocks: list[dict] | None = None,
+) -> dict[str, Any]:
+    """Build a provider-safe assistant message with optional reasoning fields."""
+    msg: dict[str, Any] = {"role": "assistant", "content": content}
+    if tool_calls:
+        msg["tool_calls"] = tool_calls
+    if reasoning_content is not None:
+        msg["reasoning_content"] = reasoning_content
+    if thinking_blocks:
+        msg["thinking_blocks"] = thinking_blocks
+    return msg
+
+
 def estimate_prompt_tokens(
     messages: list[dict[str, Any]],
     tools: list[dict[str, Any]] | None = None,
diff --git a/tests/test_task_cancel.py b/tests/test_task_cancel.py
index 27a2d73..62ab2cc 100644
--- a/tests/test_task_cancel.py
+++ b/tests/test_task_cancel.py
@@ -165,3 +165,46 @@ class TestSubagentCancellation:
         provider.get_default_model.return_value = "test-model"
         mgr = SubagentManager(provider=provider, workspace=MagicMock(), bus=bus)
         assert await mgr.cancel_by_session("nonexistent") == 0
+
+    @pytest.mark.asyncio
+    async def test_subagent_preserves_reasoning_fields_in_tool_turn(self, monkeypatch, tmp_path):
+        from nanobot.agent.subagent import SubagentManager
+        from nanobot.bus.queue import MessageBus
+        from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+        bus = MessageBus()
+        provider = MagicMock()
+        provider.get_default_model.return_value = "test-model"
+
+        captured_second_call: list[dict] = []
+
+        call_count = {"n": 0}
+
+        async def scripted_chat_with_retry(*, messages, **kwargs):
+            call_count["n"] += 1
+            if call_count["n"] == 1:
+                return LLMResponse(
+                    content="thinking",
+                    tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})],
+                    reasoning_content="hidden reasoning",
+                    thinking_blocks=[{"type": "thinking", "thinking": "step"}],
+                )
+            captured_second_call[:] = messages
+            return LLMResponse(content="done", tool_calls=[])
+        provider.chat_with_retry = scripted_chat_with_retry
+        mgr = SubagentManager(provider=provider, workspace=tmp_path, bus=bus)
+
+        async def fake_execute(self, name, arguments):
+            return "tool result"
+
+        monkeypatch.setattr("nanobot.agent.tools.registry.ToolRegistry.execute", fake_execute)
+
+        await mgr._run_subagent("sub-1", "do task", "label", {"channel": "test", "chat_id": "c1"})
+
+        assistant_messages = [
+            msg for msg in captured_second_call
+            if msg.get("role") == "assistant" and msg.get("tool_calls")
+        ]
+        assert len(assistant_messages) == 1
+        assert assistant_messages[0]["reasoning_content"] == "hidden reasoning"
+        assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}]

From 76c6063141f84d8bde3f3a95896c36e4e673c5c7 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 03:50:54 +0000
Subject: [PATCH 32/53] chore: normalize helpers.py file mode

---
 nanobot/utils/helpers.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 nanobot/utils/helpers.py

diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py
old mode 100755
new mode 100644

From dee4f27dce4a8837eea4b97b882314c50a2b74e3 Mon Sep 17 00:00:00 2001
From: "Jerome Sonnet (letzdoo)" <jerome.sonnet@letzdoo.com>
Date: Wed, 11 Mar 2026 07:43:28 +0400
Subject: [PATCH 33/53] feat: add Ollama as a local LLM provider

Add native Ollama support so local models (e.g. nemotron-3-nano) can be
used without an API key. Adds ProviderSpec with ollama_chat LiteLLM
prefix, ProvidersConfig field, and skips API key validation for local
providers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 nanobot/cli/commands.py       |  2 +-
 nanobot/config/schema.py      |  5 +++--
 nanobot/providers/registry.py | 17 +++++++++++++++++
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index cf69450..8387b28 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -252,7 +252,7 @@ def _make_provider(config: Config):
     from nanobot.providers.litellm_provider import LiteLLMProvider
     from nanobot.providers.registry import find_by_name
     spec = find_by_name(provider_name)
-    if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and spec.is_oauth):
+    if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and (spec.is_oauth or spec.is_local)):
         console.print("[red]Error: No API key configured.[/red]")
         console.print("Set one in ~/.nanobot/config.json under providers section")
         raise typer.Exit(1)
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index a2de239..9b5821b 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -272,6 +272,7 @@ class ProvidersConfig(Base):
     moonshot: ProviderConfig = Field(default_factory=ProviderConfig)
     minimax: ProviderConfig = Field(default_factory=ProviderConfig)
     aihubmix: ProviderConfig = Field(default_factory=ProviderConfig)  # AiHubMix API gateway
+    ollama: ProviderConfig = Field(default_factory=ProviderConfig)  # Ollama local models
     siliconflow: ProviderConfig = Field(default_factory=ProviderConfig)  # SiliconFlow (硅基流动)
     volcengine: ProviderConfig = Field(default_factory=ProviderConfig)  # VolcEngine (火山引擎)
     openai_codex: ProviderConfig = Field(default_factory=ProviderConfig)  # OpenAI Codex (OAuth)
@@ -375,14 +376,14 @@ class Config(BaseSettings):
         for spec in PROVIDERS:
             p = getattr(self.providers, spec.name, None)
             if p and model_prefix and normalized_prefix == spec.name:
-                if spec.is_oauth or p.api_key:
+                if spec.is_oauth or spec.is_local or p.api_key:
                     return p, spec.name
 
         # Match by keyword (order follows PROVIDERS registry)
         for spec in PROVIDERS:
             p = getattr(self.providers, spec.name, None)
             if p and any(_kw_matches(kw) for kw in spec.keywords):
-                if spec.is_oauth or p.api_key:
+                if spec.is_oauth or spec.is_local or p.api_key:
                     return p, spec.name
 
         # Fallback: gateways first, then others (follows registry order)
diff --git a/nanobot/providers/registry.py b/nanobot/providers/registry.py
index 3ba1a0e..c4bcfe2 100644
--- a/nanobot/providers/registry.py
+++ b/nanobot/providers/registry.py
@@ -360,6 +360,23 @@ PROVIDERS: tuple[ProviderSpec, ...] = (
         strip_model_prefix=False,
         model_overrides=(),
     ),
+    # === Ollama (local, OpenAI-compatible) ===================================
+    ProviderSpec(
+        name="ollama",
+        keywords=("ollama", "nemotron"),
+        env_key="OLLAMA_API_KEY",
+        display_name="Ollama",
+        litellm_prefix="ollama_chat",  # model → ollama_chat/model
+        skip_prefixes=("ollama/", "ollama_chat/"),
+        env_extras=(),
+        is_gateway=False,
+        is_local=True,
+        detect_by_key_prefix="",
+        detect_by_base_keyword="11434",
+        default_api_base="http://localhost:11434",
+        strip_model_prefix=False,
+        model_overrides=(),
+    ),
     # === Auxiliary (not a primary LLM provider) ============================
     # Groq: mainly used for Whisper voice transcription, also usable for LLM.
     # Needs "groq/" prefix for LiteLLM routing. Placed last — it rarely wins fallback.

From c7e2622ee1cb313ca3f7a4a31779813cc3ebc27b Mon Sep 17 00:00:00 2001
From: ethanclaw <ethanbot@163.com>
Date: Wed, 11 Mar 2026 12:25:28 +0800
Subject: [PATCH 34/53] fix(subagent): pass reasoning_content and
 thinking_blocks in subagent messages

Fix issue #1834: Spawn/subagent tool fails with Deepseek Reasoner
due to missing reasoning_content field when using thinking mode.

The subagent was not including reasoning_content and thinking_blocks
in assistant messages with tool calls, causing the Deepseek API to
reject subsequent requests.

- Add reasoning_content to assistant message when subagent makes tool calls
- Add thinking_blocks to assistant message for Anthropic extended thinking
- Add tests to verify both fields are properly passed

Fixes #1834
---
 nanobot/agent/subagent.py        |   2 +
 tests/test_subagent_reasoning.py | 144 +++++++++++++++++++++++++++++++
 2 files changed, 146 insertions(+)
 create mode 100644 tests/test_subagent_reasoning.py

diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
index f9eda1f..6163a52 100644
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -149,6 +149,8 @@ class SubagentManager:
                         "role": "assistant",
                         "content": response.content or "",
                         "tool_calls": tool_call_dicts,
+                        "reasoning_content": response.reasoning_content,
+                        "thinking_blocks": response.thinking_blocks,
                     })
 
                     # Execute tools
diff --git a/tests/test_subagent_reasoning.py b/tests/test_subagent_reasoning.py
new file mode 100644
index 0000000..5e70506
--- /dev/null
+++ b/tests/test_subagent_reasoning.py
@@ -0,0 +1,144 @@
+"""Tests for subagent reasoning_content and thinking_blocks handling."""
+
+from __future__ import annotations
+
+import asyncio
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+
+class TestSubagentReasoningContent:
+    """Test that subagent properly handles reasoning_content and thinking_blocks."""
+
+    @pytest.mark.asyncio
+    async def test_subagent_message_includes_reasoning_content(self):
+        """Verify reasoning_content is included in assistant messages with tool calls.
+
+        This is the fix for issue #1834: Spawn/subagent tool fails with
+        Deepseek Reasoner due to missing reasoning_content field.
+        """
+        from nanobot.agent.subagent import SubagentManager
+        from nanobot.bus.queue import MessageBus
+        from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+        bus = MessageBus()
+        provider = MagicMock()
+        provider.get_default_model.return_value = "deepseek-reasoner"
+
+        # Create a real Path object for workspace
+        workspace = Path("/tmp/test_workspace")
+        workspace.mkdir(parents=True, exist_ok=True)
+
+        # Capture messages that are sent to the provider
+        captured_messages = []
+
+        async def mock_chat(*args, **kwargs):
+            captured_messages.append(kwargs.get("messages", []))
+            # Return response with tool calls and reasoning_content
+            tool_call = ToolCallRequest(
+                id="test-1",
+                name="read_file",
+                arguments={"path": "/test.txt"},
+            )
+            return LLMResponse(
+                content="",
+                tool_calls=[tool_call],
+                reasoning_content="I need to read this file first",
+            )
+
+        provider.chat_with_retry = AsyncMock(side_effect=mock_chat)
+
+        mgr = SubagentManager(provider=provider, workspace=workspace, bus=bus)
+
+        # Mock the tools registry
+        with patch("nanobot.agent.subagent.ToolRegistry") as MockToolRegistry:
+            mock_registry = MagicMock()
+            mock_registry.get_definitions.return_value = []
+            mock_registry.execute = AsyncMock(return_value="file content")
+            MockToolRegistry.return_value = mock_registry
+
+            result = await mgr.spawn(
+                task="Read a file",
+                label="test",
+                origin_channel="cli",
+                origin_chat_id="direct",
+                session_key="cli:direct",
+            )
+
+            # Wait for the task to complete
+            await asyncio.sleep(0.5)
+
+        # Check the captured messages
+        assert len(captured_messages) >= 1
+        # Find the assistant message with tool_calls
+        found = False
+        for msg_list in captured_messages:
+            for msg in msg_list:
+                if msg.get("role") == "assistant" and msg.get("tool_calls"):
+                    assert "reasoning_content" in msg, "reasoning_content should be in assistant message with tool_calls"
+                    assert msg["reasoning_content"] == "I need to read this file first"
+                    found = True
+        assert found, "Should have found an assistant message with tool_calls"
+
+    @pytest.mark.asyncio
+    async def test_subagent_message_includes_thinking_blocks(self):
+        """Verify thinking_blocks is included in assistant messages with tool calls."""
+        from nanobot.agent.subagent import SubagentManager
+        from nanobot.bus.queue import MessageBus
+        from nanobot.providers.base import LLMResponse, ToolCallRequest
+
+        bus = MessageBus()
+        provider = MagicMock()
+        provider.get_default_model.return_value = "claude-sonnet"
+
+        workspace = Path("/tmp/test_workspace2")
+        workspace.mkdir(parents=True, exist_ok=True)
+
+        captured_messages = []
+
+        async def mock_chat(*args, **kwargs):
+            captured_messages.append(kwargs.get("messages", []))
+            tool_call = ToolCallRequest(
+                id="test-2",
+                name="read_file",
+                arguments={"path": "/test.txt"},
+            )
+            return LLMResponse(
+                content="",
+                tool_calls=[tool_call],
+                thinking_blocks=[
+                    {"signature": "sig1", "thought": "thinking step 1"},
+                    {"signature": "sig2", "thought": "thinking step 2"},
+                ],
+            )
+
+        provider.chat_with_retry = AsyncMock(side_effect=mock_chat)
+
+        mgr = SubagentManager(provider=provider, workspace=workspace, bus=bus)
+
+        with patch("nanobot.agent.subagent.ToolRegistry") as MockToolRegistry:
+            mock_registry = MagicMock()
+            mock_registry.get_definitions.return_value = []
+            mock_registry.execute = AsyncMock(return_value="file content")
+            MockToolRegistry.return_value = mock_registry
+
+            result = await mgr.spawn(
+                task="Read a file",
+                label="test",
+                origin_channel="cli",
+                origin_chat_id="direct",
+            )
+
+            await asyncio.sleep(0.5)
+
+        # Check the captured messages
+        found = False
+        for msg_list in captured_messages:
+            for msg in msg_list:
+                if msg.get("role") == "assistant" and msg.get("tool_calls"):
+                    assert "thinking_blocks" in msg, "thinking_blocks should be in assistant message with tool_calls"
+                    assert len(msg["thinking_blocks"]) == 2
+                    found = True
+        assert found, "Should have found an assistant message with tool_calls"

From 12104c8d46c0b688e0db21617b23d54f012970ba Mon Sep 17 00:00:00 2001
From: ethanclaw <ethanbot@163.com>
Date: Wed, 11 Mar 2026 14:22:33 +0800
Subject: [PATCH 35/53] fix(memory): pass temperature, max_tokens and
 reasoning_effort to memory consolidation

Fix issue #1823: Memory consolidation does not inherit agent temperature
and maxTokens configuration.

The agent's configured generation parameters were not being passed through
to the memory consolidation call, causing it to fall back to default values.
This resulted in the consolidation response being truncated before the
save_memory tool call was emitted.

- Pass temperature, max_tokens, reasoning_effort from AgentLoop to
  MemoryConsolidator and then to MemoryStore.consolidate()
- Forward these parameters to the provider.chat_with_retry() call

Fixes #1823
---
 nanobot/agent/loop.py   |  3 +++
 nanobot/agent/memory.py | 21 ++++++++++++++++++++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 8605a09..edf1e8e 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -114,6 +114,9 @@ class AgentLoop:
             context_window_tokens=context_window_tokens,
             build_messages=self.context.build_messages,
             get_tool_definitions=self.tools.get_definitions,
+            temperature=self.temperature,
+            max_tokens=self.max_tokens,
+            reasoning_effort=self.reasoning_effort,
         )
         self._register_default_tools()
 
diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index cd5f54f..d79887b 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -99,6 +99,9 @@ class MemoryStore:
         messages: list[dict],
         provider: LLMProvider,
         model: str,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        reasoning_effort: str | None = None,
     ) -> bool:
         """Consolidate the provided message chunk into MEMORY.md + HISTORY.md."""
         if not messages:
@@ -121,6 +124,9 @@ class MemoryStore:
                 ],
                 tools=_SAVE_MEMORY_TOOL,
                 model=model,
+                temperature=temperature,
+                max_tokens=max_tokens,
+                reasoning_effort=reasoning_effort,
             )
 
             if not response.has_tool_calls:
@@ -160,6 +166,9 @@ class MemoryConsolidator:
         context_window_tokens: int,
         build_messages: Callable[..., list[dict[str, Any]]],
         get_tool_definitions: Callable[[], list[dict[str, Any]]],
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        reasoning_effort: str | None = None,
     ):
         self.store = MemoryStore(workspace)
         self.provider = provider
@@ -168,6 +177,9 @@ class MemoryConsolidator:
         self.context_window_tokens = context_window_tokens
         self._build_messages = build_messages
         self._get_tool_definitions = get_tool_definitions
+        self._temperature = temperature
+        self._max_tokens = max_tokens
+        self._reasoning_effort = reasoning_effort
         self._locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary()
 
     def get_lock(self, session_key: str) -> asyncio.Lock:
@@ -176,7 +188,14 @@ class MemoryConsolidator:
 
     async def consolidate_messages(self, messages: list[dict[str, object]]) -> bool:
         """Archive a selected message chunk into persistent memory."""
-        return await self.store.consolidate(messages, self.provider, self.model)
+        return await self.store.consolidate(
+            messages,
+            self.provider,
+            self.model,
+            temperature=self._temperature,
+            max_tokens=self._max_tokens,
+            reasoning_effort=self._reasoning_effort,
+        )
 
     def pick_consolidation_boundary(
         self,

From ed82f95f0ca23605d896ff1785dd93dbb4ab70c4 Mon Sep 17 00:00:00 2001
From: WhalerO <arthurclancy@hotmail.com>
Date: Wed, 11 Mar 2026 09:56:18 +0800
Subject: [PATCH 36/53] fix: preserve provider-specific tool call metadata for
 Gemini

---
 nanobot/agent/loop.py                  | 25 ++++++++----
 nanobot/agent/subagent.py              | 25 ++++++++----
 nanobot/providers/base.py              |  2 +
 nanobot/providers/litellm_provider.py  |  7 ++++
 tests/test_gemini_thought_signature.py | 54 ++++++++++++++++++++++++++
 5 files changed, 97 insertions(+), 16 deletions(-)
 create mode 100644 tests/test_gemini_thought_signature.py

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index fcbc880..147327d 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -208,14 +208,7 @@ class AgentLoop:
                     await on_progress(self._tool_hint(response.tool_calls), tool_hint=True)
 
                 tool_call_dicts = [
-                    {
-                        "id": tc.id,
-                        "type": "function",
-                        "function": {
-                            "name": tc.name,
-                            "arguments": json.dumps(tc.arguments, ensure_ascii=False)
-                        }
-                    }
+                    self._build_tool_call_message(tc)
                     for tc in response.tool_calls
                 ]
                 messages = self.context.add_assistant_message(
@@ -256,6 +249,22 @@ class AgentLoop:
 
         return final_content, tools_used, messages
 
+    @staticmethod
+    def _build_tool_call_message(tc: Any) -> dict[str, Any]:
+        tool_call = {
+                        "id": tc.id,
+                        "type": "function",
+                        "function": {
+                            "name": tc.name,
+                            "arguments": json.dumps(tc.arguments, ensure_ascii=False)
+                        }
+                    }
+        if getattr(tc, "provider_specific_fields", None):
+            tool_call["provider_specific_fields"] = tc.provider_specific_fields
+        if getattr(tc, "function_provider_specific_fields", None):
+            tool_call["function"]["provider_specific_fields"] = tc.function_provider_specific_fields
+        return tool_call
+
     async def run(self) -> None:
         """Run the agent loop, dispatching messages as tasks to stay responsive to /stop."""
         self._running = True
diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
index f9eda1f..5f98272 100644
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -135,14 +135,7 @@ class SubagentManager:
                 if response.has_tool_calls:
                     # Add assistant message with tool calls
                     tool_call_dicts = [
-                        {
-                            "id": tc.id,
-                            "type": "function",
-                            "function": {
-                                "name": tc.name,
-                                "arguments": json.dumps(tc.arguments, ensure_ascii=False),
-                            },
-                        }
+                        self._build_tool_call_message(tc)
                         for tc in response.tool_calls
                     ]
                     messages.append({
@@ -230,6 +223,22 @@ Stay focused on the assigned task. Your final response will be reported back to
             parts.append(f"## Skills\n\nRead SKILL.md with read_file to use a skill.\n\n{skills_summary}")
 
         return "\n\n".join(parts)
+
+    @staticmethod
+    def _build_tool_call_message(tc: Any) -> dict[str, Any]:
+        tool_call = {
+            "id": tc.id,
+            "type": "function",
+            "function": {
+                "name": tc.name,
+                "arguments": json.dumps(tc.arguments, ensure_ascii=False),
+            },
+        }
+        if getattr(tc, "provider_specific_fields", None):
+            tool_call["provider_specific_fields"] = tc.provider_specific_fields
+        if getattr(tc, "function_provider_specific_fields", None):
+            tool_call["function"]["provider_specific_fields"] = tc.function_provider_specific_fields
+        return tool_call
     
     async def cancel_by_session(self, session_key: str) -> int:
         """Cancel all subagents for the given session. Returns count cancelled."""
diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py
index a3b6c47..b41ce28 100644
--- a/nanobot/providers/base.py
+++ b/nanobot/providers/base.py
@@ -14,6 +14,8 @@ class ToolCallRequest:
     id: str
     name: str
     arguments: dict[str, Any]
+    provider_specific_fields: dict[str, Any] | None = None
+    function_provider_specific_fields: dict[str, Any] | None = None
 
 
 @dataclass
diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py
index cb67635..af91c2f 100644
--- a/nanobot/providers/litellm_provider.py
+++ b/nanobot/providers/litellm_provider.py
@@ -309,10 +309,17 @@ class LiteLLMProvider(LLMProvider):
             if isinstance(args, str):
                 args = json_repair.loads(args)
 
+            provider_specific_fields = getattr(tc, "provider_specific_fields", None) or None
+            function_provider_specific_fields = (
+                getattr(tc.function, "provider_specific_fields", None) or None
+            )
+
             tool_calls.append(ToolCallRequest(
                 id=_short_tool_id(),
                 name=tc.function.name,
                 arguments=args,
+                provider_specific_fields=provider_specific_fields,
+                function_provider_specific_fields=function_provider_specific_fields,
             ))
 
         usage = {}
diff --git a/tests/test_gemini_thought_signature.py b/tests/test_gemini_thought_signature.py
new file mode 100644
index 0000000..db57c7f
--- /dev/null
+++ b/tests/test_gemini_thought_signature.py
@@ -0,0 +1,54 @@
+from types import SimpleNamespace
+
+from nanobot.agent.loop import AgentLoop
+from nanobot.providers.base import ToolCallRequest
+from nanobot.providers.litellm_provider import LiteLLMProvider
+
+
+def test_litellm_parse_response_preserves_tool_call_provider_fields() -> None:
+    provider = LiteLLMProvider(default_model="gemini/gemini-3-flash")
+
+    response = SimpleNamespace(
+        choices=[
+            SimpleNamespace(
+                finish_reason="tool_calls",
+                message=SimpleNamespace(
+                    content=None,
+                    tool_calls=[
+                        SimpleNamespace(
+                            id="call_123",
+                            function=SimpleNamespace(
+                                name="read_file",
+                                arguments='{"path":"todo.md"}',
+                                provider_specific_fields={"inner": "value"},
+                            ),
+                            provider_specific_fields={"thought_signature": "signed-token"},
+                        )
+                    ],
+                ),
+            )
+        ],
+        usage=None,
+    )
+
+    parsed = provider._parse_response(response)
+
+    assert len(parsed.tool_calls) == 1
+    assert parsed.tool_calls[0].provider_specific_fields == {"thought_signature": "signed-token"}
+    assert parsed.tool_calls[0].function_provider_specific_fields == {"inner": "value"}
+
+
+def test_agent_loop_replays_tool_call_provider_fields() -> None:
+    tool_call = ToolCallRequest(
+        id="abc123xyz",
+        name="read_file",
+        arguments={"path": "todo.md"},
+        provider_specific_fields={"thought_signature": "signed-token"},
+        function_provider_specific_fields={"inner": "value"},
+    )
+
+    message = AgentLoop._build_tool_call_message(tool_call)
+
+    assert message["provider_specific_fields"] == {"thought_signature": "signed-token"}
+    assert message["function"]["provider_specific_fields"] == {"inner": "value"}
+    assert message["function"]["arguments"] == '{"path": "todo.md"}'

From 6ef7ab53d089f9b9d25651e37ab0d8c4a3c607a1 Mon Sep 17 00:00:00 2001
From: WhalerO <arthurclancy@hotmail.com>
Date: Wed, 11 Mar 2026 15:01:18 +0800
Subject: [PATCH 37/53] refactor: centralize tool call serialization in
 ToolCallRequest

---
 nanobot/agent/loop.py                  | 18 +-----------------
 nanobot/agent/subagent.py              | 18 +-----------------
 nanobot/providers/base.py              | 17 +++++++++++++++++
 tests/test_gemini_thought_signature.py |  5 ++---
 4 files changed, 21 insertions(+), 37 deletions(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 147327d..8949844 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -208,7 +208,7 @@ class AgentLoop:
                     await on_progress(self._tool_hint(response.tool_calls), tool_hint=True)
 
                 tool_call_dicts = [
-                    self._build_tool_call_message(tc)
+                    tc.to_openai_tool_call()
                     for tc in response.tool_calls
                 ]
                 messages = self.context.add_assistant_message(
@@ -249,22 +249,6 @@ class AgentLoop:
 
         return final_content, tools_used, messages
 
-    @staticmethod
-    def _build_tool_call_message(tc: Any) -> dict[str, Any]:
-        tool_call = {
-                        "id": tc.id,
-                        "type": "function",
-                        "function": {
-                            "name": tc.name,
-                            "arguments": json.dumps(tc.arguments, ensure_ascii=False)
-                        }
-                    }
-        if getattr(tc, "provider_specific_fields", None):
-            tool_call["provider_specific_fields"] = tc.provider_specific_fields
-        if getattr(tc, "function_provider_specific_fields", None):
-            tool_call["function"]["provider_specific_fields"] = tc.function_provider_specific_fields
-        return tool_call
-
     async def run(self) -> None:
         """Run the agent loop, dispatching messages as tasks to stay responsive to /stop."""
         self._running = True
diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
index 5f98272..0049f9a 100644
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -135,7 +135,7 @@ class SubagentManager:
                 if response.has_tool_calls:
                     # Add assistant message with tool calls
                     tool_call_dicts = [
-                        self._build_tool_call_message(tc)
+                        tc.to_openai_tool_call()
                         for tc in response.tool_calls
                     ]
                     messages.append({
@@ -224,22 +224,6 @@ Stay focused on the assigned task. Your final response will be reported back to
 
         return "\n\n".join(parts)
 
-    @staticmethod
-    def _build_tool_call_message(tc: Any) -> dict[str, Any]:
-        tool_call = {
-            "id": tc.id,
-            "type": "function",
-            "function": {
-                "name": tc.name,
-                "arguments": json.dumps(tc.arguments, ensure_ascii=False),
-            },
-        }
-        if getattr(tc, "provider_specific_fields", None):
-            tool_call["provider_specific_fields"] = tc.provider_specific_fields
-        if getattr(tc, "function_provider_specific_fields", None):
-            tool_call["function"]["provider_specific_fields"] = tc.function_provider_specific_fields
-        return tool_call
-    
     async def cancel_by_session(self, session_key: str) -> int:
         """Cancel all subagents for the given session. Returns count cancelled."""
         tasks = [self._running_tasks[tid] for tid in self._session_tasks.get(session_key, [])
diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py
index b41ce28..391f903 100644
--- a/nanobot/providers/base.py
+++ b/nanobot/providers/base.py
@@ -1,6 +1,7 @@
 """Base LLM provider interface."""
 
 import asyncio
+import json
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from typing import Any
@@ -17,6 +18,22 @@ class ToolCallRequest:
     provider_specific_fields: dict[str, Any] | None = None
     function_provider_specific_fields: dict[str, Any] | None = None
 
+    def to_openai_tool_call(self) -> dict[str, Any]:
+        """Serialize to an OpenAI-style tool_call payload."""
+        tool_call = {
+            "id": self.id,
+            "type": "function",
+            "function": {
+                "name": self.name,
+                "arguments": json.dumps(self.arguments, ensure_ascii=False),
+            },
+        }
+        if self.provider_specific_fields:
+            tool_call["provider_specific_fields"] = self.provider_specific_fields
+        if self.function_provider_specific_fields:
+            tool_call["function"]["provider_specific_fields"] = self.function_provider_specific_fields
+        return tool_call
+
 
 @dataclass
 class LLMResponse:
diff --git a/tests/test_gemini_thought_signature.py b/tests/test_gemini_thought_signature.py
index db57c7f..bc4132c 100644
--- a/tests/test_gemini_thought_signature.py
+++ b/tests/test_gemini_thought_signature.py
@@ -1,6 +1,5 @@
 from types import SimpleNamespace
 
-from nanobot.agent.loop import AgentLoop
 from nanobot.providers.base import ToolCallRequest
 from nanobot.providers.litellm_provider import LiteLLMProvider
 
@@ -38,7 +37,7 @@ def test_litellm_parse_response_preserves_tool_call_provider_fields() -> None:
     assert parsed.tool_calls[0].function_provider_specific_fields == {"inner": "value"}
 
 
-def test_agent_loop_replays_tool_call_provider_fields() -> None:
+def test_tool_call_request_serializes_provider_fields() -> None:
     tool_call = ToolCallRequest(
         id="abc123xyz",
         name="read_file",
@@ -47,7 +46,7 @@ def test_agent_loop_replays_tool_call_provider_fields() -> None:
         function_provider_specific_fields={"inner": "value"},
     )
 
-    message = AgentLoop._build_tool_call_message(tool_call)
+    message = tool_call.to_openai_tool_call()
 
     assert message["provider_specific_fields"] == {"thought_signature": "signed-token"}
     assert message["function"]["provider_specific_fields"] == {"inner": "value"}

From d0b4f0d70d025ba3ffa0a9127b280d8325bb698f Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 07:57:12 +0000
Subject: [PATCH 38/53] feat(wecom): add WeCom channel with SDK pinned to
 GitHub tag v0.1.2

---
 README.md                   | 25 ++++++++++++++-----------
 nanobot/channels/manager.py |  1 -
 nanobot/channels/wecom.py   |  8 ++++----
 nanobot/config/schema.py    |  2 +-
 pyproject.toml              |  4 +++-
 5 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 5be0ce5..6e8211e 100644
--- a/README.md
+++ b/README.md
@@ -208,7 +208,7 @@ Connect nanobot to your favorite chat platform.
 | **Slack** | Bot token + App-Level token |
 | **Email** | IMAP/SMTP credentials |
 | **QQ** | App ID + App Secret |
-| **Wecom** | Bot ID + App Secret |
+| **Wecom** | Bot ID + Bot Secret |
 
 <details>
 <summary><b>Telegram</b> (Recommended)</summary>
@@ -683,12 +683,17 @@ nanobot gateway
 
 Uses **WebSocket** long connection — no public IP required.
 
-**1. Create a wecom bot**
+**1. Install the optional dependency**
 
-In the client's workspace, click on "Intelligent Robot" to create a robot and choose API mode for creation.
-Select to create in "long connection" mode, and obtain Bot ID and Secret.
+```bash
+pip install nanobot-ai[wecom]
+```
 
-**2. Configure**
+**2. Create a WeCom AI Bot**
+
+Go to the WeCom admin console → Intelligent Robot → Create Robot → select **API mode** with **long connection**. Copy the Bot ID and Secret.
+
+**3. Configure**
 
 ```json
 {
@@ -696,23 +701,21 @@ Select to create in "long connection" mode, and obtain Bot ID and Secret.
     "wecom": {
       "enabled": true,
       "botId": "your_bot_id",
-      "secret": "your_secret",
-      "allowFrom": [
-        "your_id"
-      ]
+      "secret": "your_bot_secret",
+      "allowFrom": ["your_id"]
     }
   }
 }
 ```
 
-**3. Run**
+**4. Run**
 
 ```bash
 nanobot gateway
 ```
 
 > [!TIP]
-> wecom uses WebSocket to receive messages — no webhook or public IP needed!
+> WeCom uses WebSocket to receive messages — no webhook or public IP needed!
 
 </details>
 
diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py
index 369795a..2c5cd3f 100644
--- a/nanobot/channels/manager.py
+++ b/nanobot/channels/manager.py
@@ -156,7 +156,6 @@ class ChannelManager:
                 self.channels["wecom"] = WecomChannel(
                     self.config.channels.wecom,
                     self.bus,
-                    groq_api_key=self.config.providers.groq.api_key,
                 )
                 logger.info("WeCom channel enabled")
             except ImportError as e:
diff --git a/nanobot/channels/wecom.py b/nanobot/channels/wecom.py
index dc97311..1c44451 100644
--- a/nanobot/channels/wecom.py
+++ b/nanobot/channels/wecom.py
@@ -2,6 +2,7 @@
 
 import asyncio
 import importlib.util
+import os
 from collections import OrderedDict
 from typing import Any
 
@@ -36,10 +37,9 @@ class WecomChannel(BaseChannel):
 
     name = "wecom"
 
-    def __init__(self, config: WecomConfig, bus: MessageBus, groq_api_key: str = ""):
+    def __init__(self, config: WecomConfig, bus: MessageBus):
         super().__init__(config, bus)
         self.config: WecomConfig = config
-        self.groq_api_key = groq_api_key
         self._client: Any = None
         self._processed_message_ids: OrderedDict[str, None] = OrderedDict()
         self._loop: asyncio.AbstractEventLoop | None = None
@@ -50,7 +50,7 @@ class WecomChannel(BaseChannel):
     async def start(self) -> None:
         """Start the WeCom bot with WebSocket long connection."""
         if not WECOM_AVAILABLE:
-            logger.error("WeCom SDK not installed. Run: pip install wecom-aibot-sdk-python")
+            logger.error("WeCom SDK not installed. Run: pip install nanobot-ai[wecom]")
             return
 
         if not self.config.bot_id or not self.config.secret:
@@ -213,7 +213,6 @@ class WecomChannel(BaseChannel):
                 if file_url and aes_key:
                     file_path = await self._download_and_save_media(file_url, aes_key, "image")
                     if file_path:
-                        import os
                         filename = os.path.basename(file_path)
                         content_parts.append(f"[image: {filename}]\n[Image: source: {file_path}]")
                     else:
@@ -308,6 +307,7 @@ class WecomChannel(BaseChannel):
             media_dir = get_media_dir("wecom")
             if not filename:
                 filename = fname or f"{media_type}_{hash(file_url) % 100000}"
+            filename = os.path.basename(filename)
 
             file_path = media_dir / filename
             file_path.write_bytes(data)
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index b772d18..bb0d286 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -208,7 +208,7 @@ class WecomConfig(Base):
     secret: str = ""  # Bot Secret from WeCom AI Bot platform
     allow_from: list[str] = Field(default_factory=list)  # Allowed user IDs
     welcome_message: str = ""  # Welcome message for enter_chat event
-    react_emoji: str = "eyes"  # Emoji for message reactions
+
 
 class ChannelsConfig(Base):
     """Configuration for chat channels."""
diff --git a/pyproject.toml b/pyproject.toml
index 0582be6..9868513 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,11 +44,13 @@ dependencies = [
     "json-repair>=0.57.0,<1.0.0",
     "chardet>=3.0.2,<6.0.0",
     "openai>=2.8.0",
-    "wecom-aibot-sdk-python>=0.1.2",
     "tiktoken>=0.12.0,<1.0.0",
 ]
 
 [project.optional-dependencies]
+wecom = [
+    "wecom-aibot-sdk-python @ git+https://github.com/chengyongru/wecom_aibot_sdk.git@v0.1.2",
+]
 matrix = [
     "matrix-nio[e2e]>=0.25.2",
     "mistune>=3.0.0,<4.0.0",

From 7ceddcded643432f0f4b78aa22de7ad107b61f3a Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 08:04:14 +0000
Subject: [PATCH 39/53] fix(wecom): await async disconnect, add SDK attribution
 in README

---
 README.md                 | 7 +++----
 nanobot/channels/wecom.py | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 6e8211e..2a49214 100644
--- a/README.md
+++ b/README.md
@@ -681,7 +681,9 @@ nanobot gateway
 <details>
 <summary><b>Wecom (企业微信)</b></summary>
 
-Uses **WebSocket** long connection — no public IP required.
+> Here we use [wecom-aibot-sdk-python](https://github.com/chengyongru/wecom_aibot_sdk) (community Python version of the official [@wecom/aibot-node-sdk](https://www.npmjs.com/package/@wecom/aibot-node-sdk)).
+>
+> Uses **WebSocket** long connection — no public IP required.
 
 **1. Install the optional dependency**
 
@@ -714,9 +716,6 @@ Go to the WeCom admin console → Intelligent Robot → Create Robot → select
 nanobot gateway
 ```
 
-> [!TIP]
-> WeCom uses WebSocket to receive messages — no webhook or public IP needed!
-
 </details>
 
 ## 🌐 Agent Social Network
diff --git a/nanobot/channels/wecom.py b/nanobot/channels/wecom.py
index 1c44451..72be9e2 100644
--- a/nanobot/channels/wecom.py
+++ b/nanobot/channels/wecom.py
@@ -98,7 +98,7 @@ class WecomChannel(BaseChannel):
         """Stop the WeCom bot."""
         self._running = False
         if self._client:
-            self._client.disconnect()
+            await self._client.disconnect()
         logger.info("WeCom bot stopped")
 
     async def _on_connected(self, frame: Any) -> None:

From 486df1ddbd8db4fb248115851254b8fbb03c09f0 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 08:10:38 +0000
Subject: [PATCH 40/53] docs: update table of contents in README

---
 README.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/README.md b/README.md
index 2a49214..ed4e8e7 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,25 @@
 
 📏 Real-time line count: run `bash core_agent_lines.sh` to verify anytime.
 
+## Table of Contents
+
+- [News](#-news)
+- [Key Features](#key-features-of-nanobot)
+- [Architecture](#️-architecture)
+- [Features](#-features)
+- [Install](#-install)
+- [Quick Start](#-quick-start)
+- [Chat Apps](#-chat-apps)
+- [Agent Social Network](#-agent-social-network)
+- [Configuration](#️-configuration)
+- [Multiple Instances](#-multiple-instances)
+- [CLI Reference](#-cli-reference)
+- [Docker](#-docker)
+- [Linux Service](#-linux-service)
+- [Project Structure](#-project-structure)
+- [Contribute & Roadmap](#-contribute--roadmap)
+- [Star History](#-star-history)
+
 ## 📢 News
 
 - **2026-03-08** 🚀 Released **v0.1.4.post4** — a reliability-packed release with safer defaults, better multi-instance support, sturdier MCP, and major channel and provider improvements. Please see [release notes](https://github.com/HKUDS/nanobot/releases/tag/v0.1.4.post4) for details.

From ec87946c04ccf4d453ffea02febcb747139c415c Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 08:11:28 +0000
Subject: [PATCH 41/53] docs: update table of contents position

---
 README.md | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index ed4e8e7..f0e1a6b 100644
--- a/README.md
+++ b/README.md
@@ -18,25 +18,6 @@
 
 📏 Real-time line count: run `bash core_agent_lines.sh` to verify anytime.
 
-## Table of Contents
-
-- [News](#-news)
-- [Key Features](#key-features-of-nanobot)
-- [Architecture](#️-architecture)
-- [Features](#-features)
-- [Install](#-install)
-- [Quick Start](#-quick-start)
-- [Chat Apps](#-chat-apps)
-- [Agent Social Network](#-agent-social-network)
-- [Configuration](#️-configuration)
-- [Multiple Instances](#-multiple-instances)
-- [CLI Reference](#-cli-reference)
-- [Docker](#-docker)
-- [Linux Service](#-linux-service)
-- [Project Structure](#-project-structure)
-- [Contribute & Roadmap](#-contribute--roadmap)
-- [Star History](#-star-history)
-
 ## 📢 News
 
 - **2026-03-08** 🚀 Released **v0.1.4.post4** — a reliability-packed release with safer defaults, better multi-instance support, sturdier MCP, and major channel and provider improvements. Please see [release notes](https://github.com/HKUDS/nanobot/releases/tag/v0.1.4.post4) for details.
@@ -97,6 +78,25 @@
   <img src="nanobot_arch.png" alt="nanobot architecture" width="800">
 </p>
 
+## Table of Contents
+
+- [News](#-news)
+- [Key Features](#key-features-of-nanobot)
+- [Architecture](#️-architecture)
+- [Features](#-features)
+- [Install](#-install)
+- [Quick Start](#-quick-start)
+- [Chat Apps](#-chat-apps)
+- [Agent Social Network](#-agent-social-network)
+- [Configuration](#️-configuration)
+- [Multiple Instances](#-multiple-instances)
+- [CLI Reference](#-cli-reference)
+- [Docker](#-docker)
+- [Linux Service](#-linux-service)
+- [Project Structure](#-project-structure)
+- [Contribute & Roadmap](#-contribute--roadmap)
+- [Star History](#-star-history)
+
 ## ✨ Features
 
 <table align="center">

From 4478838424496b6c233c5402d7fa205f33c683e6 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 08:42:12 +0000
Subject: [PATCH 42/53] fix(pr-1863): complete Ollama provider routing and
 README docs

---
 README.md                | 32 ++++++++++++++++++++++++++++++++
 nano.2091796.save        |  2 ++
 nano.2095802.save        |  2 ++
 nanobot/config/schema.py | 13 +++++++++++--
 tests/test_commands.py   | 29 +++++++++++++++++++++++++++++
 5 files changed, 76 insertions(+), 2 deletions(-)
 create mode 100644 nano.2091796.save
 create mode 100644 nano.2095802.save

diff --git a/README.md b/README.md
index f0e1a6b..8dba2d7 100644
--- a/README.md
+++ b/README.md
@@ -778,6 +778,7 @@ Config file: `~/.nanobot/config.json`
 | `dashscope` | LLM (Qwen) | [dashscope.console.aliyun.com](https://dashscope.console.aliyun.com) |
 | `moonshot` | LLM (Moonshot/Kimi) | [platform.moonshot.cn](https://platform.moonshot.cn) |
 | `zhipu` | LLM (Zhipu GLM) | [open.bigmodel.cn](https://open.bigmodel.cn) |
+| `ollama` | LLM (local, Ollama) | — |
 | `vllm` | LLM (local, any OpenAI-compatible server) | — |
 | `openai_codex` | LLM (Codex, OAuth) | `nanobot provider login openai-codex` |
 | `github_copilot` | LLM (GitHub Copilot, OAuth) | `nanobot provider login github-copilot` |
@@ -843,6 +844,37 @@ Connects directly to any OpenAI-compatible endpoint — LM Studio, llama.cpp, To
 
 </details>
 
+<details>
+<summary><b>Ollama (local)</b></summary>
+
+Run a local model with Ollama, then add to config:
+
+**1. Start Ollama** (example):
+```bash
+ollama run llama3.2
+```
+
+**2. Add to config** (partial — merge into `~/.nanobot/config.json`):
+```json
+{
+  "providers": {
+    "ollama": {
+      "apiBase": "http://localhost:11434"
+    }
+  },
+  "agents": {
+    "defaults": {
+      "provider": "ollama",
+      "model": "llama3.2"
+    }
+  }
+}
+```
+
+> `provider: "auto"` also works when `providers.ollama.apiBase` is configured, but setting `"provider": "ollama"` is the clearest option.
+
+</details>
+
 <details>
 <summary><b>vLLM (local / OpenAI-compatible)</b></summary>
 
diff --git a/nano.2091796.save b/nano.2091796.save
new file mode 100644
index 0000000..6953168
--- /dev/null
+++ b/nano.2091796.save
@@ -0,0 +1,2 @@
+da activate base
+
diff --git a/nano.2095802.save b/nano.2095802.save
new file mode 100644
index 0000000..6953168
--- /dev/null
+++ b/nano.2095802.save
@@ -0,0 +1,2 @@
+da activate base
+
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index d2ef713..1b26dd7 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -395,6 +395,15 @@ class Config(BaseSettings):
                 if spec.is_oauth or spec.is_local or p.api_key:
                     return p, spec.name
 
+        # Fallback: configured local providers can route models without
+        # provider-specific keywords (for example plain "llama3.2" on Ollama).
+        for spec in PROVIDERS:
+            if not spec.is_local:
+                continue
+            p = getattr(self.providers, spec.name, None)
+            if p and p.api_base:
+                return p, spec.name
+
         # Fallback: gateways first, then others (follows registry order)
         # OAuth providers are NOT valid fallbacks — they require explicit model selection
         for spec in PROVIDERS:
@@ -421,7 +430,7 @@ class Config(BaseSettings):
         return p.api_key if p else None
 
     def get_api_base(self, model: str | None = None) -> str | None:
-        """Get API base URL for the given model. Applies default URLs for known gateways."""
+        """Get API base URL for the given model. Applies default URLs for gateway/local providers."""
         from nanobot.providers.registry import find_by_name
 
         p, name = self._match_provider(model)
@@ -432,7 +441,7 @@ class Config(BaseSettings):
         # to avoid polluting the global litellm.api_base.
         if name:
             spec = find_by_name(name)
-            if spec and spec.is_gateway and spec.default_api_base:
+            if spec and (spec.is_gateway or spec.is_local) and spec.default_api_base:
                 return spec.default_api_base
         return None
 
diff --git a/tests/test_commands.py b/tests/test_commands.py
index 1375a3a..583ef6f 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -114,6 +114,35 @@ def test_config_matches_openai_codex_with_hyphen_prefix():
     assert config.get_provider_name() == "openai_codex"
 
 
+def test_config_matches_explicit_ollama_prefix_without_api_key():
+    config = Config()
+    config.agents.defaults.model = "ollama/llama3.2"
+
+    assert config.get_provider_name() == "ollama"
+    assert config.get_api_base() == "http://localhost:11434"
+
+
+def test_config_explicit_ollama_provider_uses_default_localhost_api_base():
+    config = Config()
+    config.agents.defaults.provider = "ollama"
+    config.agents.defaults.model = "llama3.2"
+
+    assert config.get_provider_name() == "ollama"
+    assert config.get_api_base() == "http://localhost:11434"
+
+
+def test_config_auto_detects_ollama_from_local_api_base():
+    config = Config.model_validate(
+        {
+            "agents": {"defaults": {"provider": "auto", "model": "llama3.2"}},
+            "providers": {"ollama": {"apiBase": "http://localhost:11434"}},
+        }
+    )
+
+    assert config.get_provider_name() == "ollama"
+    assert config.get_api_base() == "http://localhost:11434"
+
+
 def test_find_by_model_prefers_explicit_prefix_over_generic_codex_keyword():
     spec = find_by_model("github-copilot/gpt-5.3-codex")
 

From 89eff6f573d52af025ae9cb7e9db6ea8a0ad698f Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 08:44:38 +0000
Subject: [PATCH 43/53] chore: remove stray nano backup files

---
 .gitignore        | 1 +
 nano.2091796.save | 2 --
 nano.2095802.save | 2 --
 3 files changed, 1 insertion(+), 4 deletions(-)
 delete mode 100644 nano.2091796.save
 delete mode 100644 nano.2095802.save

diff --git a/.gitignore b/.gitignore
index 374875a..c50cab8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,4 +20,5 @@ __pycache__/
 poetry.lock
 .pytest_cache/
 botpy.log
+nano.*.save
 
diff --git a/nano.2091796.save b/nano.2091796.save
deleted file mode 100644
index 6953168..0000000
--- a/nano.2091796.save
+++ /dev/null
@@ -1,2 +0,0 @@
-da activate base
-
diff --git a/nano.2095802.save b/nano.2095802.save
deleted file mode 100644
index 6953168..0000000
--- a/nano.2095802.save
+++ /dev/null
@@ -1,2 +0,0 @@
-da activate base
-

From c72c2ce7e2b84fda1fd5933fc28d90137f936d03 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 09:47:04 +0000
Subject: [PATCH 44/53] refactor: move generation settings to provider level,
 eliminate parameter passthrough

---
 nanobot/agent/loop.py                    |  15 ---
 nanobot/agent/memory.py                  |  22 +---
 nanobot/agent/subagent.py                |   9 --
 nanobot/cli/commands.py                  |  57 +++++----
 nanobot/providers/base.py                |  38 +++++-
 tests/test_memory_consolidation_types.py |  23 ++++
 tests/test_provider_retry.py             |  35 +++++-
 tests/test_subagent_reasoning.py         | 144 -----------------------
 8 files changed, 120 insertions(+), 223 deletions(-)
 delete mode 100644 tests/test_subagent_reasoning.py

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index edf1e8e..b1bfd2f 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -52,9 +52,6 @@ class AgentLoop:
         workspace: Path,
         model: str | None = None,
         max_iterations: int = 40,
-        temperature: float = 0.1,
-        max_tokens: int = 4096,
-        reasoning_effort: str | None = None,
         context_window_tokens: int = 65_536,
         brave_api_key: str | None = None,
         web_proxy: str | None = None,
@@ -72,9 +69,6 @@ class AgentLoop:
         self.workspace = workspace
         self.model = model or provider.get_default_model()
         self.max_iterations = max_iterations
-        self.temperature = temperature
-        self.max_tokens = max_tokens
-        self.reasoning_effort = reasoning_effort
         self.context_window_tokens = context_window_tokens
         self.brave_api_key = brave_api_key
         self.web_proxy = web_proxy
@@ -90,9 +84,6 @@ class AgentLoop:
             workspace=workspace,
             bus=bus,
             model=self.model,
-            temperature=self.temperature,
-            max_tokens=self.max_tokens,
-            reasoning_effort=reasoning_effort,
             brave_api_key=brave_api_key,
             web_proxy=web_proxy,
             exec_config=self.exec_config,
@@ -114,9 +105,6 @@ class AgentLoop:
             context_window_tokens=context_window_tokens,
             build_messages=self.context.build_messages,
             get_tool_definitions=self.tools.get_definitions,
-            temperature=self.temperature,
-            max_tokens=self.max_tokens,
-            reasoning_effort=self.reasoning_effort,
         )
         self._register_default_tools()
 
@@ -205,9 +193,6 @@ class AgentLoop:
                 messages=messages,
                 tools=tool_defs,
                 model=self.model,
-                temperature=self.temperature,
-                max_tokens=self.max_tokens,
-                reasoning_effort=self.reasoning_effort,
             )
 
             if response.has_tool_calls:
diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index d79887b..59ba40e 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -57,7 +57,6 @@ def _normalize_save_memory_args(args: Any) -> dict[str, Any] | None:
         return args[0] if args and isinstance(args[0], dict) else None
     return args if isinstance(args, dict) else None
 
-
 class MemoryStore:
     """Two-layer memory: MEMORY.md (long-term facts) + HISTORY.md (grep-searchable log)."""
 
@@ -99,9 +98,6 @@ class MemoryStore:
         messages: list[dict],
         provider: LLMProvider,
         model: str,
-        temperature: float | None = None,
-        max_tokens: int | None = None,
-        reasoning_effort: str | None = None,
     ) -> bool:
         """Consolidate the provided message chunk into MEMORY.md + HISTORY.md."""
         if not messages:
@@ -124,9 +120,6 @@ class MemoryStore:
                 ],
                 tools=_SAVE_MEMORY_TOOL,
                 model=model,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                reasoning_effort=reasoning_effort,
             )
 
             if not response.has_tool_calls:
@@ -166,9 +159,6 @@ class MemoryConsolidator:
         context_window_tokens: int,
         build_messages: Callable[..., list[dict[str, Any]]],
         get_tool_definitions: Callable[[], list[dict[str, Any]]],
-        temperature: float | None = None,
-        max_tokens: int | None = None,
-        reasoning_effort: str | None = None,
     ):
         self.store = MemoryStore(workspace)
         self.provider = provider
@@ -177,9 +167,6 @@ class MemoryConsolidator:
         self.context_window_tokens = context_window_tokens
         self._build_messages = build_messages
         self._get_tool_definitions = get_tool_definitions
-        self._temperature = temperature
-        self._max_tokens = max_tokens
-        self._reasoning_effort = reasoning_effort
         self._locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary()
 
     def get_lock(self, session_key: str) -> asyncio.Lock:
@@ -188,14 +175,7 @@ class MemoryConsolidator:
 
     async def consolidate_messages(self, messages: list[dict[str, object]]) -> bool:
         """Archive a selected message chunk into persistent memory."""
-        return await self.store.consolidate(
-            messages,
-            self.provider,
-            self.model,
-            temperature=self._temperature,
-            max_tokens=self._max_tokens,
-            reasoning_effort=self._reasoning_effort,
-        )
+        return await self.store.consolidate(messages, self.provider, self.model)
 
     def pick_consolidation_boundary(
         self,
diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
index eff0b4f..21b8b32 100644
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -28,9 +28,6 @@ class SubagentManager:
         workspace: Path,
         bus: MessageBus,
         model: str | None = None,
-        temperature: float = 0.7,
-        max_tokens: int = 4096,
-        reasoning_effort: str | None = None,
         brave_api_key: str | None = None,
         web_proxy: str | None = None,
         exec_config: "ExecToolConfig | None" = None,
@@ -41,9 +38,6 @@ class SubagentManager:
         self.workspace = workspace
         self.bus = bus
         self.model = model or provider.get_default_model()
-        self.temperature = temperature
-        self.max_tokens = max_tokens
-        self.reasoning_effort = reasoning_effort
         self.brave_api_key = brave_api_key
         self.web_proxy = web_proxy
         self.exec_config = exec_config or ExecToolConfig()
@@ -128,9 +122,6 @@ class SubagentManager:
                     messages=messages,
                     tools=tools.get_definitions(),
                     model=self.model,
-                    temperature=self.temperature,
-                    max_tokens=self.max_tokens,
-                    reasoning_effort=self.reasoning_effort,
                 )
 
                 if response.has_tool_calls:
diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index 8387b28..f5ac859 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -215,6 +215,7 @@ def onboard():
 
 def _make_provider(config: Config):
     """Create the appropriate LLM provider from config."""
+    from nanobot.providers.base import GenerationSettings
     from nanobot.providers.openai_codex_provider import OpenAICodexProvider
     from nanobot.providers.azure_openai_provider import AzureOpenAIProvider
 
@@ -224,46 +225,50 @@ def _make_provider(config: Config):
 
     # OpenAI Codex (OAuth)
     if provider_name == "openai_codex" or model.startswith("openai-codex/"):
-        return OpenAICodexProvider(default_model=model)
-
+        provider = OpenAICodexProvider(default_model=model)
     # Custom: direct OpenAI-compatible endpoint, bypasses LiteLLM
-    from nanobot.providers.custom_provider import CustomProvider
-    if provider_name == "custom":
-        return CustomProvider(
+    elif provider_name == "custom":
+        from nanobot.providers.custom_provider import CustomProvider
+        provider = CustomProvider(
             api_key=p.api_key if p else "no-key",
             api_base=config.get_api_base(model) or "http://localhost:8000/v1",
             default_model=model,
         )
-
     # Azure OpenAI: direct Azure OpenAI endpoint with deployment name
-    if provider_name == "azure_openai":
+    elif provider_name == "azure_openai":
         if not p or not p.api_key or not p.api_base:
             console.print("[red]Error: Azure OpenAI requires api_key and api_base.[/red]")
             console.print("Set them in ~/.nanobot/config.json under providers.azure_openai section")
             console.print("Use the model field to specify the deployment name.")
             raise typer.Exit(1)
-        
-        return AzureOpenAIProvider(
+        provider = AzureOpenAIProvider(
             api_key=p.api_key,
             api_base=p.api_base,
             default_model=model,
         )
+    else:
+        from nanobot.providers.litellm_provider import LiteLLMProvider
+        from nanobot.providers.registry import find_by_name
+        spec = find_by_name(provider_name)
+        if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and (spec.is_oauth or spec.is_local)):
+            console.print("[red]Error: No API key configured.[/red]")
+            console.print("Set one in ~/.nanobot/config.json under providers section")
+            raise typer.Exit(1)
+        provider = LiteLLMProvider(
+            api_key=p.api_key if p else None,
+            api_base=config.get_api_base(model),
+            default_model=model,
+            extra_headers=p.extra_headers if p else None,
+            provider_name=provider_name,
+        )
 
-    from nanobot.providers.litellm_provider import LiteLLMProvider
-    from nanobot.providers.registry import find_by_name
-    spec = find_by_name(provider_name)
-    if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and (spec.is_oauth or spec.is_local)):
-        console.print("[red]Error: No API key configured.[/red]")
-        console.print("Set one in ~/.nanobot/config.json under providers section")
-        raise typer.Exit(1)
-
-    return LiteLLMProvider(
-        api_key=p.api_key if p else None,
-        api_base=config.get_api_base(model),
-        default_model=model,
-        extra_headers=p.extra_headers if p else None,
-        provider_name=provider_name,
+    defaults = config.agents.defaults
+    provider.generation = GenerationSettings(
+        temperature=defaults.temperature,
+        max_tokens=defaults.max_tokens,
+        reasoning_effort=defaults.reasoning_effort,
     )
+    return provider
 
 
 def _load_runtime_config(config: str | None = None, workspace: str | None = None) -> Config:
@@ -341,10 +346,7 @@ def gateway(
         provider=provider,
         workspace=config.workspace_path,
         model=config.agents.defaults.model,
-        temperature=config.agents.defaults.temperature,
-        max_tokens=config.agents.defaults.max_tokens,
         max_iterations=config.agents.defaults.max_tool_iterations,
-        reasoning_effort=config.agents.defaults.reasoning_effort,
         context_window_tokens=config.agents.defaults.context_window_tokens,
         brave_api_key=config.tools.web.search.api_key or None,
         web_proxy=config.tools.web.proxy or None,
@@ -527,10 +529,7 @@ def agent(
         provider=provider,
         workspace=config.workspace_path,
         model=config.agents.defaults.model,
-        temperature=config.agents.defaults.temperature,
-        max_tokens=config.agents.defaults.max_tokens,
         max_iterations=config.agents.defaults.max_tool_iterations,
-        reasoning_effort=config.agents.defaults.reasoning_effort,
         context_window_tokens=config.agents.defaults.context_window_tokens,
         brave_api_key=config.tools.web.search.api_key or None,
         web_proxy=config.tools.web.proxy or None,
diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py
index a3b6c47..d4ea60d 100644
--- a/nanobot/providers/base.py
+++ b/nanobot/providers/base.py
@@ -32,6 +32,21 @@ class LLMResponse:
         return len(self.tool_calls) > 0
 
 
+@dataclass(frozen=True)
+class GenerationSettings:
+    """Default generation parameters for LLM calls.
+
+    Stored on the provider so every call site inherits the same defaults
+    without having to pass temperature / max_tokens / reasoning_effort
+    through every layer.  Individual call sites can still override by
+    passing explicit keyword arguments to chat() / chat_with_retry().
+    """
+
+    temperature: float = 0.7
+    max_tokens: int = 4096
+    reasoning_effort: str | None = None
+
+
 class LLMProvider(ABC):
     """
     Abstract base class for LLM providers.
@@ -56,9 +71,12 @@ class LLMProvider(ABC):
         "temporarily unavailable",
     )
 
+    _SENTINEL = object()
+
     def __init__(self, api_key: str | None = None, api_base: str | None = None):
         self.api_key = api_key
         self.api_base = api_base
+        self.generation: GenerationSettings = GenerationSettings()
 
     @staticmethod
     def _sanitize_empty_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
@@ -155,11 +173,23 @@ class LLMProvider(ABC):
         messages: list[dict[str, Any]],
         tools: list[dict[str, Any]] | None = None,
         model: str | None = None,
-        max_tokens: int = 4096,
-        temperature: float = 0.7,
-        reasoning_effort: str | None = None,
+        max_tokens: object = _SENTINEL,
+        temperature: object = _SENTINEL,
+        reasoning_effort: object = _SENTINEL,
     ) -> LLMResponse:
-        """Call chat() with retry on transient provider failures."""
+        """Call chat() with retry on transient provider failures.
+
+        Parameters default to ``self.generation`` when not explicitly passed,
+        so callers no longer need to thread temperature / max_tokens /
+        reasoning_effort through every layer.
+        """
+        if max_tokens is self._SENTINEL:
+            max_tokens = self.generation.max_tokens
+        if temperature is self._SENTINEL:
+            temperature = self.generation.temperature
+        if reasoning_effort is self._SENTINEL:
+            reasoning_effort = self.generation.reasoning_effort
+
         for attempt, delay in enumerate(self._CHAT_RETRY_DELAYS, start=1):
             try:
                 response = await self.chat(
diff --git a/tests/test_memory_consolidation_types.py b/tests/test_memory_consolidation_types.py
index 0263f01..69be858 100644
--- a/tests/test_memory_consolidation_types.py
+++ b/tests/test_memory_consolidation_types.py
@@ -265,3 +265,26 @@ class TestMemoryConsolidationTypeHandling:
         assert result is True
         assert provider.calls == 2
         assert delays == [1]
+
+    @pytest.mark.asyncio
+    async def test_consolidation_delegates_to_provider_defaults(self, tmp_path: Path) -> None:
+        """Consolidation no longer passes generation params — the provider owns them."""
+        store = MemoryStore(tmp_path)
+        provider = AsyncMock()
+        provider.chat_with_retry = AsyncMock(
+            return_value=_make_tool_response(
+                history_entry="[2026-01-01] User discussed testing.",
+                memory_update="# Memory\nUser likes testing.",
+            )
+        )
+        messages = _make_messages(message_count=60)
+
+        result = await store.consolidate(messages, provider, "test-model")
+
+        assert result is True
+        provider.chat_with_retry.assert_awaited_once()
+        _, kwargs = provider.chat_with_retry.await_args
+        assert kwargs["model"] == "test-model"
+        assert "temperature" not in kwargs
+        assert "max_tokens" not in kwargs
+        assert "reasoning_effort" not in kwargs
diff --git a/tests/test_provider_retry.py b/tests/test_provider_retry.py
index 751ecc3..2420399 100644
--- a/tests/test_provider_retry.py
+++ b/tests/test_provider_retry.py
@@ -2,7 +2,7 @@ import asyncio
 
 import pytest
 
-from nanobot.providers.base import LLMProvider, LLMResponse
+from nanobot.providers.base import GenerationSettings, LLMProvider, LLMResponse
 
 
 class ScriptedProvider(LLMProvider):
@@ -10,9 +10,11 @@ class ScriptedProvider(LLMProvider):
         super().__init__()
         self._responses = list(responses)
         self.calls = 0
+        self.last_kwargs: dict = {}
 
     async def chat(self, *args, **kwargs) -> LLMResponse:
         self.calls += 1
+        self.last_kwargs = kwargs
         response = self._responses.pop(0)
         if isinstance(response, BaseException):
             raise response
@@ -90,3 +92,34 @@ async def test_chat_with_retry_preserves_cancelled_error() -> None:
 
     with pytest.raises(asyncio.CancelledError):
         await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
+
+
+@pytest.mark.asyncio
+async def test_chat_with_retry_uses_provider_generation_defaults() -> None:
+    """When callers omit generation params, provider.generation defaults are used."""
+    provider = ScriptedProvider([LLMResponse(content="ok")])
+    provider.generation = GenerationSettings(temperature=0.2, max_tokens=321, reasoning_effort="high")
+
+    await provider.chat_with_retry(messages=[{"role": "user", "content": "hello"}])
+
+    assert provider.last_kwargs["temperature"] == 0.2
+    assert provider.last_kwargs["max_tokens"] == 321
+    assert provider.last_kwargs["reasoning_effort"] == "high"
+
+
+@pytest.mark.asyncio
+async def test_chat_with_retry_explicit_override_beats_defaults() -> None:
+    """Explicit kwargs should override provider.generation defaults."""
+    provider = ScriptedProvider([LLMResponse(content="ok")])
+    provider.generation = GenerationSettings(temperature=0.2, max_tokens=321, reasoning_effort="high")
+
+    await provider.chat_with_retry(
+        messages=[{"role": "user", "content": "hello"}],
+        temperature=0.9,
+        max_tokens=9999,
+        reasoning_effort="low",
+    )
+
+    assert provider.last_kwargs["temperature"] == 0.9
+    assert provider.last_kwargs["max_tokens"] == 9999
+    assert provider.last_kwargs["reasoning_effort"] == "low"
diff --git a/tests/test_subagent_reasoning.py b/tests/test_subagent_reasoning.py
deleted file mode 100644
index 5e70506..0000000
--- a/tests/test_subagent_reasoning.py
+++ /dev/null
@@ -1,144 +0,0 @@
-"""Tests for subagent reasoning_content and thinking_blocks handling."""
-
-from __future__ import annotations
-
-import asyncio
-from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-
-class TestSubagentReasoningContent:
-    """Test that subagent properly handles reasoning_content and thinking_blocks."""
-
-    @pytest.mark.asyncio
-    async def test_subagent_message_includes_reasoning_content(self):
-        """Verify reasoning_content is included in assistant messages with tool calls.
-
-        This is the fix for issue #1834: Spawn/subagent tool fails with
-        Deepseek Reasoner due to missing reasoning_content field.
-        """
-        from nanobot.agent.subagent import SubagentManager
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse, ToolCallRequest
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "deepseek-reasoner"
-
-        # Create a real Path object for workspace
-        workspace = Path("/tmp/test_workspace")
-        workspace.mkdir(parents=True, exist_ok=True)
-
-        # Capture messages that are sent to the provider
-        captured_messages = []
-
-        async def mock_chat(*args, **kwargs):
-            captured_messages.append(kwargs.get("messages", []))
-            # Return response with tool calls and reasoning_content
-            tool_call = ToolCallRequest(
-                id="test-1",
-                name="read_file",
-                arguments={"path": "/test.txt"},
-            )
-            return LLMResponse(
-                content="",
-                tool_calls=[tool_call],
-                reasoning_content="I need to read this file first",
-            )
-
-        provider.chat_with_retry = AsyncMock(side_effect=mock_chat)
-
-        mgr = SubagentManager(provider=provider, workspace=workspace, bus=bus)
-
-        # Mock the tools registry
-        with patch("nanobot.agent.subagent.ToolRegistry") as MockToolRegistry:
-            mock_registry = MagicMock()
-            mock_registry.get_definitions.return_value = []
-            mock_registry.execute = AsyncMock(return_value="file content")
-            MockToolRegistry.return_value = mock_registry
-
-            result = await mgr.spawn(
-                task="Read a file",
-                label="test",
-                origin_channel="cli",
-                origin_chat_id="direct",
-                session_key="cli:direct",
-            )
-
-            # Wait for the task to complete
-            await asyncio.sleep(0.5)
-
-        # Check the captured messages
-        assert len(captured_messages) >= 1
-        # Find the assistant message with tool_calls
-        found = False
-        for msg_list in captured_messages:
-            for msg in msg_list:
-                if msg.get("role") == "assistant" and msg.get("tool_calls"):
-                    assert "reasoning_content" in msg, "reasoning_content should be in assistant message with tool_calls"
-                    assert msg["reasoning_content"] == "I need to read this file first"
-                    found = True
-        assert found, "Should have found an assistant message with tool_calls"
-
-    @pytest.mark.asyncio
-    async def test_subagent_message_includes_thinking_blocks(self):
-        """Verify thinking_blocks is included in assistant messages with tool calls."""
-        from nanobot.agent.subagent import SubagentManager
-        from nanobot.bus.queue import MessageBus
-        from nanobot.providers.base import LLMResponse, ToolCallRequest
-
-        bus = MessageBus()
-        provider = MagicMock()
-        provider.get_default_model.return_value = "claude-sonnet"
-
-        workspace = Path("/tmp/test_workspace2")
-        workspace.mkdir(parents=True, exist_ok=True)
-
-        captured_messages = []
-
-        async def mock_chat(*args, **kwargs):
-            captured_messages.append(kwargs.get("messages", []))
-            tool_call = ToolCallRequest(
-                id="test-2",
-                name="read_file",
-                arguments={"path": "/test.txt"},
-            )
-            return LLMResponse(
-                content="",
-                tool_calls=[tool_call],
-                thinking_blocks=[
-                    {"signature": "sig1", "thought": "thinking step 1"},
-                    {"signature": "sig2", "thought": "thinking step 2"},
-                ],
-            )
-
-        provider.chat_with_retry = AsyncMock(side_effect=mock_chat)
-
-        mgr = SubagentManager(provider=provider, workspace=workspace, bus=bus)
-
-        with patch("nanobot.agent.subagent.ToolRegistry") as MockToolRegistry:
-            mock_registry = MagicMock()
-            mock_registry.get_definitions.return_value = []
-            mock_registry.execute = AsyncMock(return_value="file content")
-            MockToolRegistry.return_value = mock_registry
-
-            result = await mgr.spawn(
-                task="Read a file",
-                label="test",
-                origin_channel="cli",
-                origin_chat_id="direct",
-            )
-
-            await asyncio.sleep(0.5)
-
-        # Check the captured messages
-        found = False
-        for msg_list in captured_messages:
-            for msg in msg_list:
-                if msg.get("role") == "assistant" and msg.get("tool_calls"):
-                    assert "thinking_blocks" in msg, "thinking_blocks should be in assistant message with tool_calls"
-                    assert len(msg["thinking_blocks"]) == 2
-                    found = True
-        assert found, "Should have found an assistant message with tool_calls"

From 2c5226550d0083ceb41cf4042925682753e2adb5 Mon Sep 17 00:00:00 2001
From: for13to1 <for13to1@outlook.com>
Date: Wed, 11 Mar 2026 20:35:04 +0800
Subject: [PATCH 45/53] feat: allow direct references in hatch metadata for
 wecom dep

---
 pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 9868513..a52c0c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,6 +72,9 @@ nanobot = "nanobot.cli.commands:app"
 requires = ["hatchling"]
 build-backend = "hatchling.build"
 
+[tool.hatch.metadata]
+allow-direct-references = true
+
 [tool.hatch.build.targets.wheel]
 packages = ["nanobot"]
 

From 254cfd48babf74cca4bbe7baedda7b540b897cbb Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 14:23:19 +0000
Subject: [PATCH 46/53] refactor: auto-discover channels via pkgutil, eliminate
 hardcoded registry

---
 nanobot/channels/base.py     |  18 +++++
 nanobot/channels/dingtalk.py |   1 +
 nanobot/channels/discord.py  |   1 +
 nanobot/channels/email.py    |   1 +
 nanobot/channels/feishu.py   |  18 ++---
 nanobot/channels/manager.py  | 140 ++++-------------------------------
 nanobot/channels/matrix.py   |  18 +++--
 nanobot/channels/mochat.py   |   1 +
 nanobot/channels/qq.py       |   1 +
 nanobot/channels/registry.py |  35 +++++++++
 nanobot/channels/slack.py    |   1 +
 nanobot/channels/telegram.py |  16 +---
 nanobot/channels/wecom.py    |   1 +
 nanobot/channels/whatsapp.py |   1 +
 nanobot/cli/commands.py      |  91 ++++-------------------
 15 files changed, 111 insertions(+), 233 deletions(-)
 create mode 100644 nanobot/channels/registry.py

diff --git a/nanobot/channels/base.py b/nanobot/channels/base.py
index dc53ba4..74c540a 100644
--- a/nanobot/channels/base.py
+++ b/nanobot/channels/base.py
@@ -1,6 +1,9 @@
 """Base channel interface for chat platforms."""
 
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
+from pathlib import Path
 from typing import Any
 
 from loguru import logger
@@ -18,6 +21,8 @@ class BaseChannel(ABC):
     """
 
     name: str = "base"
+    display_name: str = "Base"
+    transcription_api_key: str = ""
 
     def __init__(self, config: Any, bus: MessageBus):
         """
@@ -31,6 +36,19 @@ class BaseChannel(ABC):
         self.bus = bus
         self._running = False
 
+    async def transcribe_audio(self, file_path: str | Path) -> str:
+        """Transcribe an audio file via Groq Whisper. Returns empty string on failure."""
+        if not self.transcription_api_key:
+            return ""
+        try:
+            from nanobot.providers.transcription import GroqTranscriptionProvider
+
+            provider = GroqTranscriptionProvider(api_key=self.transcription_api_key)
+            return await provider.transcribe(file_path)
+        except Exception as e:
+            logger.warning("{}: audio transcription failed: {}", self.name, e)
+            return ""
+
     @abstractmethod
     async def start(self) -> None:
         """
diff --git a/nanobot/channels/dingtalk.py b/nanobot/channels/dingtalk.py
index cdcba57..4626d95 100644
--- a/nanobot/channels/dingtalk.py
+++ b/nanobot/channels/dingtalk.py
@@ -114,6 +114,7 @@ class DingTalkChannel(BaseChannel):
     """
 
     name = "dingtalk"
+    display_name = "DingTalk"
     _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"}
     _AUDIO_EXTS = {".amr", ".mp3", ".wav", ".ogg", ".m4a", ".aac"}
     _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm"}
diff --git a/nanobot/channels/discord.py b/nanobot/channels/discord.py
index 2ee4f77..afa20c9 100644
--- a/nanobot/channels/discord.py
+++ b/nanobot/channels/discord.py
@@ -25,6 +25,7 @@ class DiscordChannel(BaseChannel):
     """Discord channel using Gateway websocket."""
 
     name = "discord"
+    display_name = "Discord"
 
     def __init__(self, config: DiscordConfig, bus: MessageBus):
         super().__init__(config, bus)
diff --git a/nanobot/channels/email.py b/nanobot/channels/email.py
index 16771fb..46c2103 100644
--- a/nanobot/channels/email.py
+++ b/nanobot/channels/email.py
@@ -35,6 +35,7 @@ class EmailChannel(BaseChannel):
     """
 
     name = "email"
+    display_name = "Email"
     _IMAP_MONTHS = (
         "Jan",
         "Feb",
diff --git a/nanobot/channels/feishu.py b/nanobot/channels/feishu.py
index 0409c32..160b9b4 100644
--- a/nanobot/channels/feishu.py
+++ b/nanobot/channels/feishu.py
@@ -244,11 +244,11 @@ class FeishuChannel(BaseChannel):
     """
 
     name = "feishu"
+    display_name = "Feishu"
 
-    def __init__(self, config: FeishuConfig, bus: MessageBus, groq_api_key: str = ""):
+    def __init__(self, config: FeishuConfig, bus: MessageBus):
         super().__init__(config, bus)
         self.config: FeishuConfig = config
-        self.groq_api_key = groq_api_key
         self._client: Any = None
         self._ws_client: Any = None
         self._ws_thread: threading.Thread | None = None
@@ -928,16 +928,10 @@ class FeishuChannel(BaseChannel):
                 if file_path:
                     media_paths.append(file_path)
 
-                # Transcribe audio using Groq Whisper
-                if msg_type == "audio" and file_path and self.groq_api_key:
-                    try:
-                        from nanobot.providers.transcription import GroqTranscriptionProvider
-                        transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key)
-                        transcription = await transcriber.transcribe(file_path)
-                        if transcription:
-                            content_text = f"[transcription: {transcription}]"
-                    except Exception as e:
-                        logger.warning("Failed to transcribe audio: {}", e)
+                if msg_type == "audio" and file_path:
+                    transcription = await self.transcribe_audio(file_path)
+                    if transcription:
+                        content_text = f"[transcription: {transcription}]"
 
                 content_parts.append(content_text)
 
diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py
index 2c5cd3f..8288ad0 100644
--- a/nanobot/channels/manager.py
+++ b/nanobot/channels/manager.py
@@ -31,135 +31,23 @@ class ChannelManager:
         self._init_channels()
 
     def _init_channels(self) -> None:
-        """Initialize channels based on config."""
+        """Initialize channels discovered via pkgutil scan."""
+        from nanobot.channels.registry import discover_channel_names, load_channel_class
 
-        # Telegram channel
-        if self.config.channels.telegram.enabled:
+        groq_key = self.config.providers.groq.api_key
+
+        for modname in discover_channel_names():
+            section = getattr(self.config.channels, modname, None)
+            if not section or not getattr(section, "enabled", False):
+                continue
             try:
-                from nanobot.channels.telegram import TelegramChannel
-                self.channels["telegram"] = TelegramChannel(
-                    self.config.channels.telegram,
-                    self.bus,
-                    groq_api_key=self.config.providers.groq.api_key,
-                )
-                logger.info("Telegram channel enabled")
+                cls = load_channel_class(modname)
+                channel = cls(section, self.bus)
+                channel.transcription_api_key = groq_key
+                self.channels[modname] = channel
+                logger.info("{} channel enabled", cls.display_name)
             except ImportError as e:
-                logger.warning("Telegram channel not available: {}", e)
-
-        # WhatsApp channel
-        if self.config.channels.whatsapp.enabled:
-            try:
-                from nanobot.channels.whatsapp import WhatsAppChannel
-                self.channels["whatsapp"] = WhatsAppChannel(
-                    self.config.channels.whatsapp, self.bus
-                )
-                logger.info("WhatsApp channel enabled")
-            except ImportError as e:
-                logger.warning("WhatsApp channel not available: {}", e)
-
-        # Discord channel
-        if self.config.channels.discord.enabled:
-            try:
-                from nanobot.channels.discord import DiscordChannel
-                self.channels["discord"] = DiscordChannel(
-                    self.config.channels.discord, self.bus
-                )
-                logger.info("Discord channel enabled")
-            except ImportError as e:
-                logger.warning("Discord channel not available: {}", e)
-
-        # Feishu channel
-        if self.config.channels.feishu.enabled:
-            try:
-                from nanobot.channels.feishu import FeishuChannel
-                self.channels["feishu"] = FeishuChannel(
-                    self.config.channels.feishu, self.bus,
-                    groq_api_key=self.config.providers.groq.api_key,
-                )
-                logger.info("Feishu channel enabled")
-            except ImportError as e:
-                logger.warning("Feishu channel not available: {}", e)
-
-        # Mochat channel
-        if self.config.channels.mochat.enabled:
-            try:
-                from nanobot.channels.mochat import MochatChannel
-
-                self.channels["mochat"] = MochatChannel(
-                    self.config.channels.mochat, self.bus
-                )
-                logger.info("Mochat channel enabled")
-            except ImportError as e:
-                logger.warning("Mochat channel not available: {}", e)
-
-        # DingTalk channel
-        if self.config.channels.dingtalk.enabled:
-            try:
-                from nanobot.channels.dingtalk import DingTalkChannel
-                self.channels["dingtalk"] = DingTalkChannel(
-                    self.config.channels.dingtalk, self.bus
-                )
-                logger.info("DingTalk channel enabled")
-            except ImportError as e:
-                logger.warning("DingTalk channel not available: {}", e)
-
-        # Email channel
-        if self.config.channels.email.enabled:
-            try:
-                from nanobot.channels.email import EmailChannel
-                self.channels["email"] = EmailChannel(
-                    self.config.channels.email, self.bus
-                )
-                logger.info("Email channel enabled")
-            except ImportError as e:
-                logger.warning("Email channel not available: {}", e)
-
-        # Slack channel
-        if self.config.channels.slack.enabled:
-            try:
-                from nanobot.channels.slack import SlackChannel
-                self.channels["slack"] = SlackChannel(
-                    self.config.channels.slack, self.bus
-                )
-                logger.info("Slack channel enabled")
-            except ImportError as e:
-                logger.warning("Slack channel not available: {}", e)
-
-        # QQ channel
-        if self.config.channels.qq.enabled:
-            try:
-                from nanobot.channels.qq import QQChannel
-                self.channels["qq"] = QQChannel(
-                    self.config.channels.qq,
-                    self.bus,
-                )
-                logger.info("QQ channel enabled")
-            except ImportError as e:
-                logger.warning("QQ channel not available: {}", e)
-
-        # Matrix channel
-        if self.config.channels.matrix.enabled:
-            try:
-                from nanobot.channels.matrix import MatrixChannel
-                self.channels["matrix"] = MatrixChannel(
-                    self.config.channels.matrix,
-                    self.bus,
-                )
-                logger.info("Matrix channel enabled")
-            except ImportError as e:
-                logger.warning("Matrix channel not available: {}", e)
-
-        # WeCom channel
-        if self.config.channels.wecom.enabled:
-            try:
-                from nanobot.channels.wecom import WecomChannel
-                self.channels["wecom"] = WecomChannel(
-                    self.config.channels.wecom,
-                    self.bus,
-                )
-                logger.info("WeCom channel enabled")
-            except ImportError as e:
-                logger.warning("WeCom channel not available: {}", e)
+                logger.warning("{} channel not available: {}", modname, e)
 
         self._validate_allow_from()
 
diff --git a/nanobot/channels/matrix.py b/nanobot/channels/matrix.py
index 63cb0ca..0d7a908 100644
--- a/nanobot/channels/matrix.py
+++ b/nanobot/channels/matrix.py
@@ -37,6 +37,7 @@ except ImportError as e:
     ) from e
 
 from nanobot.bus.events import OutboundMessage
+from nanobot.bus.queue import MessageBus
 from nanobot.channels.base import BaseChannel
 from nanobot.config.paths import get_data_dir, get_media_dir
 from nanobot.utils.helpers import safe_filename
@@ -146,15 +147,15 @@ class MatrixChannel(BaseChannel):
     """Matrix (Element) channel using long-polling sync."""
 
     name = "matrix"
+    display_name = "Matrix"
 
-    def __init__(self, config: Any, bus, *, restrict_to_workspace: bool = False,
-                 workspace: Path | None = None):
+    def __init__(self, config: Any, bus: MessageBus):
         super().__init__(config, bus)
         self.client: AsyncClient | None = None
         self._sync_task: asyncio.Task | None = None
         self._typing_tasks: dict[str, asyncio.Task] = {}
-        self._restrict_to_workspace = restrict_to_workspace
-        self._workspace = workspace.expanduser().resolve() if workspace else None
+        self._restrict_to_workspace = False
+        self._workspace: Path | None = None
         self._server_upload_limit_bytes: int | None = None
         self._server_upload_limit_checked = False
 
@@ -677,7 +678,14 @@ class MatrixChannel(BaseChannel):
         parts: list[str] = []
         if isinstance(body := getattr(event, "body", None), str) and body.strip():
             parts.append(body.strip())
-        if marker:
+
+        if attachment and attachment.get("type") == "audio":
+            transcription = await self.transcribe_audio(attachment["path"])
+            if transcription:
+                parts.append(f"[transcription: {transcription}]")
+            else:
+                parts.append(marker)
+        elif marker:
             parts.append(marker)
 
         await self._start_typing_keepalive(room.room_id)
diff --git a/nanobot/channels/mochat.py b/nanobot/channels/mochat.py
index 09e31c3..52e246f 100644
--- a/nanobot/channels/mochat.py
+++ b/nanobot/channels/mochat.py
@@ -216,6 +216,7 @@ class MochatChannel(BaseChannel):
     """Mochat channel using socket.io with fallback polling workers."""
 
     name = "mochat"
+    display_name = "Mochat"
 
     def __init__(self, config: MochatConfig, bus: MessageBus):
         super().__init__(config, bus)
diff --git a/nanobot/channels/qq.py b/nanobot/channels/qq.py
index 5ac06e3..792cc12 100644
--- a/nanobot/channels/qq.py
+++ b/nanobot/channels/qq.py
@@ -54,6 +54,7 @@ class QQChannel(BaseChannel):
     """QQ channel using botpy SDK with WebSocket connection."""
 
     name = "qq"
+    display_name = "QQ"
 
     def __init__(self, config: QQConfig, bus: MessageBus):
         super().__init__(config, bus)
diff --git a/nanobot/channels/registry.py b/nanobot/channels/registry.py
new file mode 100644
index 0000000..eb30ff7
--- /dev/null
+++ b/nanobot/channels/registry.py
@@ -0,0 +1,35 @@
+"""Auto-discovery for channel modules — no hardcoded registry."""
+
+from __future__ import annotations
+
+import importlib
+import pkgutil
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from nanobot.channels.base import BaseChannel
+
+_INTERNAL = frozenset({"base", "manager", "registry"})
+
+
+def discover_channel_names() -> list[str]:
+    """Return all channel module names by scanning the package (zero imports)."""
+    import nanobot.channels as pkg
+
+    return [
+        name
+        for _, name, ispkg in pkgutil.iter_modules(pkg.__path__)
+        if name not in _INTERNAL and not ispkg
+    ]
+
+
+def load_channel_class(module_name: str) -> type[BaseChannel]:
+    """Import *module_name* and return the first BaseChannel subclass found."""
+    from nanobot.channels.base import BaseChannel as _Base
+
+    mod = importlib.import_module(f"nanobot.channels.{module_name}")
+    for attr in dir(mod):
+        obj = getattr(mod, attr)
+        if isinstance(obj, type) and issubclass(obj, _Base) and obj is not _Base:
+            return obj
+    raise ImportError(f"No BaseChannel subclass in nanobot.channels.{module_name}")
diff --git a/nanobot/channels/slack.py b/nanobot/channels/slack.py
index 0384d8d..5819212 100644
--- a/nanobot/channels/slack.py
+++ b/nanobot/channels/slack.py
@@ -21,6 +21,7 @@ class SlackChannel(BaseChannel):
     """Slack channel using Socket Mode."""
 
     name = "slack"
+    display_name = "Slack"
 
     def __init__(self, config: SlackConfig, bus: MessageBus):
         super().__init__(config, bus)
diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py
index 5b294cc..9f93843 100644
--- a/nanobot/channels/telegram.py
+++ b/nanobot/channels/telegram.py
@@ -155,6 +155,7 @@ class TelegramChannel(BaseChannel):
     """
 
     name = "telegram"
+    display_name = "Telegram"
 
     # Commands registered with Telegram's command menu
     BOT_COMMANDS = [
@@ -164,15 +165,9 @@ class TelegramChannel(BaseChannel):
         BotCommand("help", "Show available commands"),
     ]
 
-    def __init__(
-        self,
-        config: TelegramConfig,
-        bus: MessageBus,
-        groq_api_key: str = "",
-    ):
+    def __init__(self, config: TelegramConfig, bus: MessageBus):
         super().__init__(config, bus)
         self.config: TelegramConfig = config
-        self.groq_api_key = groq_api_key
         self._app: Application | None = None
         self._chat_ids: dict[str, int] = {}  # Map sender_id to chat_id for replies
         self._typing_tasks: dict[str, asyncio.Task] = {}  # chat_id -> typing loop task
@@ -615,11 +610,8 @@ class TelegramChannel(BaseChannel):
 
                 media_paths.append(str(file_path))
 
-                # Handle voice transcription
-                if media_type == "voice" or media_type == "audio":
-                    from nanobot.providers.transcription import GroqTranscriptionProvider
-                    transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key)
-                    transcription = await transcriber.transcribe(file_path)
+                if media_type in ("voice", "audio"):
+                    transcription = await self.transcribe_audio(file_path)
                     if transcription:
                         logger.info("Transcribed {}: {}...", media_type, transcription[:50])
                         content_parts.append(f"[transcription: {transcription}]")
diff --git a/nanobot/channels/wecom.py b/nanobot/channels/wecom.py
index 72be9e2..e0f4ae0 100644
--- a/nanobot/channels/wecom.py
+++ b/nanobot/channels/wecom.py
@@ -36,6 +36,7 @@ class WecomChannel(BaseChannel):
     """
 
     name = "wecom"
+    display_name = "WeCom"
 
     def __init__(self, config: WecomConfig, bus: MessageBus):
         super().__init__(config, bus)
diff --git a/nanobot/channels/whatsapp.py b/nanobot/channels/whatsapp.py
index 1307716..7fffb80 100644
--- a/nanobot/channels/whatsapp.py
+++ b/nanobot/channels/whatsapp.py
@@ -22,6 +22,7 @@ class WhatsAppChannel(BaseChannel):
     """
 
     name = "whatsapp"
+    display_name = "WhatsApp"
 
     def __init__(self, config: WhatsAppConfig, bus: MessageBus):
         super().__init__(config, bus)
diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index f5ac859..dd5e60c 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -683,6 +683,7 @@ app.add_typer(channels_app, name="channels")
 @channels_app.command("status")
 def channels_status():
     """Show channel status."""
+    from nanobot.channels.registry import discover_channel_names, load_channel_class
     from nanobot.config.loader import load_config
 
     config = load_config()
@@ -690,85 +691,19 @@ def channels_status():
     table = Table(title="Channel Status")
     table.add_column("Channel", style="cyan")
     table.add_column("Enabled", style="green")
-    table.add_column("Configuration", style="yellow")
 
-    # WhatsApp
-    wa = config.channels.whatsapp
-    table.add_row(
-        "WhatsApp",
-        "✓" if wa.enabled else "✗",
-        wa.bridge_url
-    )
-
-    dc = config.channels.discord
-    table.add_row(
-        "Discord",
-        "✓" if dc.enabled else "✗",
-        dc.gateway_url
-    )
-
-    # Feishu
-    fs = config.channels.feishu
-    fs_config = f"app_id: {fs.app_id[:10]}..." if fs.app_id else "[dim]not configured[/dim]"
-    table.add_row(
-        "Feishu",
-        "✓" if fs.enabled else "✗",
-        fs_config
-    )
-
-    # Mochat
-    mc = config.channels.mochat
-    mc_base = mc.base_url or "[dim]not configured[/dim]"
-    table.add_row(
-        "Mochat",
-        "✓" if mc.enabled else "✗",
-        mc_base
-    )
-
-    # Telegram
-    tg = config.channels.telegram
-    tg_config = f"token: {tg.token[:10]}..." if tg.token else "[dim]not configured[/dim]"
-    table.add_row(
-        "Telegram",
-        "✓" if tg.enabled else "✗",
-        tg_config
-    )
-
-    # Slack
-    slack = config.channels.slack
-    slack_config = "socket" if slack.app_token and slack.bot_token else "[dim]not configured[/dim]"
-    table.add_row(
-        "Slack",
-        "✓" if slack.enabled else "✗",
-        slack_config
-    )
-
-    # DingTalk
-    dt = config.channels.dingtalk
-    dt_config = f"client_id: {dt.client_id[:10]}..." if dt.client_id else "[dim]not configured[/dim]"
-    table.add_row(
-        "DingTalk",
-        "✓" if dt.enabled else "✗",
-        dt_config
-    )
-
-    # QQ
-    qq = config.channels.qq
-    qq_config = f"app_id: {qq.app_id[:10]}..." if qq.app_id else "[dim]not configured[/dim]"
-    table.add_row(
-        "QQ",
-        "✓" if qq.enabled else "✗",
-        qq_config
-    )
-
-    # Email
-    em = config.channels.email
-    em_config = em.imap_host if em.imap_host else "[dim]not configured[/dim]"
-    table.add_row(
-        "Email",
-        "✓" if em.enabled else "✗",
-        em_config
-    )
+    for modname in sorted(discover_channel_names()):
+        section = getattr(config.channels, modname, None)
+        enabled = section and getattr(section, "enabled", False)
+        try:
+            cls = load_channel_class(modname)
+            display = cls.display_name
+        except ImportError:
+            display = modname.title()
+        table.add_row(
+            display,
+            "[green]\u2713[/green]" if enabled else "[dim]\u2717[/dim]",
+        )
 
     console.print(table)
 

From 9d0db072a38123d6433156bd0da321ef213ab064 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 15:43:04 +0000
Subject: [PATCH 47/53] fix: guard quoted home paths in shell tool

---
 nanobot/agent/tools/shell.py  |  4 ++--
 tests/test_tool_validation.py | 13 +++++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/nanobot/agent/tools/shell.py b/nanobot/agent/tools/shell.py
index 4726e3c..b650930 100644
--- a/nanobot/agent/tools/shell.py
+++ b/nanobot/agent/tools/shell.py
@@ -155,6 +155,6 @@ class ExecTool(Tool):
     @staticmethod
     def _extract_absolute_paths(command: str) -> list[str]:
         win_paths = re.findall(r"[A-Za-z]:\\[^\s\"'|><;]+", command)   # Windows: C:\...
-        posix_paths = re.findall(r"(?:^|[\s|>])(/[^\s\"'>]+)", command) # POSIX: /absolute only
-        home_paths = re.findall(r"(?:^|[\s|>])(~[^\s\"'>;|<]*)", command) # POSIX/Windows home shortcut: ~
+        posix_paths = re.findall(r"(?:^|[\s|>'\"])(/[^\s\"'>;|<]+)", command) # POSIX: /absolute only
+        home_paths = re.findall(r"(?:^|[\s|>'\"])(~[^\s\"'>;|<]*)", command) # POSIX/Windows home shortcut: ~
         return win_paths + posix_paths + home_paths
diff --git a/tests/test_tool_validation.py b/tests/test_tool_validation.py
index cf648bf..e67acbf 100644
--- a/tests/test_tool_validation.py
+++ b/tests/test_tool_validation.py
@@ -115,12 +115,25 @@ def test_exec_extract_absolute_paths_captures_home_paths() -> None:
     assert "~/out.txt" in paths
 
 
+def test_exec_extract_absolute_paths_captures_quoted_paths() -> None:
+    cmd = 'cat "/tmp/data.txt" "~/.nanobot/config.json"'
+    paths = ExecTool._extract_absolute_paths(cmd)
+    assert "/tmp/data.txt" in paths
+    assert "~/.nanobot/config.json" in paths
+
+
 def test_exec_guard_blocks_home_path_outside_workspace(tmp_path) -> None:
     tool = ExecTool(restrict_to_workspace=True)
     error = tool._guard_command("cat ~/.nanobot/config.json", str(tmp_path))
     assert error == "Error: Command blocked by safety guard (path outside working dir)"
 
 
+def test_exec_guard_blocks_quoted_home_path_outside_workspace(tmp_path) -> None:
+    tool = ExecTool(restrict_to_workspace=True)
+    error = tool._guard_command('cat "~/.nanobot/config.json"', str(tmp_path))
+    assert error == "Error: Command blocked by safety guard (path outside working dir)"
+
+
 # --- cast_params tests ---
 
 

From 0d94211a9340c4ecde50601029af608045806601 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Wed, 11 Mar 2026 16:20:11 +0000
Subject: [PATCH 48/53] enhance: improve filesystem & shell tools with
 pagination, fallback matching, and smarter output

---
 nanobot/agent/tools/filesystem.py | 299 +++++++++++++++++++++---------
 nanobot/agent/tools/shell.py      |  69 ++++---
 tests/test_filesystem_tools.py    | 251 +++++++++++++++++++++++++
 tests/test_tool_validation.py     |  41 ++++
 4 files changed, 549 insertions(+), 111 deletions(-)
 create mode 100644 tests/test_filesystem_tools.py

diff --git a/nanobot/agent/tools/filesystem.py b/nanobot/agent/tools/filesystem.py
index 7b0b867..02c8331 100644
--- a/nanobot/agent/tools/filesystem.py
+++ b/nanobot/agent/tools/filesystem.py
@@ -1,4 +1,4 @@
-"""File system tools: read, write, edit."""
+"""File system tools: read, write, edit, list."""
 
 import difflib
 from pathlib import Path
@@ -23,62 +23,108 @@ def _resolve_path(
     return resolved
 
 
-class ReadFileTool(Tool):
-    """Tool to read file contents."""
-
-    _MAX_CHARS = 128_000  # ~128 KB — prevents OOM from reading huge files into LLM context
+class _FsTool(Tool):
+    """Shared base for filesystem tools — common init and path resolution."""
 
     def __init__(self, workspace: Path | None = None, allowed_dir: Path | None = None):
         self._workspace = workspace
         self._allowed_dir = allowed_dir
 
+    def _resolve(self, path: str) -> Path:
+        return _resolve_path(path, self._workspace, self._allowed_dir)
+
+
+# ---------------------------------------------------------------------------
+# read_file
+# ---------------------------------------------------------------------------
+
+class ReadFileTool(_FsTool):
+    """Read file contents with optional line-based pagination."""
+
+    _MAX_CHARS = 128_000
+    _DEFAULT_LIMIT = 2000
+
     @property
     def name(self) -> str:
         return "read_file"
 
     @property
     def description(self) -> str:
-        return "Read the contents of a file at the given path."
+        return (
+            "Read the contents of a file. Returns numbered lines. "
+            "Use offset and limit to paginate through large files."
+        )
 
     @property
     def parameters(self) -> dict[str, Any]:
         return {
             "type": "object",
-            "properties": {"path": {"type": "string", "description": "The file path to read"}},
+            "properties": {
+                "path": {"type": "string", "description": "The file path to read"},
+                "offset": {
+                    "type": "integer",
+                    "description": "Line number to start reading from (1-indexed, default 1)",
+                    "minimum": 1,
+                },
+                "limit": {
+                    "type": "integer",
+                    "description": "Maximum number of lines to read (default 2000)",
+                    "minimum": 1,
+                },
+            },
             "required": ["path"],
         }
 
-    async def execute(self, path: str, **kwargs: Any) -> str:
+    async def execute(self, path: str, offset: int = 1, limit: int | None = None, **kwargs: Any) -> str:
         try:
-            file_path = _resolve_path(path, self._workspace, self._allowed_dir)
-            if not file_path.exists():
+            fp = self._resolve(path)
+            if not fp.exists():
                 return f"Error: File not found: {path}"
-            if not file_path.is_file():
+            if not fp.is_file():
                 return f"Error: Not a file: {path}"
 
-            size = file_path.stat().st_size
-            if size > self._MAX_CHARS * 4:  # rough upper bound (UTF-8 chars ≤ 4 bytes)
-                return (
-                    f"Error: File too large ({size:,} bytes). "
-                    f"Use exec tool with head/tail/grep to read portions."
-                )
+            all_lines = fp.read_text(encoding="utf-8").splitlines()
+            total = len(all_lines)
 
-            content = file_path.read_text(encoding="utf-8")
-            if len(content) > self._MAX_CHARS:
-                return content[: self._MAX_CHARS] + f"\n\n... (truncated — file is {len(content):,} chars, limit {self._MAX_CHARS:,})"
-            return content
+            if offset < 1:
+                offset = 1
+            if total == 0:
+                return f"(Empty file: {path})"
+            if offset > total:
+                return f"Error: offset {offset} is beyond end of file ({total} lines)"
+
+            start = offset - 1
+            end = min(start + (limit or self._DEFAULT_LIMIT), total)
+            numbered = [f"{start + i + 1}| {line}" for i, line in enumerate(all_lines[start:end])]
+            result = "\n".join(numbered)
+
+            if len(result) > self._MAX_CHARS:
+                trimmed, chars = [], 0
+                for line in numbered:
+                    chars += len(line) + 1
+                    if chars > self._MAX_CHARS:
+                        break
+                    trimmed.append(line)
+                end = start + len(trimmed)
+                result = "\n".join(trimmed)
+
+            if end < total:
+                result += f"\n\n(Showing lines {offset}-{end} of {total}. Use offset={end + 1} to continue.)"
+            else:
+                result += f"\n\n(End of file — {total} lines total)"
+            return result
         except PermissionError as e:
             return f"Error: {e}"
         except Exception as e:
-            return f"Error reading file: {str(e)}"
+            return f"Error reading file: {e}"
 
 
-class WriteFileTool(Tool):
-    """Tool to write content to a file."""
+# ---------------------------------------------------------------------------
+# write_file
+# ---------------------------------------------------------------------------
 
-    def __init__(self, workspace: Path | None = None, allowed_dir: Path | None = None):
-        self._workspace = workspace
-        self._allowed_dir = allowed_dir
+class WriteFileTool(_FsTool):
+    """Write content to a file."""
 
     @property
     def name(self) -> str:
@@ -101,22 +147,48 @@ class WriteFileTool(Tool):
 
     async def execute(self, path: str, content: str, **kwargs: Any) -> str:
         try:
-            file_path = _resolve_path(path, self._workspace, self._allowed_dir)
-            file_path.parent.mkdir(parents=True, exist_ok=True)
-            file_path.write_text(content, encoding="utf-8")
-            return f"Successfully wrote {len(content)} bytes to {file_path}"
+            fp = self._resolve(path)
+            fp.parent.mkdir(parents=True, exist_ok=True)
+            fp.write_text(content, encoding="utf-8")
+            return f"Successfully wrote {len(content)} bytes to {fp}"
         except PermissionError as e:
             return f"Error: {e}"
         except Exception as e:
-            return f"Error writing file: {str(e)}"
+            return f"Error writing file: {e}"
 
 
-class EditFileTool(Tool):
-    """Tool to edit a file by replacing text."""
+# ---------------------------------------------------------------------------
+# edit_file
+# ---------------------------------------------------------------------------
 
-    def __init__(self, workspace: Path | None = None, allowed_dir: Path | None = None):
-        self._workspace = workspace
-        self._allowed_dir = allowed_dir
+def _find_match(content: str, old_text: str) -> tuple[str | None, int]:
+    """Locate old_text in content: exact first, then line-trimmed sliding window.
+
+    Both inputs should use LF line endings (caller normalises CRLF).
+    Returns (matched_fragment, count) or (None, 0).
+    """
+    if old_text in content:
+        return old_text, content.count(old_text)
+
+    old_lines = old_text.splitlines()
+    if not old_lines:
+        return None, 0
+    stripped_old = [l.strip() for l in old_lines]
+    content_lines = content.splitlines()
+
+    candidates = []
+    for i in range(len(content_lines) - len(stripped_old) + 1):
+        window = content_lines[i : i + len(stripped_old)]
+        if [l.strip() for l in window] == stripped_old:
+            candidates.append("\n".join(window))
+
+    if candidates:
+        return candidates[0], len(candidates)
+    return None, 0
+
+
+class EditFileTool(_FsTool):
+    """Edit a file by replacing text with fallback matching."""
 
     @property
     def name(self) -> str:
@@ -124,7 +196,11 @@ class EditFileTool(Tool):
 
     @property
     def description(self) -> str:
-        return "Edit a file by replacing old_text with new_text. The old_text must exist exactly in the file."
+        return (
+            "Edit a file by replacing old_text with new_text. "
+            "Supports minor whitespace/line-ending differences. "
+            "Set replace_all=true to replace every occurrence."
+        )
 
     @property
     def parameters(self) -> dict[str, Any]:
@@ -132,40 +208,52 @@ class EditFileTool(Tool):
             "type": "object",
             "properties": {
                 "path": {"type": "string", "description": "The file path to edit"},
-                "old_text": {"type": "string", "description": "The exact text to find and replace"},
+                "old_text": {"type": "string", "description": "The text to find and replace"},
                 "new_text": {"type": "string", "description": "The text to replace with"},
+                "replace_all": {
+                    "type": "boolean",
+                    "description": "Replace all occurrences (default false)",
+                },
             },
             "required": ["path", "old_text", "new_text"],
         }
 
-    async def execute(self, path: str, old_text: str, new_text: str, **kwargs: Any) -> str:
+    async def execute(
+        self, path: str, old_text: str, new_text: str,
+        replace_all: bool = False, **kwargs: Any,
+    ) -> str:
         try:
-            file_path = _resolve_path(path, self._workspace, self._allowed_dir)
-            if not file_path.exists():
+            fp = self._resolve(path)
+            if not fp.exists():
                 return f"Error: File not found: {path}"
 
-            content = file_path.read_text(encoding="utf-8")
+            raw = fp.read_bytes()
+            uses_crlf = b"\r\n" in raw
+            content = raw.decode("utf-8").replace("\r\n", "\n")
+            match, count = _find_match(content, old_text.replace("\r\n", "\n"))
 
-            if old_text not in content:
-                return self._not_found_message(old_text, content, path)
+            if match is None:
+                return self._not_found_msg(old_text, content, path)
+            if count > 1 and not replace_all:
+                return (
+                    f"Warning: old_text appears {count} times. "
+                    "Provide more context to make it unique, or set replace_all=true."
+                )
 
-            # Count occurrences
-            count = content.count(old_text)
-            if count > 1:
-                return f"Warning: old_text appears {count} times. Please provide more context to make it unique."
+            norm_new = new_text.replace("\r\n", "\n")
+            new_content = content.replace(match, norm_new) if replace_all else content.replace(match, norm_new, 1)
+            if uses_crlf:
+                new_content = new_content.replace("\n", "\r\n")
 
-            new_content = content.replace(old_text, new_text, 1)
-            file_path.write_text(new_content, encoding="utf-8")
-
-            return f"Successfully edited {file_path}"
+            fp.write_bytes(new_content.encode("utf-8"))
+            return f"Successfully edited {fp}"
         except PermissionError as e:
             return f"Error: {e}"
         except Exception as e:
-            return f"Error editing file: {str(e)}"
+            return f"Error editing file: {e}"
 
     @staticmethod
-    def _not_found_message(old_text: str, content: str, path: str) -> str:
-        """Build a helpful error when old_text is not found."""
+    def _not_found_msg(old_text: str, content: str, path: str) -> str:
         lines = content.splitlines(keepends=True)
         old_lines = old_text.splitlines(keepends=True)
         window = len(old_lines)
@@ -177,27 +265,29 @@ class EditFileTool(Tool):
                 best_ratio, best_start = ratio, i
 
         if best_ratio > 0.5:
-            diff = "\n".join(
-                difflib.unified_diff(
-                    old_lines,
-                    lines[best_start : best_start + window],
-                    fromfile="old_text (provided)",
-                    tofile=f"{path} (actual, line {best_start + 1})",
-                    lineterm="",
-                )
-            )
+            diff = "\n".join(difflib.unified_diff(
+                old_lines, lines[best_start : best_start + window],
+                fromfile="old_text (provided)",
+                tofile=f"{path} (actual, line {best_start + 1})",
+                lineterm="",
+            ))
             return f"Error: old_text not found in {path}.\nBest match ({best_ratio:.0%} similar) at line {best_start + 1}:\n{diff}"
-        return (
-            f"Error: old_text not found in {path}. No similar text found. Verify the file content."
-        )
+        return f"Error: old_text not found in {path}. No similar text found. Verify the file content."
 
 
-class ListDirTool(Tool):
-    """Tool to list directory contents."""
+# ---------------------------------------------------------------------------
+# list_dir
+# ---------------------------------------------------------------------------
 
-    def __init__(self, workspace: Path | None = None, allowed_dir: Path | None = None):
-        self._workspace = workspace
-        self._allowed_dir = allowed_dir
+class ListDirTool(_FsTool):
+    """List directory contents with optional recursion."""
+
+    _DEFAULT_MAX = 200
+    _IGNORE_DIRS = {
+        ".git", "node_modules", "__pycache__", ".venv", "venv",
+        "dist", "build", ".tox", ".mypy_cache", ".pytest_cache",
+        ".ruff_cache", ".coverage", "htmlcov",
+    }
 
     @property
     def name(self) -> str:
@@ -205,34 +295,71 @@ class ListDirTool(Tool):
 
     @property
     def description(self) -> str:
-        return "List the contents of a directory."
+        return (
+            "List the contents of a directory. "
+            "Set recursive=true to explore nested structure. "
+            "Common noise directories (.git, node_modules, __pycache__, etc.) are auto-ignored."
+        )
 
     @property
     def parameters(self) -> dict[str, Any]:
         return {
             "type": "object",
-            "properties": {"path": {"type": "string", "description": "The directory path to list"}},
+            "properties": {
+                "path": {"type": "string", "description": "The directory path to list"},
+                "recursive": {
+                    "type": "boolean",
+                    "description": "Recursively list all files (default false)",
+                },
+                "max_entries": {
+                    "type": "integer",
+                    "description": "Maximum entries to return (default 200)",
+                    "minimum": 1,
+                },
+            },
             "required": ["path"],
         }
 
-    async def execute(self, path: str, **kwargs: Any) -> str:
+    async def execute(
+        self, path: str, recursive: bool = False,
+        max_entries: int | None = None, **kwargs: Any,
+    ) -> str:
         try:
-            dir_path = _resolve_path(path, self._workspace, self._allowed_dir)
-            if not dir_path.exists():
+            dp = self._resolve(path)
+            if not dp.exists():
                 return f"Error: Directory not found: {path}"
-            if not dir_path.is_dir():
+            if not dp.is_dir():
                 return f"Error: Not a directory: {path}"
 
-            items = []
-            for item in sorted(dir_path.iterdir()):
-                prefix = "📁 " if item.is_dir() else "📄 "
-                items.append(f"{prefix}{item.name}")
+            cap = max_entries or self._DEFAULT_MAX
+            items: list[str] = []
+            total = 0
 
-            if not items:
+            if recursive:
+                for item in sorted(dp.rglob("*")):
+                    if any(p in self._IGNORE_DIRS for p in item.parts):
+                        continue
+                    total += 1
+                    if len(items) < cap:
+                        rel = item.relative_to(dp)
+                        items.append(f"{rel}/" if item.is_dir() else str(rel))
+            else:
+                for item in sorted(dp.iterdir()):
+                    if item.name in self._IGNORE_DIRS:
+                        continue
+                    total += 1
+                    if len(items) < cap:
+                        pfx = "📁 " if item.is_dir() else "📄 "
+                        items.append(f"{pfx}{item.name}")
+
+            if not items and total == 0:
                 return f"Directory {path} is empty"
 
-            return "\n".join(items)
+            result = "\n".join(items)
+            if total > cap:
+                result += f"\n\n(truncated, showing first {cap} of {total} entries)"
+            return result
         except PermissionError as e:
             return f"Error: {e}"
         except Exception as e:
-            return f"Error listing directory: {str(e)}"
+            return f"Error listing directory: {e}"
diff --git a/nanobot/agent/tools/shell.py b/nanobot/agent/tools/shell.py
index b650930..bf1b082 100644
--- a/nanobot/agent/tools/shell.py
+++ b/nanobot/agent/tools/shell.py
@@ -42,6 +42,9 @@ class ExecTool(Tool):
     def name(self) -> str:
         return "exec"
 
+    _MAX_TIMEOUT = 600
+    _MAX_OUTPUT = 10_000
+
     @property
     def description(self) -> str:
         return "Execute a shell command and return its output. Use with caution."
@@ -53,22 +56,36 @@ class ExecTool(Tool):
             "properties": {
                 "command": {
                     "type": "string",
-                    "description": "The shell command to execute"
+                    "description": "The shell command to execute",
                 },
                 "working_dir": {
                     "type": "string",
-                    "description": "Optional working directory for the command"
-                }
+                    "description": "Optional working directory for the command",
+                },
+                "timeout": {
+                    "type": "integer",
+                    "description": (
+                        "Timeout in seconds. Increase for long-running commands "
+                        "like compilation or installation (default 60, max 600)."
+                    ),
+                    "minimum": 1,
+                    "maximum": 600,
+                },
             },
-            "required": ["command"]
+            "required": ["command"],
         }
-    
-    async def execute(self, command: str, working_dir: str | None = None, **kwargs: Any) -> str:
+
+    async def execute(
+        self, command: str, working_dir: str | None = None,
+        timeout: int | None = None, **kwargs: Any,
+    ) -> str:
         cwd = working_dir or self.working_dir or os.getcwd()
         guard_error = self._guard_command(command, cwd)
         if guard_error:
             return guard_error
-        
+
+        effective_timeout = min(timeout or self.timeout, self._MAX_TIMEOUT)
+
         env = os.environ.copy()
         if self.path_append:
             env["PATH"] = env.get("PATH", "") + os.pathsep + self.path_append
@@ -81,44 +98,46 @@ class ExecTool(Tool):
                 cwd=cwd,
                 env=env,
             )
-            
+
             try:
                 stdout, stderr = await asyncio.wait_for(
                     process.communicate(),
-                    timeout=self.timeout
+                    timeout=effective_timeout,
                 )
             except asyncio.TimeoutError:
                 process.kill()
-                # Wait for the process to fully terminate so pipes are
-                # drained and file descriptors are released.
                 try:
                     await asyncio.wait_for(process.wait(), timeout=5.0)
                 except asyncio.TimeoutError:
                     pass
-                return f"Error: Command timed out after {self.timeout} seconds"
-            
+                return f"Error: Command timed out after {effective_timeout} seconds"
+
             output_parts = []
-            
+
             if stdout:
                 output_parts.append(stdout.decode("utf-8", errors="replace"))
-            
+
             if stderr:
                 stderr_text = stderr.decode("utf-8", errors="replace")
                 if stderr_text.strip():
                     output_parts.append(f"STDERR:\n{stderr_text}")
-            
-            if process.returncode != 0:
-                output_parts.append(f"\nExit code: {process.returncode}")
-            
+
+            output_parts.append(f"\nExit code: {process.returncode}")
+
             result = "\n".join(output_parts) if output_parts else "(no output)"
-            
-            # Truncate very long output
-            max_len = 10000
+
+            # Head + tail truncation to preserve both start and end of output
+            max_len = self._MAX_OUTPUT
             if len(result) > max_len:
-                result = result[:max_len] + f"\n... (truncated, {len(result) - max_len} more chars)"
-            
+                half = max_len // 2
+                result = (
+                    result[:half]
+                    + f"\n\n... ({len(result) - max_len:,} chars truncated) ...\n\n"
+                    + result[-half:]
+                )
+
             return result
-            
+
         except Exception as e:
             return f"Error executing command: {str(e)}"
 
diff --git a/tests/test_filesystem_tools.py b/tests/test_filesystem_tools.py
new file mode 100644
index 0000000..db8f256
--- /dev/null
+++ b/tests/test_filesystem_tools.py
@@ -0,0 +1,251 @@
+"""Tests for enhanced filesystem tools: ReadFileTool, EditFileTool, ListDirTool."""
+
+import pytest
+
+from nanobot.agent.tools.filesystem import (
+    EditFileTool,
+    ListDirTool,
+    ReadFileTool,
+    _find_match,
+)
+
+
+# ---------------------------------------------------------------------------
+# ReadFileTool
+# ---------------------------------------------------------------------------
+
+class TestReadFileTool:
+
+    @pytest.fixture()
+    def tool(self, tmp_path):
+        return ReadFileTool(workspace=tmp_path)
+
+    @pytest.fixture()
+    def sample_file(self, tmp_path):
+        f = tmp_path / "sample.txt"
+        f.write_text("\n".join(f"line {i}" for i in range(1, 21)), encoding="utf-8")
+        return f
+
+    @pytest.mark.asyncio
+    async def test_basic_read_has_line_numbers(self, tool, sample_file):
+        result = await tool.execute(path=str(sample_file))
+        assert "1| line 1" in result
+        assert "20| line 20" in result
+
+    @pytest.mark.asyncio
+    async def test_offset_and_limit(self, tool, sample_file):
+        result = await tool.execute(path=str(sample_file), offset=5, limit=3)
+        assert "5| line 5" in result
+        assert "7| line 7" in result
+        assert "8| line 8" not in result
+        assert "Use offset=8 to continue" in result
+
+    @pytest.mark.asyncio
+    async def test_offset_beyond_end(self, tool, sample_file):
+        result = await tool.execute(path=str(sample_file), offset=999)
+        assert "Error" in result
+        assert "beyond end" in result
+
+    @pytest.mark.asyncio
+    async def test_end_of_file_marker(self, tool, sample_file):
+        result = await tool.execute(path=str(sample_file), offset=1, limit=9999)
+        assert "End of file" in result
+
+    @pytest.mark.asyncio
+    async def test_empty_file(self, tool, tmp_path):
+        f = tmp_path / "empty.txt"
+        f.write_text("", encoding="utf-8")
+        result = await tool.execute(path=str(f))
+        assert "Empty file" in result
+
+    @pytest.mark.asyncio
+    async def test_file_not_found(self, tool, tmp_path):
+        result = await tool.execute(path=str(tmp_path / "nope.txt"))
+        assert "Error" in result
+        assert "not found" in result
+
+    @pytest.mark.asyncio
+    async def test_char_budget_trims(self, tool, tmp_path):
+        """When the selected slice exceeds _MAX_CHARS the output is trimmed."""
+        f = tmp_path / "big.txt"
+        # Each line is ~110 chars, 2000 lines ≈ 220 KB > 128 KB limit
+        f.write_text("\n".join("x" * 110 for _ in range(2000)), encoding="utf-8")
+        result = await tool.execute(path=str(f))
+        assert len(result) <= ReadFileTool._MAX_CHARS + 500  # small margin for footer
+        assert "Use offset=" in result
+
+
+# ---------------------------------------------------------------------------
+# _find_match  (unit tests for the helper)
+# ---------------------------------------------------------------------------
+
+class TestFindMatch:
+
+    def test_exact_match(self):
+        match, count = _find_match("hello world", "world")
+        assert match == "world"
+        assert count == 1
+
+    def test_exact_no_match(self):
+        match, count = _find_match("hello world", "xyz")
+        assert match is None
+        assert count == 0
+
+    def test_crlf_normalisation(self):
+        # Caller normalises CRLF before calling _find_match, so test with
+        # pre-normalised content to verify exact match still works.
+        content = "line1\nline2\nline3"
+        old_text = "line1\nline2\nline3"
+        match, count = _find_match(content, old_text)
+        assert match is not None
+        assert count == 1
+
+    def test_line_trim_fallback(self):
+        content = "    def foo():\n        pass\n"
+        old_text = "def foo():\n    pass"
+        match, count = _find_match(content, old_text)
+        assert match is not None
+        assert count == 1
+        # The returned match should be the *original* indented text
+        assert "    def foo():" in match
+
+    def test_line_trim_multiple_candidates(self):
+        content = "  a\n  b\n  a\n  b\n"
+        old_text = "a\nb"
+        match, count = _find_match(content, old_text)
+        assert count == 2
+
+    def test_empty_old_text(self):
+        match, count = _find_match("hello", "")
+        # Empty string is always "in" any string via exact match
+        assert match == ""
+
+
+# ---------------------------------------------------------------------------
+# EditFileTool
+# ---------------------------------------------------------------------------
+
+class TestEditFileTool:
+
+    @pytest.fixture()
+    def tool(self, tmp_path):
+        return EditFileTool(workspace=tmp_path)
+
+    @pytest.mark.asyncio
+    async def test_exact_match(self, tool, tmp_path):
+        f = tmp_path / "a.py"
+        f.write_text("hello world", encoding="utf-8")
+        result = await tool.execute(path=str(f), old_text="world", new_text="earth")
+        assert "Successfully" in result
+        assert f.read_text() == "hello earth"
+
+    @pytest.mark.asyncio
+    async def test_crlf_normalisation(self, tool, tmp_path):
+        f = tmp_path / "crlf.py"
+        f.write_bytes(b"line1\r\nline2\r\nline3")
+        result = await tool.execute(
+            path=str(f), old_text="line1\nline2", new_text="LINE1\nLINE2",
+        )
+        assert "Successfully" in result
+        raw = f.read_bytes()
+        assert b"LINE1" in raw
+        # CRLF line endings should be preserved throughout the file
+        assert b"\r\n" in raw
+
+    @pytest.mark.asyncio
+    async def test_trim_fallback(self, tool, tmp_path):
+        f = tmp_path / "indent.py"
+        f.write_text("    def foo():\n        pass\n", encoding="utf-8")
+        result = await tool.execute(
+            path=str(f), old_text="def foo():\n    pass", new_text="def bar():\n    return 1",
+        )
+        assert "Successfully" in result
+        assert "bar" in f.read_text()
+
+    @pytest.mark.asyncio
+    async def test_ambiguous_match(self, tool, tmp_path):
+        f = tmp_path / "dup.py"
+        f.write_text("aaa\nbbb\naaa\nbbb\n", encoding="utf-8")
+        result = await tool.execute(path=str(f), old_text="aaa\nbbb", new_text="xxx")
+        assert "appears" in result.lower() or "Warning" in result
+
+    @pytest.mark.asyncio
+    async def test_replace_all(self, tool, tmp_path):
+        f = tmp_path / "multi.py"
+        f.write_text("foo bar foo bar foo", encoding="utf-8")
+        result = await tool.execute(
+            path=str(f), old_text="foo", new_text="baz", replace_all=True,
+        )
+        assert "Successfully" in result
+        assert f.read_text() == "baz bar baz bar baz"
+
+    @pytest.mark.asyncio
+    async def test_not_found(self, tool, tmp_path):
+        f = tmp_path / "nf.py"
+        f.write_text("hello", encoding="utf-8")
+        result = await tool.execute(path=str(f), old_text="xyz", new_text="abc")
+        assert "Error" in result
+        assert "not found" in result
+
+
+# ---------------------------------------------------------------------------
+# ListDirTool
+# ---------------------------------------------------------------------------
+
+class TestListDirTool:
+
+    @pytest.fixture()
+    def tool(self, tmp_path):
+        return ListDirTool(workspace=tmp_path)
+
+    @pytest.fixture()
+    def populated_dir(self, tmp_path):
+        (tmp_path / "src").mkdir()
+        (tmp_path / "src" / "main.py").write_text("pass")
+        (tmp_path / "src" / "utils.py").write_text("pass")
+        (tmp_path / "README.md").write_text("hi")
+        (tmp_path / ".git").mkdir()
+        (tmp_path / ".git" / "config").write_text("x")
+        (tmp_path / "node_modules").mkdir()
+        (tmp_path / "node_modules" / "pkg").mkdir()
+        return tmp_path
+
+    @pytest.mark.asyncio
+    async def test_basic_list(self, tool, populated_dir):
+        result = await tool.execute(path=str(populated_dir))
+        assert "README.md" in result
+        assert "src" in result
+        # .git and node_modules should be ignored
+        assert ".git" not in result
+        assert "node_modules" not in result
+
+    @pytest.mark.asyncio
+    async def test_recursive(self, tool, populated_dir):
+        result = await tool.execute(path=str(populated_dir), recursive=True)
+        assert "src/main.py" in result
+        assert "src/utils.py" in result
+        assert "README.md" in result
+        # Ignored dirs should not appear
+        assert ".git" not in result
+        assert "node_modules" not in result
+
+    @pytest.mark.asyncio
+    async def test_max_entries_truncation(self, tool, tmp_path):
+        for i in range(10):
+            (tmp_path / f"file_{i}.txt").write_text("x")
+        result = await tool.execute(path=str(tmp_path), max_entries=3)
+        assert "truncated" in result
+        assert "3 of 10" in result
+
+    @pytest.mark.asyncio
+    async def test_empty_dir(self, tool, tmp_path):
+        d = tmp_path / "empty"
+        d.mkdir()
+        result = await tool.execute(path=str(d))
+        assert "empty" in result.lower()
+
+    @pytest.mark.asyncio
+    async def test_not_found(self, tool, tmp_path):
+        result = await tool.execute(path=str(tmp_path / "nope"))
+        assert "Error" in result
+        assert "not found" in result
diff --git a/tests/test_tool_validation.py b/tests/test_tool_validation.py
index e67acbf..095c041 100644
--- a/tests/test_tool_validation.py
+++ b/tests/test_tool_validation.py
@@ -363,3 +363,44 @@ def test_cast_params_single_value_not_auto_wrapped_to_array() -> None:
     assert result["items"] == 5  # Not wrapped to [5]
     result = tool.cast_params({"items": "text"})
     assert result["items"] == "text"  # Not wrapped to ["text"]
+
+
+# --- ExecTool enhancement tests ---
+
+
+async def test_exec_always_returns_exit_code() -> None:
+    """Exit code should appear in output even on success (exit 0)."""
+    tool = ExecTool()
+    result = await tool.execute(command="echo hello")
+    assert "Exit code: 0" in result
+    assert "hello" in result
+
+
+async def test_exec_head_tail_truncation() -> None:
+    """Long output should preserve both head and tail."""
+    tool = ExecTool()
+    # Generate output that exceeds _MAX_OUTPUT
+    big = "A" * 6000 + "\n" + "B" * 6000
+    result = await tool.execute(command=f"echo '{big}'")
+    assert "chars truncated" in result
+    # Head portion should start with As
+    assert result.startswith("A")
+    # Tail portion should end with the exit code which comes after Bs
+    assert "Exit code:" in result
+
+
+async def test_exec_timeout_parameter() -> None:
+    """LLM-supplied timeout should override the constructor default."""
+    tool = ExecTool(timeout=60)
+    # A very short timeout should cause the command to be killed
+    result = await tool.execute(command="sleep 10", timeout=1)
+    assert "timed out" in result
+    assert "1 seconds" in result
+
+
+async def test_exec_timeout_capped_at_max() -> None:
+    """Timeout values above _MAX_TIMEOUT should be clamped."""
+    tool = ExecTool()
+    # Should not raise — just clamp to 600
+    result = await tool.execute(command="echo ok", timeout=9999)
+    assert "Exit code: 0" in result

From 64ab6309d5e309976314e166f9c277d956c5a460 Mon Sep 17 00:00:00 2001
From: chengyongru <2755839590@qq.com>
Date: Thu, 12 Mar 2026 00:38:28 +0800
Subject: [PATCH 49/53] fix: wecom-aibot-sdk-python should use pypi version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index a52c0c9..58831c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,7 +49,7 @@ dependencies = [
 
 [project.optional-dependencies]
 wecom = [
-    "wecom-aibot-sdk-python @ git+https://github.com/chengyongru/wecom_aibot_sdk.git@v0.1.2",
+    "wecom-aibot-sdk-python>=0.1.2",
 ]
 matrix = [
     "matrix-nio[e2e]>=0.25.2",

From 35260ca1574520dd946f55ed11ae2abfce59260d Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Thu, 12 Mar 2026 02:50:28 +0000
Subject: [PATCH 50/53] fix: raise persisted tool result limit to 16k

---
 nanobot/agent/loop.py        |  2 +-
 tests/test_loop_save_turn.py | 16 +++++++++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index b80c5d0..ac8700c 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -43,7 +43,7 @@ class AgentLoop:
     5. Sends responses back
     """
 
-    _TOOL_RESULT_MAX_CHARS = 500
+    _TOOL_RESULT_MAX_CHARS = 16_000
 
     def __init__(
         self,
diff --git a/tests/test_loop_save_turn.py b/tests/test_loop_save_turn.py
index aec6d1a..25ba88b 100644
--- a/tests/test_loop_save_turn.py
+++ b/tests/test_loop_save_turn.py
@@ -5,7 +5,7 @@ from nanobot.session.manager import Session
 
 def _mk_loop() -> AgentLoop:
     loop = AgentLoop.__new__(AgentLoop)
-    loop._TOOL_RESULT_MAX_CHARS = 500
+    loop._TOOL_RESULT_MAX_CHARS = AgentLoop._TOOL_RESULT_MAX_CHARS
     return loop
 
 
@@ -39,3 +39,17 @@ def test_save_turn_keeps_image_placeholder_after_runtime_strip() -> None:
         skip=0,
     )
     assert session.messages[0]["content"] == [{"type": "text", "text": "[image]"}]
+
+
+def test_save_turn_keeps_tool_results_under_16k() -> None:
+    loop = _mk_loop()
+    session = Session(key="test:tool-result")
+    content = "x" * 12_000
+
+    loop._save_turn(
+        session,
+        [{"role": "tool", "tool_call_id": "call_1", "name": "read_file", "content": content}],
+        skip=0,
+    )
+
+    assert session.messages[0]["content"] == content

From 0a0017ff457f66ee91c2d27edfab7725e0751156 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Thu, 12 Mar 2026 03:08:53 +0000
Subject: [PATCH 51/53] fix: raise tool result history limit to 16k and force
 save_memory in consolidation

---
 nanobot/agent/memory.py                    | 1 +
 nanobot/providers/azure_openai_provider.py | 7 +++++--
 nanobot/providers/base.py                  | 5 +++++
 nanobot/providers/custom_provider.py       | 5 +++--
 nanobot/providers/litellm_provider.py      | 3 ++-
 nanobot/providers/openai_codex_provider.py | 3 ++-
 6 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py
index 59ba40e..802dd04 100644
--- a/nanobot/agent/memory.py
+++ b/nanobot/agent/memory.py
@@ -120,6 +120,7 @@ class MemoryStore:
                 ],
                 tools=_SAVE_MEMORY_TOOL,
                 model=model,
+                tool_choice="required",
             )
 
             if not response.has_tool_calls:
diff --git a/nanobot/providers/azure_openai_provider.py b/nanobot/providers/azure_openai_provider.py
index bd79b00..05fbac4 100644
--- a/nanobot/providers/azure_openai_provider.py
+++ b/nanobot/providers/azure_openai_provider.py
@@ -88,6 +88,7 @@ class AzureOpenAIProvider(LLMProvider):
         max_tokens: int = 4096,
         temperature: float = 0.7,
         reasoning_effort: str | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
     ) -> dict[str, Any]:
         """Prepare the request payload with Azure OpenAI 2024-10-21 compliance."""
         payload: dict[str, Any] = {
@@ -106,7 +107,7 @@ class AzureOpenAIProvider(LLMProvider):
 
         if tools:
             payload["tools"] = tools
-            payload["tool_choice"] = "auto"
+            payload["tool_choice"] = tool_choice or "auto"
 
         return payload
 
@@ -118,6 +119,7 @@ class AzureOpenAIProvider(LLMProvider):
         max_tokens: int = 4096,
         temperature: float = 0.7,
         reasoning_effort: str | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
     ) -> LLMResponse:
         """
         Send a chat completion request to Azure OpenAI.
@@ -137,7 +139,8 @@ class AzureOpenAIProvider(LLMProvider):
         url = self._build_chat_url(deployment_name)
         headers = self._build_headers()
         payload = self._prepare_request_payload(
-            deployment_name, messages, tools, max_tokens, temperature, reasoning_effort
+            deployment_name, messages, tools, max_tokens, temperature, reasoning_effort,
+            tool_choice=tool_choice,
         )
 
         try:
diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py
index 15a10ff..114a948 100644
--- a/nanobot/providers/base.py
+++ b/nanobot/providers/base.py
@@ -166,6 +166,7 @@ class LLMProvider(ABC):
         max_tokens: int = 4096,
         temperature: float = 0.7,
         reasoning_effort: str | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
     ) -> LLMResponse:
         """
         Send a chat completion request.
@@ -176,6 +177,7 @@ class LLMProvider(ABC):
             model: Model identifier (provider-specific).
             max_tokens: Maximum tokens in response.
             temperature: Sampling temperature.
+            tool_choice: Tool selection strategy ("auto", "required", or specific tool dict).
         
         Returns:
             LLMResponse with content and/or tool calls.
@@ -195,6 +197,7 @@ class LLMProvider(ABC):
         max_tokens: object = _SENTINEL,
         temperature: object = _SENTINEL,
         reasoning_effort: object = _SENTINEL,
+        tool_choice: str | dict[str, Any] | None = None,
     ) -> LLMResponse:
         """Call chat() with retry on transient provider failures.
 
@@ -218,6 +221,7 @@ class LLMProvider(ABC):
                     max_tokens=max_tokens,
                     temperature=temperature,
                     reasoning_effort=reasoning_effort,
+                    tool_choice=tool_choice,
                 )
             except asyncio.CancelledError:
                 raise
@@ -250,6 +254,7 @@ class LLMProvider(ABC):
                 max_tokens=max_tokens,
                 temperature=temperature,
                 reasoning_effort=reasoning_effort,
+                tool_choice=tool_choice,
             )
         except asyncio.CancelledError:
             raise
diff --git a/nanobot/providers/custom_provider.py b/nanobot/providers/custom_provider.py
index 66df734..f16c69b 100644
--- a/nanobot/providers/custom_provider.py
+++ b/nanobot/providers/custom_provider.py
@@ -25,7 +25,8 @@ class CustomProvider(LLMProvider):
 
     async def chat(self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None,
                    model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7,
-                   reasoning_effort: str | None = None) -> LLMResponse:
+                   reasoning_effort: str | None = None,
+                   tool_choice: str | dict[str, Any] | None = None) -> LLMResponse:
         kwargs: dict[str, Any] = {
             "model": model or self.default_model,
             "messages": self._sanitize_empty_content(messages),
@@ -35,7 +36,7 @@ class CustomProvider(LLMProvider):
         if reasoning_effort:
             kwargs["reasoning_effort"] = reasoning_effort
         if tools:
-            kwargs.update(tools=tools, tool_choice="auto")
+            kwargs.update(tools=tools, tool_choice=tool_choice or "auto")
         try:
             return self._parse(await self._client.chat.completions.create(**kwargs))
         except Exception as e:
diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py
index af91c2f..b4508a4 100644
--- a/nanobot/providers/litellm_provider.py
+++ b/nanobot/providers/litellm_provider.py
@@ -214,6 +214,7 @@ class LiteLLMProvider(LLMProvider):
         max_tokens: int = 4096,
         temperature: float = 0.7,
         reasoning_effort: str | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
     ) -> LLMResponse:
         """
         Send a chat completion request via LiteLLM.
@@ -267,7 +268,7 @@ class LiteLLMProvider(LLMProvider):
         
         if tools:
             kwargs["tools"] = tools
-            kwargs["tool_choice"] = "auto"
+            kwargs["tool_choice"] = tool_choice or "auto"
 
         try:
             response = await acompletion(**kwargs)
diff --git a/nanobot/providers/openai_codex_provider.py b/nanobot/providers/openai_codex_provider.py
index d04e210..c8f2155 100644
--- a/nanobot/providers/openai_codex_provider.py
+++ b/nanobot/providers/openai_codex_provider.py
@@ -32,6 +32,7 @@ class OpenAICodexProvider(LLMProvider):
         max_tokens: int = 4096,
         temperature: float = 0.7,
         reasoning_effort: str | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
     ) -> LLMResponse:
         model = model or self.default_model
         system_prompt, input_items = _convert_messages(messages)
@@ -48,7 +49,7 @@ class OpenAICodexProvider(LLMProvider):
             "text": {"verbosity": "medium"},
             "include": ["reasoning.encrypted_content"],
             "prompt_cache_key": _prompt_cache_key(messages),
-            "tool_choice": "auto",
+            "tool_choice": tool_choice or "auto",
             "parallel_tool_calls": True,
         }
 

From 64aeeceed02aadb19e51f82d71674024baec4b95 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Thu, 12 Mar 2026 04:33:51 +0000
Subject: [PATCH 52/53] Add /restart command: restart the bot process from any
 channel

---
 nanobot/agent/loop.py         | 43 +++++++++++++-------
 tests/test_restart_command.py | 76 +++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 15 deletions(-)
 create mode 100644 tests/test_restart_command.py

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 597f852..5fe0ee0 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -4,8 +4,8 @@ from __future__ import annotations
 
 import asyncio
 import json
-import re
 import os
+import re
 import sys
 from contextlib import AsyncExitStack
 from pathlib import Path
@@ -258,8 +258,11 @@ class AgentLoop:
             except asyncio.TimeoutError:
                 continue
 
-            if msg.content.strip().lower() == "/stop":
+            cmd = msg.content.strip().lower()
+            if cmd == "/stop":
                 await self._handle_stop(msg)
+            elif cmd == "/restart":
+                await self._handle_restart(msg)
             else:
                 task = asyncio.create_task(self._dispatch(msg))
                 self._active_tasks.setdefault(msg.session_key, []).append(task)
@@ -276,11 +279,23 @@ class AgentLoop:
                 pass
         sub_cancelled = await self.subagents.cancel_by_session(msg.session_key)
         total = cancelled + sub_cancelled
-        content = f"⏹ Stopped {total} task(s)." if total else "No active task to stop."
+        content = f"Stopped {total} task(s)." if total else "No active task to stop."
         await self.bus.publish_outbound(OutboundMessage(
             channel=msg.channel, chat_id=msg.chat_id, content=content,
         ))
 
+    async def _handle_restart(self, msg: InboundMessage) -> None:
+        """Restart the process in-place via os.execv."""
+        await self.bus.publish_outbound(OutboundMessage(
+            channel=msg.channel, chat_id=msg.chat_id, content="Restarting...",
+        ))
+
+        async def _do_restart():
+            await asyncio.sleep(1)
+            os.execv(sys.executable, [sys.executable] + sys.argv)
+
+        asyncio.create_task(_do_restart())
+
     async def _dispatch(self, msg: InboundMessage) -> None:
         """Process a message under the global lock."""
         async with self._processing_lock:
@@ -375,18 +390,16 @@ class AgentLoop:
             return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id,
                                   content="New session started.")
         if cmd == "/help":
-            return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id,
-                                  content="🐈 nanobot commands:\n/new — Start a new conversation\n/stop — Stop the current task\n/help — Show available commands")
-        if cmd == "/restart":
-            await self.bus.publish_outbound(OutboundMessage(
-                channel=msg.channel, chat_id=msg.chat_id, content="🔄 Restarting..."
-            ))
-            async def _r():
-                await asyncio.sleep(1)
-                os.execv(sys.executable, [sys.executable] + sys.argv)
-            asyncio.create_task(_r())
-            return None
-
+            lines = [
+                "🐈 nanobot commands:",
+                "/new — Start a new conversation",
+                "/stop — Stop the current task",
+                "/restart — Restart the bot",
+                "/help — Show available commands",
+            ]
+            return OutboundMessage(
+                channel=msg.channel, chat_id=msg.chat_id, content="\n".join(lines),
+            )
         await self.memory_consolidator.maybe_consolidate_by_tokens(session)
 
         self._set_tool_context(msg.channel, msg.chat_id, msg.metadata.get("message_id"))
diff --git a/tests/test_restart_command.py b/tests/test_restart_command.py
new file mode 100644
index 0000000..c495347
--- /dev/null
+++ b/tests/test_restart_command.py
@@ -0,0 +1,76 @@
+"""Tests for /restart slash command."""
+
+from __future__ import annotations
+
+import asyncio
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from nanobot.bus.events import InboundMessage
+
+
+def _make_loop():
+    """Create a minimal AgentLoop with mocked dependencies."""
+    from nanobot.agent.loop import AgentLoop
+    from nanobot.bus.queue import MessageBus
+
+    bus = MessageBus()
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+    workspace = MagicMock()
+    workspace.__truediv__ = MagicMock(return_value=MagicMock())
+
+    with patch("nanobot.agent.loop.ContextBuilder"), \
+         patch("nanobot.agent.loop.SessionManager"), \
+         patch("nanobot.agent.loop.SubagentManager"):
+        loop = AgentLoop(bus=bus, provider=provider, workspace=workspace)
+    return loop, bus
+
+
+class TestRestartCommand:
+
+    @pytest.mark.asyncio
+    async def test_restart_sends_message_and_calls_execv(self):
+        loop, bus = _make_loop()
+        msg = InboundMessage(channel="cli", sender_id="user", chat_id="direct", content="/restart")
+
+        with patch("nanobot.agent.loop.os.execv") as mock_execv:
+            await loop._handle_restart(msg)
+            out = await asyncio.wait_for(bus.consume_outbound(), timeout=1.0)
+            assert "Restarting" in out.content
+
+            await asyncio.sleep(1.5)
+            mock_execv.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_restart_intercepted_in_run_loop(self):
+        """Verify /restart is handled at the run-loop level, not inside _dispatch."""
+        loop, bus = _make_loop()
+        msg = InboundMessage(channel="telegram", sender_id="u1", chat_id="c1", content="/restart")
+
+        with patch.object(loop, "_handle_restart") as mock_handle:
+            mock_handle.return_value = None
+            await bus.publish_inbound(msg)
+
+            loop._running = True
+            run_task = asyncio.create_task(loop.run())
+            await asyncio.sleep(0.1)
+            loop._running = False
+            run_task.cancel()
+            try:
+                await run_task
+            except asyncio.CancelledError:
+                pass
+
+            mock_handle.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_help_includes_restart(self):
+        loop, bus = _make_loop()
+        msg = InboundMessage(channel="telegram", sender_id="u1", chat_id="c1", content="/help")
+
+        response = await loop._process_message(msg)
+
+        assert response is not None
+        assert "/restart" in response.content

From 95c741db6293f49ad41343432b1e9649aa4d1ef8 Mon Sep 17 00:00:00 2001
From: Re-bin <xubinrencs@gmail.com>
Date: Thu, 12 Mar 2026 04:35:34 +0000
Subject: [PATCH 53/53] docs: update nanobot key features

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8dba2d7..e887828 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@
 
 ## Key Features of nanobot:
 
-🪶 **Ultra-Lightweight**: Just ~4,000 lines of core agent code — 99% smaller than Clawdbot.
+🪶 **Ultra-Lightweight**: A super lightweight implementation of OpenClaw — 99% smaller, significantly faster.
 
 🔬 **Research-Ready**: Clean, readable code that's easy to understand, modify, and extend for research.