feat(config): hot reload agent runtime settings
Some checks failed
Test Suite / test (3.11) (push) Failing after 1m7s
Test Suite / test (3.12) (push) Failing after 1m3s
Test Suite / test (3.13) (push) Failing after 1m14s

This commit is contained in:
Hua
2026-03-19 14:01:18 +08:00
parent cfcfb35f81
commit fd52973751
7 changed files with 432 additions and 10 deletions

View File

@@ -32,6 +32,7 @@ Do not commit real API keys, tokens, chat logs, or workspace data. Keep local se
- If a slash command should appear in Telegram's native command menu, also update `nanobot/channels/telegram.py`.
- `/skill` currently supports `search`, `install`, `uninstall`, `list`, and `update`. Keep subcommand dispatch in `nanobot/agent/loop.py`.
- `/mcp` supports the default `list` behavior (and explicit `/mcp list`) to show configured MCP servers and registered MCP tools.
- Agent runtime config should be hot-reloaded from the active `config.json` for safe in-process fields such as `tools.mcpServers`, `tools.web.*`, `tools.exec.*`, `tools.restrictToWorkspace`, `agents.defaults.model`, `agents.defaults.maxToolIterations`, `agents.defaults.contextWindowTokens`, `agents.defaults.maxTokens`, `agents.defaults.temperature`, `agents.defaults.reasoningEffort`, `channels.sendProgress`, and `channels.sendToolHints`. Channel connection settings and provider credentials still require a restart.
- `/skill` shells out to `npx clawhub@latest`; it requires Node.js/`npx` at runtime.
- `/skill uninstall` runs in a non-interactive context, so keep passing `--yes` when shelling out to ClawHub.
- Treat empty `/skill search` output as a user-visible "no results" case rather than a silent success. Surface npm/registry failures directly to the user.

View File

@@ -1187,6 +1187,7 @@ Use `toolTimeout` to override the default 30s per-call timeout for slow servers:
```
MCP tools are automatically discovered and registered on startup. The LLM can use them alongside built-in tools — no extra configuration needed.
nanobot hot-reloads agent runtime config from the active `config.json` on the next message, including `tools.mcpServers`, `tools.web.*`, `tools.exec.*`, `tools.restrictToWorkspace`, `agents.defaults.model`, `agents.defaults.maxToolIterations`, `agents.defaults.contextWindowTokens`, `agents.defaults.maxTokens`, `agents.defaults.temperature`, `agents.defaults.reasoningEffort`, `channels.sendProgress`, and `channels.sendToolHints`. Channel connection settings and provider credentials still require a restart.

View File

@@ -77,6 +77,7 @@ class AgentLoop:
bus: MessageBus,
provider: LLMProvider,
workspace: Path,
config_path: Path | None = None,
model: str | None = None,
max_iterations: int = 40,
context_window_tokens: int = 65_536,
@@ -97,6 +98,7 @@ class AgentLoop:
self.channels_config = channels_config
self.provider = provider
self.workspace = workspace
self.config_path = config_path
self.model = model or provider.get_default_model()
self.max_iterations = max_iterations
self.context_window_tokens = context_window_tokens
@@ -128,6 +130,9 @@ class AgentLoop:
self._running = False
self._mcp_servers = mcp_servers or {}
self._runtime_config_mtime_ns = (
config_path.stat().st_mtime_ns if config_path and config_path.exists() else None
)
self._mcp_stack: AsyncExitStack | None = None
self._mcp_connected = False
self._mcp_connecting = False
@@ -212,6 +217,142 @@ class AgentLoop:
return {name: sorted(tools) for name, tools in grouped.items()}
def _remove_registered_mcp_tools(self) -> None:
"""Remove all dynamically registered MCP tools from the registry."""
for tool_name in list(self.tools.tool_names):
if tool_name.startswith("mcp_"):
self.tools.unregister(tool_name)
@staticmethod
def _dump_mcp_servers(servers: dict) -> dict:
"""Normalize MCP server config for value-based comparisons."""
dumped = {}
for name, cfg in servers.items():
dumped[name] = cfg.model_dump() if hasattr(cfg, "model_dump") else cfg
return dumped
async def _reset_mcp_connections(self) -> None:
"""Drop MCP tool registrations and close active MCP connections."""
self._remove_registered_mcp_tools()
if self._mcp_stack:
try:
await self._mcp_stack.aclose()
except (RuntimeError, BaseExceptionGroup):
pass
self._mcp_stack = None
self._mcp_connected = False
self._mcp_connecting = False
def _apply_runtime_tool_config(self) -> None:
"""Apply runtime-configurable settings to already-registered tools."""
allowed_dir = self.workspace if self.restrict_to_workspace else None
extra_read = [BUILTIN_SKILLS_DIR] if allowed_dir else None
if read_tool := self.tools.get("read_file"):
read_tool._workspace = self.workspace
read_tool._allowed_dir = allowed_dir
read_tool._extra_allowed_dirs = extra_read
for name in ("write_file", "edit_file", "list_dir"):
if tool := self.tools.get(name):
tool._workspace = self.workspace
tool._allowed_dir = allowed_dir
tool._extra_allowed_dirs = None
if exec_tool := self.tools.get("exec"):
exec_tool.timeout = self.exec_config.timeout
exec_tool.working_dir = str(self.workspace)
exec_tool.restrict_to_workspace = self.restrict_to_workspace
exec_tool.path_append = self.exec_config.path_append
if web_search_tool := self.tools.get("web_search"):
web_search_tool._init_provider = self.web_search_provider
web_search_tool._init_api_key = self.brave_api_key
web_search_tool._init_base_url = self.web_search_base_url
web_search_tool.max_results = self.web_search_max_results
web_search_tool.proxy = self.web_proxy
if web_fetch_tool := self.tools.get("web_fetch"):
web_fetch_tool.proxy = self.web_proxy
def _apply_runtime_config(self, config) -> bool:
"""Apply hot-reloadable config to the current agent instance."""
from nanobot.providers.base import GenerationSettings
defaults = config.agents.defaults
tools_cfg = config.tools
web_cfg = tools_cfg.web
search_cfg = web_cfg.search
self.model = defaults.model
self.max_iterations = defaults.max_tool_iterations
self.context_window_tokens = defaults.context_window_tokens
self.exec_config = tools_cfg.exec
self.restrict_to_workspace = tools_cfg.restrict_to_workspace
self.brave_api_key = search_cfg.api_key or None
self.web_proxy = web_cfg.proxy or None
self.web_search_provider = search_cfg.provider
self.web_search_base_url = search_cfg.base_url or None
self.web_search_max_results = search_cfg.max_results
self.channels_config = config.channels
self.provider.generation = GenerationSettings(
temperature=defaults.temperature,
max_tokens=defaults.max_tokens,
reasoning_effort=defaults.reasoning_effort,
)
if hasattr(self.provider, "default_model"):
self.provider.default_model = self.model
self.memory_consolidator.model = self.model
self.memory_consolidator.context_window_tokens = self.context_window_tokens
self.subagents.apply_runtime_config(
model=self.model,
brave_api_key=self.brave_api_key,
web_proxy=self.web_proxy,
web_search_provider=self.web_search_provider,
web_search_base_url=self.web_search_base_url,
web_search_max_results=self.web_search_max_results,
exec_config=self.exec_config,
restrict_to_workspace=self.restrict_to_workspace,
)
self._apply_runtime_tool_config()
mcp_changed = self._dump_mcp_servers(config.tools.mcp_servers) != self._dump_mcp_servers(
self._mcp_servers
)
self._mcp_servers = config.tools.mcp_servers
return mcp_changed
async def _reload_runtime_config_if_needed(self, *, force: bool = False) -> None:
"""Reload hot-reloadable config from the active config file when it changes."""
if self.config_path is None:
return
try:
mtime_ns = self.config_path.stat().st_mtime_ns
except FileNotFoundError:
mtime_ns = None
if not force and mtime_ns == self._runtime_config_mtime_ns:
return
self._runtime_config_mtime_ns = mtime_ns
from nanobot.config.loader import load_config
if mtime_ns is None:
await self._reset_mcp_connections()
self._mcp_servers = {}
return
reloaded = load_config(self.config_path)
if self._apply_runtime_config(reloaded):
await self._reset_mcp_connections()
async def _reload_mcp_servers_if_needed(self, *, force: bool = False) -> None:
"""Backward-compatible wrapper for runtime config reloads."""
await self._reload_runtime_config_if_needed(force=force)
@staticmethod
def _decode_subprocess_output(data: bytes) -> str:
"""Decode subprocess output conservatively for CLI surfacing."""
@@ -396,6 +537,8 @@ class AgentLoop:
content=self._mcp_usage(language),
)
await self._reload_mcp_servers_if_needed()
if not self._mcp_servers:
return OutboundMessage(
channel=msg.channel,
@@ -456,6 +599,7 @@ class AgentLoop:
async def _connect_mcp(self) -> None:
"""Connect to configured MCP servers (one-time, lazy)."""
await self._reload_mcp_servers_if_needed()
if self._mcp_connected or self._mcp_connecting or not self._mcp_servers:
return
self._mcp_connecting = True
@@ -791,12 +935,7 @@ class AgentLoop:
if self._background_tasks:
await asyncio.gather(*self._background_tasks, return_exceptions=True)
self._background_tasks.clear()
if self._mcp_stack:
try:
await self._mcp_stack.aclose()
except (RuntimeError, BaseExceptionGroup):
pass # MCP SDK cancel scope cleanup is noisy but harmless
self._mcp_stack = None
await self._reset_mcp_connections()
def _schedule_background(self, coro) -> None:
"""Schedule a coroutine as a tracked background task (drained on shutdown)."""
@@ -816,6 +955,8 @@ class AgentLoop:
on_progress: Callable[[str], Awaitable[None]] | None = None,
) -> OutboundMessage | None:
"""Process a single inbound message and return the response."""
await self._reload_runtime_config_if_needed()
# System messages: parse origin from chat_id ("channel:chat_id")
if msg.channel == "system":
channel, chat_id = (msg.chat_id.split(":", 1) if ":" in msg.chat_id
@@ -825,6 +966,7 @@ class AgentLoop:
session = self.sessions.get_or_create(key)
persona = self._get_session_persona(session)
language = self._get_session_language(session)
await self._connect_mcp()
await self.memory_consolidator.maybe_consolidate_by_tokens(session)
self._set_tool_context(channel, chat_id, msg.metadata.get("message_id"))
history = session.get_history(max_messages=0)
@@ -879,6 +1021,7 @@ class AgentLoop:
return OutboundMessage(
channel=msg.channel, chat_id=msg.chat_id, content="\n".join(help_lines(language)),
)
await self._connect_mcp()
await self.memory_consolidator.maybe_consolidate_by_tokens(session)
self._set_tool_context(msg.channel, msg.chat_id, msg.metadata.get("message_id"))

View File

@@ -52,6 +52,28 @@ class SubagentManager:
self._running_tasks: dict[str, asyncio.Task[None]] = {}
self._session_tasks: dict[str, set[str]] = {} # session_key -> {task_id, ...}
def apply_runtime_config(
self,
*,
model: str,
brave_api_key: str | None,
web_proxy: str | None,
web_search_provider: str,
web_search_base_url: str | None,
web_search_max_results: int,
exec_config: ExecToolConfig,
restrict_to_workspace: bool,
) -> None:
"""Update runtime-configurable settings for future subagent tasks."""
self.model = model
self.brave_api_key = brave_api_key
self.web_proxy = web_proxy
self.web_search_provider = web_search_provider
self.web_search_base_url = web_search_base_url
self.web_search_max_results = web_search_max_results
self.exec_config = exec_config
self.restrict_to_workspace = restrict_to_workspace
async def spawn(
self,
task: str,
@@ -209,7 +231,7 @@ Summarize this naturally for the user. Keep it brief (1-2 sentences). Do not men
await self.bus.publish_inbound(msg)
logger.debug("Subagent [{}] announced result to {}:{}", task_id, origin['channel'], origin['chat_id'])
def _build_subagent_prompt(self) -> str:
"""Build a focused system prompt for the subagent."""
from nanobot.agent.context import ContextBuilder

View File

@@ -118,7 +118,7 @@ class WebSearchTool(Tool):
return (
"Error: Brave Search API key not configured. Set it in "
"~/.nanobot/config.json under tools.web.search.apiKey "
"(or export BRAVE_API_KEY), then restart the gateway."
"(or export BRAVE_API_KEY), then retry your message."
)
try:

View File

@@ -491,6 +491,7 @@ def gateway(
from nanobot.agent.loop import AgentLoop
from nanobot.bus.queue import MessageBus
from nanobot.channels.manager import ChannelManager
from nanobot.config.loader import get_config_path
from nanobot.config.paths import get_cron_dir
from nanobot.cron.service import CronService
from nanobot.cron.types import CronJob
@@ -520,6 +521,7 @@ def gateway(
bus=bus,
provider=provider,
workspace=config.workspace_path,
config_path=get_config_path(),
model=config.agents.defaults.model,
max_iterations=config.agents.defaults.max_tool_iterations,
context_window_tokens=config.agents.defaults.context_window_tokens,
@@ -683,6 +685,7 @@ def agent(
from nanobot.agent.loop import AgentLoop
from nanobot.bus.queue import MessageBus
from nanobot.config.loader import get_config_path
from nanobot.config.paths import get_cron_dir
from nanobot.cron.service import CronService
@@ -706,6 +709,7 @@ def agent(
bus=bus,
provider=provider,
workspace=config.workspace_path,
config_path=get_config_path(),
model=config.agents.defaults.model,
max_iterations=config.agents.defaults.max_tool_iterations,
context_window_tokens=config.agents.defaults.context_window_tokens,

View File

@@ -2,7 +2,9 @@
from __future__ import annotations
import json
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
@@ -30,7 +32,7 @@ class _FakeTool:
return ""
def _make_loop(workspace: Path, *, mcp_servers: dict | None = None):
def _make_loop(workspace: Path, *, mcp_servers: dict | None = None, config_path: Path | None = None):
"""Create an AgentLoop with a real workspace and lightweight mocks."""
from nanobot.agent.loop import AgentLoop
from nanobot.bus.queue import MessageBus
@@ -40,7 +42,13 @@ def _make_loop(workspace: Path, *, mcp_servers: dict | None = None):
provider.get_default_model.return_value = "test-model"
with patch("nanobot.agent.loop.SubagentManager"):
loop = AgentLoop(bus=bus, provider=provider, workspace=workspace, mcp_servers=mcp_servers)
loop = AgentLoop(
bus=bus,
provider=provider,
workspace=workspace,
config_path=config_path,
mcp_servers=mcp_servers,
)
return loop
@@ -87,3 +95,246 @@ async def test_help_includes_mcp_command(tmp_path: Path) -> None:
assert response is not None
assert "/mcp [list]" in response.content
@pytest.mark.asyncio
async def test_mcp_command_hot_reloads_servers_from_config(tmp_path: Path) -> None:
config_path = tmp_path / "config.json"
config_path.write_text(json.dumps({"tools": {}}), encoding="utf-8")
loop = _make_loop(tmp_path, mcp_servers={}, config_path=config_path)
config_path.write_text(
json.dumps(
{
"tools": {
"mcpServers": {
"docs": {
"command": "npx",
"args": ["-y", "@demo/docs"],
}
}
}
}
),
encoding="utf-8",
)
with patch.object(loop, "_connect_mcp", AsyncMock()) as connect_mcp:
response = await loop._process_message(
InboundMessage(channel="cli", sender_id="user", chat_id="direct", content="/mcp")
)
assert response is not None
assert "Configured MCP servers:" in response.content
assert "- docs" in response.content
connect_mcp.assert_awaited_once()
@pytest.mark.asyncio
async def test_mcp_config_reload_resets_connections_and_tools(tmp_path: Path) -> None:
config_path = tmp_path / "config.json"
config_path.write_text(
json.dumps(
{
"tools": {
"mcpServers": {
"old": {
"command": "npx",
"args": ["-y", "@demo/old"],
}
}
}
}
),
encoding="utf-8",
)
loop = _make_loop(
tmp_path,
mcp_servers={"old": SimpleNamespace(model_dump=lambda: {"command": "npx", "args": ["-y", "@demo/old"]})},
config_path=config_path,
)
stack = SimpleNamespace(aclose=AsyncMock())
loop._mcp_stack = stack
loop._mcp_connected = True
loop.tools.register(_FakeTool("mcp_old_lookup"))
config_path.write_text(
json.dumps(
{
"tools": {
"mcpServers": {
"new": {
"command": "npx",
"args": ["-y", "@demo/new"],
}
}
}
}
),
encoding="utf-8",
)
await loop._reload_mcp_servers_if_needed(force=True)
assert list(loop._mcp_servers) == ["new"]
assert loop._mcp_connected is False
assert loop.tools.get("mcp_old_lookup") is None
stack.aclose.assert_awaited_once()
@pytest.mark.asyncio
async def test_regular_messages_pick_up_reloaded_mcp_config(tmp_path: Path, monkeypatch) -> None:
config_path = tmp_path / "config.json"
config_path.write_text(json.dumps({"tools": {}}), encoding="utf-8")
loop = _make_loop(tmp_path, mcp_servers={}, config_path=config_path)
loop.provider.chat_with_retry = AsyncMock(
return_value=SimpleNamespace(
has_tool_calls=False,
content="ok",
finish_reason="stop",
reasoning_content=None,
thinking_blocks=None,
)
)
config_path.write_text(
json.dumps(
{
"tools": {
"mcpServers": {
"docs": {
"command": "npx",
"args": ["-y", "@demo/docs"],
}
}
}
}
),
encoding="utf-8",
)
connect_mcp_servers = AsyncMock()
monkeypatch.setattr("nanobot.agent.tools.mcp.connect_mcp_servers", connect_mcp_servers)
response = await loop._process_message(
InboundMessage(channel="cli", sender_id="user", chat_id="direct", content="hello")
)
assert response is not None
assert response.content == "ok"
assert list(loop._mcp_servers) == ["docs"]
connect_mcp_servers.assert_awaited_once()
@pytest.mark.asyncio
async def test_runtime_config_reload_updates_agent_and_tool_settings(tmp_path: Path) -> None:
config_path = tmp_path / "config.json"
config_path.write_text(
json.dumps(
{
"agents": {
"defaults": {
"model": "initial-model",
"maxToolIterations": 4,
"contextWindowTokens": 4096,
"maxTokens": 1000,
"temperature": 0.2,
"reasoningEffort": "low",
}
},
"tools": {
"restrictToWorkspace": False,
"exec": {"timeout": 20, "pathAppend": ""},
"web": {
"proxy": "",
"search": {
"provider": "brave",
"apiKey": "",
"baseUrl": "",
"maxResults": 3,
}
},
},
"channels": {
"sendProgress": True,
"sendToolHints": False,
},
}
),
encoding="utf-8",
)
loop = _make_loop(tmp_path, mcp_servers={}, config_path=config_path)
config_path.write_text(
json.dumps(
{
"agents": {
"defaults": {
"model": "reloaded-model",
"maxToolIterations": 9,
"contextWindowTokens": 8192,
"maxTokens": 2222,
"temperature": 0.7,
"reasoningEffort": "high",
}
},
"tools": {
"restrictToWorkspace": True,
"exec": {"timeout": 45, "pathAppend": "/usr/local/bin"},
"web": {
"proxy": "http://127.0.0.1:7890",
"search": {
"provider": "searxng",
"apiKey": "demo-key",
"baseUrl": "https://search.example.com",
"maxResults": 7,
}
},
},
"channels": {
"sendProgress": False,
"sendToolHints": True,
},
}
),
encoding="utf-8",
)
await loop._reload_runtime_config_if_needed(force=True)
exec_tool = loop.tools.get("exec")
web_search_tool = loop.tools.get("web_search")
web_fetch_tool = loop.tools.get("web_fetch")
read_tool = loop.tools.get("read_file")
assert loop.model == "reloaded-model"
assert loop.max_iterations == 9
assert loop.context_window_tokens == 8192
assert loop.provider.generation.max_tokens == 2222
assert loop.provider.generation.temperature == 0.7
assert loop.provider.generation.reasoning_effort == "high"
assert loop.memory_consolidator.model == "reloaded-model"
assert loop.memory_consolidator.context_window_tokens == 8192
assert loop.channels_config.send_progress is False
assert loop.channels_config.send_tool_hints is True
loop.subagents.apply_runtime_config.assert_called_once_with(
model="reloaded-model",
brave_api_key="demo-key",
web_proxy="http://127.0.0.1:7890",
web_search_provider="searxng",
web_search_base_url="https://search.example.com",
web_search_max_results=7,
exec_config=loop.exec_config,
restrict_to_workspace=True,
)
assert exec_tool.timeout == 45
assert exec_tool.path_append == "/usr/local/bin"
assert exec_tool.restrict_to_workspace is True
assert web_search_tool._init_provider == "searxng"
assert web_search_tool._init_api_key == "demo-key"
assert web_search_tool._init_base_url == "https://search.example.com"
assert web_search_tool.max_results == 7
assert web_search_tool.proxy == "http://127.0.0.1:7890"
assert web_fetch_tool.proxy == "http://127.0.0.1:7890"
assert read_tool._allowed_dir == tmp_path