Merge remote-tracking branch 'origin/main' into pr-1796
This commit is contained in:
63
tests/test_evaluator.py
Normal file
63
tests/test_evaluator.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import pytest
|
||||
|
||||
from nanobot.utils.evaluator import evaluate_response
|
||||
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
|
||||
|
||||
|
||||
class DummyProvider(LLMProvider):
|
||||
def __init__(self, responses: list[LLMResponse]):
|
||||
super().__init__()
|
||||
self._responses = list(responses)
|
||||
|
||||
async def chat(self, *args, **kwargs) -> LLMResponse:
|
||||
if self._responses:
|
||||
return self._responses.pop(0)
|
||||
return LLMResponse(content="", tool_calls=[])
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return "test-model"
|
||||
|
||||
|
||||
def _eval_tool_call(should_notify: bool, reason: str = "") -> LLMResponse:
|
||||
return LLMResponse(
|
||||
content="",
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="eval_1",
|
||||
name="evaluate_notification",
|
||||
arguments={"should_notify": should_notify, "reason": reason},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_should_notify_true() -> None:
|
||||
provider = DummyProvider([_eval_tool_call(True, "user asked to be reminded")])
|
||||
result = await evaluate_response("Task completed with results", "check emails", provider, "m")
|
||||
assert result is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_should_notify_false() -> None:
|
||||
provider = DummyProvider([_eval_tool_call(False, "routine check, nothing new")])
|
||||
result = await evaluate_response("All clear, no updates", "check status", provider, "m")
|
||||
assert result is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fallback_on_error() -> None:
|
||||
class FailingProvider(DummyProvider):
|
||||
async def chat(self, *args, **kwargs) -> LLMResponse:
|
||||
raise RuntimeError("provider down")
|
||||
|
||||
provider = FailingProvider([])
|
||||
result = await evaluate_response("some response", "some task", provider, "m")
|
||||
assert result is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_tool_call_fallback() -> None:
|
||||
provider = DummyProvider([LLMResponse(content="I think you should notify", tool_calls=[])])
|
||||
result = await evaluate_response("some response", "some task", provider, "m")
|
||||
assert result is True
|
||||
@@ -123,6 +123,98 @@ async def test_trigger_now_returns_none_when_decision_is_skip(tmp_path) -> None:
|
||||
assert await service.trigger_now() is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tick_notifies_when_evaluator_says_yes(tmp_path, monkeypatch) -> None:
|
||||
"""Phase 1 run -> Phase 2 execute -> Phase 3 evaluate=notify -> on_notify called."""
|
||||
(tmp_path / "HEARTBEAT.md").write_text("- [ ] check deployments", encoding="utf-8")
|
||||
|
||||
provider = DummyProvider([
|
||||
LLMResponse(
|
||||
content="",
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="hb_1",
|
||||
name="heartbeat",
|
||||
arguments={"action": "run", "tasks": "check deployments"},
|
||||
)
|
||||
],
|
||||
),
|
||||
])
|
||||
|
||||
executed: list[str] = []
|
||||
notified: list[str] = []
|
||||
|
||||
async def _on_execute(tasks: str) -> str:
|
||||
executed.append(tasks)
|
||||
return "deployment failed on staging"
|
||||
|
||||
async def _on_notify(response: str) -> None:
|
||||
notified.append(response)
|
||||
|
||||
service = HeartbeatService(
|
||||
workspace=tmp_path,
|
||||
provider=provider,
|
||||
model="openai/gpt-4o-mini",
|
||||
on_execute=_on_execute,
|
||||
on_notify=_on_notify,
|
||||
)
|
||||
|
||||
async def _eval_notify(*a, **kw):
|
||||
return True
|
||||
|
||||
monkeypatch.setattr("nanobot.utils.evaluator.evaluate_response", _eval_notify)
|
||||
|
||||
await service._tick()
|
||||
assert executed == ["check deployments"]
|
||||
assert notified == ["deployment failed on staging"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tick_suppresses_when_evaluator_says_no(tmp_path, monkeypatch) -> None:
|
||||
"""Phase 1 run -> Phase 2 execute -> Phase 3 evaluate=silent -> on_notify NOT called."""
|
||||
(tmp_path / "HEARTBEAT.md").write_text("- [ ] check status", encoding="utf-8")
|
||||
|
||||
provider = DummyProvider([
|
||||
LLMResponse(
|
||||
content="",
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="hb_1",
|
||||
name="heartbeat",
|
||||
arguments={"action": "run", "tasks": "check status"},
|
||||
)
|
||||
],
|
||||
),
|
||||
])
|
||||
|
||||
executed: list[str] = []
|
||||
notified: list[str] = []
|
||||
|
||||
async def _on_execute(tasks: str) -> str:
|
||||
executed.append(tasks)
|
||||
return "everything is fine, no issues"
|
||||
|
||||
async def _on_notify(response: str) -> None:
|
||||
notified.append(response)
|
||||
|
||||
service = HeartbeatService(
|
||||
workspace=tmp_path,
|
||||
provider=provider,
|
||||
model="openai/gpt-4o-mini",
|
||||
on_execute=_on_execute,
|
||||
on_notify=_on_notify,
|
||||
)
|
||||
|
||||
async def _eval_silent(*a, **kw):
|
||||
return False
|
||||
|
||||
monkeypatch.setattr("nanobot.utils.evaluator.evaluate_response", _eval_silent)
|
||||
|
||||
await service._tick()
|
||||
assert executed == ["check status"]
|
||||
assert notified == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_decide_retries_transient_error_then_succeeds(tmp_path, monkeypatch) -> None:
|
||||
provider = DummyProvider([
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from contextlib import AsyncExitStack, asynccontextmanager
|
||||
import sys
|
||||
from types import ModuleType, SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from nanobot.agent.tools.mcp import MCPToolWrapper
|
||||
from nanobot.agent.tools.mcp import MCPToolWrapper, connect_mcp_servers
|
||||
from nanobot.agent.tools.registry import ToolRegistry
|
||||
from nanobot.config.schema import MCPServerConfig
|
||||
|
||||
|
||||
class _FakeTextContent:
|
||||
@@ -14,12 +17,63 @@ class _FakeTextContent:
|
||||
self.text = text
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_mcp_runtime() -> dict[str, object | None]:
|
||||
return {"session": None}
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _fake_mcp_module(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
def _fake_mcp_module(
|
||||
monkeypatch: pytest.MonkeyPatch, fake_mcp_runtime: dict[str, object | None]
|
||||
) -> None:
|
||||
mod = ModuleType("mcp")
|
||||
mod.types = SimpleNamespace(TextContent=_FakeTextContent)
|
||||
|
||||
class _FakeStdioServerParameters:
|
||||
def __init__(self, command: str, args: list[str], env: dict | None = None) -> None:
|
||||
self.command = command
|
||||
self.args = args
|
||||
self.env = env
|
||||
|
||||
class _FakeClientSession:
|
||||
def __init__(self, _read: object, _write: object) -> None:
|
||||
self._session = fake_mcp_runtime["session"]
|
||||
|
||||
async def __aenter__(self) -> object:
|
||||
return self._session
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb) -> bool:
|
||||
return False
|
||||
|
||||
@asynccontextmanager
|
||||
async def _fake_stdio_client(_params: object):
|
||||
yield object(), object()
|
||||
|
||||
@asynccontextmanager
|
||||
async def _fake_sse_client(_url: str, httpx_client_factory=None):
|
||||
yield object(), object()
|
||||
|
||||
@asynccontextmanager
|
||||
async def _fake_streamable_http_client(_url: str, http_client=None):
|
||||
yield object(), object(), object()
|
||||
|
||||
mod.ClientSession = _FakeClientSession
|
||||
mod.StdioServerParameters = _FakeStdioServerParameters
|
||||
monkeypatch.setitem(sys.modules, "mcp", mod)
|
||||
|
||||
client_mod = ModuleType("mcp.client")
|
||||
stdio_mod = ModuleType("mcp.client.stdio")
|
||||
stdio_mod.stdio_client = _fake_stdio_client
|
||||
sse_mod = ModuleType("mcp.client.sse")
|
||||
sse_mod.sse_client = _fake_sse_client
|
||||
streamable_http_mod = ModuleType("mcp.client.streamable_http")
|
||||
streamable_http_mod.streamable_http_client = _fake_streamable_http_client
|
||||
|
||||
monkeypatch.setitem(sys.modules, "mcp.client", client_mod)
|
||||
monkeypatch.setitem(sys.modules, "mcp.client.stdio", stdio_mod)
|
||||
monkeypatch.setitem(sys.modules, "mcp.client.sse", sse_mod)
|
||||
monkeypatch.setitem(sys.modules, "mcp.client.streamable_http", streamable_http_mod)
|
||||
|
||||
|
||||
def _make_wrapper(session: object, *, timeout: float = 0.1) -> MCPToolWrapper:
|
||||
tool_def = SimpleNamespace(
|
||||
@@ -97,3 +151,132 @@ async def test_execute_handles_generic_exception() -> None:
|
||||
result = await wrapper.execute()
|
||||
|
||||
assert result == "(MCP tool call failed: RuntimeError)"
|
||||
|
||||
|
||||
def _make_tool_def(name: str) -> SimpleNamespace:
|
||||
return SimpleNamespace(
|
||||
name=name,
|
||||
description=f"{name} tool",
|
||||
inputSchema={"type": "object", "properties": {}},
|
||||
)
|
||||
|
||||
|
||||
def _make_fake_session(tool_names: list[str]) -> SimpleNamespace:
|
||||
async def initialize() -> None:
|
||||
return None
|
||||
|
||||
async def list_tools() -> SimpleNamespace:
|
||||
return SimpleNamespace(tools=[_make_tool_def(name) for name in tool_names])
|
||||
|
||||
return SimpleNamespace(initialize=initialize, list_tools=list_tools)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_connect_mcp_servers_enabled_tools_supports_raw_names(
|
||||
fake_mcp_runtime: dict[str, object | None],
|
||||
) -> None:
|
||||
fake_mcp_runtime["session"] = _make_fake_session(["demo", "other"])
|
||||
registry = ToolRegistry()
|
||||
stack = AsyncExitStack()
|
||||
await stack.__aenter__()
|
||||
try:
|
||||
await connect_mcp_servers(
|
||||
{"test": MCPServerConfig(command="fake", enabled_tools=["demo"])},
|
||||
registry,
|
||||
stack,
|
||||
)
|
||||
finally:
|
||||
await stack.aclose()
|
||||
|
||||
assert registry.tool_names == ["mcp_test_demo"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_connect_mcp_servers_enabled_tools_defaults_to_all(
|
||||
fake_mcp_runtime: dict[str, object | None],
|
||||
) -> None:
|
||||
fake_mcp_runtime["session"] = _make_fake_session(["demo", "other"])
|
||||
registry = ToolRegistry()
|
||||
stack = AsyncExitStack()
|
||||
await stack.__aenter__()
|
||||
try:
|
||||
await connect_mcp_servers(
|
||||
{"test": MCPServerConfig(command="fake")},
|
||||
registry,
|
||||
stack,
|
||||
)
|
||||
finally:
|
||||
await stack.aclose()
|
||||
|
||||
assert registry.tool_names == ["mcp_test_demo", "mcp_test_other"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_connect_mcp_servers_enabled_tools_supports_wrapped_names(
|
||||
fake_mcp_runtime: dict[str, object | None],
|
||||
) -> None:
|
||||
fake_mcp_runtime["session"] = _make_fake_session(["demo", "other"])
|
||||
registry = ToolRegistry()
|
||||
stack = AsyncExitStack()
|
||||
await stack.__aenter__()
|
||||
try:
|
||||
await connect_mcp_servers(
|
||||
{"test": MCPServerConfig(command="fake", enabled_tools=["mcp_test_demo"])},
|
||||
registry,
|
||||
stack,
|
||||
)
|
||||
finally:
|
||||
await stack.aclose()
|
||||
|
||||
assert registry.tool_names == ["mcp_test_demo"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_connect_mcp_servers_enabled_tools_empty_list_registers_none(
|
||||
fake_mcp_runtime: dict[str, object | None],
|
||||
) -> None:
|
||||
fake_mcp_runtime["session"] = _make_fake_session(["demo", "other"])
|
||||
registry = ToolRegistry()
|
||||
stack = AsyncExitStack()
|
||||
await stack.__aenter__()
|
||||
try:
|
||||
await connect_mcp_servers(
|
||||
{"test": MCPServerConfig(command="fake", enabled_tools=[])},
|
||||
registry,
|
||||
stack,
|
||||
)
|
||||
finally:
|
||||
await stack.aclose()
|
||||
|
||||
assert registry.tool_names == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_connect_mcp_servers_enabled_tools_warns_on_unknown_entries(
|
||||
fake_mcp_runtime: dict[str, object | None], monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
fake_mcp_runtime["session"] = _make_fake_session(["demo"])
|
||||
registry = ToolRegistry()
|
||||
warnings: list[str] = []
|
||||
|
||||
def _warning(message: str, *args: object) -> None:
|
||||
warnings.append(message.format(*args))
|
||||
|
||||
monkeypatch.setattr("nanobot.agent.tools.mcp.logger.warning", _warning)
|
||||
|
||||
stack = AsyncExitStack()
|
||||
await stack.__aenter__()
|
||||
try:
|
||||
await connect_mcp_servers(
|
||||
{"test": MCPServerConfig(command="fake", enabled_tools=["unknown"])},
|
||||
registry,
|
||||
stack,
|
||||
)
|
||||
finally:
|
||||
await stack.aclose()
|
||||
|
||||
assert registry.tool_names == []
|
||||
assert warnings
|
||||
assert "enabledTools entries not found: unknown" in warnings[-1]
|
||||
assert "Available raw names: demo" in warnings[-1]
|
||||
assert "Available wrapped names: mcp_test_demo" in warnings[-1]
|
||||
|
||||
@@ -647,3 +647,19 @@ async def test_forward_command_does_not_inject_reply_context() -> None:
|
||||
|
||||
assert len(handled) == 1
|
||||
assert handled[0]["content"] == "/new"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_on_help_includes_restart_command() -> None:
|
||||
channel = TelegramChannel(
|
||||
TelegramConfig(enabled=True, token="123:abc", allow_from=["*"], group_policy="open"),
|
||||
MessageBus(),
|
||||
)
|
||||
update = _make_telegram_update(text="/help", chat_type="private")
|
||||
update.message.reply_text = AsyncMock()
|
||||
|
||||
await channel._on_help(update, None)
|
||||
|
||||
update.message.reply_text.assert_awaited_once()
|
||||
help_text = update.message.reply_text.await_args.args[0]
|
||||
assert "/restart" in help_text
|
||||
|
||||
Reference in New Issue
Block a user