refactor(tests): optimize unit test structure

This commit is contained in:
chengyongru
2026-03-24 13:37:06 +08:00
committed by Xubin Ren
parent d25985be0b
commit 72acba5d27
52 changed files with 104 additions and 13 deletions

View File

@@ -0,0 +1,619 @@
"""Test session management with cache-friendly message handling."""
import asyncio
from unittest.mock import AsyncMock, MagicMock
import pytest
from pathlib import Path
from nanobot.session.manager import Session, SessionManager
# Test constants
MEMORY_WINDOW = 50
KEEP_COUNT = MEMORY_WINDOW // 2 # 25
def create_session_with_messages(key: str, count: int, role: str = "user") -> Session:
"""Create a session and add the specified number of messages.
Args:
key: Session identifier
count: Number of messages to add
role: Message role (default: "user")
Returns:
Session with the specified messages
"""
session = Session(key=key)
for i in range(count):
session.add_message(role, f"msg{i}")
return session
def assert_messages_content(messages: list, start_index: int, end_index: int) -> None:
"""Assert that messages contain expected content from start to end index.
Args:
messages: List of message dictionaries
start_index: Expected first message index
end_index: Expected last message index
"""
assert len(messages) > 0
assert messages[0]["content"] == f"msg{start_index}"
assert messages[-1]["content"] == f"msg{end_index}"
def get_old_messages(session: Session, last_consolidated: int, keep_count: int) -> list:
"""Extract messages that would be consolidated using the standard slice logic.
Args:
session: The session containing messages
last_consolidated: Index of last consolidated message
keep_count: Number of recent messages to keep
Returns:
List of messages that would be consolidated
"""
return session.messages[last_consolidated:-keep_count]
class TestSessionLastConsolidated:
"""Test last_consolidated tracking to avoid duplicate processing."""
def test_initial_last_consolidated_zero(self) -> None:
"""Test that new session starts with last_consolidated=0."""
session = Session(key="test:initial")
assert session.last_consolidated == 0
def test_last_consolidated_persistence(self, tmp_path) -> None:
"""Test that last_consolidated persists across save/load."""
manager = SessionManager(Path(tmp_path))
session1 = create_session_with_messages("test:persist", 20)
session1.last_consolidated = 15
manager.save(session1)
session2 = manager.get_or_create("test:persist")
assert session2.last_consolidated == 15
assert len(session2.messages) == 20
def test_clear_resets_last_consolidated(self) -> None:
"""Test that clear() resets last_consolidated to 0."""
session = create_session_with_messages("test:clear", 10)
session.last_consolidated = 5
session.clear()
assert len(session.messages) == 0
assert session.last_consolidated == 0
class TestSessionImmutableHistory:
"""Test Session message immutability for cache efficiency."""
def test_initial_state(self) -> None:
"""Test that new session has empty messages list."""
session = Session(key="test:initial")
assert len(session.messages) == 0
def test_add_messages_appends_only(self) -> None:
"""Test that adding messages only appends, never modifies."""
session = Session(key="test:preserve")
session.add_message("user", "msg1")
session.add_message("assistant", "resp1")
session.add_message("user", "msg2")
assert len(session.messages) == 3
assert session.messages[0]["content"] == "msg1"
def test_get_history_returns_most_recent(self) -> None:
"""Test get_history returns the most recent messages."""
session = Session(key="test:history")
for i in range(10):
session.add_message("user", f"msg{i}")
session.add_message("assistant", f"resp{i}")
history = session.get_history(max_messages=6)
assert len(history) == 6
assert history[0]["content"] == "msg7"
assert history[-1]["content"] == "resp9"
def test_get_history_with_all_messages(self) -> None:
"""Test get_history with max_messages larger than actual."""
session = create_session_with_messages("test:all", 5)
history = session.get_history(max_messages=100)
assert len(history) == 5
assert history[0]["content"] == "msg0"
def test_get_history_stable_for_same_session(self) -> None:
"""Test that get_history returns same content for same max_messages."""
session = create_session_with_messages("test:stable", 20)
history1 = session.get_history(max_messages=10)
history2 = session.get_history(max_messages=10)
assert history1 == history2
def test_messages_list_never_modified(self) -> None:
"""Test that messages list is never modified after creation."""
session = create_session_with_messages("test:immutable", 5)
original_len = len(session.messages)
session.get_history(max_messages=2)
assert len(session.messages) == original_len
for _ in range(10):
session.get_history(max_messages=3)
assert len(session.messages) == original_len
class TestSessionPersistence:
"""Test Session persistence and reload."""
@pytest.fixture
def temp_manager(self, tmp_path):
return SessionManager(Path(tmp_path))
def test_persistence_roundtrip(self, temp_manager):
"""Test that messages persist across save/load."""
session1 = create_session_with_messages("test:persistence", 20)
temp_manager.save(session1)
session2 = temp_manager.get_or_create("test:persistence")
assert len(session2.messages) == 20
assert session2.messages[0]["content"] == "msg0"
assert session2.messages[-1]["content"] == "msg19"
def test_get_history_after_reload(self, temp_manager):
"""Test that get_history works correctly after reload."""
session1 = create_session_with_messages("test:reload", 30)
temp_manager.save(session1)
session2 = temp_manager.get_or_create("test:reload")
history = session2.get_history(max_messages=10)
assert len(history) == 10
assert history[0]["content"] == "msg20"
assert history[-1]["content"] == "msg29"
def test_clear_resets_session(self, temp_manager):
"""Test that clear() properly resets session."""
session = create_session_with_messages("test:clear", 10)
assert len(session.messages) == 10
session.clear()
assert len(session.messages) == 0
class TestConsolidationTriggerConditions:
"""Test consolidation trigger conditions and logic."""
def test_consolidation_needed_when_messages_exceed_window(self):
"""Test consolidation logic: should trigger when messages exceed the window."""
session = create_session_with_messages("test:trigger", 60)
total_messages = len(session.messages)
messages_to_process = total_messages - session.last_consolidated
assert total_messages > MEMORY_WINDOW
assert messages_to_process > 0
expected_consolidate_count = total_messages - KEEP_COUNT
assert expected_consolidate_count == 35
def test_consolidation_skipped_when_within_keep_count(self):
"""Test consolidation skipped when total messages <= keep_count."""
session = create_session_with_messages("test:skip", 20)
total_messages = len(session.messages)
assert total_messages <= KEEP_COUNT
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
assert len(old_messages) == 0
def test_consolidation_skipped_when_no_new_messages(self):
"""Test consolidation skipped when messages_to_process <= 0."""
session = create_session_with_messages("test:already_consolidated", 40)
session.last_consolidated = len(session.messages) - KEEP_COUNT # 15
# Add a few more messages
for i in range(40, 42):
session.add_message("user", f"msg{i}")
total_messages = len(session.messages)
messages_to_process = total_messages - session.last_consolidated
assert messages_to_process > 0
# Simulate last_consolidated catching up
session.last_consolidated = total_messages - KEEP_COUNT
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
assert len(old_messages) == 0
class TestLastConsolidatedEdgeCases:
"""Test last_consolidated edge cases and data corruption scenarios."""
def test_last_consolidated_exceeds_message_count(self):
"""Test behavior when last_consolidated > len(messages) (data corruption)."""
session = create_session_with_messages("test:corruption", 10)
session.last_consolidated = 20
total_messages = len(session.messages)
messages_to_process = total_messages - session.last_consolidated
assert messages_to_process <= 0
old_messages = get_old_messages(session, session.last_consolidated, 5)
assert len(old_messages) == 0
def test_last_consolidated_negative_value(self):
"""Test behavior with negative last_consolidated (invalid state)."""
session = create_session_with_messages("test:negative", 10)
session.last_consolidated = -5
keep_count = 3
old_messages = get_old_messages(session, session.last_consolidated, keep_count)
# messages[-5:-3] with 10 messages gives indices 5,6
assert len(old_messages) == 2
assert old_messages[0]["content"] == "msg5"
assert old_messages[-1]["content"] == "msg6"
def test_messages_added_after_consolidation(self):
"""Test correct behavior when new messages arrive after consolidation."""
session = create_session_with_messages("test:new_messages", 40)
session.last_consolidated = len(session.messages) - KEEP_COUNT # 15
# Add new messages after consolidation
for i in range(40, 50):
session.add_message("user", f"msg{i}")
total_messages = len(session.messages)
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
expected_consolidate_count = total_messages - KEEP_COUNT - session.last_consolidated
assert len(old_messages) == expected_consolidate_count
assert_messages_content(old_messages, 15, 24)
def test_slice_behavior_when_indices_overlap(self):
"""Test slice behavior when last_consolidated >= total - keep_count."""
session = create_session_with_messages("test:overlap", 30)
session.last_consolidated = 12
old_messages = get_old_messages(session, session.last_consolidated, 20)
assert len(old_messages) == 0
class TestArchiveAllMode:
"""Test archive_all mode (used by /new command)."""
def test_archive_all_consolidates_everything(self):
"""Test archive_all=True consolidates all messages."""
session = create_session_with_messages("test:archive_all", 50)
archive_all = True
if archive_all:
old_messages = session.messages
assert len(old_messages) == 50
assert session.last_consolidated == 0
def test_archive_all_resets_last_consolidated(self):
"""Test that archive_all mode resets last_consolidated to 0."""
session = create_session_with_messages("test:reset", 40)
session.last_consolidated = 15
archive_all = True
if archive_all:
session.last_consolidated = 0
assert session.last_consolidated == 0
assert len(session.messages) == 40
def test_archive_all_vs_normal_consolidation(self):
"""Test difference between archive_all and normal consolidation."""
# Normal consolidation
session1 = create_session_with_messages("test:normal", 60)
session1.last_consolidated = len(session1.messages) - KEEP_COUNT
# archive_all mode
session2 = create_session_with_messages("test:all", 60)
session2.last_consolidated = 0
assert session1.last_consolidated == 35
assert len(session1.messages) == 60
assert session2.last_consolidated == 0
assert len(session2.messages) == 60
class TestCacheImmutability:
"""Test that consolidation doesn't modify session.messages (cache safety)."""
def test_consolidation_does_not_modify_messages_list(self):
"""Test that consolidation leaves messages list unchanged."""
session = create_session_with_messages("test:immutable", 50)
original_messages = session.messages.copy()
original_len = len(session.messages)
session.last_consolidated = original_len - KEEP_COUNT
assert len(session.messages) == original_len
assert session.messages == original_messages
def test_get_history_does_not_modify_messages(self):
"""Test that get_history doesn't modify messages list."""
session = create_session_with_messages("test:history_immutable", 40)
original_messages = [m.copy() for m in session.messages]
for _ in range(5):
history = session.get_history(max_messages=10)
assert len(history) == 10
assert len(session.messages) == 40
for i, msg in enumerate(session.messages):
assert msg["content"] == original_messages[i]["content"]
def test_consolidation_only_updates_last_consolidated(self):
"""Test that consolidation only updates last_consolidated field."""
session = create_session_with_messages("test:field_only", 60)
original_messages = session.messages.copy()
original_key = session.key
original_metadata = session.metadata.copy()
session.last_consolidated = len(session.messages) - KEEP_COUNT
assert session.messages == original_messages
assert session.key == original_key
assert session.metadata == original_metadata
assert session.last_consolidated == 35
class TestSliceLogic:
"""Test the slice logic: messages[last_consolidated:-keep_count]."""
def test_slice_extracts_correct_range(self):
"""Test that slice extracts the correct message range."""
session = create_session_with_messages("test:slice", 60)
old_messages = get_old_messages(session, 0, KEEP_COUNT)
assert len(old_messages) == 35
assert_messages_content(old_messages, 0, 34)
remaining = session.messages[-KEEP_COUNT:]
assert len(remaining) == 25
assert_messages_content(remaining, 35, 59)
def test_slice_with_partial_consolidation(self):
"""Test slice when some messages already consolidated."""
session = create_session_with_messages("test:partial", 70)
last_consolidated = 30
old_messages = get_old_messages(session, last_consolidated, KEEP_COUNT)
assert len(old_messages) == 15
assert_messages_content(old_messages, 30, 44)
def test_slice_with_various_keep_counts(self):
"""Test slice behavior with different keep_count values."""
session = create_session_with_messages("test:keep_counts", 50)
test_cases = [(10, 40), (20, 30), (30, 20), (40, 10)]
for keep_count, expected_count in test_cases:
old_messages = session.messages[0:-keep_count]
assert len(old_messages) == expected_count
def test_slice_when_keep_count_exceeds_messages(self):
"""Test slice when keep_count > len(messages)."""
session = create_session_with_messages("test:exceed", 10)
old_messages = session.messages[0:-20]
assert len(old_messages) == 0
class TestEmptyAndBoundarySessions:
"""Test empty sessions and boundary conditions."""
def test_empty_session_consolidation(self):
"""Test consolidation behavior with empty session."""
session = Session(key="test:empty")
assert len(session.messages) == 0
assert session.last_consolidated == 0
messages_to_process = len(session.messages) - session.last_consolidated
assert messages_to_process == 0
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
assert len(old_messages) == 0
def test_single_message_session(self):
"""Test consolidation with single message."""
session = Session(key="test:single")
session.add_message("user", "only message")
assert len(session.messages) == 1
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
assert len(old_messages) == 0
def test_exactly_keep_count_messages(self):
"""Test session with exactly keep_count messages."""
session = create_session_with_messages("test:exact", KEEP_COUNT)
assert len(session.messages) == KEEP_COUNT
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
assert len(old_messages) == 0
def test_just_over_keep_count(self):
"""Test session with one message over keep_count."""
session = create_session_with_messages("test:over", KEEP_COUNT + 1)
assert len(session.messages) == 26
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
assert len(old_messages) == 1
assert old_messages[0]["content"] == "msg0"
def test_very_large_session(self):
"""Test consolidation with very large message count."""
session = create_session_with_messages("test:large", 1000)
assert len(session.messages) == 1000
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
assert len(old_messages) == 975
assert_messages_content(old_messages, 0, 974)
remaining = session.messages[-KEEP_COUNT:]
assert len(remaining) == 25
assert_messages_content(remaining, 975, 999)
def test_session_with_gaps_in_consolidation(self):
"""Test session with potential gaps in consolidation history."""
session = create_session_with_messages("test:gaps", 50)
session.last_consolidated = 10
# Add more messages
for i in range(50, 60):
session.add_message("user", f"msg{i}")
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
expected_count = 60 - KEEP_COUNT - 10
assert len(old_messages) == expected_count
assert_messages_content(old_messages, 10, 34)
class TestNewCommandArchival:
"""Test /new archival behavior with the simplified consolidation flow."""
@staticmethod
def _make_loop(tmp_path: Path):
from nanobot.agent.loop import AgentLoop
from nanobot.bus.queue import MessageBus
from nanobot.providers.base import LLMResponse
bus = MessageBus()
provider = MagicMock()
provider.get_default_model.return_value = "test-model"
provider.estimate_prompt_tokens.return_value = (10_000, "test")
loop = AgentLoop(
bus=bus,
provider=provider,
workspace=tmp_path,
model="test-model",
context_window_tokens=1,
)
loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
loop.tools.get_definitions = MagicMock(return_value=[])
return loop
@pytest.mark.asyncio
async def test_new_clears_session_immediately_even_if_archive_fails(self, tmp_path: Path) -> None:
"""/new clears session immediately; archive_messages retries until raw dump."""
from nanobot.bus.events import InboundMessage
loop = self._make_loop(tmp_path)
session = loop.sessions.get_or_create("cli:test")
for i in range(5):
session.add_message("user", f"msg{i}")
session.add_message("assistant", f"resp{i}")
loop.sessions.save(session)
call_count = 0
async def _failing_consolidate(_messages) -> bool:
nonlocal call_count
call_count += 1
return False
loop.memory_consolidator.consolidate_messages = _failing_consolidate # type: ignore[method-assign]
new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
response = await loop._process_message(new_msg)
assert response is not None
assert "new session started" in response.content.lower()
session_after = loop.sessions.get_or_create("cli:test")
assert len(session_after.messages) == 0
await loop.close_mcp()
assert call_count == 3 # retried up to raw-archive threshold
@pytest.mark.asyncio
async def test_new_archives_only_unconsolidated_messages(self, tmp_path: Path) -> None:
from nanobot.bus.events import InboundMessage
loop = self._make_loop(tmp_path)
session = loop.sessions.get_or_create("cli:test")
for i in range(15):
session.add_message("user", f"msg{i}")
session.add_message("assistant", f"resp{i}")
session.last_consolidated = len(session.messages) - 3
loop.sessions.save(session)
archived_count = -1
async def _fake_consolidate(messages) -> bool:
nonlocal archived_count
archived_count = len(messages)
return True
loop.memory_consolidator.consolidate_messages = _fake_consolidate # type: ignore[method-assign]
new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
response = await loop._process_message(new_msg)
assert response is not None
assert "new session started" in response.content.lower()
await loop.close_mcp()
assert archived_count == 3
@pytest.mark.asyncio
async def test_new_clears_session_and_responds(self, tmp_path: Path) -> None:
from nanobot.bus.events import InboundMessage
loop = self._make_loop(tmp_path)
session = loop.sessions.get_or_create("cli:test")
for i in range(3):
session.add_message("user", f"msg{i}")
session.add_message("assistant", f"resp{i}")
loop.sessions.save(session)
async def _ok_consolidate(_messages) -> bool:
return True
loop.memory_consolidator.consolidate_messages = _ok_consolidate # type: ignore[method-assign]
new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
response = await loop._process_message(new_msg)
assert response is not None
assert "new session started" in response.content.lower()
assert loop.sessions.get_or_create("cli:test").messages == []
@pytest.mark.asyncio
async def test_close_mcp_drains_background_tasks(self, tmp_path: Path) -> None:
"""close_mcp waits for background tasks to complete."""
from nanobot.bus.events import InboundMessage
loop = self._make_loop(tmp_path)
session = loop.sessions.get_or_create("cli:test")
for i in range(3):
session.add_message("user", f"msg{i}")
session.add_message("assistant", f"resp{i}")
loop.sessions.save(session)
archived = asyncio.Event()
async def _slow_consolidate(_messages) -> bool:
await asyncio.sleep(0.1)
archived.set()
return True
loop.memory_consolidator.consolidate_messages = _slow_consolidate # type: ignore[method-assign]
new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
await loop._process_message(new_msg)
assert not archived.is_set()
await loop.close_mcp()
assert archived.is_set()

View File

@@ -0,0 +1,73 @@
"""Tests for cache-friendly prompt construction."""
from __future__ import annotations
from datetime import datetime as real_datetime
from importlib.resources import files as pkg_files
from pathlib import Path
import datetime as datetime_module
from nanobot.agent.context import ContextBuilder
class _FakeDatetime(real_datetime):
current = real_datetime(2026, 2, 24, 13, 59)
@classmethod
def now(cls, tz=None): # type: ignore[override]
return cls.current
def _make_workspace(tmp_path: Path) -> Path:
workspace = tmp_path / "workspace"
workspace.mkdir(parents=True)
return workspace
def test_bootstrap_files_are_backed_by_templates() -> None:
template_dir = pkg_files("nanobot") / "templates"
for filename in ContextBuilder.BOOTSTRAP_FILES:
assert (template_dir / filename).is_file(), f"missing bootstrap template: {filename}"
def test_system_prompt_stays_stable_when_clock_changes(tmp_path, monkeypatch) -> None:
"""System prompt should not change just because wall clock minute changes."""
monkeypatch.setattr(datetime_module, "datetime", _FakeDatetime)
workspace = _make_workspace(tmp_path)
builder = ContextBuilder(workspace)
_FakeDatetime.current = real_datetime(2026, 2, 24, 13, 59)
prompt1 = builder.build_system_prompt()
_FakeDatetime.current = real_datetime(2026, 2, 24, 14, 0)
prompt2 = builder.build_system_prompt()
assert prompt1 == prompt2
def test_runtime_context_is_separate_untrusted_user_message(tmp_path) -> None:
"""Runtime metadata should be merged with the user message."""
workspace = _make_workspace(tmp_path)
builder = ContextBuilder(workspace)
messages = builder.build_messages(
history=[],
current_message="Return exactly: OK",
channel="cli",
chat_id="direct",
)
assert messages[0]["role"] == "system"
assert "## Current Session" not in messages[0]["content"]
# Runtime context is now merged with user message into a single message
assert messages[-1]["role"] == "user"
user_content = messages[-1]["content"]
assert isinstance(user_content, str)
assert ContextBuilder._RUNTIME_CONTEXT_TAG in user_content
assert "Current Time:" in user_content
assert "Channel: cli" in user_content
assert "Chat ID: direct" in user_content
assert "Return exactly: OK" in user_content

View File

@@ -0,0 +1,63 @@
import pytest
from nanobot.utils.evaluator import evaluate_response
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
class DummyProvider(LLMProvider):
def __init__(self, responses: list[LLMResponse]):
super().__init__()
self._responses = list(responses)
async def chat(self, *args, **kwargs) -> LLMResponse:
if self._responses:
return self._responses.pop(0)
return LLMResponse(content="", tool_calls=[])
def get_default_model(self) -> str:
return "test-model"
def _eval_tool_call(should_notify: bool, reason: str = "") -> LLMResponse:
return LLMResponse(
content="",
tool_calls=[
ToolCallRequest(
id="eval_1",
name="evaluate_notification",
arguments={"should_notify": should_notify, "reason": reason},
)
],
)
@pytest.mark.asyncio
async def test_should_notify_true() -> None:
provider = DummyProvider([_eval_tool_call(True, "user asked to be reminded")])
result = await evaluate_response("Task completed with results", "check emails", provider, "m")
assert result is True
@pytest.mark.asyncio
async def test_should_notify_false() -> None:
provider = DummyProvider([_eval_tool_call(False, "routine check, nothing new")])
result = await evaluate_response("All clear, no updates", "check status", provider, "m")
assert result is False
@pytest.mark.asyncio
async def test_fallback_on_error() -> None:
class FailingProvider(DummyProvider):
async def chat(self, *args, **kwargs) -> LLMResponse:
raise RuntimeError("provider down")
provider = FailingProvider([])
result = await evaluate_response("some response", "some task", provider, "m")
assert result is True
@pytest.mark.asyncio
async def test_no_tool_call_fallback() -> None:
provider = DummyProvider([LLMResponse(content="I think you should notify", tool_calls=[])])
result = await evaluate_response("some response", "some task", provider, "m")
assert result is True

View File

@@ -0,0 +1,53 @@
from types import SimpleNamespace
from nanobot.providers.base import ToolCallRequest
from nanobot.providers.litellm_provider import LiteLLMProvider
def test_litellm_parse_response_preserves_tool_call_provider_fields() -> None:
provider = LiteLLMProvider(default_model="gemini/gemini-3-flash")
response = SimpleNamespace(
choices=[
SimpleNamespace(
finish_reason="tool_calls",
message=SimpleNamespace(
content=None,
tool_calls=[
SimpleNamespace(
id="call_123",
function=SimpleNamespace(
name="read_file",
arguments='{"path":"todo.md"}',
provider_specific_fields={"inner": "value"},
),
provider_specific_fields={"thought_signature": "signed-token"},
)
],
),
)
],
usage=None,
)
parsed = provider._parse_response(response)
assert len(parsed.tool_calls) == 1
assert parsed.tool_calls[0].provider_specific_fields == {"thought_signature": "signed-token"}
assert parsed.tool_calls[0].function_provider_specific_fields == {"inner": "value"}
def test_tool_call_request_serializes_provider_fields() -> None:
tool_call = ToolCallRequest(
id="abc123xyz",
name="read_file",
arguments={"path": "todo.md"},
provider_specific_fields={"thought_signature": "signed-token"},
function_provider_specific_fields={"inner": "value"},
)
message = tool_call.to_openai_tool_call()
assert message["provider_specific_fields"] == {"thought_signature": "signed-token"}
assert message["function"]["provider_specific_fields"] == {"inner": "value"}
assert message["function"]["arguments"] == '{"path": "todo.md"}'

View File

@@ -0,0 +1,289 @@
import asyncio
import pytest
from nanobot.heartbeat.service import HeartbeatService
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
class DummyProvider(LLMProvider):
def __init__(self, responses: list[LLMResponse]):
super().__init__()
self._responses = list(responses)
self.calls = 0
async def chat(self, *args, **kwargs) -> LLMResponse:
self.calls += 1
if self._responses:
return self._responses.pop(0)
return LLMResponse(content="", tool_calls=[])
def get_default_model(self) -> str:
return "test-model"
@pytest.mark.asyncio
async def test_start_is_idempotent(tmp_path) -> None:
provider = DummyProvider([])
service = HeartbeatService(
workspace=tmp_path,
provider=provider,
model="openai/gpt-4o-mini",
interval_s=9999,
enabled=True,
)
await service.start()
first_task = service._task
await service.start()
assert service._task is first_task
service.stop()
await asyncio.sleep(0)
@pytest.mark.asyncio
async def test_decide_returns_skip_when_no_tool_call(tmp_path) -> None:
provider = DummyProvider([LLMResponse(content="no tool call", tool_calls=[])])
service = HeartbeatService(
workspace=tmp_path,
provider=provider,
model="openai/gpt-4o-mini",
)
action, tasks = await service._decide("heartbeat content")
assert action == "skip"
assert tasks == ""
@pytest.mark.asyncio
async def test_trigger_now_executes_when_decision_is_run(tmp_path) -> None:
(tmp_path / "HEARTBEAT.md").write_text("- [ ] do thing", encoding="utf-8")
provider = DummyProvider([
LLMResponse(
content="",
tool_calls=[
ToolCallRequest(
id="hb_1",
name="heartbeat",
arguments={"action": "run", "tasks": "check open tasks"},
)
],
)
])
called_with: list[str] = []
async def _on_execute(tasks: str) -> str:
called_with.append(tasks)
return "done"
service = HeartbeatService(
workspace=tmp_path,
provider=provider,
model="openai/gpt-4o-mini",
on_execute=_on_execute,
)
result = await service.trigger_now()
assert result == "done"
assert called_with == ["check open tasks"]
@pytest.mark.asyncio
async def test_trigger_now_returns_none_when_decision_is_skip(tmp_path) -> None:
(tmp_path / "HEARTBEAT.md").write_text("- [ ] do thing", encoding="utf-8")
provider = DummyProvider([
LLMResponse(
content="",
tool_calls=[
ToolCallRequest(
id="hb_1",
name="heartbeat",
arguments={"action": "skip"},
)
],
)
])
async def _on_execute(tasks: str) -> str:
return tasks
service = HeartbeatService(
workspace=tmp_path,
provider=provider,
model="openai/gpt-4o-mini",
on_execute=_on_execute,
)
assert await service.trigger_now() is None
@pytest.mark.asyncio
async def test_tick_notifies_when_evaluator_says_yes(tmp_path, monkeypatch) -> None:
"""Phase 1 run -> Phase 2 execute -> Phase 3 evaluate=notify -> on_notify called."""
(tmp_path / "HEARTBEAT.md").write_text("- [ ] check deployments", encoding="utf-8")
provider = DummyProvider([
LLMResponse(
content="",
tool_calls=[
ToolCallRequest(
id="hb_1",
name="heartbeat",
arguments={"action": "run", "tasks": "check deployments"},
)
],
),
])
executed: list[str] = []
notified: list[str] = []
async def _on_execute(tasks: str) -> str:
executed.append(tasks)
return "deployment failed on staging"
async def _on_notify(response: str) -> None:
notified.append(response)
service = HeartbeatService(
workspace=tmp_path,
provider=provider,
model="openai/gpt-4o-mini",
on_execute=_on_execute,
on_notify=_on_notify,
)
async def _eval_notify(*a, **kw):
return True
monkeypatch.setattr("nanobot.utils.evaluator.evaluate_response", _eval_notify)
await service._tick()
assert executed == ["check deployments"]
assert notified == ["deployment failed on staging"]
@pytest.mark.asyncio
async def test_tick_suppresses_when_evaluator_says_no(tmp_path, monkeypatch) -> None:
"""Phase 1 run -> Phase 2 execute -> Phase 3 evaluate=silent -> on_notify NOT called."""
(tmp_path / "HEARTBEAT.md").write_text("- [ ] check status", encoding="utf-8")
provider = DummyProvider([
LLMResponse(
content="",
tool_calls=[
ToolCallRequest(
id="hb_1",
name="heartbeat",
arguments={"action": "run", "tasks": "check status"},
)
],
),
])
executed: list[str] = []
notified: list[str] = []
async def _on_execute(tasks: str) -> str:
executed.append(tasks)
return "everything is fine, no issues"
async def _on_notify(response: str) -> None:
notified.append(response)
service = HeartbeatService(
workspace=tmp_path,
provider=provider,
model="openai/gpt-4o-mini",
on_execute=_on_execute,
on_notify=_on_notify,
)
async def _eval_silent(*a, **kw):
return False
monkeypatch.setattr("nanobot.utils.evaluator.evaluate_response", _eval_silent)
await service._tick()
assert executed == ["check status"]
assert notified == []
@pytest.mark.asyncio
async def test_decide_retries_transient_error_then_succeeds(tmp_path, monkeypatch) -> None:
provider = DummyProvider([
LLMResponse(content="429 rate limit", finish_reason="error"),
LLMResponse(
content="",
tool_calls=[
ToolCallRequest(
id="hb_1",
name="heartbeat",
arguments={"action": "run", "tasks": "check open tasks"},
)
],
),
])
delays: list[int] = []
async def _fake_sleep(delay: int) -> None:
delays.append(delay)
monkeypatch.setattr(asyncio, "sleep", _fake_sleep)
service = HeartbeatService(
workspace=tmp_path,
provider=provider,
model="openai/gpt-4o-mini",
)
action, tasks = await service._decide("heartbeat content")
assert action == "run"
assert tasks == "check open tasks"
assert provider.calls == 2
assert delays == [1]
@pytest.mark.asyncio
async def test_decide_prompt_includes_current_time(tmp_path) -> None:
"""Phase 1 user prompt must contain current time so the LLM can judge task urgency."""
captured_messages: list[dict] = []
class CapturingProvider(LLMProvider):
async def chat(self, *, messages=None, **kwargs) -> LLMResponse:
if messages:
captured_messages.extend(messages)
return LLMResponse(
content="",
tool_calls=[
ToolCallRequest(
id="hb_1", name="heartbeat",
arguments={"action": "skip"},
)
],
)
def get_default_model(self) -> str:
return "test-model"
service = HeartbeatService(
workspace=tmp_path,
provider=CapturingProvider(),
model="test-model",
)
await service._decide("- [ ] check servers at 10:00 UTC")
user_msg = captured_messages[1]
assert user_msg["role"] == "user"
assert "Current Time:" in user_msg["content"]

View File

@@ -0,0 +1,196 @@
from unittest.mock import AsyncMock, MagicMock
import pytest
from nanobot.agent.loop import AgentLoop
import nanobot.agent.memory as memory_module
from nanobot.bus.queue import MessageBus
from nanobot.providers.base import LLMResponse
def _make_loop(tmp_path, *, estimated_tokens: int, context_window_tokens: int) -> AgentLoop:
from nanobot.providers.base import GenerationSettings
provider = MagicMock()
provider.get_default_model.return_value = "test-model"
provider.generation = GenerationSettings(max_tokens=0)
provider.estimate_prompt_tokens.return_value = (estimated_tokens, "test-counter")
_response = LLMResponse(content="ok", tool_calls=[])
provider.chat_with_retry = AsyncMock(return_value=_response)
provider.chat_stream_with_retry = AsyncMock(return_value=_response)
loop = AgentLoop(
bus=MessageBus(),
provider=provider,
workspace=tmp_path,
model="test-model",
context_window_tokens=context_window_tokens,
)
loop.tools.get_definitions = MagicMock(return_value=[])
loop.memory_consolidator._SAFETY_BUFFER = 0
return loop
@pytest.mark.asyncio
async def test_prompt_below_threshold_does_not_consolidate(tmp_path) -> None:
loop = _make_loop(tmp_path, estimated_tokens=100, context_window_tokens=200)
loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign]
await loop.process_direct("hello", session_key="cli:test")
loop.memory_consolidator.consolidate_messages.assert_not_awaited()
@pytest.mark.asyncio
async def test_prompt_above_threshold_triggers_consolidation(tmp_path, monkeypatch) -> None:
loop = _make_loop(tmp_path, estimated_tokens=1000, context_window_tokens=200)
loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign]
session = loop.sessions.get_or_create("cli:test")
session.messages = [
{"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
{"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
{"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
]
loop.sessions.save(session)
monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _message: 500)
await loop.process_direct("hello", session_key="cli:test")
assert loop.memory_consolidator.consolidate_messages.await_count >= 1
@pytest.mark.asyncio
async def test_prompt_above_threshold_archives_until_next_user_boundary(tmp_path, monkeypatch) -> None:
loop = _make_loop(tmp_path, estimated_tokens=1000, context_window_tokens=200)
loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign]
session = loop.sessions.get_or_create("cli:test")
session.messages = [
{"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
{"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
{"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
{"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"},
{"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"},
]
loop.sessions.save(session)
token_map = {"u1": 120, "a1": 120, "u2": 120, "a2": 120, "u3": 120}
monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda message: token_map[message["content"]])
await loop.memory_consolidator.maybe_consolidate_by_tokens(session)
archived_chunk = loop.memory_consolidator.consolidate_messages.await_args.args[0]
assert [message["content"] for message in archived_chunk] == ["u1", "a1", "u2", "a2"]
assert session.last_consolidated == 4
@pytest.mark.asyncio
async def test_consolidation_loops_until_target_met(tmp_path, monkeypatch) -> None:
"""Verify maybe_consolidate_by_tokens keeps looping until under threshold."""
loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200)
loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign]
session = loop.sessions.get_or_create("cli:test")
session.messages = [
{"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
{"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
{"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
{"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"},
{"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"},
{"role": "assistant", "content": "a3", "timestamp": "2026-01-01T00:00:05"},
{"role": "user", "content": "u4", "timestamp": "2026-01-01T00:00:06"},
]
loop.sessions.save(session)
call_count = [0]
def mock_estimate(_session):
call_count[0] += 1
if call_count[0] == 1:
return (500, "test")
if call_count[0] == 2:
return (300, "test")
return (80, "test")
loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate # type: ignore[method-assign]
monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 100)
await loop.memory_consolidator.maybe_consolidate_by_tokens(session)
assert loop.memory_consolidator.consolidate_messages.await_count == 2
assert session.last_consolidated == 6
@pytest.mark.asyncio
async def test_consolidation_continues_below_trigger_until_half_target(tmp_path, monkeypatch) -> None:
"""Once triggered, consolidation should continue until it drops below half threshold."""
loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200)
loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign]
session = loop.sessions.get_or_create("cli:test")
session.messages = [
{"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
{"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
{"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
{"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"},
{"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"},
{"role": "assistant", "content": "a3", "timestamp": "2026-01-01T00:00:05"},
{"role": "user", "content": "u4", "timestamp": "2026-01-01T00:00:06"},
]
loop.sessions.save(session)
call_count = [0]
def mock_estimate(_session):
call_count[0] += 1
if call_count[0] == 1:
return (500, "test")
if call_count[0] == 2:
return (150, "test")
return (80, "test")
loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate # type: ignore[method-assign]
monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 100)
await loop.memory_consolidator.maybe_consolidate_by_tokens(session)
assert loop.memory_consolidator.consolidate_messages.await_count == 2
assert session.last_consolidated == 6
@pytest.mark.asyncio
async def test_preflight_consolidation_before_llm_call(tmp_path, monkeypatch) -> None:
"""Verify preflight consolidation runs before the LLM call in process_direct."""
order: list[str] = []
loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200)
async def track_consolidate(messages):
order.append("consolidate")
return True
loop.memory_consolidator.consolidate_messages = track_consolidate # type: ignore[method-assign]
async def track_llm(*args, **kwargs):
order.append("llm")
return LLMResponse(content="ok", tool_calls=[])
loop.provider.chat_with_retry = track_llm
loop.provider.chat_stream_with_retry = track_llm
session = loop.sessions.get_or_create("cli:test")
session.messages = [
{"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
{"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
{"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
]
loop.sessions.save(session)
monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 500)
call_count = [0]
def mock_estimate(_session):
call_count[0] += 1
return (1000 if call_count[0] <= 1 else 80, "test")
loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate # type: ignore[method-assign]
await loop.process_direct("hello", session_key="cli:test")
assert "consolidate" in order
assert "llm" in order
assert order.index("consolidate") < order.index("llm")

View File

@@ -0,0 +1,74 @@
from nanobot.agent.context import ContextBuilder
from nanobot.agent.loop import AgentLoop
from nanobot.session.manager import Session
def _mk_loop() -> AgentLoop:
loop = AgentLoop.__new__(AgentLoop)
loop._TOOL_RESULT_MAX_CHARS = AgentLoop._TOOL_RESULT_MAX_CHARS
return loop
def test_save_turn_skips_multimodal_user_when_only_runtime_context() -> None:
loop = _mk_loop()
session = Session(key="test:runtime-only")
runtime = ContextBuilder._RUNTIME_CONTEXT_TAG + "\nCurrent Time: now (UTC)"
loop._save_turn(
session,
[{"role": "user", "content": [{"type": "text", "text": runtime}]}],
skip=0,
)
assert session.messages == []
def test_save_turn_keeps_image_placeholder_with_path_after_runtime_strip() -> None:
loop = _mk_loop()
session = Session(key="test:image")
runtime = ContextBuilder._RUNTIME_CONTEXT_TAG + "\nCurrent Time: now (UTC)"
loop._save_turn(
session,
[{
"role": "user",
"content": [
{"type": "text", "text": runtime},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}, "_meta": {"path": "/media/feishu/photo.jpg"}},
],
}],
skip=0,
)
assert session.messages[0]["content"] == [{"type": "text", "text": "[image: /media/feishu/photo.jpg]"}]
def test_save_turn_keeps_image_placeholder_without_meta() -> None:
loop = _mk_loop()
session = Session(key="test:image-no-meta")
runtime = ContextBuilder._RUNTIME_CONTEXT_TAG + "\nCurrent Time: now (UTC)"
loop._save_turn(
session,
[{
"role": "user",
"content": [
{"type": "text", "text": runtime},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
],
}],
skip=0,
)
assert session.messages[0]["content"] == [{"type": "text", "text": "[image]"}]
def test_save_turn_keeps_tool_results_under_16k() -> None:
loop = _mk_loop()
session = Session(key="test:tool-result")
content = "x" * 12_000
loop._save_turn(
session,
[{"role": "tool", "tool_call_id": "call_1", "name": "read_file", "content": content}],
skip=0,
)
assert session.messages[0]["content"] == content

View File

@@ -0,0 +1,478 @@
"""Test MemoryStore.consolidate() handles non-string tool call arguments.
Regression test for https://github.com/HKUDS/nanobot/issues/1042
When memory consolidation receives dict values instead of strings from the LLM
tool call response, it should serialize them to JSON instead of raising TypeError.
"""
import json
from pathlib import Path
from unittest.mock import AsyncMock
import pytest
from nanobot.agent.memory import MemoryStore
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
def _make_messages(message_count: int = 30):
"""Create a list of mock messages."""
return [
{"role": "user", "content": f"msg{i}", "timestamp": "2026-01-01 00:00"}
for i in range(message_count)
]
def _make_tool_response(history_entry, memory_update):
"""Create an LLMResponse with a save_memory tool call."""
return LLMResponse(
content=None,
tool_calls=[
ToolCallRequest(
id="call_1",
name="save_memory",
arguments={
"history_entry": history_entry,
"memory_update": memory_update,
},
)
],
)
class ScriptedProvider(LLMProvider):
def __init__(self, responses: list[LLMResponse]):
super().__init__()
self._responses = list(responses)
self.calls = 0
async def chat(self, *args, **kwargs) -> LLMResponse:
self.calls += 1
if self._responses:
return self._responses.pop(0)
return LLMResponse(content="", tool_calls=[])
def get_default_model(self) -> str:
return "test-model"
class TestMemoryConsolidationTypeHandling:
"""Test that consolidation handles various argument types correctly."""
@pytest.mark.asyncio
async def test_string_arguments_work(self, tmp_path: Path) -> None:
"""Normal case: LLM returns string arguments."""
store = MemoryStore(tmp_path)
provider = AsyncMock()
provider.chat = AsyncMock(
return_value=_make_tool_response(
history_entry="[2026-01-01] User discussed testing.",
memory_update="# Memory\nUser likes testing.",
)
)
provider.chat_with_retry = provider.chat
messages = _make_messages(message_count=60)
result = await store.consolidate(messages, provider, "test-model")
assert result is True
assert store.history_file.exists()
assert "[2026-01-01] User discussed testing." in store.history_file.read_text()
assert "User likes testing." in store.memory_file.read_text()
@pytest.mark.asyncio
async def test_dict_arguments_serialized_to_json(self, tmp_path: Path) -> None:
"""Issue #1042: LLM returns dict instead of string — must not raise TypeError."""
store = MemoryStore(tmp_path)
provider = AsyncMock()
provider.chat = AsyncMock(
return_value=_make_tool_response(
history_entry={"timestamp": "2026-01-01", "summary": "User discussed testing."},
memory_update={"facts": ["User likes testing"], "topics": ["testing"]},
)
)
provider.chat_with_retry = provider.chat
messages = _make_messages(message_count=60)
result = await store.consolidate(messages, provider, "test-model")
assert result is True
assert store.history_file.exists()
history_content = store.history_file.read_text()
parsed = json.loads(history_content.strip())
assert parsed["summary"] == "User discussed testing."
memory_content = store.memory_file.read_text()
parsed_mem = json.loads(memory_content)
assert "User likes testing" in parsed_mem["facts"]
@pytest.mark.asyncio
async def test_string_arguments_as_raw_json(self, tmp_path: Path) -> None:
"""Some providers return arguments as a JSON string instead of parsed dict."""
store = MemoryStore(tmp_path)
provider = AsyncMock()
response = LLMResponse(
content=None,
tool_calls=[
ToolCallRequest(
id="call_1",
name="save_memory",
arguments=json.dumps({
"history_entry": "[2026-01-01] User discussed testing.",
"memory_update": "# Memory\nUser likes testing.",
}),
)
],
)
provider.chat = AsyncMock(return_value=response)
provider.chat_with_retry = provider.chat
messages = _make_messages(message_count=60)
result = await store.consolidate(messages, provider, "test-model")
assert result is True
assert "User discussed testing." in store.history_file.read_text()
@pytest.mark.asyncio
async def test_no_tool_call_returns_false(self, tmp_path: Path) -> None:
"""When LLM doesn't use the save_memory tool, return False."""
store = MemoryStore(tmp_path)
provider = AsyncMock()
provider.chat = AsyncMock(
return_value=LLMResponse(content="I summarized the conversation.", tool_calls=[])
)
provider.chat_with_retry = provider.chat
messages = _make_messages(message_count=60)
result = await store.consolidate(messages, provider, "test-model")
assert result is False
assert not store.history_file.exists()
@pytest.mark.asyncio
async def test_skips_when_message_chunk_is_empty(self, tmp_path: Path) -> None:
"""Consolidation should be a no-op when the selected chunk is empty."""
store = MemoryStore(tmp_path)
provider = AsyncMock()
provider.chat_with_retry = provider.chat
messages: list[dict] = []
result = await store.consolidate(messages, provider, "test-model")
assert result is True
provider.chat.assert_not_called()
@pytest.mark.asyncio
async def test_list_arguments_extracts_first_dict(self, tmp_path: Path) -> None:
"""Some providers return arguments as a list - extract first element if it's a dict."""
store = MemoryStore(tmp_path)
provider = AsyncMock()
response = LLMResponse(
content=None,
tool_calls=[
ToolCallRequest(
id="call_1",
name="save_memory",
arguments=[{
"history_entry": "[2026-01-01] User discussed testing.",
"memory_update": "# Memory\nUser likes testing.",
}],
)
],
)
provider.chat = AsyncMock(return_value=response)
provider.chat_with_retry = provider.chat
messages = _make_messages(message_count=60)
result = await store.consolidate(messages, provider, "test-model")
assert result is True
assert "User discussed testing." in store.history_file.read_text()
assert "User likes testing." in store.memory_file.read_text()
@pytest.mark.asyncio
async def test_list_arguments_empty_list_returns_false(self, tmp_path: Path) -> None:
"""Empty list arguments should return False."""
store = MemoryStore(tmp_path)
provider = AsyncMock()
response = LLMResponse(
content=None,
tool_calls=[
ToolCallRequest(
id="call_1",
name="save_memory",
arguments=[],
)
],
)
provider.chat = AsyncMock(return_value=response)
provider.chat_with_retry = provider.chat
messages = _make_messages(message_count=60)
result = await store.consolidate(messages, provider, "test-model")
assert result is False
@pytest.mark.asyncio
async def test_list_arguments_non_dict_content_returns_false(self, tmp_path: Path) -> None:
"""List with non-dict content should return False."""
store = MemoryStore(tmp_path)
provider = AsyncMock()
response = LLMResponse(
content=None,
tool_calls=[
ToolCallRequest(
id="call_1",
name="save_memory",
arguments=["string", "content"],
)
],
)
provider.chat = AsyncMock(return_value=response)
provider.chat_with_retry = provider.chat
messages = _make_messages(message_count=60)
result = await store.consolidate(messages, provider, "test-model")
assert result is False
@pytest.mark.asyncio
async def test_missing_history_entry_returns_false_without_writing(self, tmp_path: Path) -> None:
"""Do not persist partial results when required fields are missing."""
store = MemoryStore(tmp_path)
provider = AsyncMock()
provider.chat_with_retry = AsyncMock(
return_value=LLMResponse(
content=None,
tool_calls=[
ToolCallRequest(
id="call_1",
name="save_memory",
arguments={"memory_update": "# Memory\nOnly memory update"},
)
],
)
)
messages = _make_messages(message_count=60)
result = await store.consolidate(messages, provider, "test-model")
assert result is False
assert not store.history_file.exists()
assert not store.memory_file.exists()
@pytest.mark.asyncio
async def test_missing_memory_update_returns_false_without_writing(self, tmp_path: Path) -> None:
"""Do not append history if memory_update is missing."""
store = MemoryStore(tmp_path)
provider = AsyncMock()
provider.chat_with_retry = AsyncMock(
return_value=LLMResponse(
content=None,
tool_calls=[
ToolCallRequest(
id="call_1",
name="save_memory",
arguments={"history_entry": "[2026-01-01] Partial output."},
)
],
)
)
messages = _make_messages(message_count=60)
result = await store.consolidate(messages, provider, "test-model")
assert result is False
assert not store.history_file.exists()
assert not store.memory_file.exists()
@pytest.mark.asyncio
async def test_null_required_field_returns_false_without_writing(self, tmp_path: Path) -> None:
"""Null required fields should be rejected before persistence."""
store = MemoryStore(tmp_path)
provider = AsyncMock()
provider.chat_with_retry = AsyncMock(
return_value=_make_tool_response(
history_entry=None,
memory_update="# Memory\nUser likes testing.",
)
)
messages = _make_messages(message_count=60)
result = await store.consolidate(messages, provider, "test-model")
assert result is False
assert not store.history_file.exists()
assert not store.memory_file.exists()
@pytest.mark.asyncio
async def test_empty_history_entry_returns_false_without_writing(self, tmp_path: Path) -> None:
"""Empty history entries should be rejected to avoid blank archival records."""
store = MemoryStore(tmp_path)
provider = AsyncMock()
provider.chat_with_retry = AsyncMock(
return_value=_make_tool_response(
history_entry=" ",
memory_update="# Memory\nUser likes testing.",
)
)
messages = _make_messages(message_count=60)
result = await store.consolidate(messages, provider, "test-model")
assert result is False
assert not store.history_file.exists()
assert not store.memory_file.exists()
@pytest.mark.asyncio
async def test_retries_transient_error_then_succeeds(self, tmp_path: Path, monkeypatch) -> None:
store = MemoryStore(tmp_path)
provider = ScriptedProvider([
LLMResponse(content="503 server error", finish_reason="error"),
_make_tool_response(
history_entry="[2026-01-01] User discussed testing.",
memory_update="# Memory\nUser likes testing.",
),
])
messages = _make_messages(message_count=60)
delays: list[int] = []
async def _fake_sleep(delay: int) -> None:
delays.append(delay)
monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
result = await store.consolidate(messages, provider, "test-model")
assert result is True
assert provider.calls == 2
assert delays == [1]
@pytest.mark.asyncio
async def test_consolidation_delegates_to_provider_defaults(self, tmp_path: Path) -> None:
"""Consolidation no longer passes generation params — the provider owns them."""
store = MemoryStore(tmp_path)
provider = AsyncMock()
provider.chat_with_retry = AsyncMock(
return_value=_make_tool_response(
history_entry="[2026-01-01] User discussed testing.",
memory_update="# Memory\nUser likes testing.",
)
)
messages = _make_messages(message_count=60)
result = await store.consolidate(messages, provider, "test-model")
assert result is True
provider.chat_with_retry.assert_awaited_once()
_, kwargs = provider.chat_with_retry.await_args
assert kwargs["model"] == "test-model"
assert "temperature" not in kwargs
assert "max_tokens" not in kwargs
assert "reasoning_effort" not in kwargs
@pytest.mark.asyncio
async def test_tool_choice_fallback_on_unsupported_error(self, tmp_path: Path) -> None:
"""Forced tool_choice rejected by provider -> retry with auto and succeed."""
store = MemoryStore(tmp_path)
error_resp = LLMResponse(
content="Error calling LLM: litellm.BadRequestError: "
"The tool_choice parameter does not support being set to required or object",
finish_reason="error",
tool_calls=[],
)
ok_resp = _make_tool_response(
history_entry="[2026-01-01] Fallback worked.",
memory_update="# Memory\nFallback OK.",
)
call_log: list[dict] = []
async def _tracking_chat(**kwargs):
call_log.append(kwargs)
return error_resp if len(call_log) == 1 else ok_resp
provider = AsyncMock()
provider.chat_with_retry = AsyncMock(side_effect=_tracking_chat)
messages = _make_messages(message_count=60)
result = await store.consolidate(messages, provider, "test-model")
assert result is True
assert len(call_log) == 2
assert isinstance(call_log[0]["tool_choice"], dict)
assert call_log[1]["tool_choice"] == "auto"
assert "Fallback worked." in store.history_file.read_text()
@pytest.mark.asyncio
async def test_tool_choice_fallback_auto_no_tool_call(self, tmp_path: Path) -> None:
"""Forced rejected, auto retry also produces no tool call -> return False."""
store = MemoryStore(tmp_path)
error_resp = LLMResponse(
content="Error: tool_choice must be none or auto",
finish_reason="error",
tool_calls=[],
)
no_tool_resp = LLMResponse(
content="Here is a summary.",
finish_reason="stop",
tool_calls=[],
)
provider = AsyncMock()
provider.chat_with_retry = AsyncMock(side_effect=[error_resp, no_tool_resp])
messages = _make_messages(message_count=60)
result = await store.consolidate(messages, provider, "test-model")
assert result is False
assert not store.history_file.exists()
@pytest.mark.asyncio
async def test_raw_archive_after_consecutive_failures(self, tmp_path: Path) -> None:
"""After 3 consecutive failures, raw-archive messages and return True."""
store = MemoryStore(tmp_path)
no_tool = LLMResponse(content="No tool call.", finish_reason="stop", tool_calls=[])
provider = AsyncMock()
provider.chat_with_retry = AsyncMock(return_value=no_tool)
messages = _make_messages(message_count=10)
assert await store.consolidate(messages, provider, "m") is False
assert await store.consolidate(messages, provider, "m") is False
assert await store.consolidate(messages, provider, "m") is True
assert store.history_file.exists()
content = store.history_file.read_text()
assert "[RAW]" in content
assert "10 messages" in content
assert "msg0" in content
assert not store.memory_file.exists()
@pytest.mark.asyncio
async def test_raw_archive_counter_resets_on_success(self, tmp_path: Path) -> None:
"""A successful consolidation resets the failure counter."""
store = MemoryStore(tmp_path)
no_tool = LLMResponse(content="Nope.", finish_reason="stop", tool_calls=[])
ok_resp = _make_tool_response(
history_entry="[2026-01-01] OK.",
memory_update="# Memory\nOK.",
)
messages = _make_messages(message_count=10)
provider = AsyncMock()
provider.chat_with_retry = AsyncMock(return_value=no_tool)
assert await store.consolidate(messages, provider, "m") is False
assert await store.consolidate(messages, provider, "m") is False
assert store._consecutive_failures == 2
provider.chat_with_retry = AsyncMock(return_value=ok_resp)
assert await store.consolidate(messages, provider, "m") is True
assert store._consecutive_failures == 0
provider.chat_with_retry = AsyncMock(return_value=no_tool)
assert await store.consolidate(messages, provider, "m") is False
assert store._consecutive_failures == 1

View File

@@ -0,0 +1,495 @@
"""Unit tests for onboard core logic functions.
These tests focus on the business logic behind the onboard wizard,
without testing the interactive UI components.
"""
import json
from pathlib import Path
from types import SimpleNamespace
from typing import Any, cast
import pytest
from pydantic import BaseModel, Field
from nanobot.cli import onboard as onboard_wizard
# Import functions to test
from nanobot.cli.commands import _merge_missing_defaults
from nanobot.cli.onboard import (
_BACK_PRESSED,
_configure_pydantic_model,
_format_value,
_get_field_display_name,
_get_field_type_info,
run_onboard,
)
from nanobot.config.schema import Config
from nanobot.utils.helpers import sync_workspace_templates
class TestMergeMissingDefaults:
"""Tests for _merge_missing_defaults recursive config merging."""
def test_adds_missing_top_level_keys(self):
existing = {"a": 1}
defaults = {"a": 1, "b": 2, "c": 3}
result = _merge_missing_defaults(existing, defaults)
assert result == {"a": 1, "b": 2, "c": 3}
def test_preserves_existing_values(self):
existing = {"a": "custom_value"}
defaults = {"a": "default_value"}
result = _merge_missing_defaults(existing, defaults)
assert result == {"a": "custom_value"}
def test_merges_nested_dicts_recursively(self):
existing = {
"level1": {
"level2": {
"existing": "kept",
}
}
}
defaults = {
"level1": {
"level2": {
"existing": "replaced",
"added": "new",
},
"level2b": "also_new",
}
}
result = _merge_missing_defaults(existing, defaults)
assert result == {
"level1": {
"level2": {
"existing": "kept",
"added": "new",
},
"level2b": "also_new",
}
}
def test_returns_existing_if_not_dict(self):
assert _merge_missing_defaults("string", {"a": 1}) == "string"
assert _merge_missing_defaults([1, 2, 3], {"a": 1}) == [1, 2, 3]
assert _merge_missing_defaults(None, {"a": 1}) is None
assert _merge_missing_defaults(42, {"a": 1}) == 42
def test_returns_existing_if_defaults_not_dict(self):
assert _merge_missing_defaults({"a": 1}, "string") == {"a": 1}
assert _merge_missing_defaults({"a": 1}, None) == {"a": 1}
def test_handles_empty_dicts(self):
assert _merge_missing_defaults({}, {"a": 1}) == {"a": 1}
assert _merge_missing_defaults({"a": 1}, {}) == {"a": 1}
assert _merge_missing_defaults({}, {}) == {}
def test_backfills_channel_config(self):
"""Real-world scenario: backfill missing channel fields."""
existing_channel = {
"enabled": False,
"appId": "",
"secret": "",
}
default_channel = {
"enabled": False,
"appId": "",
"secret": "",
"msgFormat": "plain",
"allowFrom": [],
}
result = _merge_missing_defaults(existing_channel, default_channel)
assert result["msgFormat"] == "plain"
assert result["allowFrom"] == []
class TestGetFieldTypeInfo:
"""Tests for _get_field_type_info type extraction."""
def test_extracts_str_type(self):
class Model(BaseModel):
field: str
type_name, inner = _get_field_type_info(Model.model_fields["field"])
assert type_name == "str"
assert inner is None
def test_extracts_int_type(self):
class Model(BaseModel):
count: int
type_name, inner = _get_field_type_info(Model.model_fields["count"])
assert type_name == "int"
assert inner is None
def test_extracts_bool_type(self):
class Model(BaseModel):
enabled: bool
type_name, inner = _get_field_type_info(Model.model_fields["enabled"])
assert type_name == "bool"
assert inner is None
def test_extracts_float_type(self):
class Model(BaseModel):
ratio: float
type_name, inner = _get_field_type_info(Model.model_fields["ratio"])
assert type_name == "float"
assert inner is None
def test_extracts_list_type_with_item_type(self):
class Model(BaseModel):
items: list[str]
type_name, inner = _get_field_type_info(Model.model_fields["items"])
assert type_name == "list"
assert inner is str
def test_extracts_list_type_without_item_type(self):
# Plain list without type param falls back to str
class Model(BaseModel):
items: list # type: ignore
# Plain list annotation doesn't match list check, returns str
type_name, inner = _get_field_type_info(Model.model_fields["items"])
assert type_name == "str" # Falls back to str for untyped list
assert inner is None
def test_extracts_dict_type(self):
# Plain dict without type param falls back to str
class Model(BaseModel):
data: dict # type: ignore
# Plain dict annotation doesn't match dict check, returns str
type_name, inner = _get_field_type_info(Model.model_fields["data"])
assert type_name == "str" # Falls back to str for untyped dict
assert inner is None
def test_extracts_optional_type(self):
class Model(BaseModel):
optional: str | None = None
type_name, inner = _get_field_type_info(Model.model_fields["optional"])
# Should unwrap Optional and get str
assert type_name == "str"
assert inner is None
def test_extracts_nested_model_type(self):
class Inner(BaseModel):
x: int
class Outer(BaseModel):
nested: Inner
type_name, inner = _get_field_type_info(Outer.model_fields["nested"])
assert type_name == "model"
assert inner is Inner
def test_handles_none_annotation(self):
"""Field with None annotation defaults to str."""
class Model(BaseModel):
field: Any = None
# Create a mock field_info with None annotation
field_info = SimpleNamespace(annotation=None)
type_name, inner = _get_field_type_info(field_info)
assert type_name == "str"
assert inner is None
class TestGetFieldDisplayName:
"""Tests for _get_field_display_name human-readable name generation."""
def test_uses_description_if_present(self):
class Model(BaseModel):
api_key: str = Field(description="API Key for authentication")
name = _get_field_display_name("api_key", Model.model_fields["api_key"])
assert name == "API Key for authentication"
def test_converts_snake_case_to_title(self):
field_info = SimpleNamespace(description=None)
name = _get_field_display_name("user_name", field_info)
assert name == "User Name"
def test_adds_url_suffix(self):
field_info = SimpleNamespace(description=None)
name = _get_field_display_name("api_url", field_info)
# Title case: "Api Url"
assert "Url" in name and "Api" in name
def test_adds_path_suffix(self):
field_info = SimpleNamespace(description=None)
name = _get_field_display_name("file_path", field_info)
assert "Path" in name and "File" in name
def test_adds_id_suffix(self):
field_info = SimpleNamespace(description=None)
name = _get_field_display_name("user_id", field_info)
# Title case: "User Id"
assert "Id" in name and "User" in name
def test_adds_key_suffix(self):
field_info = SimpleNamespace(description=None)
name = _get_field_display_name("api_key", field_info)
assert "Key" in name and "Api" in name
def test_adds_token_suffix(self):
field_info = SimpleNamespace(description=None)
name = _get_field_display_name("auth_token", field_info)
assert "Token" in name and "Auth" in name
def test_adds_seconds_suffix(self):
field_info = SimpleNamespace(description=None)
name = _get_field_display_name("timeout_s", field_info)
# Contains "(Seconds)" with title case
assert "(Seconds)" in name or "(seconds)" in name
def test_adds_ms_suffix(self):
field_info = SimpleNamespace(description=None)
name = _get_field_display_name("delay_ms", field_info)
# Contains "(Ms)" or "(ms)"
assert "(Ms)" in name or "(ms)" in name
class TestFormatValue:
"""Tests for _format_value display formatting."""
def test_formats_none_as_not_set(self):
assert "not set" in _format_value(None)
def test_formats_empty_string_as_not_set(self):
assert "not set" in _format_value("")
def test_formats_empty_dict_as_not_set(self):
assert "not set" in _format_value({})
def test_formats_empty_list_as_not_set(self):
assert "not set" in _format_value([])
def test_formats_string_value(self):
result = _format_value("hello")
assert "hello" in result
def test_formats_list_value(self):
result = _format_value(["a", "b"])
assert "a" in result or "b" in result
def test_formats_dict_value(self):
result = _format_value({"key": "value"})
assert "key" in result or "value" in result
def test_formats_int_value(self):
result = _format_value(42)
assert "42" in result
def test_formats_bool_true(self):
result = _format_value(True)
assert "true" in result.lower() or "" in result
def test_formats_bool_false(self):
result = _format_value(False)
assert "false" in result.lower() or "" in result
class TestSyncWorkspaceTemplates:
"""Tests for sync_workspace_templates file synchronization."""
def test_creates_missing_files(self, tmp_path):
"""Should create template files that don't exist."""
workspace = tmp_path / "workspace"
added = sync_workspace_templates(workspace, silent=True)
# Check that some files were created
assert isinstance(added, list)
# The actual files depend on the templates directory
def test_does_not_overwrite_existing_files(self, tmp_path):
"""Should not overwrite files that already exist."""
workspace = tmp_path / "workspace"
workspace.mkdir(parents=True)
(workspace / "AGENTS.md").write_text("existing content")
sync_workspace_templates(workspace, silent=True)
# Existing file should not be changed
content = (workspace / "AGENTS.md").read_text()
assert content == "existing content"
def test_creates_memory_directory(self, tmp_path):
"""Should create memory directory structure."""
workspace = tmp_path / "workspace"
sync_workspace_templates(workspace, silent=True)
assert (workspace / "memory").exists() or (workspace / "skills").exists()
def test_returns_list_of_added_files(self, tmp_path):
"""Should return list of relative paths for added files."""
workspace = tmp_path / "workspace"
added = sync_workspace_templates(workspace, silent=True)
assert isinstance(added, list)
# All paths should be relative to workspace
for path in added:
assert not Path(path).is_absolute()
class TestProviderChannelInfo:
"""Tests for provider and channel info retrieval."""
def test_get_provider_names_returns_dict(self):
from nanobot.cli.onboard import _get_provider_names
names = _get_provider_names()
assert isinstance(names, dict)
assert len(names) > 0
# Should include common providers
assert "openai" in names or "anthropic" in names
assert "openai_codex" not in names
assert "github_copilot" not in names
def test_get_channel_names_returns_dict(self):
from nanobot.cli.onboard import _get_channel_names
names = _get_channel_names()
assert isinstance(names, dict)
# Should include at least some channels
assert len(names) >= 0
def test_get_provider_info_returns_valid_structure(self):
from nanobot.cli.onboard import _get_provider_info
info = _get_provider_info()
assert isinstance(info, dict)
# Each value should be a tuple with expected structure
for provider_name, value in info.items():
assert isinstance(value, tuple)
assert len(value) == 4 # (display_name, needs_api_key, needs_api_base, env_var)
class _SimpleDraftModel(BaseModel):
api_key: str = ""
class _NestedDraftModel(BaseModel):
api_key: str = ""
class _OuterDraftModel(BaseModel):
nested: _NestedDraftModel = Field(default_factory=_NestedDraftModel)
class TestConfigurePydanticModelDrafts:
@staticmethod
def _patch_prompt_helpers(monkeypatch, tokens, text_value="secret"):
sequence = iter(tokens)
def fake_select(_prompt, choices, default=None):
token = next(sequence)
if token == "first":
return choices[0]
if token == "done":
return "[Done]"
if token == "back":
return _BACK_PRESSED
return token
monkeypatch.setattr(onboard_wizard, "_select_with_back", fake_select)
monkeypatch.setattr(onboard_wizard, "_show_config_panel", lambda *_args, **_kwargs: None)
monkeypatch.setattr(
onboard_wizard, "_input_with_existing", lambda *_args, **_kwargs: text_value
)
def test_discarding_section_keeps_original_model_unchanged(self, monkeypatch):
model = _SimpleDraftModel()
self._patch_prompt_helpers(monkeypatch, ["first", "back"])
result = _configure_pydantic_model(model, "Simple")
assert result is None
assert model.api_key == ""
def test_completing_section_returns_updated_draft(self, monkeypatch):
model = _SimpleDraftModel()
self._patch_prompt_helpers(monkeypatch, ["first", "done"])
result = _configure_pydantic_model(model, "Simple")
assert result is not None
updated = cast(_SimpleDraftModel, result)
assert updated.api_key == "secret"
assert model.api_key == ""
def test_nested_section_back_discards_nested_edits(self, monkeypatch):
model = _OuterDraftModel()
self._patch_prompt_helpers(monkeypatch, ["first", "first", "back", "done"])
result = _configure_pydantic_model(model, "Outer")
assert result is not None
updated = cast(_OuterDraftModel, result)
assert updated.nested.api_key == ""
assert model.nested.api_key == ""
def test_nested_section_done_commits_nested_edits(self, monkeypatch):
model = _OuterDraftModel()
self._patch_prompt_helpers(monkeypatch, ["first", "first", "done", "done"])
result = _configure_pydantic_model(model, "Outer")
assert result is not None
updated = cast(_OuterDraftModel, result)
assert updated.nested.api_key == "secret"
assert model.nested.api_key == ""
class TestRunOnboardExitBehavior:
def test_main_menu_interrupt_can_discard_unsaved_session_changes(self, monkeypatch):
initial_config = Config()
responses = iter(
[
"[A] Agent Settings",
KeyboardInterrupt(),
"[X] Exit Without Saving",
]
)
class FakePrompt:
def __init__(self, response):
self.response = response
def ask(self):
if isinstance(self.response, BaseException):
raise self.response
return self.response
def fake_select(*_args, **_kwargs):
return FakePrompt(next(responses))
def fake_configure_general_settings(config, section):
if section == "Agent Settings":
config.agents.defaults.model = "test/provider-model"
monkeypatch.setattr(onboard_wizard, "_show_main_menu_header", lambda: None)
monkeypatch.setattr(onboard_wizard, "questionary", SimpleNamespace(select=fake_select))
monkeypatch.setattr(onboard_wizard, "_configure_general_settings", fake_configure_general_settings)
result = run_onboard(initial_config=initial_config)
assert result.should_save is False
assert result.config.model_dump(by_alias=True) == initial_config.model_dump(by_alias=True)

View File

@@ -0,0 +1,198 @@
from nanobot.session.manager import Session
def _assert_no_orphans(history: list[dict]) -> None:
"""Assert every tool result in history has a matching assistant tool_call."""
declared = {
tc["id"]
for m in history if m.get("role") == "assistant"
for tc in (m.get("tool_calls") or [])
}
orphans = [
m.get("tool_call_id") for m in history
if m.get("role") == "tool" and m.get("tool_call_id") not in declared
]
assert orphans == [], f"orphan tool_call_ids: {orphans}"
def _tool_turn(prefix: str, idx: int) -> list[dict]:
"""Helper: one assistant with 2 tool_calls + 2 tool results."""
return [
{
"role": "assistant",
"content": None,
"tool_calls": [
{"id": f"{prefix}_{idx}_a", "type": "function", "function": {"name": "x", "arguments": "{}"}},
{"id": f"{prefix}_{idx}_b", "type": "function", "function": {"name": "y", "arguments": "{}"}},
],
},
{"role": "tool", "tool_call_id": f"{prefix}_{idx}_a", "name": "x", "content": "ok"},
{"role": "tool", "tool_call_id": f"{prefix}_{idx}_b", "name": "y", "content": "ok"},
]
# --- Original regression test (from PR 2075) ---
def test_get_history_drops_orphan_tool_results_when_window_cuts_tool_calls():
session = Session(key="telegram:test")
session.messages.append({"role": "user", "content": "old turn"})
for i in range(20):
session.messages.extend(_tool_turn("old", i))
session.messages.append({"role": "user", "content": "problem turn"})
for i in range(25):
session.messages.extend(_tool_turn("cur", i))
session.messages.append({"role": "user", "content": "new telegram question"})
history = session.get_history(max_messages=100)
_assert_no_orphans(history)
# --- Positive test: legitimate pairs survive trimming ---
def test_legitimate_tool_pairs_preserved_after_trim():
"""Complete tool-call groups within the window must not be dropped."""
session = Session(key="test:positive")
session.messages.append({"role": "user", "content": "hello"})
for i in range(5):
session.messages.extend(_tool_turn("ok", i))
session.messages.append({"role": "assistant", "content": "done"})
history = session.get_history(max_messages=500)
_assert_no_orphans(history)
tool_ids = [m["tool_call_id"] for m in history if m.get("role") == "tool"]
assert len(tool_ids) == 10
assert history[0]["role"] == "user"
def test_retain_recent_legal_suffix_keeps_recent_messages():
session = Session(key="test:trim")
for i in range(10):
session.messages.append({"role": "user", "content": f"msg{i}"})
session.retain_recent_legal_suffix(4)
assert len(session.messages) == 4
assert session.messages[0]["content"] == "msg6"
assert session.messages[-1]["content"] == "msg9"
def test_retain_recent_legal_suffix_adjusts_last_consolidated():
session = Session(key="test:trim-cons")
for i in range(10):
session.messages.append({"role": "user", "content": f"msg{i}"})
session.last_consolidated = 7
session.retain_recent_legal_suffix(4)
assert len(session.messages) == 4
assert session.last_consolidated == 1
def test_retain_recent_legal_suffix_zero_clears_session():
session = Session(key="test:trim-zero")
for i in range(10):
session.messages.append({"role": "user", "content": f"msg{i}"})
session.last_consolidated = 5
session.retain_recent_legal_suffix(0)
assert session.messages == []
assert session.last_consolidated == 0
def test_retain_recent_legal_suffix_keeps_legal_tool_boundary():
session = Session(key="test:trim-tools")
session.messages.append({"role": "user", "content": "old"})
session.messages.extend(_tool_turn("old", 0))
session.messages.append({"role": "user", "content": "keep"})
session.messages.extend(_tool_turn("keep", 0))
session.messages.append({"role": "assistant", "content": "done"})
session.retain_recent_legal_suffix(4)
history = session.get_history(max_messages=500)
_assert_no_orphans(history)
assert history[0]["role"] == "user"
assert history[0]["content"] == "keep"
# --- last_consolidated > 0 ---
def test_orphan_trim_with_last_consolidated():
"""Orphan trimming works correctly when session is partially consolidated."""
session = Session(key="test:consolidated")
for i in range(10):
session.messages.append({"role": "user", "content": f"old {i}"})
session.messages.extend(_tool_turn("cons", i))
session.last_consolidated = 30
session.messages.append({"role": "user", "content": "recent"})
for i in range(15):
session.messages.extend(_tool_turn("new", i))
session.messages.append({"role": "user", "content": "latest"})
history = session.get_history(max_messages=20)
_assert_no_orphans(history)
assert all(m.get("role") != "tool" or m["tool_call_id"].startswith("new_") for m in history)
# --- Edge: no tool messages at all ---
def test_no_tool_messages_unchanged():
session = Session(key="test:plain")
for i in range(5):
session.messages.append({"role": "user", "content": f"q{i}"})
session.messages.append({"role": "assistant", "content": f"a{i}"})
history = session.get_history(max_messages=6)
assert len(history) == 6
_assert_no_orphans(history)
# --- Edge: all leading messages are orphan tool results ---
def test_all_orphan_prefix_stripped():
"""If the window starts with orphan tool results and nothing else, they're all dropped."""
session = Session(key="test:all-orphan")
session.messages.append({"role": "tool", "tool_call_id": "gone_1", "name": "x", "content": "ok"})
session.messages.append({"role": "tool", "tool_call_id": "gone_2", "name": "y", "content": "ok"})
session.messages.append({"role": "user", "content": "fresh start"})
session.messages.append({"role": "assistant", "content": "hi"})
history = session.get_history(max_messages=500)
_assert_no_orphans(history)
assert history[0]["role"] == "user"
assert len(history) == 2
# --- Edge: empty session ---
def test_empty_session_history():
session = Session(key="test:empty")
history = session.get_history(max_messages=500)
assert history == []
# --- Window cuts mid-group: assistant present but some tool results orphaned ---
def test_window_cuts_mid_tool_group():
"""If the window starts between an assistant's tool results, the partial group is trimmed."""
session = Session(key="test:mid-cut")
session.messages.append({"role": "user", "content": "setup"})
session.messages.append({
"role": "assistant", "content": None,
"tool_calls": [
{"id": "split_a", "type": "function", "function": {"name": "x", "arguments": "{}"}},
{"id": "split_b", "type": "function", "function": {"name": "y", "arguments": "{}"}},
],
})
session.messages.append({"role": "tool", "tool_call_id": "split_a", "name": "x", "content": "ok"})
session.messages.append({"role": "tool", "tool_call_id": "split_b", "name": "y", "content": "ok"})
session.messages.append({"role": "user", "content": "next"})
session.messages.extend(_tool_turn("intact", 0))
session.messages.append({"role": "assistant", "content": "final"})
# Window of 6 should cut off the "setup" user msg and the assistant with split_a/split_b,
# leaving orphan tool results for split_a at the front.
history = session.get_history(max_messages=6)
_assert_no_orphans(history)

View File

@@ -0,0 +1,127 @@
import importlib
import shutil
import sys
import zipfile
from pathlib import Path
SCRIPT_DIR = Path("nanobot/skills/skill-creator/scripts").resolve()
if str(SCRIPT_DIR) not in sys.path:
sys.path.insert(0, str(SCRIPT_DIR))
init_skill = importlib.import_module("init_skill")
package_skill = importlib.import_module("package_skill")
quick_validate = importlib.import_module("quick_validate")
def test_init_skill_creates_expected_files(tmp_path: Path) -> None:
skill_dir = init_skill.init_skill(
"demo-skill",
tmp_path,
["scripts", "references", "assets"],
include_examples=True,
)
assert skill_dir == tmp_path / "demo-skill"
assert (skill_dir / "SKILL.md").exists()
assert (skill_dir / "scripts" / "example.py").exists()
assert (skill_dir / "references" / "api_reference.md").exists()
assert (skill_dir / "assets" / "example_asset.txt").exists()
def test_validate_skill_accepts_existing_skill_creator() -> None:
valid, message = quick_validate.validate_skill(
Path("nanobot/skills/skill-creator").resolve()
)
assert valid, message
def test_validate_skill_rejects_placeholder_description(tmp_path: Path) -> None:
skill_dir = tmp_path / "placeholder-skill"
skill_dir.mkdir()
(skill_dir / "SKILL.md").write_text(
"---\n"
"name: placeholder-skill\n"
'description: "[TODO: fill me in]"\n'
"---\n"
"# Placeholder\n",
encoding="utf-8",
)
valid, message = quick_validate.validate_skill(skill_dir)
assert not valid
assert "TODO placeholder" in message
def test_validate_skill_rejects_root_files_outside_allowed_dirs(tmp_path: Path) -> None:
skill_dir = tmp_path / "bad-root-skill"
skill_dir.mkdir()
(skill_dir / "SKILL.md").write_text(
"---\n"
"name: bad-root-skill\n"
"description: Valid description\n"
"---\n"
"# Skill\n",
encoding="utf-8",
)
(skill_dir / "README.md").write_text("extra\n", encoding="utf-8")
valid, message = quick_validate.validate_skill(skill_dir)
assert not valid
assert "Unexpected file or directory in skill root" in message
def test_package_skill_creates_archive(tmp_path: Path) -> None:
skill_dir = tmp_path / "package-me"
skill_dir.mkdir()
(skill_dir / "SKILL.md").write_text(
"---\n"
"name: package-me\n"
"description: Package this skill.\n"
"---\n"
"# Skill\n",
encoding="utf-8",
)
scripts_dir = skill_dir / "scripts"
scripts_dir.mkdir()
(scripts_dir / "helper.py").write_text("print('ok')\n", encoding="utf-8")
archive_path = package_skill.package_skill(skill_dir, tmp_path / "dist")
assert archive_path == (tmp_path / "dist" / "package-me.skill")
assert archive_path.exists()
with zipfile.ZipFile(archive_path, "r") as archive:
names = set(archive.namelist())
assert "package-me/SKILL.md" in names
assert "package-me/scripts/helper.py" in names
def test_package_skill_rejects_symlink(tmp_path: Path) -> None:
skill_dir = tmp_path / "symlink-skill"
skill_dir.mkdir()
(skill_dir / "SKILL.md").write_text(
"---\n"
"name: symlink-skill\n"
"description: Reject symlinks during packaging.\n"
"---\n"
"# Skill\n",
encoding="utf-8",
)
scripts_dir = skill_dir / "scripts"
scripts_dir.mkdir()
target = tmp_path / "outside.txt"
target.write_text("secret\n", encoding="utf-8")
link = scripts_dir / "outside.txt"
try:
link.symlink_to(target)
except (OSError, NotImplementedError):
return
archive_path = package_skill.package_skill(skill_dir, tmp_path / "dist")
assert archive_path is None
assert not (tmp_path / "dist" / "symlink-skill.skill").exists()

View File

@@ -0,0 +1,223 @@
"""Tests for /stop task cancellation."""
from __future__ import annotations
import asyncio
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
def _make_loop(*, exec_config=None):
"""Create a minimal AgentLoop with mocked dependencies."""
from nanobot.agent.loop import AgentLoop
from nanobot.bus.queue import MessageBus
bus = MessageBus()
provider = MagicMock()
provider.get_default_model.return_value = "test-model"
workspace = MagicMock()
workspace.__truediv__ = MagicMock(return_value=MagicMock())
with patch("nanobot.agent.loop.ContextBuilder"), \
patch("nanobot.agent.loop.SessionManager"), \
patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
loop = AgentLoop(bus=bus, provider=provider, workspace=workspace, exec_config=exec_config)
return loop, bus
class TestHandleStop:
@pytest.mark.asyncio
async def test_stop_no_active_task(self):
from nanobot.bus.events import InboundMessage
from nanobot.command.builtin import cmd_stop
from nanobot.command.router import CommandContext
loop, bus = _make_loop()
msg = InboundMessage(channel="test", sender_id="u1", chat_id="c1", content="/stop")
ctx = CommandContext(msg=msg, session=None, key=msg.session_key, raw="/stop", loop=loop)
out = await cmd_stop(ctx)
assert "No active task" in out.content
@pytest.mark.asyncio
async def test_stop_cancels_active_task(self):
from nanobot.bus.events import InboundMessage
from nanobot.command.builtin import cmd_stop
from nanobot.command.router import CommandContext
loop, bus = _make_loop()
cancelled = asyncio.Event()
async def slow_task():
try:
await asyncio.sleep(60)
except asyncio.CancelledError:
cancelled.set()
raise
task = asyncio.create_task(slow_task())
await asyncio.sleep(0)
loop._active_tasks["test:c1"] = [task]
msg = InboundMessage(channel="test", sender_id="u1", chat_id="c1", content="/stop")
ctx = CommandContext(msg=msg, session=None, key=msg.session_key, raw="/stop", loop=loop)
out = await cmd_stop(ctx)
assert cancelled.is_set()
assert "stopped" in out.content.lower()
@pytest.mark.asyncio
async def test_stop_cancels_multiple_tasks(self):
from nanobot.bus.events import InboundMessage
from nanobot.command.builtin import cmd_stop
from nanobot.command.router import CommandContext
loop, bus = _make_loop()
events = [asyncio.Event(), asyncio.Event()]
async def slow(idx):
try:
await asyncio.sleep(60)
except asyncio.CancelledError:
events[idx].set()
raise
tasks = [asyncio.create_task(slow(i)) for i in range(2)]
await asyncio.sleep(0)
loop._active_tasks["test:c1"] = tasks
msg = InboundMessage(channel="test", sender_id="u1", chat_id="c1", content="/stop")
ctx = CommandContext(msg=msg, session=None, key=msg.session_key, raw="/stop", loop=loop)
out = await cmd_stop(ctx)
assert all(e.is_set() for e in events)
assert "2 task" in out.content
class TestDispatch:
def test_exec_tool_not_registered_when_disabled(self):
from nanobot.config.schema import ExecToolConfig
loop, _bus = _make_loop(exec_config=ExecToolConfig(enable=False))
assert loop.tools.get("exec") is None
@pytest.mark.asyncio
async def test_dispatch_processes_and_publishes(self):
from nanobot.bus.events import InboundMessage, OutboundMessage
loop, bus = _make_loop()
msg = InboundMessage(channel="test", sender_id="u1", chat_id="c1", content="hello")
loop._process_message = AsyncMock(
return_value=OutboundMessage(channel="test", chat_id="c1", content="hi")
)
await loop._dispatch(msg)
out = await asyncio.wait_for(bus.consume_outbound(), timeout=1.0)
assert out.content == "hi"
@pytest.mark.asyncio
async def test_processing_lock_serializes(self):
from nanobot.bus.events import InboundMessage, OutboundMessage
loop, bus = _make_loop()
order = []
async def mock_process(m, **kwargs):
order.append(f"start-{m.content}")
await asyncio.sleep(0.05)
order.append(f"end-{m.content}")
return OutboundMessage(channel="test", chat_id="c1", content=m.content)
loop._process_message = mock_process
msg1 = InboundMessage(channel="test", sender_id="u1", chat_id="c1", content="a")
msg2 = InboundMessage(channel="test", sender_id="u1", chat_id="c1", content="b")
t1 = asyncio.create_task(loop._dispatch(msg1))
t2 = asyncio.create_task(loop._dispatch(msg2))
await asyncio.gather(t1, t2)
assert order == ["start-a", "end-a", "start-b", "end-b"]
class TestSubagentCancellation:
@pytest.mark.asyncio
async def test_cancel_by_session(self):
from nanobot.agent.subagent import SubagentManager
from nanobot.bus.queue import MessageBus
bus = MessageBus()
provider = MagicMock()
provider.get_default_model.return_value = "test-model"
mgr = SubagentManager(provider=provider, workspace=MagicMock(), bus=bus)
cancelled = asyncio.Event()
async def slow():
try:
await asyncio.sleep(60)
except asyncio.CancelledError:
cancelled.set()
raise
task = asyncio.create_task(slow())
await asyncio.sleep(0)
mgr._running_tasks["sub-1"] = task
mgr._session_tasks["test:c1"] = {"sub-1"}
count = await mgr.cancel_by_session("test:c1")
assert count == 1
assert cancelled.is_set()
@pytest.mark.asyncio
async def test_cancel_by_session_no_tasks(self):
from nanobot.agent.subagent import SubagentManager
from nanobot.bus.queue import MessageBus
bus = MessageBus()
provider = MagicMock()
provider.get_default_model.return_value = "test-model"
mgr = SubagentManager(provider=provider, workspace=MagicMock(), bus=bus)
assert await mgr.cancel_by_session("nonexistent") == 0
@pytest.mark.asyncio
async def test_subagent_preserves_reasoning_fields_in_tool_turn(self, monkeypatch, tmp_path):
from nanobot.agent.subagent import SubagentManager
from nanobot.bus.queue import MessageBus
from nanobot.providers.base import LLMResponse, ToolCallRequest
bus = MessageBus()
provider = MagicMock()
provider.get_default_model.return_value = "test-model"
captured_second_call: list[dict] = []
call_count = {"n": 0}
async def scripted_chat_with_retry(*, messages, **kwargs):
call_count["n"] += 1
if call_count["n"] == 1:
return LLMResponse(
content="thinking",
tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})],
reasoning_content="hidden reasoning",
thinking_blocks=[{"type": "thinking", "thinking": "step"}],
)
captured_second_call[:] = messages
return LLMResponse(content="done", tool_calls=[])
provider.chat_with_retry = scripted_chat_with_retry
mgr = SubagentManager(provider=provider, workspace=tmp_path, bus=bus)
async def fake_execute(self, name, arguments):
return "tool result"
monkeypatch.setattr("nanobot.agent.tools.registry.ToolRegistry.execute", fake_execute)
await mgr._run_subagent("sub-1", "do task", "label", {"channel": "test", "chat_id": "c1"})
assistant_messages = [
msg for msg in captured_second_call
if msg.get("role") == "assistant" and msg.get("tool_calls")
]
assert len(assistant_messages) == 1
assert assistant_messages[0]["reasoning_content"] == "hidden reasoning"
assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}]