refactor(tests): optimize unit test structure
This commit is contained in:
619
tests/agent/test_consolidate_offset.py
Normal file
619
tests/agent/test_consolidate_offset.py
Normal file
@@ -0,0 +1,619 @@
|
||||
"""Test session management with cache-friendly message handling."""
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from nanobot.session.manager import Session, SessionManager
|
||||
|
||||
# Test constants
|
||||
MEMORY_WINDOW = 50
|
||||
KEEP_COUNT = MEMORY_WINDOW // 2 # 25
|
||||
|
||||
|
||||
def create_session_with_messages(key: str, count: int, role: str = "user") -> Session:
|
||||
"""Create a session and add the specified number of messages.
|
||||
|
||||
Args:
|
||||
key: Session identifier
|
||||
count: Number of messages to add
|
||||
role: Message role (default: "user")
|
||||
|
||||
Returns:
|
||||
Session with the specified messages
|
||||
"""
|
||||
session = Session(key=key)
|
||||
for i in range(count):
|
||||
session.add_message(role, f"msg{i}")
|
||||
return session
|
||||
|
||||
|
||||
def assert_messages_content(messages: list, start_index: int, end_index: int) -> None:
|
||||
"""Assert that messages contain expected content from start to end index.
|
||||
|
||||
Args:
|
||||
messages: List of message dictionaries
|
||||
start_index: Expected first message index
|
||||
end_index: Expected last message index
|
||||
"""
|
||||
assert len(messages) > 0
|
||||
assert messages[0]["content"] == f"msg{start_index}"
|
||||
assert messages[-1]["content"] == f"msg{end_index}"
|
||||
|
||||
|
||||
def get_old_messages(session: Session, last_consolidated: int, keep_count: int) -> list:
|
||||
"""Extract messages that would be consolidated using the standard slice logic.
|
||||
|
||||
Args:
|
||||
session: The session containing messages
|
||||
last_consolidated: Index of last consolidated message
|
||||
keep_count: Number of recent messages to keep
|
||||
|
||||
Returns:
|
||||
List of messages that would be consolidated
|
||||
"""
|
||||
return session.messages[last_consolidated:-keep_count]
|
||||
|
||||
|
||||
class TestSessionLastConsolidated:
|
||||
"""Test last_consolidated tracking to avoid duplicate processing."""
|
||||
|
||||
def test_initial_last_consolidated_zero(self) -> None:
|
||||
"""Test that new session starts with last_consolidated=0."""
|
||||
session = Session(key="test:initial")
|
||||
assert session.last_consolidated == 0
|
||||
|
||||
def test_last_consolidated_persistence(self, tmp_path) -> None:
|
||||
"""Test that last_consolidated persists across save/load."""
|
||||
manager = SessionManager(Path(tmp_path))
|
||||
session1 = create_session_with_messages("test:persist", 20)
|
||||
session1.last_consolidated = 15
|
||||
manager.save(session1)
|
||||
|
||||
session2 = manager.get_or_create("test:persist")
|
||||
assert session2.last_consolidated == 15
|
||||
assert len(session2.messages) == 20
|
||||
|
||||
def test_clear_resets_last_consolidated(self) -> None:
|
||||
"""Test that clear() resets last_consolidated to 0."""
|
||||
session = create_session_with_messages("test:clear", 10)
|
||||
session.last_consolidated = 5
|
||||
|
||||
session.clear()
|
||||
assert len(session.messages) == 0
|
||||
assert session.last_consolidated == 0
|
||||
|
||||
|
||||
class TestSessionImmutableHistory:
|
||||
"""Test Session message immutability for cache efficiency."""
|
||||
|
||||
def test_initial_state(self) -> None:
|
||||
"""Test that new session has empty messages list."""
|
||||
session = Session(key="test:initial")
|
||||
assert len(session.messages) == 0
|
||||
|
||||
def test_add_messages_appends_only(self) -> None:
|
||||
"""Test that adding messages only appends, never modifies."""
|
||||
session = Session(key="test:preserve")
|
||||
session.add_message("user", "msg1")
|
||||
session.add_message("assistant", "resp1")
|
||||
session.add_message("user", "msg2")
|
||||
assert len(session.messages) == 3
|
||||
assert session.messages[0]["content"] == "msg1"
|
||||
|
||||
def test_get_history_returns_most_recent(self) -> None:
|
||||
"""Test get_history returns the most recent messages."""
|
||||
session = Session(key="test:history")
|
||||
for i in range(10):
|
||||
session.add_message("user", f"msg{i}")
|
||||
session.add_message("assistant", f"resp{i}")
|
||||
|
||||
history = session.get_history(max_messages=6)
|
||||
assert len(history) == 6
|
||||
assert history[0]["content"] == "msg7"
|
||||
assert history[-1]["content"] == "resp9"
|
||||
|
||||
def test_get_history_with_all_messages(self) -> None:
|
||||
"""Test get_history with max_messages larger than actual."""
|
||||
session = create_session_with_messages("test:all", 5)
|
||||
history = session.get_history(max_messages=100)
|
||||
assert len(history) == 5
|
||||
assert history[0]["content"] == "msg0"
|
||||
|
||||
def test_get_history_stable_for_same_session(self) -> None:
|
||||
"""Test that get_history returns same content for same max_messages."""
|
||||
session = create_session_with_messages("test:stable", 20)
|
||||
history1 = session.get_history(max_messages=10)
|
||||
history2 = session.get_history(max_messages=10)
|
||||
assert history1 == history2
|
||||
|
||||
def test_messages_list_never_modified(self) -> None:
|
||||
"""Test that messages list is never modified after creation."""
|
||||
session = create_session_with_messages("test:immutable", 5)
|
||||
original_len = len(session.messages)
|
||||
|
||||
session.get_history(max_messages=2)
|
||||
assert len(session.messages) == original_len
|
||||
|
||||
for _ in range(10):
|
||||
session.get_history(max_messages=3)
|
||||
assert len(session.messages) == original_len
|
||||
|
||||
|
||||
class TestSessionPersistence:
|
||||
"""Test Session persistence and reload."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_manager(self, tmp_path):
|
||||
return SessionManager(Path(tmp_path))
|
||||
|
||||
def test_persistence_roundtrip(self, temp_manager):
|
||||
"""Test that messages persist across save/load."""
|
||||
session1 = create_session_with_messages("test:persistence", 20)
|
||||
temp_manager.save(session1)
|
||||
|
||||
session2 = temp_manager.get_or_create("test:persistence")
|
||||
assert len(session2.messages) == 20
|
||||
assert session2.messages[0]["content"] == "msg0"
|
||||
assert session2.messages[-1]["content"] == "msg19"
|
||||
|
||||
def test_get_history_after_reload(self, temp_manager):
|
||||
"""Test that get_history works correctly after reload."""
|
||||
session1 = create_session_with_messages("test:reload", 30)
|
||||
temp_manager.save(session1)
|
||||
|
||||
session2 = temp_manager.get_or_create("test:reload")
|
||||
history = session2.get_history(max_messages=10)
|
||||
assert len(history) == 10
|
||||
assert history[0]["content"] == "msg20"
|
||||
assert history[-1]["content"] == "msg29"
|
||||
|
||||
def test_clear_resets_session(self, temp_manager):
|
||||
"""Test that clear() properly resets session."""
|
||||
session = create_session_with_messages("test:clear", 10)
|
||||
assert len(session.messages) == 10
|
||||
|
||||
session.clear()
|
||||
assert len(session.messages) == 0
|
||||
|
||||
|
||||
class TestConsolidationTriggerConditions:
|
||||
"""Test consolidation trigger conditions and logic."""
|
||||
|
||||
def test_consolidation_needed_when_messages_exceed_window(self):
|
||||
"""Test consolidation logic: should trigger when messages exceed the window."""
|
||||
session = create_session_with_messages("test:trigger", 60)
|
||||
|
||||
total_messages = len(session.messages)
|
||||
messages_to_process = total_messages - session.last_consolidated
|
||||
|
||||
assert total_messages > MEMORY_WINDOW
|
||||
assert messages_to_process > 0
|
||||
|
||||
expected_consolidate_count = total_messages - KEEP_COUNT
|
||||
assert expected_consolidate_count == 35
|
||||
|
||||
def test_consolidation_skipped_when_within_keep_count(self):
|
||||
"""Test consolidation skipped when total messages <= keep_count."""
|
||||
session = create_session_with_messages("test:skip", 20)
|
||||
|
||||
total_messages = len(session.messages)
|
||||
assert total_messages <= KEEP_COUNT
|
||||
|
||||
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
|
||||
assert len(old_messages) == 0
|
||||
|
||||
def test_consolidation_skipped_when_no_new_messages(self):
|
||||
"""Test consolidation skipped when messages_to_process <= 0."""
|
||||
session = create_session_with_messages("test:already_consolidated", 40)
|
||||
session.last_consolidated = len(session.messages) - KEEP_COUNT # 15
|
||||
|
||||
# Add a few more messages
|
||||
for i in range(40, 42):
|
||||
session.add_message("user", f"msg{i}")
|
||||
|
||||
total_messages = len(session.messages)
|
||||
messages_to_process = total_messages - session.last_consolidated
|
||||
assert messages_to_process > 0
|
||||
|
||||
# Simulate last_consolidated catching up
|
||||
session.last_consolidated = total_messages - KEEP_COUNT
|
||||
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
|
||||
assert len(old_messages) == 0
|
||||
|
||||
|
||||
class TestLastConsolidatedEdgeCases:
|
||||
"""Test last_consolidated edge cases and data corruption scenarios."""
|
||||
|
||||
def test_last_consolidated_exceeds_message_count(self):
|
||||
"""Test behavior when last_consolidated > len(messages) (data corruption)."""
|
||||
session = create_session_with_messages("test:corruption", 10)
|
||||
session.last_consolidated = 20
|
||||
|
||||
total_messages = len(session.messages)
|
||||
messages_to_process = total_messages - session.last_consolidated
|
||||
assert messages_to_process <= 0
|
||||
|
||||
old_messages = get_old_messages(session, session.last_consolidated, 5)
|
||||
assert len(old_messages) == 0
|
||||
|
||||
def test_last_consolidated_negative_value(self):
|
||||
"""Test behavior with negative last_consolidated (invalid state)."""
|
||||
session = create_session_with_messages("test:negative", 10)
|
||||
session.last_consolidated = -5
|
||||
|
||||
keep_count = 3
|
||||
old_messages = get_old_messages(session, session.last_consolidated, keep_count)
|
||||
|
||||
# messages[-5:-3] with 10 messages gives indices 5,6
|
||||
assert len(old_messages) == 2
|
||||
assert old_messages[0]["content"] == "msg5"
|
||||
assert old_messages[-1]["content"] == "msg6"
|
||||
|
||||
def test_messages_added_after_consolidation(self):
|
||||
"""Test correct behavior when new messages arrive after consolidation."""
|
||||
session = create_session_with_messages("test:new_messages", 40)
|
||||
session.last_consolidated = len(session.messages) - KEEP_COUNT # 15
|
||||
|
||||
# Add new messages after consolidation
|
||||
for i in range(40, 50):
|
||||
session.add_message("user", f"msg{i}")
|
||||
|
||||
total_messages = len(session.messages)
|
||||
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
|
||||
expected_consolidate_count = total_messages - KEEP_COUNT - session.last_consolidated
|
||||
|
||||
assert len(old_messages) == expected_consolidate_count
|
||||
assert_messages_content(old_messages, 15, 24)
|
||||
|
||||
def test_slice_behavior_when_indices_overlap(self):
|
||||
"""Test slice behavior when last_consolidated >= total - keep_count."""
|
||||
session = create_session_with_messages("test:overlap", 30)
|
||||
session.last_consolidated = 12
|
||||
|
||||
old_messages = get_old_messages(session, session.last_consolidated, 20)
|
||||
assert len(old_messages) == 0
|
||||
|
||||
|
||||
class TestArchiveAllMode:
|
||||
"""Test archive_all mode (used by /new command)."""
|
||||
|
||||
def test_archive_all_consolidates_everything(self):
|
||||
"""Test archive_all=True consolidates all messages."""
|
||||
session = create_session_with_messages("test:archive_all", 50)
|
||||
|
||||
archive_all = True
|
||||
if archive_all:
|
||||
old_messages = session.messages
|
||||
assert len(old_messages) == 50
|
||||
|
||||
assert session.last_consolidated == 0
|
||||
|
||||
def test_archive_all_resets_last_consolidated(self):
|
||||
"""Test that archive_all mode resets last_consolidated to 0."""
|
||||
session = create_session_with_messages("test:reset", 40)
|
||||
session.last_consolidated = 15
|
||||
|
||||
archive_all = True
|
||||
if archive_all:
|
||||
session.last_consolidated = 0
|
||||
|
||||
assert session.last_consolidated == 0
|
||||
assert len(session.messages) == 40
|
||||
|
||||
def test_archive_all_vs_normal_consolidation(self):
|
||||
"""Test difference between archive_all and normal consolidation."""
|
||||
# Normal consolidation
|
||||
session1 = create_session_with_messages("test:normal", 60)
|
||||
session1.last_consolidated = len(session1.messages) - KEEP_COUNT
|
||||
|
||||
# archive_all mode
|
||||
session2 = create_session_with_messages("test:all", 60)
|
||||
session2.last_consolidated = 0
|
||||
|
||||
assert session1.last_consolidated == 35
|
||||
assert len(session1.messages) == 60
|
||||
assert session2.last_consolidated == 0
|
||||
assert len(session2.messages) == 60
|
||||
|
||||
|
||||
class TestCacheImmutability:
|
||||
"""Test that consolidation doesn't modify session.messages (cache safety)."""
|
||||
|
||||
def test_consolidation_does_not_modify_messages_list(self):
|
||||
"""Test that consolidation leaves messages list unchanged."""
|
||||
session = create_session_with_messages("test:immutable", 50)
|
||||
|
||||
original_messages = session.messages.copy()
|
||||
original_len = len(session.messages)
|
||||
session.last_consolidated = original_len - KEEP_COUNT
|
||||
|
||||
assert len(session.messages) == original_len
|
||||
assert session.messages == original_messages
|
||||
|
||||
def test_get_history_does_not_modify_messages(self):
|
||||
"""Test that get_history doesn't modify messages list."""
|
||||
session = create_session_with_messages("test:history_immutable", 40)
|
||||
original_messages = [m.copy() for m in session.messages]
|
||||
|
||||
for _ in range(5):
|
||||
history = session.get_history(max_messages=10)
|
||||
assert len(history) == 10
|
||||
|
||||
assert len(session.messages) == 40
|
||||
for i, msg in enumerate(session.messages):
|
||||
assert msg["content"] == original_messages[i]["content"]
|
||||
|
||||
def test_consolidation_only_updates_last_consolidated(self):
|
||||
"""Test that consolidation only updates last_consolidated field."""
|
||||
session = create_session_with_messages("test:field_only", 60)
|
||||
|
||||
original_messages = session.messages.copy()
|
||||
original_key = session.key
|
||||
original_metadata = session.metadata.copy()
|
||||
|
||||
session.last_consolidated = len(session.messages) - KEEP_COUNT
|
||||
|
||||
assert session.messages == original_messages
|
||||
assert session.key == original_key
|
||||
assert session.metadata == original_metadata
|
||||
assert session.last_consolidated == 35
|
||||
|
||||
|
||||
class TestSliceLogic:
|
||||
"""Test the slice logic: messages[last_consolidated:-keep_count]."""
|
||||
|
||||
def test_slice_extracts_correct_range(self):
|
||||
"""Test that slice extracts the correct message range."""
|
||||
session = create_session_with_messages("test:slice", 60)
|
||||
|
||||
old_messages = get_old_messages(session, 0, KEEP_COUNT)
|
||||
|
||||
assert len(old_messages) == 35
|
||||
assert_messages_content(old_messages, 0, 34)
|
||||
|
||||
remaining = session.messages[-KEEP_COUNT:]
|
||||
assert len(remaining) == 25
|
||||
assert_messages_content(remaining, 35, 59)
|
||||
|
||||
def test_slice_with_partial_consolidation(self):
|
||||
"""Test slice when some messages already consolidated."""
|
||||
session = create_session_with_messages("test:partial", 70)
|
||||
|
||||
last_consolidated = 30
|
||||
old_messages = get_old_messages(session, last_consolidated, KEEP_COUNT)
|
||||
|
||||
assert len(old_messages) == 15
|
||||
assert_messages_content(old_messages, 30, 44)
|
||||
|
||||
def test_slice_with_various_keep_counts(self):
|
||||
"""Test slice behavior with different keep_count values."""
|
||||
session = create_session_with_messages("test:keep_counts", 50)
|
||||
|
||||
test_cases = [(10, 40), (20, 30), (30, 20), (40, 10)]
|
||||
|
||||
for keep_count, expected_count in test_cases:
|
||||
old_messages = session.messages[0:-keep_count]
|
||||
assert len(old_messages) == expected_count
|
||||
|
||||
def test_slice_when_keep_count_exceeds_messages(self):
|
||||
"""Test slice when keep_count > len(messages)."""
|
||||
session = create_session_with_messages("test:exceed", 10)
|
||||
|
||||
old_messages = session.messages[0:-20]
|
||||
assert len(old_messages) == 0
|
||||
|
||||
|
||||
class TestEmptyAndBoundarySessions:
|
||||
"""Test empty sessions and boundary conditions."""
|
||||
|
||||
def test_empty_session_consolidation(self):
|
||||
"""Test consolidation behavior with empty session."""
|
||||
session = Session(key="test:empty")
|
||||
|
||||
assert len(session.messages) == 0
|
||||
assert session.last_consolidated == 0
|
||||
|
||||
messages_to_process = len(session.messages) - session.last_consolidated
|
||||
assert messages_to_process == 0
|
||||
|
||||
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
|
||||
assert len(old_messages) == 0
|
||||
|
||||
def test_single_message_session(self):
|
||||
"""Test consolidation with single message."""
|
||||
session = Session(key="test:single")
|
||||
session.add_message("user", "only message")
|
||||
|
||||
assert len(session.messages) == 1
|
||||
|
||||
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
|
||||
assert len(old_messages) == 0
|
||||
|
||||
def test_exactly_keep_count_messages(self):
|
||||
"""Test session with exactly keep_count messages."""
|
||||
session = create_session_with_messages("test:exact", KEEP_COUNT)
|
||||
|
||||
assert len(session.messages) == KEEP_COUNT
|
||||
|
||||
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
|
||||
assert len(old_messages) == 0
|
||||
|
||||
def test_just_over_keep_count(self):
|
||||
"""Test session with one message over keep_count."""
|
||||
session = create_session_with_messages("test:over", KEEP_COUNT + 1)
|
||||
|
||||
assert len(session.messages) == 26
|
||||
|
||||
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
|
||||
assert len(old_messages) == 1
|
||||
assert old_messages[0]["content"] == "msg0"
|
||||
|
||||
def test_very_large_session(self):
|
||||
"""Test consolidation with very large message count."""
|
||||
session = create_session_with_messages("test:large", 1000)
|
||||
|
||||
assert len(session.messages) == 1000
|
||||
|
||||
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
|
||||
assert len(old_messages) == 975
|
||||
assert_messages_content(old_messages, 0, 974)
|
||||
|
||||
remaining = session.messages[-KEEP_COUNT:]
|
||||
assert len(remaining) == 25
|
||||
assert_messages_content(remaining, 975, 999)
|
||||
|
||||
def test_session_with_gaps_in_consolidation(self):
|
||||
"""Test session with potential gaps in consolidation history."""
|
||||
session = create_session_with_messages("test:gaps", 50)
|
||||
session.last_consolidated = 10
|
||||
|
||||
# Add more messages
|
||||
for i in range(50, 60):
|
||||
session.add_message("user", f"msg{i}")
|
||||
|
||||
old_messages = get_old_messages(session, session.last_consolidated, KEEP_COUNT)
|
||||
|
||||
expected_count = 60 - KEEP_COUNT - 10
|
||||
assert len(old_messages) == expected_count
|
||||
assert_messages_content(old_messages, 10, 34)
|
||||
|
||||
|
||||
class TestNewCommandArchival:
|
||||
"""Test /new archival behavior with the simplified consolidation flow."""
|
||||
|
||||
@staticmethod
|
||||
def _make_loop(tmp_path: Path):
|
||||
from nanobot.agent.loop import AgentLoop
|
||||
from nanobot.bus.queue import MessageBus
|
||||
from nanobot.providers.base import LLMResponse
|
||||
|
||||
bus = MessageBus()
|
||||
provider = MagicMock()
|
||||
provider.get_default_model.return_value = "test-model"
|
||||
provider.estimate_prompt_tokens.return_value = (10_000, "test")
|
||||
loop = AgentLoop(
|
||||
bus=bus,
|
||||
provider=provider,
|
||||
workspace=tmp_path,
|
||||
model="test-model",
|
||||
context_window_tokens=1,
|
||||
)
|
||||
loop.provider.chat_with_retry = AsyncMock(return_value=LLMResponse(content="ok", tool_calls=[]))
|
||||
loop.tools.get_definitions = MagicMock(return_value=[])
|
||||
return loop
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_new_clears_session_immediately_even_if_archive_fails(self, tmp_path: Path) -> None:
|
||||
"""/new clears session immediately; archive_messages retries until raw dump."""
|
||||
from nanobot.bus.events import InboundMessage
|
||||
|
||||
loop = self._make_loop(tmp_path)
|
||||
session = loop.sessions.get_or_create("cli:test")
|
||||
for i in range(5):
|
||||
session.add_message("user", f"msg{i}")
|
||||
session.add_message("assistant", f"resp{i}")
|
||||
loop.sessions.save(session)
|
||||
|
||||
call_count = 0
|
||||
|
||||
async def _failing_consolidate(_messages) -> bool:
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
return False
|
||||
|
||||
loop.memory_consolidator.consolidate_messages = _failing_consolidate # type: ignore[method-assign]
|
||||
|
||||
new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
|
||||
response = await loop._process_message(new_msg)
|
||||
|
||||
assert response is not None
|
||||
assert "new session started" in response.content.lower()
|
||||
|
||||
session_after = loop.sessions.get_or_create("cli:test")
|
||||
assert len(session_after.messages) == 0
|
||||
|
||||
await loop.close_mcp()
|
||||
assert call_count == 3 # retried up to raw-archive threshold
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_new_archives_only_unconsolidated_messages(self, tmp_path: Path) -> None:
|
||||
from nanobot.bus.events import InboundMessage
|
||||
|
||||
loop = self._make_loop(tmp_path)
|
||||
session = loop.sessions.get_or_create("cli:test")
|
||||
for i in range(15):
|
||||
session.add_message("user", f"msg{i}")
|
||||
session.add_message("assistant", f"resp{i}")
|
||||
session.last_consolidated = len(session.messages) - 3
|
||||
loop.sessions.save(session)
|
||||
|
||||
archived_count = -1
|
||||
|
||||
async def _fake_consolidate(messages) -> bool:
|
||||
nonlocal archived_count
|
||||
archived_count = len(messages)
|
||||
return True
|
||||
|
||||
loop.memory_consolidator.consolidate_messages = _fake_consolidate # type: ignore[method-assign]
|
||||
|
||||
new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
|
||||
response = await loop._process_message(new_msg)
|
||||
|
||||
assert response is not None
|
||||
assert "new session started" in response.content.lower()
|
||||
|
||||
await loop.close_mcp()
|
||||
assert archived_count == 3
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_new_clears_session_and_responds(self, tmp_path: Path) -> None:
|
||||
from nanobot.bus.events import InboundMessage
|
||||
|
||||
loop = self._make_loop(tmp_path)
|
||||
session = loop.sessions.get_or_create("cli:test")
|
||||
for i in range(3):
|
||||
session.add_message("user", f"msg{i}")
|
||||
session.add_message("assistant", f"resp{i}")
|
||||
loop.sessions.save(session)
|
||||
|
||||
async def _ok_consolidate(_messages) -> bool:
|
||||
return True
|
||||
|
||||
loop.memory_consolidator.consolidate_messages = _ok_consolidate # type: ignore[method-assign]
|
||||
|
||||
new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
|
||||
response = await loop._process_message(new_msg)
|
||||
|
||||
assert response is not None
|
||||
assert "new session started" in response.content.lower()
|
||||
assert loop.sessions.get_or_create("cli:test").messages == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_close_mcp_drains_background_tasks(self, tmp_path: Path) -> None:
|
||||
"""close_mcp waits for background tasks to complete."""
|
||||
from nanobot.bus.events import InboundMessage
|
||||
|
||||
loop = self._make_loop(tmp_path)
|
||||
session = loop.sessions.get_or_create("cli:test")
|
||||
for i in range(3):
|
||||
session.add_message("user", f"msg{i}")
|
||||
session.add_message("assistant", f"resp{i}")
|
||||
loop.sessions.save(session)
|
||||
|
||||
archived = asyncio.Event()
|
||||
|
||||
async def _slow_consolidate(_messages) -> bool:
|
||||
await asyncio.sleep(0.1)
|
||||
archived.set()
|
||||
return True
|
||||
|
||||
loop.memory_consolidator.consolidate_messages = _slow_consolidate # type: ignore[method-assign]
|
||||
|
||||
new_msg = InboundMessage(channel="cli", sender_id="user", chat_id="test", content="/new")
|
||||
await loop._process_message(new_msg)
|
||||
|
||||
assert not archived.is_set()
|
||||
await loop.close_mcp()
|
||||
assert archived.is_set()
|
||||
73
tests/agent/test_context_prompt_cache.py
Normal file
73
tests/agent/test_context_prompt_cache.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""Tests for cache-friendly prompt construction."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime as real_datetime
|
||||
from importlib.resources import files as pkg_files
|
||||
from pathlib import Path
|
||||
import datetime as datetime_module
|
||||
|
||||
from nanobot.agent.context import ContextBuilder
|
||||
|
||||
|
||||
class _FakeDatetime(real_datetime):
|
||||
current = real_datetime(2026, 2, 24, 13, 59)
|
||||
|
||||
@classmethod
|
||||
def now(cls, tz=None): # type: ignore[override]
|
||||
return cls.current
|
||||
|
||||
|
||||
def _make_workspace(tmp_path: Path) -> Path:
|
||||
workspace = tmp_path / "workspace"
|
||||
workspace.mkdir(parents=True)
|
||||
return workspace
|
||||
|
||||
|
||||
def test_bootstrap_files_are_backed_by_templates() -> None:
|
||||
template_dir = pkg_files("nanobot") / "templates"
|
||||
|
||||
for filename in ContextBuilder.BOOTSTRAP_FILES:
|
||||
assert (template_dir / filename).is_file(), f"missing bootstrap template: {filename}"
|
||||
|
||||
|
||||
def test_system_prompt_stays_stable_when_clock_changes(tmp_path, monkeypatch) -> None:
|
||||
"""System prompt should not change just because wall clock minute changes."""
|
||||
monkeypatch.setattr(datetime_module, "datetime", _FakeDatetime)
|
||||
|
||||
workspace = _make_workspace(tmp_path)
|
||||
builder = ContextBuilder(workspace)
|
||||
|
||||
_FakeDatetime.current = real_datetime(2026, 2, 24, 13, 59)
|
||||
prompt1 = builder.build_system_prompt()
|
||||
|
||||
_FakeDatetime.current = real_datetime(2026, 2, 24, 14, 0)
|
||||
prompt2 = builder.build_system_prompt()
|
||||
|
||||
assert prompt1 == prompt2
|
||||
|
||||
|
||||
def test_runtime_context_is_separate_untrusted_user_message(tmp_path) -> None:
|
||||
"""Runtime metadata should be merged with the user message."""
|
||||
workspace = _make_workspace(tmp_path)
|
||||
builder = ContextBuilder(workspace)
|
||||
|
||||
messages = builder.build_messages(
|
||||
history=[],
|
||||
current_message="Return exactly: OK",
|
||||
channel="cli",
|
||||
chat_id="direct",
|
||||
)
|
||||
|
||||
assert messages[0]["role"] == "system"
|
||||
assert "## Current Session" not in messages[0]["content"]
|
||||
|
||||
# Runtime context is now merged with user message into a single message
|
||||
assert messages[-1]["role"] == "user"
|
||||
user_content = messages[-1]["content"]
|
||||
assert isinstance(user_content, str)
|
||||
assert ContextBuilder._RUNTIME_CONTEXT_TAG in user_content
|
||||
assert "Current Time:" in user_content
|
||||
assert "Channel: cli" in user_content
|
||||
assert "Chat ID: direct" in user_content
|
||||
assert "Return exactly: OK" in user_content
|
||||
63
tests/agent/test_evaluator.py
Normal file
63
tests/agent/test_evaluator.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import pytest
|
||||
|
||||
from nanobot.utils.evaluator import evaluate_response
|
||||
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
|
||||
|
||||
|
||||
class DummyProvider(LLMProvider):
|
||||
def __init__(self, responses: list[LLMResponse]):
|
||||
super().__init__()
|
||||
self._responses = list(responses)
|
||||
|
||||
async def chat(self, *args, **kwargs) -> LLMResponse:
|
||||
if self._responses:
|
||||
return self._responses.pop(0)
|
||||
return LLMResponse(content="", tool_calls=[])
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return "test-model"
|
||||
|
||||
|
||||
def _eval_tool_call(should_notify: bool, reason: str = "") -> LLMResponse:
|
||||
return LLMResponse(
|
||||
content="",
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="eval_1",
|
||||
name="evaluate_notification",
|
||||
arguments={"should_notify": should_notify, "reason": reason},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_should_notify_true() -> None:
|
||||
provider = DummyProvider([_eval_tool_call(True, "user asked to be reminded")])
|
||||
result = await evaluate_response("Task completed with results", "check emails", provider, "m")
|
||||
assert result is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_should_notify_false() -> None:
|
||||
provider = DummyProvider([_eval_tool_call(False, "routine check, nothing new")])
|
||||
result = await evaluate_response("All clear, no updates", "check status", provider, "m")
|
||||
assert result is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fallback_on_error() -> None:
|
||||
class FailingProvider(DummyProvider):
|
||||
async def chat(self, *args, **kwargs) -> LLMResponse:
|
||||
raise RuntimeError("provider down")
|
||||
|
||||
provider = FailingProvider([])
|
||||
result = await evaluate_response("some response", "some task", provider, "m")
|
||||
assert result is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_tool_call_fallback() -> None:
|
||||
provider = DummyProvider([LLMResponse(content="I think you should notify", tool_calls=[])])
|
||||
result = await evaluate_response("some response", "some task", provider, "m")
|
||||
assert result is True
|
||||
53
tests/agent/test_gemini_thought_signature.py
Normal file
53
tests/agent/test_gemini_thought_signature.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from types import SimpleNamespace
|
||||
|
||||
from nanobot.providers.base import ToolCallRequest
|
||||
from nanobot.providers.litellm_provider import LiteLLMProvider
|
||||
|
||||
|
||||
def test_litellm_parse_response_preserves_tool_call_provider_fields() -> None:
|
||||
provider = LiteLLMProvider(default_model="gemini/gemini-3-flash")
|
||||
|
||||
response = SimpleNamespace(
|
||||
choices=[
|
||||
SimpleNamespace(
|
||||
finish_reason="tool_calls",
|
||||
message=SimpleNamespace(
|
||||
content=None,
|
||||
tool_calls=[
|
||||
SimpleNamespace(
|
||||
id="call_123",
|
||||
function=SimpleNamespace(
|
||||
name="read_file",
|
||||
arguments='{"path":"todo.md"}',
|
||||
provider_specific_fields={"inner": "value"},
|
||||
),
|
||||
provider_specific_fields={"thought_signature": "signed-token"},
|
||||
)
|
||||
],
|
||||
),
|
||||
)
|
||||
],
|
||||
usage=None,
|
||||
)
|
||||
|
||||
parsed = provider._parse_response(response)
|
||||
|
||||
assert len(parsed.tool_calls) == 1
|
||||
assert parsed.tool_calls[0].provider_specific_fields == {"thought_signature": "signed-token"}
|
||||
assert parsed.tool_calls[0].function_provider_specific_fields == {"inner": "value"}
|
||||
|
||||
|
||||
def test_tool_call_request_serializes_provider_fields() -> None:
|
||||
tool_call = ToolCallRequest(
|
||||
id="abc123xyz",
|
||||
name="read_file",
|
||||
arguments={"path": "todo.md"},
|
||||
provider_specific_fields={"thought_signature": "signed-token"},
|
||||
function_provider_specific_fields={"inner": "value"},
|
||||
)
|
||||
|
||||
message = tool_call.to_openai_tool_call()
|
||||
|
||||
assert message["provider_specific_fields"] == {"thought_signature": "signed-token"}
|
||||
assert message["function"]["provider_specific_fields"] == {"inner": "value"}
|
||||
assert message["function"]["arguments"] == '{"path": "todo.md"}'
|
||||
289
tests/agent/test_heartbeat_service.py
Normal file
289
tests/agent/test_heartbeat_service.py
Normal file
@@ -0,0 +1,289 @@
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
|
||||
from nanobot.heartbeat.service import HeartbeatService
|
||||
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
|
||||
|
||||
|
||||
class DummyProvider(LLMProvider):
|
||||
def __init__(self, responses: list[LLMResponse]):
|
||||
super().__init__()
|
||||
self._responses = list(responses)
|
||||
self.calls = 0
|
||||
|
||||
async def chat(self, *args, **kwargs) -> LLMResponse:
|
||||
self.calls += 1
|
||||
if self._responses:
|
||||
return self._responses.pop(0)
|
||||
return LLMResponse(content="", tool_calls=[])
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return "test-model"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_is_idempotent(tmp_path) -> None:
|
||||
provider = DummyProvider([])
|
||||
|
||||
service = HeartbeatService(
|
||||
workspace=tmp_path,
|
||||
provider=provider,
|
||||
model="openai/gpt-4o-mini",
|
||||
interval_s=9999,
|
||||
enabled=True,
|
||||
)
|
||||
|
||||
await service.start()
|
||||
first_task = service._task
|
||||
await service.start()
|
||||
|
||||
assert service._task is first_task
|
||||
|
||||
service.stop()
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_decide_returns_skip_when_no_tool_call(tmp_path) -> None:
|
||||
provider = DummyProvider([LLMResponse(content="no tool call", tool_calls=[])])
|
||||
service = HeartbeatService(
|
||||
workspace=tmp_path,
|
||||
provider=provider,
|
||||
model="openai/gpt-4o-mini",
|
||||
)
|
||||
|
||||
action, tasks = await service._decide("heartbeat content")
|
||||
assert action == "skip"
|
||||
assert tasks == ""
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_trigger_now_executes_when_decision_is_run(tmp_path) -> None:
|
||||
(tmp_path / "HEARTBEAT.md").write_text("- [ ] do thing", encoding="utf-8")
|
||||
|
||||
provider = DummyProvider([
|
||||
LLMResponse(
|
||||
content="",
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="hb_1",
|
||||
name="heartbeat",
|
||||
arguments={"action": "run", "tasks": "check open tasks"},
|
||||
)
|
||||
],
|
||||
)
|
||||
])
|
||||
|
||||
called_with: list[str] = []
|
||||
|
||||
async def _on_execute(tasks: str) -> str:
|
||||
called_with.append(tasks)
|
||||
return "done"
|
||||
|
||||
service = HeartbeatService(
|
||||
workspace=tmp_path,
|
||||
provider=provider,
|
||||
model="openai/gpt-4o-mini",
|
||||
on_execute=_on_execute,
|
||||
)
|
||||
|
||||
result = await service.trigger_now()
|
||||
assert result == "done"
|
||||
assert called_with == ["check open tasks"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_trigger_now_returns_none_when_decision_is_skip(tmp_path) -> None:
|
||||
(tmp_path / "HEARTBEAT.md").write_text("- [ ] do thing", encoding="utf-8")
|
||||
|
||||
provider = DummyProvider([
|
||||
LLMResponse(
|
||||
content="",
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="hb_1",
|
||||
name="heartbeat",
|
||||
arguments={"action": "skip"},
|
||||
)
|
||||
],
|
||||
)
|
||||
])
|
||||
|
||||
async def _on_execute(tasks: str) -> str:
|
||||
return tasks
|
||||
|
||||
service = HeartbeatService(
|
||||
workspace=tmp_path,
|
||||
provider=provider,
|
||||
model="openai/gpt-4o-mini",
|
||||
on_execute=_on_execute,
|
||||
)
|
||||
|
||||
assert await service.trigger_now() is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tick_notifies_when_evaluator_says_yes(tmp_path, monkeypatch) -> None:
|
||||
"""Phase 1 run -> Phase 2 execute -> Phase 3 evaluate=notify -> on_notify called."""
|
||||
(tmp_path / "HEARTBEAT.md").write_text("- [ ] check deployments", encoding="utf-8")
|
||||
|
||||
provider = DummyProvider([
|
||||
LLMResponse(
|
||||
content="",
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="hb_1",
|
||||
name="heartbeat",
|
||||
arguments={"action": "run", "tasks": "check deployments"},
|
||||
)
|
||||
],
|
||||
),
|
||||
])
|
||||
|
||||
executed: list[str] = []
|
||||
notified: list[str] = []
|
||||
|
||||
async def _on_execute(tasks: str) -> str:
|
||||
executed.append(tasks)
|
||||
return "deployment failed on staging"
|
||||
|
||||
async def _on_notify(response: str) -> None:
|
||||
notified.append(response)
|
||||
|
||||
service = HeartbeatService(
|
||||
workspace=tmp_path,
|
||||
provider=provider,
|
||||
model="openai/gpt-4o-mini",
|
||||
on_execute=_on_execute,
|
||||
on_notify=_on_notify,
|
||||
)
|
||||
|
||||
async def _eval_notify(*a, **kw):
|
||||
return True
|
||||
|
||||
monkeypatch.setattr("nanobot.utils.evaluator.evaluate_response", _eval_notify)
|
||||
|
||||
await service._tick()
|
||||
assert executed == ["check deployments"]
|
||||
assert notified == ["deployment failed on staging"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tick_suppresses_when_evaluator_says_no(tmp_path, monkeypatch) -> None:
|
||||
"""Phase 1 run -> Phase 2 execute -> Phase 3 evaluate=silent -> on_notify NOT called."""
|
||||
(tmp_path / "HEARTBEAT.md").write_text("- [ ] check status", encoding="utf-8")
|
||||
|
||||
provider = DummyProvider([
|
||||
LLMResponse(
|
||||
content="",
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="hb_1",
|
||||
name="heartbeat",
|
||||
arguments={"action": "run", "tasks": "check status"},
|
||||
)
|
||||
],
|
||||
),
|
||||
])
|
||||
|
||||
executed: list[str] = []
|
||||
notified: list[str] = []
|
||||
|
||||
async def _on_execute(tasks: str) -> str:
|
||||
executed.append(tasks)
|
||||
return "everything is fine, no issues"
|
||||
|
||||
async def _on_notify(response: str) -> None:
|
||||
notified.append(response)
|
||||
|
||||
service = HeartbeatService(
|
||||
workspace=tmp_path,
|
||||
provider=provider,
|
||||
model="openai/gpt-4o-mini",
|
||||
on_execute=_on_execute,
|
||||
on_notify=_on_notify,
|
||||
)
|
||||
|
||||
async def _eval_silent(*a, **kw):
|
||||
return False
|
||||
|
||||
monkeypatch.setattr("nanobot.utils.evaluator.evaluate_response", _eval_silent)
|
||||
|
||||
await service._tick()
|
||||
assert executed == ["check status"]
|
||||
assert notified == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_decide_retries_transient_error_then_succeeds(tmp_path, monkeypatch) -> None:
|
||||
provider = DummyProvider([
|
||||
LLMResponse(content="429 rate limit", finish_reason="error"),
|
||||
LLMResponse(
|
||||
content="",
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="hb_1",
|
||||
name="heartbeat",
|
||||
arguments={"action": "run", "tasks": "check open tasks"},
|
||||
)
|
||||
],
|
||||
),
|
||||
])
|
||||
|
||||
delays: list[int] = []
|
||||
|
||||
async def _fake_sleep(delay: int) -> None:
|
||||
delays.append(delay)
|
||||
|
||||
monkeypatch.setattr(asyncio, "sleep", _fake_sleep)
|
||||
|
||||
service = HeartbeatService(
|
||||
workspace=tmp_path,
|
||||
provider=provider,
|
||||
model="openai/gpt-4o-mini",
|
||||
)
|
||||
|
||||
action, tasks = await service._decide("heartbeat content")
|
||||
|
||||
assert action == "run"
|
||||
assert tasks == "check open tasks"
|
||||
assert provider.calls == 2
|
||||
assert delays == [1]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_decide_prompt_includes_current_time(tmp_path) -> None:
|
||||
"""Phase 1 user prompt must contain current time so the LLM can judge task urgency."""
|
||||
|
||||
captured_messages: list[dict] = []
|
||||
|
||||
class CapturingProvider(LLMProvider):
|
||||
async def chat(self, *, messages=None, **kwargs) -> LLMResponse:
|
||||
if messages:
|
||||
captured_messages.extend(messages)
|
||||
return LLMResponse(
|
||||
content="",
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="hb_1", name="heartbeat",
|
||||
arguments={"action": "skip"},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return "test-model"
|
||||
|
||||
service = HeartbeatService(
|
||||
workspace=tmp_path,
|
||||
provider=CapturingProvider(),
|
||||
model="test-model",
|
||||
)
|
||||
|
||||
await service._decide("- [ ] check servers at 10:00 UTC")
|
||||
|
||||
user_msg = captured_messages[1]
|
||||
assert user_msg["role"] == "user"
|
||||
assert "Current Time:" in user_msg["content"]
|
||||
|
||||
196
tests/agent/test_loop_consolidation_tokens.py
Normal file
196
tests/agent/test_loop_consolidation_tokens.py
Normal file
@@ -0,0 +1,196 @@
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from nanobot.agent.loop import AgentLoop
|
||||
import nanobot.agent.memory as memory_module
|
||||
from nanobot.bus.queue import MessageBus
|
||||
from nanobot.providers.base import LLMResponse
|
||||
|
||||
|
||||
def _make_loop(tmp_path, *, estimated_tokens: int, context_window_tokens: int) -> AgentLoop:
|
||||
from nanobot.providers.base import GenerationSettings
|
||||
provider = MagicMock()
|
||||
provider.get_default_model.return_value = "test-model"
|
||||
provider.generation = GenerationSettings(max_tokens=0)
|
||||
provider.estimate_prompt_tokens.return_value = (estimated_tokens, "test-counter")
|
||||
_response = LLMResponse(content="ok", tool_calls=[])
|
||||
provider.chat_with_retry = AsyncMock(return_value=_response)
|
||||
provider.chat_stream_with_retry = AsyncMock(return_value=_response)
|
||||
|
||||
loop = AgentLoop(
|
||||
bus=MessageBus(),
|
||||
provider=provider,
|
||||
workspace=tmp_path,
|
||||
model="test-model",
|
||||
context_window_tokens=context_window_tokens,
|
||||
)
|
||||
loop.tools.get_definitions = MagicMock(return_value=[])
|
||||
loop.memory_consolidator._SAFETY_BUFFER = 0
|
||||
return loop
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prompt_below_threshold_does_not_consolidate(tmp_path) -> None:
|
||||
loop = _make_loop(tmp_path, estimated_tokens=100, context_window_tokens=200)
|
||||
loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign]
|
||||
|
||||
await loop.process_direct("hello", session_key="cli:test")
|
||||
|
||||
loop.memory_consolidator.consolidate_messages.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prompt_above_threshold_triggers_consolidation(tmp_path, monkeypatch) -> None:
|
||||
loop = _make_loop(tmp_path, estimated_tokens=1000, context_window_tokens=200)
|
||||
loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign]
|
||||
session = loop.sessions.get_or_create("cli:test")
|
||||
session.messages = [
|
||||
{"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
|
||||
{"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
|
||||
{"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
|
||||
]
|
||||
loop.sessions.save(session)
|
||||
monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _message: 500)
|
||||
|
||||
await loop.process_direct("hello", session_key="cli:test")
|
||||
|
||||
assert loop.memory_consolidator.consolidate_messages.await_count >= 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prompt_above_threshold_archives_until_next_user_boundary(tmp_path, monkeypatch) -> None:
|
||||
loop = _make_loop(tmp_path, estimated_tokens=1000, context_window_tokens=200)
|
||||
loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign]
|
||||
|
||||
session = loop.sessions.get_or_create("cli:test")
|
||||
session.messages = [
|
||||
{"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
|
||||
{"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
|
||||
{"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
|
||||
{"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"},
|
||||
{"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"},
|
||||
]
|
||||
loop.sessions.save(session)
|
||||
|
||||
token_map = {"u1": 120, "a1": 120, "u2": 120, "a2": 120, "u3": 120}
|
||||
monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda message: token_map[message["content"]])
|
||||
|
||||
await loop.memory_consolidator.maybe_consolidate_by_tokens(session)
|
||||
|
||||
archived_chunk = loop.memory_consolidator.consolidate_messages.await_args.args[0]
|
||||
assert [message["content"] for message in archived_chunk] == ["u1", "a1", "u2", "a2"]
|
||||
assert session.last_consolidated == 4
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_consolidation_loops_until_target_met(tmp_path, monkeypatch) -> None:
|
||||
"""Verify maybe_consolidate_by_tokens keeps looping until under threshold."""
|
||||
loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200)
|
||||
loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign]
|
||||
|
||||
session = loop.sessions.get_or_create("cli:test")
|
||||
session.messages = [
|
||||
{"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
|
||||
{"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
|
||||
{"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
|
||||
{"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"},
|
||||
{"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"},
|
||||
{"role": "assistant", "content": "a3", "timestamp": "2026-01-01T00:00:05"},
|
||||
{"role": "user", "content": "u4", "timestamp": "2026-01-01T00:00:06"},
|
||||
]
|
||||
loop.sessions.save(session)
|
||||
|
||||
call_count = [0]
|
||||
def mock_estimate(_session):
|
||||
call_count[0] += 1
|
||||
if call_count[0] == 1:
|
||||
return (500, "test")
|
||||
if call_count[0] == 2:
|
||||
return (300, "test")
|
||||
return (80, "test")
|
||||
|
||||
loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate # type: ignore[method-assign]
|
||||
monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 100)
|
||||
|
||||
await loop.memory_consolidator.maybe_consolidate_by_tokens(session)
|
||||
|
||||
assert loop.memory_consolidator.consolidate_messages.await_count == 2
|
||||
assert session.last_consolidated == 6
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_consolidation_continues_below_trigger_until_half_target(tmp_path, monkeypatch) -> None:
|
||||
"""Once triggered, consolidation should continue until it drops below half threshold."""
|
||||
loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200)
|
||||
loop.memory_consolidator.consolidate_messages = AsyncMock(return_value=True) # type: ignore[method-assign]
|
||||
|
||||
session = loop.sessions.get_or_create("cli:test")
|
||||
session.messages = [
|
||||
{"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
|
||||
{"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
|
||||
{"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
|
||||
{"role": "assistant", "content": "a2", "timestamp": "2026-01-01T00:00:03"},
|
||||
{"role": "user", "content": "u3", "timestamp": "2026-01-01T00:00:04"},
|
||||
{"role": "assistant", "content": "a3", "timestamp": "2026-01-01T00:00:05"},
|
||||
{"role": "user", "content": "u4", "timestamp": "2026-01-01T00:00:06"},
|
||||
]
|
||||
loop.sessions.save(session)
|
||||
|
||||
call_count = [0]
|
||||
|
||||
def mock_estimate(_session):
|
||||
call_count[0] += 1
|
||||
if call_count[0] == 1:
|
||||
return (500, "test")
|
||||
if call_count[0] == 2:
|
||||
return (150, "test")
|
||||
return (80, "test")
|
||||
|
||||
loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate # type: ignore[method-assign]
|
||||
monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 100)
|
||||
|
||||
await loop.memory_consolidator.maybe_consolidate_by_tokens(session)
|
||||
|
||||
assert loop.memory_consolidator.consolidate_messages.await_count == 2
|
||||
assert session.last_consolidated == 6
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_preflight_consolidation_before_llm_call(tmp_path, monkeypatch) -> None:
|
||||
"""Verify preflight consolidation runs before the LLM call in process_direct."""
|
||||
order: list[str] = []
|
||||
|
||||
loop = _make_loop(tmp_path, estimated_tokens=0, context_window_tokens=200)
|
||||
|
||||
async def track_consolidate(messages):
|
||||
order.append("consolidate")
|
||||
return True
|
||||
loop.memory_consolidator.consolidate_messages = track_consolidate # type: ignore[method-assign]
|
||||
|
||||
async def track_llm(*args, **kwargs):
|
||||
order.append("llm")
|
||||
return LLMResponse(content="ok", tool_calls=[])
|
||||
loop.provider.chat_with_retry = track_llm
|
||||
loop.provider.chat_stream_with_retry = track_llm
|
||||
|
||||
session = loop.sessions.get_or_create("cli:test")
|
||||
session.messages = [
|
||||
{"role": "user", "content": "u1", "timestamp": "2026-01-01T00:00:00"},
|
||||
{"role": "assistant", "content": "a1", "timestamp": "2026-01-01T00:00:01"},
|
||||
{"role": "user", "content": "u2", "timestamp": "2026-01-01T00:00:02"},
|
||||
]
|
||||
loop.sessions.save(session)
|
||||
monkeypatch.setattr(memory_module, "estimate_message_tokens", lambda _m: 500)
|
||||
|
||||
call_count = [0]
|
||||
def mock_estimate(_session):
|
||||
call_count[0] += 1
|
||||
return (1000 if call_count[0] <= 1 else 80, "test")
|
||||
loop.memory_consolidator.estimate_session_prompt_tokens = mock_estimate # type: ignore[method-assign]
|
||||
|
||||
await loop.process_direct("hello", session_key="cli:test")
|
||||
|
||||
assert "consolidate" in order
|
||||
assert "llm" in order
|
||||
assert order.index("consolidate") < order.index("llm")
|
||||
74
tests/agent/test_loop_save_turn.py
Normal file
74
tests/agent/test_loop_save_turn.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from nanobot.agent.context import ContextBuilder
|
||||
from nanobot.agent.loop import AgentLoop
|
||||
from nanobot.session.manager import Session
|
||||
|
||||
|
||||
def _mk_loop() -> AgentLoop:
|
||||
loop = AgentLoop.__new__(AgentLoop)
|
||||
loop._TOOL_RESULT_MAX_CHARS = AgentLoop._TOOL_RESULT_MAX_CHARS
|
||||
return loop
|
||||
|
||||
|
||||
def test_save_turn_skips_multimodal_user_when_only_runtime_context() -> None:
|
||||
loop = _mk_loop()
|
||||
session = Session(key="test:runtime-only")
|
||||
runtime = ContextBuilder._RUNTIME_CONTEXT_TAG + "\nCurrent Time: now (UTC)"
|
||||
|
||||
loop._save_turn(
|
||||
session,
|
||||
[{"role": "user", "content": [{"type": "text", "text": runtime}]}],
|
||||
skip=0,
|
||||
)
|
||||
assert session.messages == []
|
||||
|
||||
|
||||
def test_save_turn_keeps_image_placeholder_with_path_after_runtime_strip() -> None:
|
||||
loop = _mk_loop()
|
||||
session = Session(key="test:image")
|
||||
runtime = ContextBuilder._RUNTIME_CONTEXT_TAG + "\nCurrent Time: now (UTC)"
|
||||
|
||||
loop._save_turn(
|
||||
session,
|
||||
[{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": runtime},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}, "_meta": {"path": "/media/feishu/photo.jpg"}},
|
||||
],
|
||||
}],
|
||||
skip=0,
|
||||
)
|
||||
assert session.messages[0]["content"] == [{"type": "text", "text": "[image: /media/feishu/photo.jpg]"}]
|
||||
|
||||
|
||||
def test_save_turn_keeps_image_placeholder_without_meta() -> None:
|
||||
loop = _mk_loop()
|
||||
session = Session(key="test:image-no-meta")
|
||||
runtime = ContextBuilder._RUNTIME_CONTEXT_TAG + "\nCurrent Time: now (UTC)"
|
||||
|
||||
loop._save_turn(
|
||||
session,
|
||||
[{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": runtime},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
|
||||
],
|
||||
}],
|
||||
skip=0,
|
||||
)
|
||||
assert session.messages[0]["content"] == [{"type": "text", "text": "[image]"}]
|
||||
|
||||
|
||||
def test_save_turn_keeps_tool_results_under_16k() -> None:
|
||||
loop = _mk_loop()
|
||||
session = Session(key="test:tool-result")
|
||||
content = "x" * 12_000
|
||||
|
||||
loop._save_turn(
|
||||
session,
|
||||
[{"role": "tool", "tool_call_id": "call_1", "name": "read_file", "content": content}],
|
||||
skip=0,
|
||||
)
|
||||
|
||||
assert session.messages[0]["content"] == content
|
||||
478
tests/agent/test_memory_consolidation_types.py
Normal file
478
tests/agent/test_memory_consolidation_types.py
Normal file
@@ -0,0 +1,478 @@
|
||||
"""Test MemoryStore.consolidate() handles non-string tool call arguments.
|
||||
|
||||
Regression test for https://github.com/HKUDS/nanobot/issues/1042
|
||||
When memory consolidation receives dict values instead of strings from the LLM
|
||||
tool call response, it should serialize them to JSON instead of raising TypeError.
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from nanobot.agent.memory import MemoryStore
|
||||
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
|
||||
|
||||
|
||||
def _make_messages(message_count: int = 30):
|
||||
"""Create a list of mock messages."""
|
||||
return [
|
||||
{"role": "user", "content": f"msg{i}", "timestamp": "2026-01-01 00:00"}
|
||||
for i in range(message_count)
|
||||
]
|
||||
|
||||
|
||||
def _make_tool_response(history_entry, memory_update):
|
||||
"""Create an LLMResponse with a save_memory tool call."""
|
||||
return LLMResponse(
|
||||
content=None,
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="call_1",
|
||||
name="save_memory",
|
||||
arguments={
|
||||
"history_entry": history_entry,
|
||||
"memory_update": memory_update,
|
||||
},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
class ScriptedProvider(LLMProvider):
|
||||
def __init__(self, responses: list[LLMResponse]):
|
||||
super().__init__()
|
||||
self._responses = list(responses)
|
||||
self.calls = 0
|
||||
|
||||
async def chat(self, *args, **kwargs) -> LLMResponse:
|
||||
self.calls += 1
|
||||
if self._responses:
|
||||
return self._responses.pop(0)
|
||||
return LLMResponse(content="", tool_calls=[])
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return "test-model"
|
||||
|
||||
|
||||
class TestMemoryConsolidationTypeHandling:
|
||||
"""Test that consolidation handles various argument types correctly."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_string_arguments_work(self, tmp_path: Path) -> None:
|
||||
"""Normal case: LLM returns string arguments."""
|
||||
store = MemoryStore(tmp_path)
|
||||
provider = AsyncMock()
|
||||
provider.chat = AsyncMock(
|
||||
return_value=_make_tool_response(
|
||||
history_entry="[2026-01-01] User discussed testing.",
|
||||
memory_update="# Memory\nUser likes testing.",
|
||||
)
|
||||
)
|
||||
provider.chat_with_retry = provider.chat
|
||||
messages = _make_messages(message_count=60)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is True
|
||||
assert store.history_file.exists()
|
||||
assert "[2026-01-01] User discussed testing." in store.history_file.read_text()
|
||||
assert "User likes testing." in store.memory_file.read_text()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dict_arguments_serialized_to_json(self, tmp_path: Path) -> None:
|
||||
"""Issue #1042: LLM returns dict instead of string — must not raise TypeError."""
|
||||
store = MemoryStore(tmp_path)
|
||||
provider = AsyncMock()
|
||||
provider.chat = AsyncMock(
|
||||
return_value=_make_tool_response(
|
||||
history_entry={"timestamp": "2026-01-01", "summary": "User discussed testing."},
|
||||
memory_update={"facts": ["User likes testing"], "topics": ["testing"]},
|
||||
)
|
||||
)
|
||||
provider.chat_with_retry = provider.chat
|
||||
messages = _make_messages(message_count=60)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is True
|
||||
assert store.history_file.exists()
|
||||
history_content = store.history_file.read_text()
|
||||
parsed = json.loads(history_content.strip())
|
||||
assert parsed["summary"] == "User discussed testing."
|
||||
|
||||
memory_content = store.memory_file.read_text()
|
||||
parsed_mem = json.loads(memory_content)
|
||||
assert "User likes testing" in parsed_mem["facts"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_string_arguments_as_raw_json(self, tmp_path: Path) -> None:
|
||||
"""Some providers return arguments as a JSON string instead of parsed dict."""
|
||||
store = MemoryStore(tmp_path)
|
||||
provider = AsyncMock()
|
||||
|
||||
response = LLMResponse(
|
||||
content=None,
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="call_1",
|
||||
name="save_memory",
|
||||
arguments=json.dumps({
|
||||
"history_entry": "[2026-01-01] User discussed testing.",
|
||||
"memory_update": "# Memory\nUser likes testing.",
|
||||
}),
|
||||
)
|
||||
],
|
||||
)
|
||||
provider.chat = AsyncMock(return_value=response)
|
||||
provider.chat_with_retry = provider.chat
|
||||
messages = _make_messages(message_count=60)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is True
|
||||
assert "User discussed testing." in store.history_file.read_text()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_tool_call_returns_false(self, tmp_path: Path) -> None:
|
||||
"""When LLM doesn't use the save_memory tool, return False."""
|
||||
store = MemoryStore(tmp_path)
|
||||
provider = AsyncMock()
|
||||
provider.chat = AsyncMock(
|
||||
return_value=LLMResponse(content="I summarized the conversation.", tool_calls=[])
|
||||
)
|
||||
provider.chat_with_retry = provider.chat
|
||||
messages = _make_messages(message_count=60)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is False
|
||||
assert not store.history_file.exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_skips_when_message_chunk_is_empty(self, tmp_path: Path) -> None:
|
||||
"""Consolidation should be a no-op when the selected chunk is empty."""
|
||||
store = MemoryStore(tmp_path)
|
||||
provider = AsyncMock()
|
||||
provider.chat_with_retry = provider.chat
|
||||
messages: list[dict] = []
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is True
|
||||
provider.chat.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_arguments_extracts_first_dict(self, tmp_path: Path) -> None:
|
||||
"""Some providers return arguments as a list - extract first element if it's a dict."""
|
||||
store = MemoryStore(tmp_path)
|
||||
provider = AsyncMock()
|
||||
|
||||
response = LLMResponse(
|
||||
content=None,
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="call_1",
|
||||
name="save_memory",
|
||||
arguments=[{
|
||||
"history_entry": "[2026-01-01] User discussed testing.",
|
||||
"memory_update": "# Memory\nUser likes testing.",
|
||||
}],
|
||||
)
|
||||
],
|
||||
)
|
||||
provider.chat = AsyncMock(return_value=response)
|
||||
provider.chat_with_retry = provider.chat
|
||||
messages = _make_messages(message_count=60)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is True
|
||||
assert "User discussed testing." in store.history_file.read_text()
|
||||
assert "User likes testing." in store.memory_file.read_text()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_arguments_empty_list_returns_false(self, tmp_path: Path) -> None:
|
||||
"""Empty list arguments should return False."""
|
||||
store = MemoryStore(tmp_path)
|
||||
provider = AsyncMock()
|
||||
|
||||
response = LLMResponse(
|
||||
content=None,
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="call_1",
|
||||
name="save_memory",
|
||||
arguments=[],
|
||||
)
|
||||
],
|
||||
)
|
||||
provider.chat = AsyncMock(return_value=response)
|
||||
provider.chat_with_retry = provider.chat
|
||||
messages = _make_messages(message_count=60)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_arguments_non_dict_content_returns_false(self, tmp_path: Path) -> None:
|
||||
"""List with non-dict content should return False."""
|
||||
store = MemoryStore(tmp_path)
|
||||
provider = AsyncMock()
|
||||
|
||||
response = LLMResponse(
|
||||
content=None,
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="call_1",
|
||||
name="save_memory",
|
||||
arguments=["string", "content"],
|
||||
)
|
||||
],
|
||||
)
|
||||
provider.chat = AsyncMock(return_value=response)
|
||||
provider.chat_with_retry = provider.chat
|
||||
messages = _make_messages(message_count=60)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_history_entry_returns_false_without_writing(self, tmp_path: Path) -> None:
|
||||
"""Do not persist partial results when required fields are missing."""
|
||||
store = MemoryStore(tmp_path)
|
||||
provider = AsyncMock()
|
||||
provider.chat_with_retry = AsyncMock(
|
||||
return_value=LLMResponse(
|
||||
content=None,
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="call_1",
|
||||
name="save_memory",
|
||||
arguments={"memory_update": "# Memory\nOnly memory update"},
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
messages = _make_messages(message_count=60)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is False
|
||||
assert not store.history_file.exists()
|
||||
assert not store.memory_file.exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_memory_update_returns_false_without_writing(self, tmp_path: Path) -> None:
|
||||
"""Do not append history if memory_update is missing."""
|
||||
store = MemoryStore(tmp_path)
|
||||
provider = AsyncMock()
|
||||
provider.chat_with_retry = AsyncMock(
|
||||
return_value=LLMResponse(
|
||||
content=None,
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id="call_1",
|
||||
name="save_memory",
|
||||
arguments={"history_entry": "[2026-01-01] Partial output."},
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
messages = _make_messages(message_count=60)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is False
|
||||
assert not store.history_file.exists()
|
||||
assert not store.memory_file.exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_null_required_field_returns_false_without_writing(self, tmp_path: Path) -> None:
|
||||
"""Null required fields should be rejected before persistence."""
|
||||
store = MemoryStore(tmp_path)
|
||||
provider = AsyncMock()
|
||||
provider.chat_with_retry = AsyncMock(
|
||||
return_value=_make_tool_response(
|
||||
history_entry=None,
|
||||
memory_update="# Memory\nUser likes testing.",
|
||||
)
|
||||
)
|
||||
messages = _make_messages(message_count=60)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is False
|
||||
assert not store.history_file.exists()
|
||||
assert not store.memory_file.exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_history_entry_returns_false_without_writing(self, tmp_path: Path) -> None:
|
||||
"""Empty history entries should be rejected to avoid blank archival records."""
|
||||
store = MemoryStore(tmp_path)
|
||||
provider = AsyncMock()
|
||||
provider.chat_with_retry = AsyncMock(
|
||||
return_value=_make_tool_response(
|
||||
history_entry=" ",
|
||||
memory_update="# Memory\nUser likes testing.",
|
||||
)
|
||||
)
|
||||
messages = _make_messages(message_count=60)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is False
|
||||
assert not store.history_file.exists()
|
||||
assert not store.memory_file.exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_retries_transient_error_then_succeeds(self, tmp_path: Path, monkeypatch) -> None:
|
||||
store = MemoryStore(tmp_path)
|
||||
provider = ScriptedProvider([
|
||||
LLMResponse(content="503 server error", finish_reason="error"),
|
||||
_make_tool_response(
|
||||
history_entry="[2026-01-01] User discussed testing.",
|
||||
memory_update="# Memory\nUser likes testing.",
|
||||
),
|
||||
])
|
||||
messages = _make_messages(message_count=60)
|
||||
delays: list[int] = []
|
||||
|
||||
async def _fake_sleep(delay: int) -> None:
|
||||
delays.append(delay)
|
||||
|
||||
monkeypatch.setattr("nanobot.providers.base.asyncio.sleep", _fake_sleep)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is True
|
||||
assert provider.calls == 2
|
||||
assert delays == [1]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_consolidation_delegates_to_provider_defaults(self, tmp_path: Path) -> None:
|
||||
"""Consolidation no longer passes generation params — the provider owns them."""
|
||||
store = MemoryStore(tmp_path)
|
||||
provider = AsyncMock()
|
||||
provider.chat_with_retry = AsyncMock(
|
||||
return_value=_make_tool_response(
|
||||
history_entry="[2026-01-01] User discussed testing.",
|
||||
memory_update="# Memory\nUser likes testing.",
|
||||
)
|
||||
)
|
||||
messages = _make_messages(message_count=60)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is True
|
||||
provider.chat_with_retry.assert_awaited_once()
|
||||
_, kwargs = provider.chat_with_retry.await_args
|
||||
assert kwargs["model"] == "test-model"
|
||||
assert "temperature" not in kwargs
|
||||
assert "max_tokens" not in kwargs
|
||||
assert "reasoning_effort" not in kwargs
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_choice_fallback_on_unsupported_error(self, tmp_path: Path) -> None:
|
||||
"""Forced tool_choice rejected by provider -> retry with auto and succeed."""
|
||||
store = MemoryStore(tmp_path)
|
||||
error_resp = LLMResponse(
|
||||
content="Error calling LLM: litellm.BadRequestError: "
|
||||
"The tool_choice parameter does not support being set to required or object",
|
||||
finish_reason="error",
|
||||
tool_calls=[],
|
||||
)
|
||||
ok_resp = _make_tool_response(
|
||||
history_entry="[2026-01-01] Fallback worked.",
|
||||
memory_update="# Memory\nFallback OK.",
|
||||
)
|
||||
|
||||
call_log: list[dict] = []
|
||||
|
||||
async def _tracking_chat(**kwargs):
|
||||
call_log.append(kwargs)
|
||||
return error_resp if len(call_log) == 1 else ok_resp
|
||||
|
||||
provider = AsyncMock()
|
||||
provider.chat_with_retry = AsyncMock(side_effect=_tracking_chat)
|
||||
messages = _make_messages(message_count=60)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is True
|
||||
assert len(call_log) == 2
|
||||
assert isinstance(call_log[0]["tool_choice"], dict)
|
||||
assert call_log[1]["tool_choice"] == "auto"
|
||||
assert "Fallback worked." in store.history_file.read_text()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_choice_fallback_auto_no_tool_call(self, tmp_path: Path) -> None:
|
||||
"""Forced rejected, auto retry also produces no tool call -> return False."""
|
||||
store = MemoryStore(tmp_path)
|
||||
error_resp = LLMResponse(
|
||||
content="Error: tool_choice must be none or auto",
|
||||
finish_reason="error",
|
||||
tool_calls=[],
|
||||
)
|
||||
no_tool_resp = LLMResponse(
|
||||
content="Here is a summary.",
|
||||
finish_reason="stop",
|
||||
tool_calls=[],
|
||||
)
|
||||
|
||||
provider = AsyncMock()
|
||||
provider.chat_with_retry = AsyncMock(side_effect=[error_resp, no_tool_resp])
|
||||
messages = _make_messages(message_count=60)
|
||||
|
||||
result = await store.consolidate(messages, provider, "test-model")
|
||||
|
||||
assert result is False
|
||||
assert not store.history_file.exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_raw_archive_after_consecutive_failures(self, tmp_path: Path) -> None:
|
||||
"""After 3 consecutive failures, raw-archive messages and return True."""
|
||||
store = MemoryStore(tmp_path)
|
||||
no_tool = LLMResponse(content="No tool call.", finish_reason="stop", tool_calls=[])
|
||||
provider = AsyncMock()
|
||||
provider.chat_with_retry = AsyncMock(return_value=no_tool)
|
||||
messages = _make_messages(message_count=10)
|
||||
|
||||
assert await store.consolidate(messages, provider, "m") is False
|
||||
assert await store.consolidate(messages, provider, "m") is False
|
||||
assert await store.consolidate(messages, provider, "m") is True
|
||||
|
||||
assert store.history_file.exists()
|
||||
content = store.history_file.read_text()
|
||||
assert "[RAW]" in content
|
||||
assert "10 messages" in content
|
||||
assert "msg0" in content
|
||||
assert not store.memory_file.exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_raw_archive_counter_resets_on_success(self, tmp_path: Path) -> None:
|
||||
"""A successful consolidation resets the failure counter."""
|
||||
store = MemoryStore(tmp_path)
|
||||
no_tool = LLMResponse(content="Nope.", finish_reason="stop", tool_calls=[])
|
||||
ok_resp = _make_tool_response(
|
||||
history_entry="[2026-01-01] OK.",
|
||||
memory_update="# Memory\nOK.",
|
||||
)
|
||||
messages = _make_messages(message_count=10)
|
||||
|
||||
provider = AsyncMock()
|
||||
provider.chat_with_retry = AsyncMock(return_value=no_tool)
|
||||
assert await store.consolidate(messages, provider, "m") is False
|
||||
assert await store.consolidate(messages, provider, "m") is False
|
||||
assert store._consecutive_failures == 2
|
||||
|
||||
provider.chat_with_retry = AsyncMock(return_value=ok_resp)
|
||||
assert await store.consolidate(messages, provider, "m") is True
|
||||
assert store._consecutive_failures == 0
|
||||
|
||||
provider.chat_with_retry = AsyncMock(return_value=no_tool)
|
||||
assert await store.consolidate(messages, provider, "m") is False
|
||||
assert store._consecutive_failures == 1
|
||||
495
tests/agent/test_onboard_logic.py
Normal file
495
tests/agent/test_onboard_logic.py
Normal file
@@ -0,0 +1,495 @@
|
||||
"""Unit tests for onboard core logic functions.
|
||||
|
||||
These tests focus on the business logic behind the onboard wizard,
|
||||
without testing the interactive UI components.
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, cast
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from nanobot.cli import onboard as onboard_wizard
|
||||
|
||||
# Import functions to test
|
||||
from nanobot.cli.commands import _merge_missing_defaults
|
||||
from nanobot.cli.onboard import (
|
||||
_BACK_PRESSED,
|
||||
_configure_pydantic_model,
|
||||
_format_value,
|
||||
_get_field_display_name,
|
||||
_get_field_type_info,
|
||||
run_onboard,
|
||||
)
|
||||
from nanobot.config.schema import Config
|
||||
from nanobot.utils.helpers import sync_workspace_templates
|
||||
|
||||
|
||||
class TestMergeMissingDefaults:
|
||||
"""Tests for _merge_missing_defaults recursive config merging."""
|
||||
|
||||
def test_adds_missing_top_level_keys(self):
|
||||
existing = {"a": 1}
|
||||
defaults = {"a": 1, "b": 2, "c": 3}
|
||||
|
||||
result = _merge_missing_defaults(existing, defaults)
|
||||
|
||||
assert result == {"a": 1, "b": 2, "c": 3}
|
||||
|
||||
def test_preserves_existing_values(self):
|
||||
existing = {"a": "custom_value"}
|
||||
defaults = {"a": "default_value"}
|
||||
|
||||
result = _merge_missing_defaults(existing, defaults)
|
||||
|
||||
assert result == {"a": "custom_value"}
|
||||
|
||||
def test_merges_nested_dicts_recursively(self):
|
||||
existing = {
|
||||
"level1": {
|
||||
"level2": {
|
||||
"existing": "kept",
|
||||
}
|
||||
}
|
||||
}
|
||||
defaults = {
|
||||
"level1": {
|
||||
"level2": {
|
||||
"existing": "replaced",
|
||||
"added": "new",
|
||||
},
|
||||
"level2b": "also_new",
|
||||
}
|
||||
}
|
||||
|
||||
result = _merge_missing_defaults(existing, defaults)
|
||||
|
||||
assert result == {
|
||||
"level1": {
|
||||
"level2": {
|
||||
"existing": "kept",
|
||||
"added": "new",
|
||||
},
|
||||
"level2b": "also_new",
|
||||
}
|
||||
}
|
||||
|
||||
def test_returns_existing_if_not_dict(self):
|
||||
assert _merge_missing_defaults("string", {"a": 1}) == "string"
|
||||
assert _merge_missing_defaults([1, 2, 3], {"a": 1}) == [1, 2, 3]
|
||||
assert _merge_missing_defaults(None, {"a": 1}) is None
|
||||
assert _merge_missing_defaults(42, {"a": 1}) == 42
|
||||
|
||||
def test_returns_existing_if_defaults_not_dict(self):
|
||||
assert _merge_missing_defaults({"a": 1}, "string") == {"a": 1}
|
||||
assert _merge_missing_defaults({"a": 1}, None) == {"a": 1}
|
||||
|
||||
def test_handles_empty_dicts(self):
|
||||
assert _merge_missing_defaults({}, {"a": 1}) == {"a": 1}
|
||||
assert _merge_missing_defaults({"a": 1}, {}) == {"a": 1}
|
||||
assert _merge_missing_defaults({}, {}) == {}
|
||||
|
||||
def test_backfills_channel_config(self):
|
||||
"""Real-world scenario: backfill missing channel fields."""
|
||||
existing_channel = {
|
||||
"enabled": False,
|
||||
"appId": "",
|
||||
"secret": "",
|
||||
}
|
||||
default_channel = {
|
||||
"enabled": False,
|
||||
"appId": "",
|
||||
"secret": "",
|
||||
"msgFormat": "plain",
|
||||
"allowFrom": [],
|
||||
}
|
||||
|
||||
result = _merge_missing_defaults(existing_channel, default_channel)
|
||||
|
||||
assert result["msgFormat"] == "plain"
|
||||
assert result["allowFrom"] == []
|
||||
|
||||
|
||||
class TestGetFieldTypeInfo:
|
||||
"""Tests for _get_field_type_info type extraction."""
|
||||
|
||||
def test_extracts_str_type(self):
|
||||
class Model(BaseModel):
|
||||
field: str
|
||||
|
||||
type_name, inner = _get_field_type_info(Model.model_fields["field"])
|
||||
assert type_name == "str"
|
||||
assert inner is None
|
||||
|
||||
def test_extracts_int_type(self):
|
||||
class Model(BaseModel):
|
||||
count: int
|
||||
|
||||
type_name, inner = _get_field_type_info(Model.model_fields["count"])
|
||||
assert type_name == "int"
|
||||
assert inner is None
|
||||
|
||||
def test_extracts_bool_type(self):
|
||||
class Model(BaseModel):
|
||||
enabled: bool
|
||||
|
||||
type_name, inner = _get_field_type_info(Model.model_fields["enabled"])
|
||||
assert type_name == "bool"
|
||||
assert inner is None
|
||||
|
||||
def test_extracts_float_type(self):
|
||||
class Model(BaseModel):
|
||||
ratio: float
|
||||
|
||||
type_name, inner = _get_field_type_info(Model.model_fields["ratio"])
|
||||
assert type_name == "float"
|
||||
assert inner is None
|
||||
|
||||
def test_extracts_list_type_with_item_type(self):
|
||||
class Model(BaseModel):
|
||||
items: list[str]
|
||||
|
||||
type_name, inner = _get_field_type_info(Model.model_fields["items"])
|
||||
assert type_name == "list"
|
||||
assert inner is str
|
||||
|
||||
def test_extracts_list_type_without_item_type(self):
|
||||
# Plain list without type param falls back to str
|
||||
class Model(BaseModel):
|
||||
items: list # type: ignore
|
||||
|
||||
# Plain list annotation doesn't match list check, returns str
|
||||
type_name, inner = _get_field_type_info(Model.model_fields["items"])
|
||||
assert type_name == "str" # Falls back to str for untyped list
|
||||
assert inner is None
|
||||
|
||||
def test_extracts_dict_type(self):
|
||||
# Plain dict without type param falls back to str
|
||||
class Model(BaseModel):
|
||||
data: dict # type: ignore
|
||||
|
||||
# Plain dict annotation doesn't match dict check, returns str
|
||||
type_name, inner = _get_field_type_info(Model.model_fields["data"])
|
||||
assert type_name == "str" # Falls back to str for untyped dict
|
||||
assert inner is None
|
||||
|
||||
def test_extracts_optional_type(self):
|
||||
class Model(BaseModel):
|
||||
optional: str | None = None
|
||||
|
||||
type_name, inner = _get_field_type_info(Model.model_fields["optional"])
|
||||
# Should unwrap Optional and get str
|
||||
assert type_name == "str"
|
||||
assert inner is None
|
||||
|
||||
def test_extracts_nested_model_type(self):
|
||||
class Inner(BaseModel):
|
||||
x: int
|
||||
|
||||
class Outer(BaseModel):
|
||||
nested: Inner
|
||||
|
||||
type_name, inner = _get_field_type_info(Outer.model_fields["nested"])
|
||||
assert type_name == "model"
|
||||
assert inner is Inner
|
||||
|
||||
def test_handles_none_annotation(self):
|
||||
"""Field with None annotation defaults to str."""
|
||||
class Model(BaseModel):
|
||||
field: Any = None
|
||||
|
||||
# Create a mock field_info with None annotation
|
||||
field_info = SimpleNamespace(annotation=None)
|
||||
type_name, inner = _get_field_type_info(field_info)
|
||||
assert type_name == "str"
|
||||
assert inner is None
|
||||
|
||||
|
||||
class TestGetFieldDisplayName:
|
||||
"""Tests for _get_field_display_name human-readable name generation."""
|
||||
|
||||
def test_uses_description_if_present(self):
|
||||
class Model(BaseModel):
|
||||
api_key: str = Field(description="API Key for authentication")
|
||||
|
||||
name = _get_field_display_name("api_key", Model.model_fields["api_key"])
|
||||
assert name == "API Key for authentication"
|
||||
|
||||
def test_converts_snake_case_to_title(self):
|
||||
field_info = SimpleNamespace(description=None)
|
||||
name = _get_field_display_name("user_name", field_info)
|
||||
assert name == "User Name"
|
||||
|
||||
def test_adds_url_suffix(self):
|
||||
field_info = SimpleNamespace(description=None)
|
||||
name = _get_field_display_name("api_url", field_info)
|
||||
# Title case: "Api Url"
|
||||
assert "Url" in name and "Api" in name
|
||||
|
||||
def test_adds_path_suffix(self):
|
||||
field_info = SimpleNamespace(description=None)
|
||||
name = _get_field_display_name("file_path", field_info)
|
||||
assert "Path" in name and "File" in name
|
||||
|
||||
def test_adds_id_suffix(self):
|
||||
field_info = SimpleNamespace(description=None)
|
||||
name = _get_field_display_name("user_id", field_info)
|
||||
# Title case: "User Id"
|
||||
assert "Id" in name and "User" in name
|
||||
|
||||
def test_adds_key_suffix(self):
|
||||
field_info = SimpleNamespace(description=None)
|
||||
name = _get_field_display_name("api_key", field_info)
|
||||
assert "Key" in name and "Api" in name
|
||||
|
||||
def test_adds_token_suffix(self):
|
||||
field_info = SimpleNamespace(description=None)
|
||||
name = _get_field_display_name("auth_token", field_info)
|
||||
assert "Token" in name and "Auth" in name
|
||||
|
||||
def test_adds_seconds_suffix(self):
|
||||
field_info = SimpleNamespace(description=None)
|
||||
name = _get_field_display_name("timeout_s", field_info)
|
||||
# Contains "(Seconds)" with title case
|
||||
assert "(Seconds)" in name or "(seconds)" in name
|
||||
|
||||
def test_adds_ms_suffix(self):
|
||||
field_info = SimpleNamespace(description=None)
|
||||
name = _get_field_display_name("delay_ms", field_info)
|
||||
# Contains "(Ms)" or "(ms)"
|
||||
assert "(Ms)" in name or "(ms)" in name
|
||||
|
||||
|
||||
class TestFormatValue:
|
||||
"""Tests for _format_value display formatting."""
|
||||
|
||||
def test_formats_none_as_not_set(self):
|
||||
assert "not set" in _format_value(None)
|
||||
|
||||
def test_formats_empty_string_as_not_set(self):
|
||||
assert "not set" in _format_value("")
|
||||
|
||||
def test_formats_empty_dict_as_not_set(self):
|
||||
assert "not set" in _format_value({})
|
||||
|
||||
def test_formats_empty_list_as_not_set(self):
|
||||
assert "not set" in _format_value([])
|
||||
|
||||
def test_formats_string_value(self):
|
||||
result = _format_value("hello")
|
||||
assert "hello" in result
|
||||
|
||||
def test_formats_list_value(self):
|
||||
result = _format_value(["a", "b"])
|
||||
assert "a" in result or "b" in result
|
||||
|
||||
def test_formats_dict_value(self):
|
||||
result = _format_value({"key": "value"})
|
||||
assert "key" in result or "value" in result
|
||||
|
||||
def test_formats_int_value(self):
|
||||
result = _format_value(42)
|
||||
assert "42" in result
|
||||
|
||||
def test_formats_bool_true(self):
|
||||
result = _format_value(True)
|
||||
assert "true" in result.lower() or "✓" in result
|
||||
|
||||
def test_formats_bool_false(self):
|
||||
result = _format_value(False)
|
||||
assert "false" in result.lower() or "✗" in result
|
||||
|
||||
|
||||
class TestSyncWorkspaceTemplates:
|
||||
"""Tests for sync_workspace_templates file synchronization."""
|
||||
|
||||
def test_creates_missing_files(self, tmp_path):
|
||||
"""Should create template files that don't exist."""
|
||||
workspace = tmp_path / "workspace"
|
||||
|
||||
added = sync_workspace_templates(workspace, silent=True)
|
||||
|
||||
# Check that some files were created
|
||||
assert isinstance(added, list)
|
||||
# The actual files depend on the templates directory
|
||||
|
||||
def test_does_not_overwrite_existing_files(self, tmp_path):
|
||||
"""Should not overwrite files that already exist."""
|
||||
workspace = tmp_path / "workspace"
|
||||
workspace.mkdir(parents=True)
|
||||
(workspace / "AGENTS.md").write_text("existing content")
|
||||
|
||||
sync_workspace_templates(workspace, silent=True)
|
||||
|
||||
# Existing file should not be changed
|
||||
content = (workspace / "AGENTS.md").read_text()
|
||||
assert content == "existing content"
|
||||
|
||||
def test_creates_memory_directory(self, tmp_path):
|
||||
"""Should create memory directory structure."""
|
||||
workspace = tmp_path / "workspace"
|
||||
|
||||
sync_workspace_templates(workspace, silent=True)
|
||||
|
||||
assert (workspace / "memory").exists() or (workspace / "skills").exists()
|
||||
|
||||
def test_returns_list_of_added_files(self, tmp_path):
|
||||
"""Should return list of relative paths for added files."""
|
||||
workspace = tmp_path / "workspace"
|
||||
|
||||
added = sync_workspace_templates(workspace, silent=True)
|
||||
|
||||
assert isinstance(added, list)
|
||||
# All paths should be relative to workspace
|
||||
for path in added:
|
||||
assert not Path(path).is_absolute()
|
||||
|
||||
|
||||
class TestProviderChannelInfo:
|
||||
"""Tests for provider and channel info retrieval."""
|
||||
|
||||
def test_get_provider_names_returns_dict(self):
|
||||
from nanobot.cli.onboard import _get_provider_names
|
||||
|
||||
names = _get_provider_names()
|
||||
assert isinstance(names, dict)
|
||||
assert len(names) > 0
|
||||
# Should include common providers
|
||||
assert "openai" in names or "anthropic" in names
|
||||
assert "openai_codex" not in names
|
||||
assert "github_copilot" not in names
|
||||
|
||||
def test_get_channel_names_returns_dict(self):
|
||||
from nanobot.cli.onboard import _get_channel_names
|
||||
|
||||
names = _get_channel_names()
|
||||
assert isinstance(names, dict)
|
||||
# Should include at least some channels
|
||||
assert len(names) >= 0
|
||||
|
||||
def test_get_provider_info_returns_valid_structure(self):
|
||||
from nanobot.cli.onboard import _get_provider_info
|
||||
|
||||
info = _get_provider_info()
|
||||
assert isinstance(info, dict)
|
||||
# Each value should be a tuple with expected structure
|
||||
for provider_name, value in info.items():
|
||||
assert isinstance(value, tuple)
|
||||
assert len(value) == 4 # (display_name, needs_api_key, needs_api_base, env_var)
|
||||
|
||||
|
||||
class _SimpleDraftModel(BaseModel):
|
||||
api_key: str = ""
|
||||
|
||||
|
||||
class _NestedDraftModel(BaseModel):
|
||||
api_key: str = ""
|
||||
|
||||
|
||||
class _OuterDraftModel(BaseModel):
|
||||
nested: _NestedDraftModel = Field(default_factory=_NestedDraftModel)
|
||||
|
||||
|
||||
class TestConfigurePydanticModelDrafts:
|
||||
@staticmethod
|
||||
def _patch_prompt_helpers(monkeypatch, tokens, text_value="secret"):
|
||||
sequence = iter(tokens)
|
||||
|
||||
def fake_select(_prompt, choices, default=None):
|
||||
token = next(sequence)
|
||||
if token == "first":
|
||||
return choices[0]
|
||||
if token == "done":
|
||||
return "[Done]"
|
||||
if token == "back":
|
||||
return _BACK_PRESSED
|
||||
return token
|
||||
|
||||
monkeypatch.setattr(onboard_wizard, "_select_with_back", fake_select)
|
||||
monkeypatch.setattr(onboard_wizard, "_show_config_panel", lambda *_args, **_kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
onboard_wizard, "_input_with_existing", lambda *_args, **_kwargs: text_value
|
||||
)
|
||||
|
||||
def test_discarding_section_keeps_original_model_unchanged(self, monkeypatch):
|
||||
model = _SimpleDraftModel()
|
||||
self._patch_prompt_helpers(monkeypatch, ["first", "back"])
|
||||
|
||||
result = _configure_pydantic_model(model, "Simple")
|
||||
|
||||
assert result is None
|
||||
assert model.api_key == ""
|
||||
|
||||
def test_completing_section_returns_updated_draft(self, monkeypatch):
|
||||
model = _SimpleDraftModel()
|
||||
self._patch_prompt_helpers(monkeypatch, ["first", "done"])
|
||||
|
||||
result = _configure_pydantic_model(model, "Simple")
|
||||
|
||||
assert result is not None
|
||||
updated = cast(_SimpleDraftModel, result)
|
||||
assert updated.api_key == "secret"
|
||||
assert model.api_key == ""
|
||||
|
||||
def test_nested_section_back_discards_nested_edits(self, monkeypatch):
|
||||
model = _OuterDraftModel()
|
||||
self._patch_prompt_helpers(monkeypatch, ["first", "first", "back", "done"])
|
||||
|
||||
result = _configure_pydantic_model(model, "Outer")
|
||||
|
||||
assert result is not None
|
||||
updated = cast(_OuterDraftModel, result)
|
||||
assert updated.nested.api_key == ""
|
||||
assert model.nested.api_key == ""
|
||||
|
||||
def test_nested_section_done_commits_nested_edits(self, monkeypatch):
|
||||
model = _OuterDraftModel()
|
||||
self._patch_prompt_helpers(monkeypatch, ["first", "first", "done", "done"])
|
||||
|
||||
result = _configure_pydantic_model(model, "Outer")
|
||||
|
||||
assert result is not None
|
||||
updated = cast(_OuterDraftModel, result)
|
||||
assert updated.nested.api_key == "secret"
|
||||
assert model.nested.api_key == ""
|
||||
|
||||
|
||||
class TestRunOnboardExitBehavior:
|
||||
def test_main_menu_interrupt_can_discard_unsaved_session_changes(self, monkeypatch):
|
||||
initial_config = Config()
|
||||
|
||||
responses = iter(
|
||||
[
|
||||
"[A] Agent Settings",
|
||||
KeyboardInterrupt(),
|
||||
"[X] Exit Without Saving",
|
||||
]
|
||||
)
|
||||
|
||||
class FakePrompt:
|
||||
def __init__(self, response):
|
||||
self.response = response
|
||||
|
||||
def ask(self):
|
||||
if isinstance(self.response, BaseException):
|
||||
raise self.response
|
||||
return self.response
|
||||
|
||||
def fake_select(*_args, **_kwargs):
|
||||
return FakePrompt(next(responses))
|
||||
|
||||
def fake_configure_general_settings(config, section):
|
||||
if section == "Agent Settings":
|
||||
config.agents.defaults.model = "test/provider-model"
|
||||
|
||||
monkeypatch.setattr(onboard_wizard, "_show_main_menu_header", lambda: None)
|
||||
monkeypatch.setattr(onboard_wizard, "questionary", SimpleNamespace(select=fake_select))
|
||||
monkeypatch.setattr(onboard_wizard, "_configure_general_settings", fake_configure_general_settings)
|
||||
|
||||
result = run_onboard(initial_config=initial_config)
|
||||
|
||||
assert result.should_save is False
|
||||
assert result.config.model_dump(by_alias=True) == initial_config.model_dump(by_alias=True)
|
||||
198
tests/agent/test_session_manager_history.py
Normal file
198
tests/agent/test_session_manager_history.py
Normal file
@@ -0,0 +1,198 @@
|
||||
from nanobot.session.manager import Session
|
||||
|
||||
|
||||
def _assert_no_orphans(history: list[dict]) -> None:
|
||||
"""Assert every tool result in history has a matching assistant tool_call."""
|
||||
declared = {
|
||||
tc["id"]
|
||||
for m in history if m.get("role") == "assistant"
|
||||
for tc in (m.get("tool_calls") or [])
|
||||
}
|
||||
orphans = [
|
||||
m.get("tool_call_id") for m in history
|
||||
if m.get("role") == "tool" and m.get("tool_call_id") not in declared
|
||||
]
|
||||
assert orphans == [], f"orphan tool_call_ids: {orphans}"
|
||||
|
||||
|
||||
def _tool_turn(prefix: str, idx: int) -> list[dict]:
|
||||
"""Helper: one assistant with 2 tool_calls + 2 tool results."""
|
||||
return [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{"id": f"{prefix}_{idx}_a", "type": "function", "function": {"name": "x", "arguments": "{}"}},
|
||||
{"id": f"{prefix}_{idx}_b", "type": "function", "function": {"name": "y", "arguments": "{}"}},
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": f"{prefix}_{idx}_a", "name": "x", "content": "ok"},
|
||||
{"role": "tool", "tool_call_id": f"{prefix}_{idx}_b", "name": "y", "content": "ok"},
|
||||
]
|
||||
|
||||
|
||||
# --- Original regression test (from PR 2075) ---
|
||||
|
||||
def test_get_history_drops_orphan_tool_results_when_window_cuts_tool_calls():
|
||||
session = Session(key="telegram:test")
|
||||
session.messages.append({"role": "user", "content": "old turn"})
|
||||
for i in range(20):
|
||||
session.messages.extend(_tool_turn("old", i))
|
||||
session.messages.append({"role": "user", "content": "problem turn"})
|
||||
for i in range(25):
|
||||
session.messages.extend(_tool_turn("cur", i))
|
||||
session.messages.append({"role": "user", "content": "new telegram question"})
|
||||
|
||||
history = session.get_history(max_messages=100)
|
||||
_assert_no_orphans(history)
|
||||
|
||||
|
||||
# --- Positive test: legitimate pairs survive trimming ---
|
||||
|
||||
def test_legitimate_tool_pairs_preserved_after_trim():
|
||||
"""Complete tool-call groups within the window must not be dropped."""
|
||||
session = Session(key="test:positive")
|
||||
session.messages.append({"role": "user", "content": "hello"})
|
||||
for i in range(5):
|
||||
session.messages.extend(_tool_turn("ok", i))
|
||||
session.messages.append({"role": "assistant", "content": "done"})
|
||||
|
||||
history = session.get_history(max_messages=500)
|
||||
_assert_no_orphans(history)
|
||||
tool_ids = [m["tool_call_id"] for m in history if m.get("role") == "tool"]
|
||||
assert len(tool_ids) == 10
|
||||
assert history[0]["role"] == "user"
|
||||
|
||||
|
||||
def test_retain_recent_legal_suffix_keeps_recent_messages():
|
||||
session = Session(key="test:trim")
|
||||
for i in range(10):
|
||||
session.messages.append({"role": "user", "content": f"msg{i}"})
|
||||
|
||||
session.retain_recent_legal_suffix(4)
|
||||
|
||||
assert len(session.messages) == 4
|
||||
assert session.messages[0]["content"] == "msg6"
|
||||
assert session.messages[-1]["content"] == "msg9"
|
||||
|
||||
|
||||
def test_retain_recent_legal_suffix_adjusts_last_consolidated():
|
||||
session = Session(key="test:trim-cons")
|
||||
for i in range(10):
|
||||
session.messages.append({"role": "user", "content": f"msg{i}"})
|
||||
session.last_consolidated = 7
|
||||
|
||||
session.retain_recent_legal_suffix(4)
|
||||
|
||||
assert len(session.messages) == 4
|
||||
assert session.last_consolidated == 1
|
||||
|
||||
|
||||
def test_retain_recent_legal_suffix_zero_clears_session():
|
||||
session = Session(key="test:trim-zero")
|
||||
for i in range(10):
|
||||
session.messages.append({"role": "user", "content": f"msg{i}"})
|
||||
session.last_consolidated = 5
|
||||
|
||||
session.retain_recent_legal_suffix(0)
|
||||
|
||||
assert session.messages == []
|
||||
assert session.last_consolidated == 0
|
||||
|
||||
|
||||
def test_retain_recent_legal_suffix_keeps_legal_tool_boundary():
|
||||
session = Session(key="test:trim-tools")
|
||||
session.messages.append({"role": "user", "content": "old"})
|
||||
session.messages.extend(_tool_turn("old", 0))
|
||||
session.messages.append({"role": "user", "content": "keep"})
|
||||
session.messages.extend(_tool_turn("keep", 0))
|
||||
session.messages.append({"role": "assistant", "content": "done"})
|
||||
|
||||
session.retain_recent_legal_suffix(4)
|
||||
|
||||
history = session.get_history(max_messages=500)
|
||||
_assert_no_orphans(history)
|
||||
assert history[0]["role"] == "user"
|
||||
assert history[0]["content"] == "keep"
|
||||
|
||||
|
||||
# --- last_consolidated > 0 ---
|
||||
|
||||
def test_orphan_trim_with_last_consolidated():
|
||||
"""Orphan trimming works correctly when session is partially consolidated."""
|
||||
session = Session(key="test:consolidated")
|
||||
for i in range(10):
|
||||
session.messages.append({"role": "user", "content": f"old {i}"})
|
||||
session.messages.extend(_tool_turn("cons", i))
|
||||
session.last_consolidated = 30
|
||||
|
||||
session.messages.append({"role": "user", "content": "recent"})
|
||||
for i in range(15):
|
||||
session.messages.extend(_tool_turn("new", i))
|
||||
session.messages.append({"role": "user", "content": "latest"})
|
||||
|
||||
history = session.get_history(max_messages=20)
|
||||
_assert_no_orphans(history)
|
||||
assert all(m.get("role") != "tool" or m["tool_call_id"].startswith("new_") for m in history)
|
||||
|
||||
|
||||
# --- Edge: no tool messages at all ---
|
||||
|
||||
def test_no_tool_messages_unchanged():
|
||||
session = Session(key="test:plain")
|
||||
for i in range(5):
|
||||
session.messages.append({"role": "user", "content": f"q{i}"})
|
||||
session.messages.append({"role": "assistant", "content": f"a{i}"})
|
||||
|
||||
history = session.get_history(max_messages=6)
|
||||
assert len(history) == 6
|
||||
_assert_no_orphans(history)
|
||||
|
||||
|
||||
# --- Edge: all leading messages are orphan tool results ---
|
||||
|
||||
def test_all_orphan_prefix_stripped():
|
||||
"""If the window starts with orphan tool results and nothing else, they're all dropped."""
|
||||
session = Session(key="test:all-orphan")
|
||||
session.messages.append({"role": "tool", "tool_call_id": "gone_1", "name": "x", "content": "ok"})
|
||||
session.messages.append({"role": "tool", "tool_call_id": "gone_2", "name": "y", "content": "ok"})
|
||||
session.messages.append({"role": "user", "content": "fresh start"})
|
||||
session.messages.append({"role": "assistant", "content": "hi"})
|
||||
|
||||
history = session.get_history(max_messages=500)
|
||||
_assert_no_orphans(history)
|
||||
assert history[0]["role"] == "user"
|
||||
assert len(history) == 2
|
||||
|
||||
|
||||
# --- Edge: empty session ---
|
||||
|
||||
def test_empty_session_history():
|
||||
session = Session(key="test:empty")
|
||||
history = session.get_history(max_messages=500)
|
||||
assert history == []
|
||||
|
||||
|
||||
# --- Window cuts mid-group: assistant present but some tool results orphaned ---
|
||||
|
||||
def test_window_cuts_mid_tool_group():
|
||||
"""If the window starts between an assistant's tool results, the partial group is trimmed."""
|
||||
session = Session(key="test:mid-cut")
|
||||
session.messages.append({"role": "user", "content": "setup"})
|
||||
session.messages.append({
|
||||
"role": "assistant", "content": None,
|
||||
"tool_calls": [
|
||||
{"id": "split_a", "type": "function", "function": {"name": "x", "arguments": "{}"}},
|
||||
{"id": "split_b", "type": "function", "function": {"name": "y", "arguments": "{}"}},
|
||||
],
|
||||
})
|
||||
session.messages.append({"role": "tool", "tool_call_id": "split_a", "name": "x", "content": "ok"})
|
||||
session.messages.append({"role": "tool", "tool_call_id": "split_b", "name": "y", "content": "ok"})
|
||||
session.messages.append({"role": "user", "content": "next"})
|
||||
session.messages.extend(_tool_turn("intact", 0))
|
||||
session.messages.append({"role": "assistant", "content": "final"})
|
||||
|
||||
# Window of 6 should cut off the "setup" user msg and the assistant with split_a/split_b,
|
||||
# leaving orphan tool results for split_a at the front.
|
||||
history = session.get_history(max_messages=6)
|
||||
_assert_no_orphans(history)
|
||||
127
tests/agent/test_skill_creator_scripts.py
Normal file
127
tests/agent/test_skill_creator_scripts.py
Normal file
@@ -0,0 +1,127 @@
|
||||
import importlib
|
||||
import shutil
|
||||
import sys
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
SCRIPT_DIR = Path("nanobot/skills/skill-creator/scripts").resolve()
|
||||
if str(SCRIPT_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(SCRIPT_DIR))
|
||||
|
||||
init_skill = importlib.import_module("init_skill")
|
||||
package_skill = importlib.import_module("package_skill")
|
||||
quick_validate = importlib.import_module("quick_validate")
|
||||
|
||||
|
||||
def test_init_skill_creates_expected_files(tmp_path: Path) -> None:
|
||||
skill_dir = init_skill.init_skill(
|
||||
"demo-skill",
|
||||
tmp_path,
|
||||
["scripts", "references", "assets"],
|
||||
include_examples=True,
|
||||
)
|
||||
|
||||
assert skill_dir == tmp_path / "demo-skill"
|
||||
assert (skill_dir / "SKILL.md").exists()
|
||||
assert (skill_dir / "scripts" / "example.py").exists()
|
||||
assert (skill_dir / "references" / "api_reference.md").exists()
|
||||
assert (skill_dir / "assets" / "example_asset.txt").exists()
|
||||
|
||||
|
||||
def test_validate_skill_accepts_existing_skill_creator() -> None:
|
||||
valid, message = quick_validate.validate_skill(
|
||||
Path("nanobot/skills/skill-creator").resolve()
|
||||
)
|
||||
|
||||
assert valid, message
|
||||
|
||||
|
||||
def test_validate_skill_rejects_placeholder_description(tmp_path: Path) -> None:
|
||||
skill_dir = tmp_path / "placeholder-skill"
|
||||
skill_dir.mkdir()
|
||||
(skill_dir / "SKILL.md").write_text(
|
||||
"---\n"
|
||||
"name: placeholder-skill\n"
|
||||
'description: "[TODO: fill me in]"\n'
|
||||
"---\n"
|
||||
"# Placeholder\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
valid, message = quick_validate.validate_skill(skill_dir)
|
||||
|
||||
assert not valid
|
||||
assert "TODO placeholder" in message
|
||||
|
||||
|
||||
def test_validate_skill_rejects_root_files_outside_allowed_dirs(tmp_path: Path) -> None:
|
||||
skill_dir = tmp_path / "bad-root-skill"
|
||||
skill_dir.mkdir()
|
||||
(skill_dir / "SKILL.md").write_text(
|
||||
"---\n"
|
||||
"name: bad-root-skill\n"
|
||||
"description: Valid description\n"
|
||||
"---\n"
|
||||
"# Skill\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(skill_dir / "README.md").write_text("extra\n", encoding="utf-8")
|
||||
|
||||
valid, message = quick_validate.validate_skill(skill_dir)
|
||||
|
||||
assert not valid
|
||||
assert "Unexpected file or directory in skill root" in message
|
||||
|
||||
|
||||
def test_package_skill_creates_archive(tmp_path: Path) -> None:
|
||||
skill_dir = tmp_path / "package-me"
|
||||
skill_dir.mkdir()
|
||||
(skill_dir / "SKILL.md").write_text(
|
||||
"---\n"
|
||||
"name: package-me\n"
|
||||
"description: Package this skill.\n"
|
||||
"---\n"
|
||||
"# Skill\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
scripts_dir = skill_dir / "scripts"
|
||||
scripts_dir.mkdir()
|
||||
(scripts_dir / "helper.py").write_text("print('ok')\n", encoding="utf-8")
|
||||
|
||||
archive_path = package_skill.package_skill(skill_dir, tmp_path / "dist")
|
||||
|
||||
assert archive_path == (tmp_path / "dist" / "package-me.skill")
|
||||
assert archive_path.exists()
|
||||
with zipfile.ZipFile(archive_path, "r") as archive:
|
||||
names = set(archive.namelist())
|
||||
assert "package-me/SKILL.md" in names
|
||||
assert "package-me/scripts/helper.py" in names
|
||||
|
||||
|
||||
def test_package_skill_rejects_symlink(tmp_path: Path) -> None:
|
||||
skill_dir = tmp_path / "symlink-skill"
|
||||
skill_dir.mkdir()
|
||||
(skill_dir / "SKILL.md").write_text(
|
||||
"---\n"
|
||||
"name: symlink-skill\n"
|
||||
"description: Reject symlinks during packaging.\n"
|
||||
"---\n"
|
||||
"# Skill\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
scripts_dir = skill_dir / "scripts"
|
||||
scripts_dir.mkdir()
|
||||
target = tmp_path / "outside.txt"
|
||||
target.write_text("secret\n", encoding="utf-8")
|
||||
link = scripts_dir / "outside.txt"
|
||||
|
||||
try:
|
||||
link.symlink_to(target)
|
||||
except (OSError, NotImplementedError):
|
||||
return
|
||||
|
||||
archive_path = package_skill.package_skill(skill_dir, tmp_path / "dist")
|
||||
|
||||
assert archive_path is None
|
||||
assert not (tmp_path / "dist" / "symlink-skill.skill").exists()
|
||||
223
tests/agent/test_task_cancel.py
Normal file
223
tests/agent/test_task_cancel.py
Normal file
@@ -0,0 +1,223 @@
|
||||
"""Tests for /stop task cancellation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _make_loop(*, exec_config=None):
|
||||
"""Create a minimal AgentLoop with mocked dependencies."""
|
||||
from nanobot.agent.loop import AgentLoop
|
||||
from nanobot.bus.queue import MessageBus
|
||||
|
||||
bus = MessageBus()
|
||||
provider = MagicMock()
|
||||
provider.get_default_model.return_value = "test-model"
|
||||
workspace = MagicMock()
|
||||
workspace.__truediv__ = MagicMock(return_value=MagicMock())
|
||||
|
||||
with patch("nanobot.agent.loop.ContextBuilder"), \
|
||||
patch("nanobot.agent.loop.SessionManager"), \
|
||||
patch("nanobot.agent.loop.SubagentManager") as MockSubMgr:
|
||||
MockSubMgr.return_value.cancel_by_session = AsyncMock(return_value=0)
|
||||
loop = AgentLoop(bus=bus, provider=provider, workspace=workspace, exec_config=exec_config)
|
||||
return loop, bus
|
||||
|
||||
|
||||
class TestHandleStop:
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_no_active_task(self):
|
||||
from nanobot.bus.events import InboundMessage
|
||||
from nanobot.command.builtin import cmd_stop
|
||||
from nanobot.command.router import CommandContext
|
||||
|
||||
loop, bus = _make_loop()
|
||||
msg = InboundMessage(channel="test", sender_id="u1", chat_id="c1", content="/stop")
|
||||
ctx = CommandContext(msg=msg, session=None, key=msg.session_key, raw="/stop", loop=loop)
|
||||
out = await cmd_stop(ctx)
|
||||
assert "No active task" in out.content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_cancels_active_task(self):
|
||||
from nanobot.bus.events import InboundMessage
|
||||
from nanobot.command.builtin import cmd_stop
|
||||
from nanobot.command.router import CommandContext
|
||||
|
||||
loop, bus = _make_loop()
|
||||
cancelled = asyncio.Event()
|
||||
|
||||
async def slow_task():
|
||||
try:
|
||||
await asyncio.sleep(60)
|
||||
except asyncio.CancelledError:
|
||||
cancelled.set()
|
||||
raise
|
||||
|
||||
task = asyncio.create_task(slow_task())
|
||||
await asyncio.sleep(0)
|
||||
loop._active_tasks["test:c1"] = [task]
|
||||
|
||||
msg = InboundMessage(channel="test", sender_id="u1", chat_id="c1", content="/stop")
|
||||
ctx = CommandContext(msg=msg, session=None, key=msg.session_key, raw="/stop", loop=loop)
|
||||
out = await cmd_stop(ctx)
|
||||
|
||||
assert cancelled.is_set()
|
||||
assert "stopped" in out.content.lower()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_cancels_multiple_tasks(self):
|
||||
from nanobot.bus.events import InboundMessage
|
||||
from nanobot.command.builtin import cmd_stop
|
||||
from nanobot.command.router import CommandContext
|
||||
|
||||
loop, bus = _make_loop()
|
||||
events = [asyncio.Event(), asyncio.Event()]
|
||||
|
||||
async def slow(idx):
|
||||
try:
|
||||
await asyncio.sleep(60)
|
||||
except asyncio.CancelledError:
|
||||
events[idx].set()
|
||||
raise
|
||||
|
||||
tasks = [asyncio.create_task(slow(i)) for i in range(2)]
|
||||
await asyncio.sleep(0)
|
||||
loop._active_tasks["test:c1"] = tasks
|
||||
|
||||
msg = InboundMessage(channel="test", sender_id="u1", chat_id="c1", content="/stop")
|
||||
ctx = CommandContext(msg=msg, session=None, key=msg.session_key, raw="/stop", loop=loop)
|
||||
out = await cmd_stop(ctx)
|
||||
|
||||
assert all(e.is_set() for e in events)
|
||||
assert "2 task" in out.content
|
||||
|
||||
|
||||
class TestDispatch:
|
||||
def test_exec_tool_not_registered_when_disabled(self):
|
||||
from nanobot.config.schema import ExecToolConfig
|
||||
|
||||
loop, _bus = _make_loop(exec_config=ExecToolConfig(enable=False))
|
||||
|
||||
assert loop.tools.get("exec") is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_processes_and_publishes(self):
|
||||
from nanobot.bus.events import InboundMessage, OutboundMessage
|
||||
|
||||
loop, bus = _make_loop()
|
||||
msg = InboundMessage(channel="test", sender_id="u1", chat_id="c1", content="hello")
|
||||
loop._process_message = AsyncMock(
|
||||
return_value=OutboundMessage(channel="test", chat_id="c1", content="hi")
|
||||
)
|
||||
await loop._dispatch(msg)
|
||||
out = await asyncio.wait_for(bus.consume_outbound(), timeout=1.0)
|
||||
assert out.content == "hi"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_processing_lock_serializes(self):
|
||||
from nanobot.bus.events import InboundMessage, OutboundMessage
|
||||
|
||||
loop, bus = _make_loop()
|
||||
order = []
|
||||
|
||||
async def mock_process(m, **kwargs):
|
||||
order.append(f"start-{m.content}")
|
||||
await asyncio.sleep(0.05)
|
||||
order.append(f"end-{m.content}")
|
||||
return OutboundMessage(channel="test", chat_id="c1", content=m.content)
|
||||
|
||||
loop._process_message = mock_process
|
||||
msg1 = InboundMessage(channel="test", sender_id="u1", chat_id="c1", content="a")
|
||||
msg2 = InboundMessage(channel="test", sender_id="u1", chat_id="c1", content="b")
|
||||
|
||||
t1 = asyncio.create_task(loop._dispatch(msg1))
|
||||
t2 = asyncio.create_task(loop._dispatch(msg2))
|
||||
await asyncio.gather(t1, t2)
|
||||
assert order == ["start-a", "end-a", "start-b", "end-b"]
|
||||
|
||||
|
||||
class TestSubagentCancellation:
|
||||
@pytest.mark.asyncio
|
||||
async def test_cancel_by_session(self):
|
||||
from nanobot.agent.subagent import SubagentManager
|
||||
from nanobot.bus.queue import MessageBus
|
||||
|
||||
bus = MessageBus()
|
||||
provider = MagicMock()
|
||||
provider.get_default_model.return_value = "test-model"
|
||||
mgr = SubagentManager(provider=provider, workspace=MagicMock(), bus=bus)
|
||||
|
||||
cancelled = asyncio.Event()
|
||||
|
||||
async def slow():
|
||||
try:
|
||||
await asyncio.sleep(60)
|
||||
except asyncio.CancelledError:
|
||||
cancelled.set()
|
||||
raise
|
||||
|
||||
task = asyncio.create_task(slow())
|
||||
await asyncio.sleep(0)
|
||||
mgr._running_tasks["sub-1"] = task
|
||||
mgr._session_tasks["test:c1"] = {"sub-1"}
|
||||
|
||||
count = await mgr.cancel_by_session("test:c1")
|
||||
assert count == 1
|
||||
assert cancelled.is_set()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cancel_by_session_no_tasks(self):
|
||||
from nanobot.agent.subagent import SubagentManager
|
||||
from nanobot.bus.queue import MessageBus
|
||||
|
||||
bus = MessageBus()
|
||||
provider = MagicMock()
|
||||
provider.get_default_model.return_value = "test-model"
|
||||
mgr = SubagentManager(provider=provider, workspace=MagicMock(), bus=bus)
|
||||
assert await mgr.cancel_by_session("nonexistent") == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_subagent_preserves_reasoning_fields_in_tool_turn(self, monkeypatch, tmp_path):
|
||||
from nanobot.agent.subagent import SubagentManager
|
||||
from nanobot.bus.queue import MessageBus
|
||||
from nanobot.providers.base import LLMResponse, ToolCallRequest
|
||||
|
||||
bus = MessageBus()
|
||||
provider = MagicMock()
|
||||
provider.get_default_model.return_value = "test-model"
|
||||
|
||||
captured_second_call: list[dict] = []
|
||||
|
||||
call_count = {"n": 0}
|
||||
|
||||
async def scripted_chat_with_retry(*, messages, **kwargs):
|
||||
call_count["n"] += 1
|
||||
if call_count["n"] == 1:
|
||||
return LLMResponse(
|
||||
content="thinking",
|
||||
tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={})],
|
||||
reasoning_content="hidden reasoning",
|
||||
thinking_blocks=[{"type": "thinking", "thinking": "step"}],
|
||||
)
|
||||
captured_second_call[:] = messages
|
||||
return LLMResponse(content="done", tool_calls=[])
|
||||
provider.chat_with_retry = scripted_chat_with_retry
|
||||
mgr = SubagentManager(provider=provider, workspace=tmp_path, bus=bus)
|
||||
|
||||
async def fake_execute(self, name, arguments):
|
||||
return "tool result"
|
||||
|
||||
monkeypatch.setattr("nanobot.agent.tools.registry.ToolRegistry.execute", fake_execute)
|
||||
|
||||
await mgr._run_subagent("sub-1", "do task", "label", {"channel": "test", "chat_id": "c1"})
|
||||
|
||||
assistant_messages = [
|
||||
msg for msg in captured_second_call
|
||||
if msg.get("role") == "assistant" and msg.get("tool_calls")
|
||||
]
|
||||
assert len(assistant_messages) == 1
|
||||
assert assistant_messages[0]["reasoning_content"] == "hidden reasoning"
|
||||
assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}]
|
||||
Reference in New Issue
Block a user