fix(memory): reserve completion headroom for consolidation
Trigger token consolidation before prompt usage reaches the full context window so response tokens and tokenizer estimation drift still fit safely within the model budget. Made-with: Cursor
This commit is contained in:
@@ -9,8 +9,10 @@ from nanobot.providers.base import LLMResponse
|
||||
|
||||
|
||||
def _make_loop(tmp_path, *, estimated_tokens: int, context_window_tokens: int) -> AgentLoop:
|
||||
from nanobot.providers.base import GenerationSettings
|
||||
provider = MagicMock()
|
||||
provider.get_default_model.return_value = "test-model"
|
||||
provider.generation = GenerationSettings(max_tokens=0)
|
||||
provider.estimate_prompt_tokens.return_value = (estimated_tokens, "test-counter")
|
||||
_response = LLMResponse(content="ok", tool_calls=[])
|
||||
provider.chat_with_retry = AsyncMock(return_value=_response)
|
||||
@@ -24,6 +26,7 @@ def _make_loop(tmp_path, *, estimated_tokens: int, context_window_tokens: int) -
|
||||
context_window_tokens=context_window_tokens,
|
||||
)
|
||||
loop.tools.get_definitions = MagicMock(return_value=[])
|
||||
loop.memory_consolidator._SAFETY_BUFFER = 0
|
||||
return loop
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user