From 1c71489121172f8ec307db5e7de8c816f2e10bad Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Sun, 22 Mar 2026 03:38:58 +0000
Subject: [PATCH] fix(agent): count all message fields in token estimation

estimate_prompt_tokens() only counted the `content` text field, completely
missing tool_calls JSON (~72% of actual payload), reasoning_content,
tool_call_id, name, and per-message framing overhead. This caused the
memory consolidator to never trigger for tool-heavy sessions (e.g. cron
jobs), leading to context window overflow errors from the LLM provider.

Also adds reasoning_content counting and proper per-message overhead to
estimate_message_tokens() for consistent boundary detection.

Made-with: Cursor
---
 nanobot/utils/helpers.py | 34 +++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py
index c0cf083..f89b956 100644
--- a/nanobot/utils/helpers.py
+++ b/nanobot/utils/helpers.py
@@ -115,7 +115,11 @@ def estimate_prompt_tokens(
     messages: list[dict[str, Any]],
     tools: list[dict[str, Any]] | None = None,
 ) -> int:
-    """Estimate prompt tokens with tiktoken."""
+    """Estimate prompt tokens with tiktoken.
+
+    Counts all fields that providers send to the LLM: content, tool_calls,
+    reasoning_content, tool_call_id, name, plus per-message framing overhead.
+    """
     try:
         enc = tiktoken.get_encoding("cl100k_base")
         parts: list[str] = []
@@ -129,9 +133,25 @@ def estimate_prompt_tokens(
                         txt = part.get("text", "")
                         if txt:
                             parts.append(txt)
+
+            tc = msg.get("tool_calls")
+            if tc:
+                parts.append(json.dumps(tc, ensure_ascii=False))
+
+            rc = msg.get("reasoning_content")
+            if isinstance(rc, str) and rc:
+                parts.append(rc)
+
+            for key in ("name", "tool_call_id"):
+                value = msg.get(key)
+                if isinstance(value, str) and value:
+                    parts.append(value)
+
         if tools:
             parts.append(json.dumps(tools, ensure_ascii=False))
-        return len(enc.encode("\n".join(parts)))
+
+        per_message_overhead = len(messages) * 4
+        return len(enc.encode("\n".join(parts))) + per_message_overhead
     except Exception:
         return 0
 
@@ -160,14 +180,18 @@ def estimate_message_tokens(message: dict[str, Any]) -> int:
     if message.get("tool_calls"):
         parts.append(json.dumps(message["tool_calls"], ensure_ascii=False))
 
+    rc = message.get("reasoning_content")
+    if isinstance(rc, str) and rc:
+        parts.append(rc)
+
     payload = "\n".join(parts)
     if not payload:
-        return 1
+        return 4
     try:
         enc = tiktoken.get_encoding("cl100k_base")
-        return max(1, len(enc.encode(payload)))
+        return max(4, len(enc.encode(payload)) + 4)
     except Exception:
-        return max(1, len(payload) // 4)
+        return max(4, len(payload) // 4 + 4)
 
 
 def estimate_prompt_tokens_chain(