diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index 010b126..df4825f 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -112,11 +112,20 @@ Reply directly with text for conversations. Only use the 'message' tool to send chat_id: str | None = None, ) -> list[dict[str, Any]]: """Build the complete message list for an LLM call.""" + runtime_ctx = self._build_runtime_context(channel, chat_id) + user_content = self._build_user_content(current_message, media) + + # Merge runtime context and user content into a single user message + # to avoid consecutive same-role messages that some providers reject. + if isinstance(user_content, str): + merged = f"{runtime_ctx}\n\n{user_content}" + else: + merged = [{"type": "text", "text": runtime_ctx}] + user_content + return [ {"role": "system", "content": self.build_system_prompt(skill_names)}, *history, - {"role": "user", "content": self._build_runtime_context(channel, chat_id)}, - {"role": "user", "content": self._build_user_content(current_message, media)}, + {"role": "user", "content": merged}, ] def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]: diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 488615d..65a62e5 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -464,14 +464,25 @@ class AgentLoop: entry["content"] = content[:self._TOOL_RESULT_MAX_CHARS] + "\n... (truncated)" elif role == "user": if isinstance(content, str) and content.startswith(ContextBuilder._RUNTIME_CONTEXT_TAG): - continue + # Strip the runtime-context prefix, keep only the user text. + parts = content.split("\n\n", 1) + if len(parts) > 1 and parts[1].strip(): + entry["content"] = parts[1] + else: + continue if isinstance(content, list): - entry["content"] = [ - {"type": "text", "text": "[image]"} if ( - c.get("type") == "image_url" - and c.get("image_url", {}).get("url", "").startswith("data:image/") - ) else c for c in content - ] + filtered = [] + for c in content: + if c.get("type") == "text" and isinstance(c.get("text"), str) and c["text"].startswith(ContextBuilder._RUNTIME_CONTEXT_TAG): + continue # Strip runtime context from multimodal messages + if (c.get("type") == "image_url" + and c.get("image_url", {}).get("url", "").startswith("data:image/")): + filtered.append({"type": "text", "text": "[image]"}) + else: + filtered.append(c) + if not filtered: + continue + entry["content"] = filtered entry.setdefault("timestamp", datetime.now().isoformat()) session.messages.append(entry) session.updated_at = datetime.now() diff --git a/tests/test_loop_save_turn.py b/tests/test_loop_save_turn.py new file mode 100644 index 0000000..aec6d1a --- /dev/null +++ b/tests/test_loop_save_turn.py @@ -0,0 +1,41 @@ +from nanobot.agent.context import ContextBuilder +from nanobot.agent.loop import AgentLoop +from nanobot.session.manager import Session + + +def _mk_loop() -> AgentLoop: + loop = AgentLoop.__new__(AgentLoop) + loop._TOOL_RESULT_MAX_CHARS = 500 + return loop + + +def test_save_turn_skips_multimodal_user_when_only_runtime_context() -> None: + loop = _mk_loop() + session = Session(key="test:runtime-only") + runtime = ContextBuilder._RUNTIME_CONTEXT_TAG + "\nCurrent Time: now (UTC)" + + loop._save_turn( + session, + [{"role": "user", "content": [{"type": "text", "text": runtime}]}], + skip=0, + ) + assert session.messages == [] + + +def test_save_turn_keeps_image_placeholder_after_runtime_strip() -> None: + loop = _mk_loop() + session = Session(key="test:image") + runtime = ContextBuilder._RUNTIME_CONTEXT_TAG + "\nCurrent Time: now (UTC)" + + loop._save_turn( + session, + [{ + "role": "user", + "content": [ + {"type": "text", "text": runtime}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}}, + ], + }], + skip=0, + ) + assert session.messages[0]["content"] == [{"type": "text", "text": "[image]"}]