From ad99d5aaa060655347ec593ee613837667045c77 Mon Sep 17 00:00:00 2001 From: Nikolas de Hor Date: Tue, 3 Mar 2026 00:59:58 -0300 Subject: [PATCH] fix: merge consecutive user messages into single message Some LLM providers (Minimax, Dashscope) strictly reject consecutive messages with the same role. build_messages() was emitting two separate user messages back-to-back: the runtime context and the actual user content. Merge them into a single user message, handling both plain text and multimodal (image) content. Update _save_turn() to strip the runtime context prefix from the merged message when persisting to session history. Fixes #1414 Fixes #1344 --- nanobot/agent/context.py | 13 +++++++++++-- nanobot/agent/loop.py | 23 ++++++++++++++++------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index 010b126..df4825f 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -112,11 +112,20 @@ Reply directly with text for conversations. Only use the 'message' tool to send chat_id: str | None = None, ) -> list[dict[str, Any]]: """Build the complete message list for an LLM call.""" + runtime_ctx = self._build_runtime_context(channel, chat_id) + user_content = self._build_user_content(current_message, media) + + # Merge runtime context and user content into a single user message + # to avoid consecutive same-role messages that some providers reject. + if isinstance(user_content, str): + merged = f"{runtime_ctx}\n\n{user_content}" + else: + merged = [{"type": "text", "text": runtime_ctx}] + user_content + return [ {"role": "system", "content": self.build_system_prompt(skill_names)}, *history, - {"role": "user", "content": self._build_runtime_context(channel, chat_id)}, - {"role": "user", "content": self._build_user_content(current_message, media)}, + {"role": "user", "content": merged}, ] def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]: diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 488615d..825b11a 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -464,14 +464,23 @@ class AgentLoop: entry["content"] = content[:self._TOOL_RESULT_MAX_CHARS] + "\n... (truncated)" elif role == "user": if isinstance(content, str) and content.startswith(ContextBuilder._RUNTIME_CONTEXT_TAG): - continue + # Strip the runtime-context prefix, keep only the user text. + parts = content.split("\n\n", 1) + if len(parts) > 1 and parts[1].strip(): + entry["content"] = parts[1] + else: + continue if isinstance(content, list): - entry["content"] = [ - {"type": "text", "text": "[image]"} if ( - c.get("type") == "image_url" - and c.get("image_url", {}).get("url", "").startswith("data:image/") - ) else c for c in content - ] + filtered = [] + for c in content: + if c.get("type") == "text" and isinstance(c.get("text"), str) and c["text"].startswith(ContextBuilder._RUNTIME_CONTEXT_TAG): + continue # Strip runtime context from multimodal messages + if (c.get("type") == "image_url" + and c.get("image_url", {}).get("url", "").startswith("data:image/")): + filtered.append({"type": "text", "text": "[image]"}) + else: + filtered.append(c) + entry["content"] = filtered entry.setdefault("timestamp", datetime.now().isoformat()) session.messages.append(entry) session.updated_at = datetime.now()