fix: merge consecutive user messages into single message
Some LLM providers (Minimax, Dashscope) strictly reject consecutive messages with the same role. build_messages() was emitting two separate user messages back-to-back: the runtime context and the actual user content. Merge them into a single user message, handling both plain text and multimodal (image) content. Update _save_turn() to strip the runtime context prefix from the merged message when persisting to session history. Fixes #1414 Fixes #1344
This commit is contained in:
@@ -112,11 +112,20 @@ Reply directly with text for conversations. Only use the 'message' tool to send
|
|||||||
chat_id: str | None = None,
|
chat_id: str | None = None,
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
"""Build the complete message list for an LLM call."""
|
"""Build the complete message list for an LLM call."""
|
||||||
|
runtime_ctx = self._build_runtime_context(channel, chat_id)
|
||||||
|
user_content = self._build_user_content(current_message, media)
|
||||||
|
|
||||||
|
# Merge runtime context and user content into a single user message
|
||||||
|
# to avoid consecutive same-role messages that some providers reject.
|
||||||
|
if isinstance(user_content, str):
|
||||||
|
merged = f"{runtime_ctx}\n\n{user_content}"
|
||||||
|
else:
|
||||||
|
merged = [{"type": "text", "text": runtime_ctx}] + user_content
|
||||||
|
|
||||||
return [
|
return [
|
||||||
{"role": "system", "content": self.build_system_prompt(skill_names)},
|
{"role": "system", "content": self.build_system_prompt(skill_names)},
|
||||||
*history,
|
*history,
|
||||||
{"role": "user", "content": self._build_runtime_context(channel, chat_id)},
|
{"role": "user", "content": merged},
|
||||||
{"role": "user", "content": self._build_user_content(current_message, media)},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]:
|
def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]:
|
||||||
|
|||||||
@@ -464,14 +464,23 @@ class AgentLoop:
|
|||||||
entry["content"] = content[:self._TOOL_RESULT_MAX_CHARS] + "\n... (truncated)"
|
entry["content"] = content[:self._TOOL_RESULT_MAX_CHARS] + "\n... (truncated)"
|
||||||
elif role == "user":
|
elif role == "user":
|
||||||
if isinstance(content, str) and content.startswith(ContextBuilder._RUNTIME_CONTEXT_TAG):
|
if isinstance(content, str) and content.startswith(ContextBuilder._RUNTIME_CONTEXT_TAG):
|
||||||
continue
|
# Strip the runtime-context prefix, keep only the user text.
|
||||||
|
parts = content.split("\n\n", 1)
|
||||||
|
if len(parts) > 1 and parts[1].strip():
|
||||||
|
entry["content"] = parts[1]
|
||||||
|
else:
|
||||||
|
continue
|
||||||
if isinstance(content, list):
|
if isinstance(content, list):
|
||||||
entry["content"] = [
|
filtered = []
|
||||||
{"type": "text", "text": "[image]"} if (
|
for c in content:
|
||||||
c.get("type") == "image_url"
|
if c.get("type") == "text" and isinstance(c.get("text"), str) and c["text"].startswith(ContextBuilder._RUNTIME_CONTEXT_TAG):
|
||||||
and c.get("image_url", {}).get("url", "").startswith("data:image/")
|
continue # Strip runtime context from multimodal messages
|
||||||
) else c for c in content
|
if (c.get("type") == "image_url"
|
||||||
]
|
and c.get("image_url", {}).get("url", "").startswith("data:image/")):
|
||||||
|
filtered.append({"type": "text", "text": "[image]"})
|
||||||
|
else:
|
||||||
|
filtered.append(c)
|
||||||
|
entry["content"] = filtered
|
||||||
entry.setdefault("timestamp", datetime.now().isoformat())
|
entry.setdefault("timestamp", datetime.now().isoformat())
|
||||||
session.messages.append(entry)
|
session.messages.append(entry)
|
||||||
session.updated_at = datetime.now()
|
session.updated_at = datetime.now()
|
||||||
|
|||||||
Reference in New Issue
Block a user