fix: preserve image paths in fallback and session history
This commit is contained in:
@@ -159,7 +159,11 @@ Reply directly with text for conversations. Only use the 'message' tool to send
|
|||||||
if not mime or not mime.startswith("image/"):
|
if not mime or not mime.startswith("image/"):
|
||||||
continue
|
continue
|
||||||
b64 = base64.b64encode(raw).decode()
|
b64 = base64.b64encode(raw).decode()
|
||||||
images.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}})
|
images.append({
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": f"data:{mime};base64,{b64}"},
|
||||||
|
"_meta": {"path": str(p)},
|
||||||
|
})
|
||||||
|
|
||||||
if not images:
|
if not images:
|
||||||
return text
|
return text
|
||||||
|
|||||||
@@ -480,7 +480,9 @@ class AgentLoop:
|
|||||||
continue # Strip runtime context from multimodal messages
|
continue # Strip runtime context from multimodal messages
|
||||||
if (c.get("type") == "image_url"
|
if (c.get("type") == "image_url"
|
||||||
and c.get("image_url", {}).get("url", "").startswith("data:image/")):
|
and c.get("image_url", {}).get("url", "").startswith("data:image/")):
|
||||||
filtered.append({"type": "text", "text": "[image]"})
|
path = (c.get("_meta") or {}).get("path", "")
|
||||||
|
placeholder = f"[image: {path}]" if path else "[image]"
|
||||||
|
filtered.append({"type": "text", "text": placeholder})
|
||||||
else:
|
else:
|
||||||
filtered.append(c)
|
filtered.append(c)
|
||||||
if not filtered:
|
if not filtered:
|
||||||
|
|||||||
@@ -89,14 +89,6 @@ class LLMProvider(ABC):
|
|||||||
"server error",
|
"server error",
|
||||||
"temporarily unavailable",
|
"temporarily unavailable",
|
||||||
)
|
)
|
||||||
_IMAGE_UNSUPPORTED_MARKERS = (
|
|
||||||
"image_url is only supported",
|
|
||||||
"does not support image",
|
|
||||||
"images are not supported",
|
|
||||||
"image input is not supported",
|
|
||||||
"image_url is not supported",
|
|
||||||
"unsupported image input",
|
|
||||||
)
|
|
||||||
|
|
||||||
_SENTINEL = object()
|
_SENTINEL = object()
|
||||||
|
|
||||||
@@ -107,11 +99,7 @@ class LLMProvider(ABC):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _sanitize_empty_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
def _sanitize_empty_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
"""Replace empty text content that causes provider 400 errors.
|
"""Sanitize message content: fix empty blocks, strip internal _meta fields."""
|
||||||
|
|
||||||
Empty content can appear when MCP tools return nothing. Most providers
|
|
||||||
reject empty-string content or empty text blocks in list content.
|
|
||||||
"""
|
|
||||||
result: list[dict[str, Any]] = []
|
result: list[dict[str, Any]] = []
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
content = msg.get("content")
|
content = msg.get("content")
|
||||||
@@ -123,18 +111,25 @@ class LLMProvider(ABC):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if isinstance(content, list):
|
if isinstance(content, list):
|
||||||
filtered = [
|
new_items: list[Any] = []
|
||||||
item for item in content
|
changed = False
|
||||||
if not (
|
for item in content:
|
||||||
|
if (
|
||||||
isinstance(item, dict)
|
isinstance(item, dict)
|
||||||
and item.get("type") in ("text", "input_text", "output_text")
|
and item.get("type") in ("text", "input_text", "output_text")
|
||||||
and not item.get("text")
|
and not item.get("text")
|
||||||
)
|
):
|
||||||
]
|
changed = True
|
||||||
if len(filtered) != len(content):
|
continue
|
||||||
|
if isinstance(item, dict) and "_meta" in item:
|
||||||
|
new_items.append({k: v for k, v in item.items() if k != "_meta"})
|
||||||
|
changed = True
|
||||||
|
else:
|
||||||
|
new_items.append(item)
|
||||||
|
if changed:
|
||||||
clean = dict(msg)
|
clean = dict(msg)
|
||||||
if filtered:
|
if new_items:
|
||||||
clean["content"] = filtered
|
clean["content"] = new_items
|
||||||
elif msg.get("role") == "assistant" and msg.get("tool_calls"):
|
elif msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||||||
clean["content"] = None
|
clean["content"] = None
|
||||||
else:
|
else:
|
||||||
@@ -197,11 +192,6 @@ class LLMProvider(ABC):
|
|||||||
err = (content or "").lower()
|
err = (content or "").lower()
|
||||||
return any(marker in err for marker in cls._TRANSIENT_ERROR_MARKERS)
|
return any(marker in err for marker in cls._TRANSIENT_ERROR_MARKERS)
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _is_image_unsupported_error(cls, content: str | None) -> bool:
|
|
||||||
err = (content or "").lower()
|
|
||||||
return any(marker in err for marker in cls._IMAGE_UNSUPPORTED_MARKERS)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _strip_image_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]] | None:
|
def _strip_image_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]] | None:
|
||||||
"""Replace image_url blocks with text placeholder. Returns None if no images found."""
|
"""Replace image_url blocks with text placeholder. Returns None if no images found."""
|
||||||
@@ -213,7 +203,9 @@ class LLMProvider(ABC):
|
|||||||
new_content = []
|
new_content = []
|
||||||
for b in content:
|
for b in content:
|
||||||
if isinstance(b, dict) and b.get("type") == "image_url":
|
if isinstance(b, dict) and b.get("type") == "image_url":
|
||||||
new_content.append({"type": "text", "text": "[image omitted]"})
|
path = (b.get("_meta") or {}).get("path", "")
|
||||||
|
placeholder = f"[image: {path}]" if path else "[image omitted]"
|
||||||
|
new_content.append({"type": "text", "text": placeholder})
|
||||||
found = True
|
found = True
|
||||||
else:
|
else:
|
||||||
new_content.append(b)
|
new_content.append(b)
|
||||||
@@ -267,11 +259,10 @@ class LLMProvider(ABC):
|
|||||||
return response
|
return response
|
||||||
|
|
||||||
if not self._is_transient_error(response.content):
|
if not self._is_transient_error(response.content):
|
||||||
if self._is_image_unsupported_error(response.content):
|
stripped = self._strip_image_content(messages)
|
||||||
stripped = self._strip_image_content(messages)
|
if stripped is not None:
|
||||||
if stripped is not None:
|
logger.warning("Non-transient LLM error with image content, retrying without images")
|
||||||
logger.warning("Model does not support image input, retrying without images")
|
return await self._safe_chat(**{**kw, "messages": stripped})
|
||||||
return await self._safe_chat(**{**kw, "messages": stripped})
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
|||||||
@@ -22,11 +22,30 @@ def test_save_turn_skips_multimodal_user_when_only_runtime_context() -> None:
|
|||||||
assert session.messages == []
|
assert session.messages == []
|
||||||
|
|
||||||
|
|
||||||
def test_save_turn_keeps_image_placeholder_after_runtime_strip() -> None:
|
def test_save_turn_keeps_image_placeholder_with_path_after_runtime_strip() -> None:
|
||||||
loop = _mk_loop()
|
loop = _mk_loop()
|
||||||
session = Session(key="test:image")
|
session = Session(key="test:image")
|
||||||
runtime = ContextBuilder._RUNTIME_CONTEXT_TAG + "\nCurrent Time: now (UTC)"
|
runtime = ContextBuilder._RUNTIME_CONTEXT_TAG + "\nCurrent Time: now (UTC)"
|
||||||
|
|
||||||
|
loop._save_turn(
|
||||||
|
session,
|
||||||
|
[{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": runtime},
|
||||||
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}, "_meta": {"path": "/media/feishu/photo.jpg"}},
|
||||||
|
],
|
||||||
|
}],
|
||||||
|
skip=0,
|
||||||
|
)
|
||||||
|
assert session.messages[0]["content"] == [{"type": "text", "text": "[image: /media/feishu/photo.jpg]"}]
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_turn_keeps_image_placeholder_without_meta() -> None:
|
||||||
|
loop = _mk_loop()
|
||||||
|
session = Session(key="test:image-no-meta")
|
||||||
|
runtime = ContextBuilder._RUNTIME_CONTEXT_TAG + "\nCurrent Time: now (UTC)"
|
||||||
|
|
||||||
loop._save_turn(
|
loop._save_turn(
|
||||||
session,
|
session,
|
||||||
[{
|
[{
|
||||||
|
|||||||
@@ -126,10 +126,17 @@ async def test_chat_with_retry_explicit_override_beats_defaults() -> None:
|
|||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Image-unsupported fallback tests
|
# Image fallback tests
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
_IMAGE_MSG = [
|
_IMAGE_MSG = [
|
||||||
|
{"role": "user", "content": [
|
||||||
|
{"type": "text", "text": "describe this"},
|
||||||
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}, "_meta": {"path": "/media/test.png"}},
|
||||||
|
]},
|
||||||
|
]
|
||||||
|
|
||||||
|
_IMAGE_MSG_NO_META = [
|
||||||
{"role": "user", "content": [
|
{"role": "user", "content": [
|
||||||
{"type": "text", "text": "describe this"},
|
{"type": "text", "text": "describe this"},
|
||||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
|
||||||
@@ -138,13 +145,10 @@ _IMAGE_MSG = [
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_image_unsupported_error_retries_without_images() -> None:
|
async def test_non_transient_error_with_images_retries_without_images() -> None:
|
||||||
"""If the model rejects image_url, retry once with images stripped."""
|
"""Any non-transient error retries once with images stripped when images are present."""
|
||||||
provider = ScriptedProvider([
|
provider = ScriptedProvider([
|
||||||
LLMResponse(
|
LLMResponse(content="API调用参数有误,请检查文档", finish_reason="error"),
|
||||||
content="Invalid content type. image_url is only supported by certain models",
|
|
||||||
finish_reason="error",
|
|
||||||
),
|
|
||||||
LLMResponse(content="ok, no image"),
|
LLMResponse(content="ok, no image"),
|
||||||
])
|
])
|
||||||
|
|
||||||
@@ -157,17 +161,14 @@ async def test_image_unsupported_error_retries_without_images() -> None:
|
|||||||
content = msg.get("content")
|
content = msg.get("content")
|
||||||
if isinstance(content, list):
|
if isinstance(content, list):
|
||||||
assert all(b.get("type") != "image_url" for b in content)
|
assert all(b.get("type") != "image_url" for b in content)
|
||||||
assert any("[image omitted]" in (b.get("text") or "") for b in content)
|
assert any("[image: /media/test.png]" in (b.get("text") or "") for b in content)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_image_unsupported_error_no_retry_without_image_content() -> None:
|
async def test_non_transient_error_without_images_no_retry() -> None:
|
||||||
"""If messages don't contain image_url blocks, don't retry on image error."""
|
"""Non-transient errors without image content are returned immediately."""
|
||||||
provider = ScriptedProvider([
|
provider = ScriptedProvider([
|
||||||
LLMResponse(
|
LLMResponse(content="401 unauthorized", finish_reason="error"),
|
||||||
content="image_url is only supported by certain models",
|
|
||||||
finish_reason="error",
|
|
||||||
),
|
|
||||||
])
|
])
|
||||||
|
|
||||||
response = await provider.chat_with_retry(
|
response = await provider.chat_with_retry(
|
||||||
@@ -179,31 +180,34 @@ async def test_image_unsupported_error_no_retry_without_image_content() -> None:
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_image_unsupported_fallback_returns_error_on_second_failure() -> None:
|
async def test_image_fallback_returns_error_on_second_failure() -> None:
|
||||||
"""If the image-stripped retry also fails, return that error."""
|
"""If the image-stripped retry also fails, return that error."""
|
||||||
provider = ScriptedProvider([
|
provider = ScriptedProvider([
|
||||||
LLMResponse(
|
LLMResponse(content="some model error", finish_reason="error"),
|
||||||
content="does not support image input",
|
LLMResponse(content="still failing", finish_reason="error"),
|
||||||
finish_reason="error",
|
|
||||||
),
|
|
||||||
LLMResponse(content="some other error", finish_reason="error"),
|
|
||||||
])
|
])
|
||||||
|
|
||||||
response = await provider.chat_with_retry(messages=_IMAGE_MSG)
|
response = await provider.chat_with_retry(messages=_IMAGE_MSG)
|
||||||
|
|
||||||
assert provider.calls == 2
|
assert provider.calls == 2
|
||||||
assert response.content == "some other error"
|
assert response.content == "still failing"
|
||||||
assert response.finish_reason == "error"
|
assert response.finish_reason == "error"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_non_image_error_does_not_trigger_image_fallback() -> None:
|
async def test_image_fallback_without_meta_uses_default_placeholder() -> None:
|
||||||
"""Regular non-transient errors must not trigger image stripping."""
|
"""When _meta is absent, fallback placeholder is '[image omitted]'."""
|
||||||
provider = ScriptedProvider([
|
provider = ScriptedProvider([
|
||||||
LLMResponse(content="401 unauthorized", finish_reason="error"),
|
LLMResponse(content="error", finish_reason="error"),
|
||||||
|
LLMResponse(content="ok"),
|
||||||
])
|
])
|
||||||
|
|
||||||
response = await provider.chat_with_retry(messages=_IMAGE_MSG)
|
response = await provider.chat_with_retry(messages=_IMAGE_MSG_NO_META)
|
||||||
|
|
||||||
assert provider.calls == 1
|
assert response.content == "ok"
|
||||||
assert response.content == "401 unauthorized"
|
assert provider.calls == 2
|
||||||
|
msgs_on_retry = provider.last_kwargs["messages"]
|
||||||
|
for msg in msgs_on_retry:
|
||||||
|
content = msg.get("content")
|
||||||
|
if isinstance(content, list):
|
||||||
|
assert any("[image omitted]" in (b.get("text") or "") for b in content)
|
||||||
|
|||||||
Reference in New Issue
Block a user