fix: filter image_url for non-vision models at provider layer
- Add field to ProviderSpec (default True) - Add and methods in LiteLLMProvider - Filter image_url content blocks in before sending to non-vision models - Reverts session-layer filtering from original PR (wrong layer) This fixes the issue where switching from Claude (vision-capable) to non-vision models (e.g., Baidu Qianfan) causes API errors due to unsupported image_url content blocks. The provider layer is the correct place for this filtering because: 1. It has access to model/provider capabilities 2. It only affects non-vision models 3. It preserves session layer purity (storage should not know about model capabilities)
This commit is contained in:
@@ -124,6 +124,32 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
spec = find_by_model(model)
|
spec = find_by_model(model)
|
||||||
return spec is not None and spec.supports_prompt_caching
|
return spec is not None and spec.supports_prompt_caching
|
||||||
|
|
||||||
|
def _supports_vision(self, model: str) -> bool:
|
||||||
|
"""Return True when the provider supports vision/image inputs."""
|
||||||
|
if self._gateway is not None:
|
||||||
|
return self._gateway.supports_vision
|
||||||
|
spec = find_by_model(model)
|
||||||
|
return spec is None or spec.supports_vision # default True for unknown providers
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _filter_image_url(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
|
"""Replace image_url content blocks with [image] placeholder for non-vision models."""
|
||||||
|
filtered = []
|
||||||
|
for msg in messages:
|
||||||
|
content = msg.get("content")
|
||||||
|
if isinstance(content, list):
|
||||||
|
new_content = []
|
||||||
|
for block in content:
|
||||||
|
if isinstance(block, dict) and block.get("type") == "image_url":
|
||||||
|
# Replace image with placeholder text
|
||||||
|
new_content.append({"type": "text", "text": "[image]"})
|
||||||
|
else:
|
||||||
|
new_content.append(block)
|
||||||
|
filtered.append({**msg, "content": new_content})
|
||||||
|
else:
|
||||||
|
filtered.append(msg)
|
||||||
|
return filtered
|
||||||
|
|
||||||
def _apply_cache_control(
|
def _apply_cache_control(
|
||||||
self,
|
self,
|
||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
@@ -234,6 +260,10 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
model = self._resolve_model(original_model)
|
model = self._resolve_model(original_model)
|
||||||
extra_msg_keys = self._extra_msg_keys(original_model, model)
|
extra_msg_keys = self._extra_msg_keys(original_model, model)
|
||||||
|
|
||||||
|
# Filter image_url for non-vision models
|
||||||
|
if not self._supports_vision(original_model):
|
||||||
|
messages = self._filter_image_url(messages)
|
||||||
|
|
||||||
if self._supports_cache_control(original_model):
|
if self._supports_cache_control(original_model):
|
||||||
messages, tools = self._apply_cache_control(messages, tools)
|
messages, tools = self._apply_cache_control(messages, tools)
|
||||||
|
|
||||||
|
|||||||
@@ -61,6 +61,9 @@ class ProviderSpec:
|
|||||||
# Provider supports cache_control on content blocks (e.g. Anthropic prompt caching)
|
# Provider supports cache_control on content blocks (e.g. Anthropic prompt caching)
|
||||||
supports_prompt_caching: bool = False
|
supports_prompt_caching: bool = False
|
||||||
|
|
||||||
|
# Provider supports vision/image inputs (most modern models do)
|
||||||
|
supports_vision: bool = True
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def label(self) -> str:
|
def label(self) -> str:
|
||||||
return self.display_name or self.name.title()
|
return self.display_name or self.name.title()
|
||||||
|
|||||||
Reference in New Issue
Block a user