From de0b5b3d91392263ebd061a3c3e365b0e823998d Mon Sep 17 00:00:00 2001 From: coldxiangyu Date: Thu, 12 Mar 2026 08:17:42 +0800 Subject: [PATCH 1/6] fix: filter image_url for non-vision models at provider layer - Add field to ProviderSpec (default True) - Add and methods in LiteLLMProvider - Filter image_url content blocks in before sending to non-vision models - Reverts session-layer filtering from original PR (wrong layer) This fixes the issue where switching from Claude (vision-capable) to non-vision models (e.g., Baidu Qianfan) causes API errors due to unsupported image_url content blocks. The provider layer is the correct place for this filtering because: 1. It has access to model/provider capabilities 2. It only affects non-vision models 3. It preserves session layer purity (storage should not know about model capabilities) --- nanobot/providers/litellm_provider.py | 30 +++++++++++++++++++++++++++ nanobot/providers/registry.py | 3 +++ 2 files changed, 33 insertions(+) diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index d14e4c0..3dece89 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -124,6 +124,32 @@ class LiteLLMProvider(LLMProvider): spec = find_by_model(model) return spec is not None and spec.supports_prompt_caching + def _supports_vision(self, model: str) -> bool: + """Return True when the provider supports vision/image inputs.""" + if self._gateway is not None: + return self._gateway.supports_vision + spec = find_by_model(model) + return spec is None or spec.supports_vision # default True for unknown providers + + @staticmethod + def _filter_image_url(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Replace image_url content blocks with [image] placeholder for non-vision models.""" + filtered = [] + for msg in messages: + content = msg.get("content") + if isinstance(content, list): + new_content = [] + for block in content: + if isinstance(block, dict) and block.get("type") == "image_url": + # Replace image with placeholder text + new_content.append({"type": "text", "text": "[image]"}) + else: + new_content.append(block) + filtered.append({**msg, "content": new_content}) + else: + filtered.append(msg) + return filtered + def _apply_cache_control( self, messages: list[dict[str, Any]], @@ -234,6 +260,10 @@ class LiteLLMProvider(LLMProvider): model = self._resolve_model(original_model) extra_msg_keys = self._extra_msg_keys(original_model, model) + # Filter image_url for non-vision models + if not self._supports_vision(original_model): + messages = self._filter_image_url(messages) + if self._supports_cache_control(original_model): messages, tools = self._apply_cache_control(messages, tools) diff --git a/nanobot/providers/registry.py b/nanobot/providers/registry.py index 42c1d24..a45f14a 100644 --- a/nanobot/providers/registry.py +++ b/nanobot/providers/registry.py @@ -61,6 +61,9 @@ class ProviderSpec: # Provider supports cache_control on content blocks (e.g. Anthropic prompt caching) supports_prompt_caching: bool = False + # Provider supports vision/image inputs (most modern models do) + supports_vision: bool = True + @property def label(self) -> str: return self.display_name or self.name.title() From c4628038c62ac8680226886a30a470423e54ea25 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Sun, 15 Mar 2026 14:18:08 +0000 Subject: [PATCH 2/6] fix: handle image_url rejection by retrying without images Replace the static provider-level supports_vision check with a reactive fallback: when a model returns an image-unsupported error, strip image_url blocks from messages and retry once. This avoids maintaining an inaccurate vision capability table and correctly handles gateway/unknown model scenarios. Also extract _safe_chat() to deduplicate try/except boilerplate in chat_with_retry(). --- nanobot/providers/base.py | 97 ++++++++++++++++----------- nanobot/providers/litellm_provider.py | 30 --------- nanobot/providers/registry.py | 3 - tests/test_provider_retry.py | 84 +++++++++++++++++++++++ 4 files changed, 142 insertions(+), 72 deletions(-) diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py index 114a948..8b6956c 100644 --- a/nanobot/providers/base.py +++ b/nanobot/providers/base.py @@ -89,6 +89,14 @@ class LLMProvider(ABC): "server error", "temporarily unavailable", ) + _IMAGE_UNSUPPORTED_MARKERS = ( + "image_url is only supported", + "does not support image", + "images are not supported", + "image input is not supported", + "image_url is not supported", + "unsupported image input", + ) _SENTINEL = object() @@ -189,6 +197,40 @@ class LLMProvider(ABC): err = (content or "").lower() return any(marker in err for marker in cls._TRANSIENT_ERROR_MARKERS) + @classmethod + def _is_image_unsupported_error(cls, content: str | None) -> bool: + err = (content or "").lower() + return any(marker in err for marker in cls._IMAGE_UNSUPPORTED_MARKERS) + + @staticmethod + def _strip_image_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]] | None: + """Replace image_url blocks with text placeholder. Returns None if no images found.""" + found = False + result = [] + for msg in messages: + content = msg.get("content") + if isinstance(content, list): + new_content = [] + for b in content: + if isinstance(b, dict) and b.get("type") == "image_url": + new_content.append({"type": "text", "text": "[image omitted]"}) + found = True + else: + new_content.append(b) + result.append({**msg, "content": new_content}) + else: + result.append(msg) + return result if found else None + + async def _safe_chat(self, **kwargs: Any) -> LLMResponse: + """Call chat() and convert unexpected exceptions to error responses.""" + try: + return await self.chat(**kwargs) + except asyncio.CancelledError: + raise + except Exception as exc: + return LLMResponse(content=f"Error calling LLM: {exc}", finish_reason="error") + async def chat_with_retry( self, messages: list[dict[str, Any]], @@ -212,57 +254,34 @@ class LLMProvider(ABC): if reasoning_effort is self._SENTINEL: reasoning_effort = self.generation.reasoning_effort + kw: dict[str, Any] = dict( + messages=messages, tools=tools, model=model, + max_tokens=max_tokens, temperature=temperature, + reasoning_effort=reasoning_effort, tool_choice=tool_choice, + ) + for attempt, delay in enumerate(self._CHAT_RETRY_DELAYS, start=1): - try: - response = await self.chat( - messages=messages, - tools=tools, - model=model, - max_tokens=max_tokens, - temperature=temperature, - reasoning_effort=reasoning_effort, - tool_choice=tool_choice, - ) - except asyncio.CancelledError: - raise - except Exception as exc: - response = LLMResponse( - content=f"Error calling LLM: {exc}", - finish_reason="error", - ) + response = await self._safe_chat(**kw) if response.finish_reason != "error": return response + if not self._is_transient_error(response.content): + if self._is_image_unsupported_error(response.content): + stripped = self._strip_image_content(messages) + if stripped is not None: + logger.warning("Model does not support image input, retrying without images") + return await self._safe_chat(**{**kw, "messages": stripped}) return response - err = (response.content or "").lower() logger.warning( "LLM transient error (attempt {}/{}), retrying in {}s: {}", - attempt, - len(self._CHAT_RETRY_DELAYS), - delay, - err[:120], + attempt, len(self._CHAT_RETRY_DELAYS), delay, + (response.content or "")[:120].lower(), ) await asyncio.sleep(delay) - try: - return await self.chat( - messages=messages, - tools=tools, - model=model, - max_tokens=max_tokens, - temperature=temperature, - reasoning_effort=reasoning_effort, - tool_choice=tool_choice, - ) - except asyncio.CancelledError: - raise - except Exception as exc: - return LLMResponse( - content=f"Error calling LLM: {exc}", - finish_reason="error", - ) + return await self._safe_chat(**kw) @abstractmethod def get_default_model(self) -> str: diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index 3dece89..d14e4c0 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -124,32 +124,6 @@ class LiteLLMProvider(LLMProvider): spec = find_by_model(model) return spec is not None and spec.supports_prompt_caching - def _supports_vision(self, model: str) -> bool: - """Return True when the provider supports vision/image inputs.""" - if self._gateway is not None: - return self._gateway.supports_vision - spec = find_by_model(model) - return spec is None or spec.supports_vision # default True for unknown providers - - @staticmethod - def _filter_image_url(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: - """Replace image_url content blocks with [image] placeholder for non-vision models.""" - filtered = [] - for msg in messages: - content = msg.get("content") - if isinstance(content, list): - new_content = [] - for block in content: - if isinstance(block, dict) and block.get("type") == "image_url": - # Replace image with placeholder text - new_content.append({"type": "text", "text": "[image]"}) - else: - new_content.append(block) - filtered.append({**msg, "content": new_content}) - else: - filtered.append(msg) - return filtered - def _apply_cache_control( self, messages: list[dict[str, Any]], @@ -260,10 +234,6 @@ class LiteLLMProvider(LLMProvider): model = self._resolve_model(original_model) extra_msg_keys = self._extra_msg_keys(original_model, model) - # Filter image_url for non-vision models - if not self._supports_vision(original_model): - messages = self._filter_image_url(messages) - if self._supports_cache_control(original_model): messages, tools = self._apply_cache_control(messages, tools) diff --git a/nanobot/providers/registry.py b/nanobot/providers/registry.py index a45f14a..42c1d24 100644 --- a/nanobot/providers/registry.py +++ b/nanobot/providers/registry.py @@ -61,9 +61,6 @@ class ProviderSpec: # Provider supports cache_control on content blocks (e.g. Anthropic prompt caching) supports_prompt_caching: bool = False - # Provider supports vision/image inputs (most modern models do) - supports_vision: bool = True - @property def label(self) -> str: return self.display_name or self.name.title() diff --git a/tests/test_provider_retry.py b/tests/test_provider_retry.py index 2420399..6f2c165 100644 --- a/tests/test_provider_retry.py +++ b/tests/test_provider_retry.py @@ -123,3 +123,87 @@ async def test_chat_with_retry_explicit_override_beats_defaults() -> None: assert provider.last_kwargs["temperature"] == 0.9 assert provider.last_kwargs["max_tokens"] == 9999 assert provider.last_kwargs["reasoning_effort"] == "low" + + +# --------------------------------------------------------------------------- +# Image-unsupported fallback tests +# --------------------------------------------------------------------------- + +_IMAGE_MSG = [ + {"role": "user", "content": [ + {"type": "text", "text": "describe this"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}}, + ]}, +] + + +@pytest.mark.asyncio +async def test_image_unsupported_error_retries_without_images() -> None: + """If the model rejects image_url, retry once with images stripped.""" + provider = ScriptedProvider([ + LLMResponse( + content="Invalid content type. image_url is only supported by certain models", + finish_reason="error", + ), + LLMResponse(content="ok, no image"), + ]) + + response = await provider.chat_with_retry(messages=_IMAGE_MSG) + + assert response.content == "ok, no image" + assert provider.calls == 2 + msgs_on_retry = provider.last_kwargs["messages"] + for msg in msgs_on_retry: + content = msg.get("content") + if isinstance(content, list): + assert all(b.get("type") != "image_url" for b in content) + assert any("[image omitted]" in (b.get("text") or "") for b in content) + + +@pytest.mark.asyncio +async def test_image_unsupported_error_no_retry_without_image_content() -> None: + """If messages don't contain image_url blocks, don't retry on image error.""" + provider = ScriptedProvider([ + LLMResponse( + content="image_url is only supported by certain models", + finish_reason="error", + ), + ]) + + response = await provider.chat_with_retry( + messages=[{"role": "user", "content": "hello"}], + ) + + assert provider.calls == 1 + assert response.finish_reason == "error" + + +@pytest.mark.asyncio +async def test_image_unsupported_fallback_returns_error_on_second_failure() -> None: + """If the image-stripped retry also fails, return that error.""" + provider = ScriptedProvider([ + LLMResponse( + content="does not support image input", + finish_reason="error", + ), + LLMResponse(content="some other error", finish_reason="error"), + ]) + + response = await provider.chat_with_retry(messages=_IMAGE_MSG) + + assert provider.calls == 2 + assert response.content == "some other error" + assert response.finish_reason == "error" + + +@pytest.mark.asyncio +async def test_non_image_error_does_not_trigger_image_fallback() -> None: + """Regular non-transient errors must not trigger image stripping.""" + provider = ScriptedProvider([ + LLMResponse(content="401 unauthorized", finish_reason="error"), + ]) + + response = await provider.chat_with_retry(messages=_IMAGE_MSG) + + assert provider.calls == 1 + assert response.content == "401 unauthorized" From 45832ea499907a701913faa49fbded384abc1339 Mon Sep 17 00:00:00 2001 From: Ben Date: Sun, 15 Mar 2026 07:18:20 +0000 Subject: [PATCH 3/6] Add load_skill tool to bypass workspace restriction for builtin skills When restrictToWorkspace is enabled, the agent cannot read builtin skill files via read_file since they live outside the workspace. This adds a dedicated load_skill tool that reads skills by name through the SkillsLoader, which accesses files directly via Python without the workspace restriction. - Add LoadSkillTool to filesystem tools - Register it in the agent loop - Update system prompt to instruct agent to use load_skill instead of read_file - Remove raw filesystem paths from skills summary --- nanobot/agent/context.py | 2 +- nanobot/agent/loop.py | 3 ++- nanobot/agent/skills.py | 2 -- nanobot/agent/subagent.py | 2 +- nanobot/agent/tools/filesystem.py | 32 +++++++++++++++++++++++++++++++ 5 files changed, 36 insertions(+), 5 deletions(-) diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index e47fcb8..a6c3eea 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -46,7 +46,7 @@ class ContextBuilder: if skills_summary: parts.append(f"""# Skills -The following skills extend your capabilities. To use a skill, read its SKILL.md file using the read_file tool. +The following skills extend your capabilities. To use a skill, call the load_skill tool with its name. Skills with available="false" need dependencies installed first - you can try installing them with apt/brew. {skills_summary}""") diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index d644845..9cbdaf8 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -17,7 +17,7 @@ from nanobot.agent.context import ContextBuilder from nanobot.agent.memory import MemoryConsolidator from nanobot.agent.subagent import SubagentManager from nanobot.agent.tools.cron import CronTool -from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool +from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, LoadSkillTool, ReadFileTool, WriteFileTool from nanobot.agent.tools.message import MessageTool from nanobot.agent.tools.registry import ToolRegistry from nanobot.agent.tools.shell import ExecTool @@ -128,6 +128,7 @@ class AgentLoop: self.tools.register(SpawnTool(manager=self.subagents)) if self.cron_service: self.tools.register(CronTool(self.cron_service)) + self.tools.register(LoadSkillTool(skills_loader=self.context.skills)) async def _connect_mcp(self) -> None: """Connect to configured MCP servers (one-time, lazy).""" diff --git a/nanobot/agent/skills.py b/nanobot/agent/skills.py index 9afee82..2b869fa 100644 --- a/nanobot/agent/skills.py +++ b/nanobot/agent/skills.py @@ -118,7 +118,6 @@ class SkillsLoader: lines = [""] for s in all_skills: name = escape_xml(s["name"]) - path = s["path"] desc = escape_xml(self._get_skill_description(s["name"])) skill_meta = self._get_skill_meta(s["name"]) available = self._check_requirements(skill_meta) @@ -126,7 +125,6 @@ class SkillsLoader: lines.append(f" ") lines.append(f" {name}") lines.append(f" {desc}") - lines.append(f" {path}") # Show missing requirements for unavailable skills if not available: diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index b6bef68..cdde30d 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -213,7 +213,7 @@ Stay focused on the assigned task. Your final response will be reported back to skills_summary = SkillsLoader(self.workspace).build_skills_summary() if skills_summary: - parts.append(f"## Skills\n\nRead SKILL.md with read_file to use a skill.\n\n{skills_summary}") + parts.append(f"## Skills\n\nUse load_skill tool to load a skill by name.\n\n{skills_summary}") return "\n\n".join(parts) diff --git a/nanobot/agent/tools/filesystem.py b/nanobot/agent/tools/filesystem.py index 02c8331..95bc980 100644 --- a/nanobot/agent/tools/filesystem.py +++ b/nanobot/agent/tools/filesystem.py @@ -363,3 +363,35 @@ class ListDirTool(_FsTool): return f"Error: {e}" except Exception as e: return f"Error listing directory: {e}" + + +class LoadSkillTool(Tool): + """Tool to load a skill by name, bypassing workspace restriction.""" + + def __init__(self, skills_loader): + self._skills_loader = skills_loader + + @property + def name(self) -> str: + return "load_skill" + + @property + def description(self) -> str: + return "Load a skill by name. Returns the full SKILL.md content." + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": {"name": {"type": "string", "description": "The skill name to load"}}, + "required": ["name"], + } + + async def execute(self, name: str, **kwargs: Any) -> str: + try: + content = self._skills_loader.load_skill(name) + if content is None: + return f"Error: Skill not found: {name}" + return content + except Exception as e: + return f"Error loading skill: {str(e)}" From d684fec27aeee55be0bb7a1251313190275fef31 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Sun, 15 Mar 2026 15:13:41 +0000 Subject: [PATCH 4/6] Replace load_skill tool with read_file extra_allowed_dirs for builtin skills access Instead of adding a separate load_skill tool to bypass workspace restrictions, extend ReadFileTool with extra_allowed_dirs so it can read builtin skill paths while keeping write/edit tools locked to the workspace. Fixes the original issue for both main agent and subagents. Made-with: Cursor --- nanobot/agent/context.py | 2 +- nanobot/agent/loop.py | 8 ++- nanobot/agent/skills.py | 2 + nanobot/agent/subagent.py | 6 +- nanobot/agent/tools/filesystem.py | 60 ++++++---------- tests/test_filesystem_tools.py | 111 ++++++++++++++++++++++++++++++ 6 files changed, 145 insertions(+), 44 deletions(-) diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index a6c3eea..e47fcb8 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -46,7 +46,7 @@ class ContextBuilder: if skills_summary: parts.append(f"""# Skills -The following skills extend your capabilities. To use a skill, call the load_skill tool with its name. +The following skills extend your capabilities. To use a skill, read its SKILL.md file using the read_file tool. Skills with available="false" need dependencies installed first - you can try installing them with apt/brew. {skills_summary}""") diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 9cbdaf8..2c0d29a 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -17,7 +17,8 @@ from nanobot.agent.context import ContextBuilder from nanobot.agent.memory import MemoryConsolidator from nanobot.agent.subagent import SubagentManager from nanobot.agent.tools.cron import CronTool -from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, LoadSkillTool, ReadFileTool, WriteFileTool +from nanobot.agent.skills import BUILTIN_SKILLS_DIR +from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool from nanobot.agent.tools.message import MessageTool from nanobot.agent.tools.registry import ToolRegistry from nanobot.agent.tools.shell import ExecTool @@ -114,7 +115,9 @@ class AgentLoop: def _register_default_tools(self) -> None: """Register the default set of tools.""" allowed_dir = self.workspace if self.restrict_to_workspace else None - for cls in (ReadFileTool, WriteFileTool, EditFileTool, ListDirTool): + extra_read = [BUILTIN_SKILLS_DIR] if allowed_dir else None + self.tools.register(ReadFileTool(workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read)) + for cls in (WriteFileTool, EditFileTool, ListDirTool): self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir)) self.tools.register(ExecTool( working_dir=str(self.workspace), @@ -128,7 +131,6 @@ class AgentLoop: self.tools.register(SpawnTool(manager=self.subagents)) if self.cron_service: self.tools.register(CronTool(self.cron_service)) - self.tools.register(LoadSkillTool(skills_loader=self.context.skills)) async def _connect_mcp(self) -> None: """Connect to configured MCP servers (one-time, lazy).""" diff --git a/nanobot/agent/skills.py b/nanobot/agent/skills.py index 2b869fa..9afee82 100644 --- a/nanobot/agent/skills.py +++ b/nanobot/agent/skills.py @@ -118,6 +118,7 @@ class SkillsLoader: lines = [""] for s in all_skills: name = escape_xml(s["name"]) + path = s["path"] desc = escape_xml(self._get_skill_description(s["name"])) skill_meta = self._get_skill_meta(s["name"]) available = self._check_requirements(skill_meta) @@ -125,6 +126,7 @@ class SkillsLoader: lines.append(f" ") lines.append(f" {name}") lines.append(f" {desc}") + lines.append(f" {path}") # Show missing requirements for unavailable skills if not available: diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index cdde30d..063b54c 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -8,6 +8,7 @@ from typing import Any from loguru import logger +from nanobot.agent.skills import BUILTIN_SKILLS_DIR from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool from nanobot.agent.tools.registry import ToolRegistry from nanobot.agent.tools.shell import ExecTool @@ -92,7 +93,8 @@ class SubagentManager: # Build subagent tools (no message tool, no spawn tool) tools = ToolRegistry() allowed_dir = self.workspace if self.restrict_to_workspace else None - tools.register(ReadFileTool(workspace=self.workspace, allowed_dir=allowed_dir)) + extra_read = [BUILTIN_SKILLS_DIR] if allowed_dir else None + tools.register(ReadFileTool(workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read)) tools.register(WriteFileTool(workspace=self.workspace, allowed_dir=allowed_dir)) tools.register(EditFileTool(workspace=self.workspace, allowed_dir=allowed_dir)) tools.register(ListDirTool(workspace=self.workspace, allowed_dir=allowed_dir)) @@ -213,7 +215,7 @@ Stay focused on the assigned task. Your final response will be reported back to skills_summary = SkillsLoader(self.workspace).build_skills_summary() if skills_summary: - parts.append(f"## Skills\n\nUse load_skill tool to load a skill by name.\n\n{skills_summary}") + parts.append(f"## Skills\n\nRead SKILL.md with read_file to use a skill.\n\n{skills_summary}") return "\n\n".join(parts) diff --git a/nanobot/agent/tools/filesystem.py b/nanobot/agent/tools/filesystem.py index 95bc980..6443f28 100644 --- a/nanobot/agent/tools/filesystem.py +++ b/nanobot/agent/tools/filesystem.py @@ -8,7 +8,10 @@ from nanobot.agent.tools.base import Tool def _resolve_path( - path: str, workspace: Path | None = None, allowed_dir: Path | None = None + path: str, + workspace: Path | None = None, + allowed_dir: Path | None = None, + extra_allowed_dirs: list[Path] | None = None, ) -> Path: """Resolve path against workspace (if relative) and enforce directory restriction.""" p = Path(path).expanduser() @@ -16,22 +19,35 @@ def _resolve_path( p = workspace / p resolved = p.resolve() if allowed_dir: - try: - resolved.relative_to(allowed_dir.resolve()) - except ValueError: + all_dirs = [allowed_dir] + (extra_allowed_dirs or []) + if not any(_is_under(resolved, d) for d in all_dirs): raise PermissionError(f"Path {path} is outside allowed directory {allowed_dir}") return resolved +def _is_under(path: Path, directory: Path) -> bool: + try: + path.relative_to(directory.resolve()) + return True + except ValueError: + return False + + class _FsTool(Tool): """Shared base for filesystem tools — common init and path resolution.""" - def __init__(self, workspace: Path | None = None, allowed_dir: Path | None = None): + def __init__( + self, + workspace: Path | None = None, + allowed_dir: Path | None = None, + extra_allowed_dirs: list[Path] | None = None, + ): self._workspace = workspace self._allowed_dir = allowed_dir + self._extra_allowed_dirs = extra_allowed_dirs def _resolve(self, path: str) -> Path: - return _resolve_path(path, self._workspace, self._allowed_dir) + return _resolve_path(path, self._workspace, self._allowed_dir, self._extra_allowed_dirs) # --------------------------------------------------------------------------- @@ -363,35 +379,3 @@ class ListDirTool(_FsTool): return f"Error: {e}" except Exception as e: return f"Error listing directory: {e}" - - -class LoadSkillTool(Tool): - """Tool to load a skill by name, bypassing workspace restriction.""" - - def __init__(self, skills_loader): - self._skills_loader = skills_loader - - @property - def name(self) -> str: - return "load_skill" - - @property - def description(self) -> str: - return "Load a skill by name. Returns the full SKILL.md content." - - @property - def parameters(self) -> dict[str, Any]: - return { - "type": "object", - "properties": {"name": {"type": "string", "description": "The skill name to load"}}, - "required": ["name"], - } - - async def execute(self, name: str, **kwargs: Any) -> str: - try: - content = self._skills_loader.load_skill(name) - if content is None: - return f"Error: Skill not found: {name}" - return content - except Exception as e: - return f"Error loading skill: {str(e)}" diff --git a/tests/test_filesystem_tools.py b/tests/test_filesystem_tools.py index 0f0ba78..620aa75 100644 --- a/tests/test_filesystem_tools.py +++ b/tests/test_filesystem_tools.py @@ -251,3 +251,114 @@ class TestListDirTool: result = await tool.execute(path=str(tmp_path / "nope")) assert "Error" in result assert "not found" in result + + +# --------------------------------------------------------------------------- +# Workspace restriction + extra_allowed_dirs +# --------------------------------------------------------------------------- + +class TestWorkspaceRestriction: + + @pytest.mark.asyncio + async def test_read_blocked_outside_workspace(self, tmp_path): + workspace = tmp_path / "ws" + workspace.mkdir() + outside = tmp_path / "outside" + outside.mkdir() + secret = outside / "secret.txt" + secret.write_text("top secret") + + tool = ReadFileTool(workspace=workspace, allowed_dir=workspace) + result = await tool.execute(path=str(secret)) + assert "Error" in result + assert "outside" in result.lower() + + @pytest.mark.asyncio + async def test_read_allowed_with_extra_dir(self, tmp_path): + workspace = tmp_path / "ws" + workspace.mkdir() + skills_dir = tmp_path / "skills" + skills_dir.mkdir() + skill_file = skills_dir / "test_skill" / "SKILL.md" + skill_file.parent.mkdir() + skill_file.write_text("# Test Skill\nDo something.") + + tool = ReadFileTool( + workspace=workspace, allowed_dir=workspace, + extra_allowed_dirs=[skills_dir], + ) + result = await tool.execute(path=str(skill_file)) + assert "Test Skill" in result + assert "Error" not in result + + @pytest.mark.asyncio + async def test_extra_dirs_does_not_widen_write(self, tmp_path): + from nanobot.agent.tools.filesystem import WriteFileTool + + workspace = tmp_path / "ws" + workspace.mkdir() + outside = tmp_path / "outside" + outside.mkdir() + + tool = WriteFileTool(workspace=workspace, allowed_dir=workspace) + result = await tool.execute(path=str(outside / "hack.txt"), content="pwned") + assert "Error" in result + assert "outside" in result.lower() + + @pytest.mark.asyncio + async def test_read_still_blocked_for_unrelated_dir(self, tmp_path): + workspace = tmp_path / "ws" + workspace.mkdir() + skills_dir = tmp_path / "skills" + skills_dir.mkdir() + unrelated = tmp_path / "other" + unrelated.mkdir() + secret = unrelated / "secret.txt" + secret.write_text("nope") + + tool = ReadFileTool( + workspace=workspace, allowed_dir=workspace, + extra_allowed_dirs=[skills_dir], + ) + result = await tool.execute(path=str(secret)) + assert "Error" in result + assert "outside" in result.lower() + + @pytest.mark.asyncio + async def test_workspace_file_still_readable_with_extra_dirs(self, tmp_path): + """Adding extra_allowed_dirs must not break normal workspace reads.""" + workspace = tmp_path / "ws" + workspace.mkdir() + ws_file = workspace / "README.md" + ws_file.write_text("hello from workspace") + skills_dir = tmp_path / "skills" + skills_dir.mkdir() + + tool = ReadFileTool( + workspace=workspace, allowed_dir=workspace, + extra_allowed_dirs=[skills_dir], + ) + result = await tool.execute(path=str(ws_file)) + assert "hello from workspace" in result + assert "Error" not in result + + @pytest.mark.asyncio + async def test_edit_blocked_in_extra_dir(self, tmp_path): + """edit_file must not be able to modify files in extra_allowed_dirs.""" + workspace = tmp_path / "ws" + workspace.mkdir() + skills_dir = tmp_path / "skills" + skills_dir.mkdir() + skill_file = skills_dir / "weather" / "SKILL.md" + skill_file.parent.mkdir() + skill_file.write_text("# Weather\nOriginal content.") + + tool = EditFileTool(workspace=workspace, allowed_dir=workspace) + result = await tool.execute( + path=str(skill_file), + old_text="Original content.", + new_text="Hacked content.", + ) + assert "Error" in result + assert "outside" in result.lower() + assert skill_file.read_text() == "# Weather\nOriginal content." From 34358eabc9a3bcc73cdbdfac23a8ecee4d568be0 Mon Sep 17 00:00:00 2001 From: Meng Yuhang Date: Thu, 12 Mar 2026 14:31:27 +0800 Subject: [PATCH 5/6] feat: support file/image/richText message receiving for DingTalk --- nanobot/channels/dingtalk.py | 90 ++++++++++++++++++++++++++++ tests/test_dingtalk_channel.py | 105 ++++++++++++++++++++++++++++++++- 2 files changed, 192 insertions(+), 3 deletions(-) diff --git a/nanobot/channels/dingtalk.py b/nanobot/channels/dingtalk.py index f1b8407..8a822ff 100644 --- a/nanobot/channels/dingtalk.py +++ b/nanobot/channels/dingtalk.py @@ -63,6 +63,49 @@ class NanobotDingTalkHandler(CallbackHandler): if not content: content = message.data.get("text", {}).get("content", "").strip() + # Handle file/image messages + file_paths = [] + if chatbot_msg.message_type == "picture" and chatbot_msg.image_content: + download_code = chatbot_msg.image_content.download_code + if download_code: + sender_uid = chatbot_msg.sender_staff_id or chatbot_msg.sender_id or "unknown" + fp = await self.channel._download_dingtalk_file(download_code, "image.jpg", sender_uid) + if fp: + file_paths.append(fp) + content = content or "[Image]" + + elif chatbot_msg.message_type == "file": + download_code = message.data.get("content", {}).get("downloadCode") or message.data.get("downloadCode") + fname = message.data.get("content", {}).get("fileName") or message.data.get("fileName") or "file" + if download_code: + sender_uid = chatbot_msg.sender_staff_id or chatbot_msg.sender_id or "unknown" + fp = await self.channel._download_dingtalk_file(download_code, fname, sender_uid) + if fp: + file_paths.append(fp) + content = content or "[File]" + + elif chatbot_msg.message_type == "richText" and chatbot_msg.rich_text_content: + rich_list = chatbot_msg.rich_text_content.rich_text_list or [] + for item in rich_list: + if not isinstance(item, dict): + continue + if item.get("type") == "text": + t = item.get("text", "").strip() + if t: + content = (content + " " + t).strip() if content else t + elif item.get("downloadCode"): + dc = item["downloadCode"] + fname = item.get("fileName") or "file" + sender_uid = chatbot_msg.sender_staff_id or chatbot_msg.sender_id or "unknown" + fp = await self.channel._download_dingtalk_file(dc, fname, sender_uid) + if fp: + file_paths.append(fp) + content = content or "[File]" + + if file_paths: + file_list = "\n".join("- " + p for p in file_paths) + content = content + "\n\nReceived files:\n" + file_list + if not content: logger.warning( "Received empty or unsupported message type: {}", @@ -488,3 +531,50 @@ class DingTalkChannel(BaseChannel): ) except Exception as e: logger.error("Error publishing DingTalk message: {}", e) + + async def _download_dingtalk_file( + self, + download_code: str, + filename: str, + sender_id: str, + ) -> str | None: + """Download a DingTalk file to a local temp directory, return local path.""" + import tempfile + + try: + token = await self._get_access_token() + if not token or not self._http: + logger.error("DingTalk file download: no token or http client") + return None + + # Step 1: Exchange downloadCode for a temporary download URL + api_url = "https://api.dingtalk.com/v1.0/robot/messageFiles/download" + headers = {"x-acs-dingtalk-access-token": token, "Content-Type": "application/json"} + payload = {"downloadCode": download_code, "robotCode": self.config.client_id} + resp = await self._http.post(api_url, json=payload, headers=headers) + if resp.status_code != 200: + logger.error("DingTalk get download URL failed: status={}, body={}", resp.status_code, resp.text) + return None + + result = resp.json() + download_url = result.get("downloadUrl") + if not download_url: + logger.error("DingTalk download URL not found in response: {}", result) + return None + + # Step 2: Download the file content + file_resp = await self._http.get(download_url, follow_redirects=True) + if file_resp.status_code != 200: + logger.error("DingTalk file download failed: status={}", file_resp.status_code) + return None + + # Save to local temp directory + download_dir = Path(tempfile.gettempdir()) / "nanobot_dingtalk" / sender_id + download_dir.mkdir(parents=True, exist_ok=True) + file_path = download_dir / filename + await asyncio.to_thread(file_path.write_bytes, file_resp.content) + logger.info("DingTalk file saved: {}", file_path) + return str(file_path) + except Exception as e: + logger.error("DingTalk file download error: {}", e) + return None diff --git a/tests/test_dingtalk_channel.py b/tests/test_dingtalk_channel.py index 7b04e80..5bcbcfa 100644 --- a/tests/test_dingtalk_channel.py +++ b/tests/test_dingtalk_channel.py @@ -14,19 +14,31 @@ class _FakeResponse: self.status_code = status_code self._json_body = json_body or {} self.text = "{}" + self.content = b"" + self.headers = {"content-type": "application/json"} def json(self) -> dict: return self._json_body class _FakeHttp: - def __init__(self) -> None: + def __init__(self, responses: list[_FakeResponse] | None = None) -> None: self.calls: list[dict] = [] + self._responses = list(responses) if responses else [] - async def post(self, url: str, json=None, headers=None): - self.calls.append({"url": url, "json": json, "headers": headers}) + def _next_response(self) -> _FakeResponse: + if self._responses: + return self._responses.pop(0) return _FakeResponse() + async def post(self, url: str, json=None, headers=None, **kwargs): + self.calls.append({"method": "POST", "url": url, "json": json, "headers": headers}) + return self._next_response() + + async def get(self, url: str, **kwargs): + self.calls.append({"method": "GET", "url": url}) + return self._next_response() + @pytest.mark.asyncio async def test_group_message_keeps_sender_id_and_routes_chat_id() -> None: @@ -109,3 +121,90 @@ async def test_handler_uses_voice_recognition_text_when_text_is_empty(monkeypatc assert msg.content == "voice transcript" assert msg.sender_id == "user1" assert msg.chat_id == "group:conv123" + + +@pytest.mark.asyncio +async def test_handler_processes_file_message(monkeypatch) -> None: + """Test that file messages are handled and forwarded with downloaded path.""" + bus = MessageBus() + channel = DingTalkChannel( + DingTalkConfig(client_id="app", client_secret="secret", allow_from=["user1"]), + bus, + ) + handler = NanobotDingTalkHandler(channel) + + class _FakeFileChatbotMessage: + text = None + extensions = {} + image_content = None + rich_text_content = None + sender_staff_id = "user1" + sender_id = "fallback-user" + sender_nick = "Alice" + message_type = "file" + + @staticmethod + def from_dict(_data): + return _FakeFileChatbotMessage() + + async def fake_download(download_code, filename, sender_id): + return f"/tmp/nanobot_dingtalk/{sender_id}/{filename}" + + monkeypatch.setattr(dingtalk_module, "ChatbotMessage", _FakeFileChatbotMessage) + monkeypatch.setattr(dingtalk_module, "AckMessage", SimpleNamespace(STATUS_OK="OK")) + monkeypatch.setattr(channel, "_download_dingtalk_file", fake_download) + + status, body = await handler.process( + SimpleNamespace( + data={ + "conversationType": "1", + "content": {"downloadCode": "abc123", "fileName": "report.xlsx"}, + "text": {"content": ""}, + } + ) + ) + + await asyncio.gather(*list(channel._background_tasks)) + msg = await bus.consume_inbound() + + assert (status, body) == ("OK", "OK") + assert "[File]" in msg.content + assert "/tmp/nanobot_dingtalk/user1/report.xlsx" in msg.content + + +@pytest.mark.asyncio +async def test_download_dingtalk_file(tmp_path, monkeypatch) -> None: + """Test the two-step file download flow (get URL then download content).""" + channel = DingTalkChannel( + DingTalkConfig(client_id="app", client_secret="secret", allow_from=["*"]), + MessageBus(), + ) + + # Mock access token + async def fake_get_token(): + return "test-token" + + monkeypatch.setattr(channel, "_get_access_token", fake_get_token) + + # Mock HTTP: first POST returns downloadUrl, then GET returns file bytes + file_content = b"fake file content" + channel._http = _FakeHttp(responses=[ + _FakeResponse(200, {"downloadUrl": "https://example.com/tmpfile"}), + _FakeResponse(200), + ]) + channel._http._responses[1].content = file_content + + # Redirect temp dir to tmp_path + monkeypatch.setattr("tempfile.gettempdir", lambda: str(tmp_path)) + + result = await channel._download_dingtalk_file("code123", "test.xlsx", "user1") + + assert result is not None + assert result.endswith("test.xlsx") + assert (tmp_path / "nanobot_dingtalk" / "user1" / "test.xlsx").read_bytes() == file_content + + # Verify API calls + assert channel._http.calls[0]["method"] == "POST" + assert "messageFiles/download" in channel._http.calls[0]["url"] + assert channel._http.calls[0]["json"]["downloadCode"] == "code123" + assert channel._http.calls[1]["method"] == "GET" From f9ba6197de16ccb9afe07e944c4bc79b83068190 Mon Sep 17 00:00:00 2001 From: Meng Yuhang Date: Sat, 14 Mar 2026 23:24:36 +0800 Subject: [PATCH 6/6] fix: save DingTalk downloaded files to media dir instead of /tmp --- nanobot/channels/dingtalk.py | 8 ++++---- tests/test_dingtalk_channel.py | 9 ++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/nanobot/channels/dingtalk.py b/nanobot/channels/dingtalk.py index 8a822ff..ab12211 100644 --- a/nanobot/channels/dingtalk.py +++ b/nanobot/channels/dingtalk.py @@ -538,8 +538,8 @@ class DingTalkChannel(BaseChannel): filename: str, sender_id: str, ) -> str | None: - """Download a DingTalk file to a local temp directory, return local path.""" - import tempfile + """Download a DingTalk file to the media directory, return local path.""" + from nanobot.config.paths import get_media_dir try: token = await self._get_access_token() @@ -568,8 +568,8 @@ class DingTalkChannel(BaseChannel): logger.error("DingTalk file download failed: status={}", file_resp.status_code) return None - # Save to local temp directory - download_dir = Path(tempfile.gettempdir()) / "nanobot_dingtalk" / sender_id + # Save to media directory (accessible under workspace) + download_dir = get_media_dir("dingtalk") / sender_id download_dir.mkdir(parents=True, exist_ok=True) file_path = download_dir / filename await asyncio.to_thread(file_path.write_bytes, file_resp.content) diff --git a/tests/test_dingtalk_channel.py b/tests/test_dingtalk_channel.py index 5bcbcfa..a0b866f 100644 --- a/tests/test_dingtalk_channel.py +++ b/tests/test_dingtalk_channel.py @@ -194,14 +194,17 @@ async def test_download_dingtalk_file(tmp_path, monkeypatch) -> None: ]) channel._http._responses[1].content = file_content - # Redirect temp dir to tmp_path - monkeypatch.setattr("tempfile.gettempdir", lambda: str(tmp_path)) + # Redirect media dir to tmp_path + monkeypatch.setattr( + "nanobot.config.paths.get_media_dir", + lambda channel_name=None: tmp_path / channel_name if channel_name else tmp_path, + ) result = await channel._download_dingtalk_file("code123", "test.xlsx", "user1") assert result is not None assert result.endswith("test.xlsx") - assert (tmp_path / "nanobot_dingtalk" / "user1" / "test.xlsx").read_bytes() == file_content + assert (tmp_path / "dingtalk" / "user1" / "test.xlsx").read_bytes() == file_content # Verify API calls assert channel._http.calls[0]["method"] == "POST"