feat: implement native multimodal autonomous sensory capabilities

This commit is contained in:
vandazia
2026-03-20 22:00:38 +08:00
parent 214bf66a29
commit 71a88da186
7 changed files with 102 additions and 14 deletions

View File

@@ -1,10 +1,13 @@
"""File system tools: read, write, edit, list."""
import base64
import difflib
import mimetypes
from pathlib import Path
from typing import Any
from nanobot.agent.tools.base import Tool
from nanobot.utils.helpers import detect_image_mime
def _resolve_path(
@@ -91,7 +94,7 @@ class ReadFileTool(_FsTool):
"required": ["path"],
}
async def execute(self, path: str, offset: int = 1, limit: int | None = None, **kwargs: Any) -> str:
async def execute(self, path: str, offset: int = 1, limit: int | None = None, **kwargs: Any) -> Any:
try:
fp = self._resolve(path)
if not fp.exists():
@@ -99,13 +102,28 @@ class ReadFileTool(_FsTool):
if not fp.is_file():
return f"Error: Not a file: {path}"
all_lines = fp.read_text(encoding="utf-8").splitlines()
raw = fp.read_bytes()
if not raw:
return f"(Empty file: {path})"
mime = detect_image_mime(raw) or mimetypes.guess_type(path)[0]
if mime and mime.startswith("image/"):
b64 = base64.b64encode(raw).decode()
return [
{"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}, "_meta": {"path": str(fp)}},
{"type": "text", "text": f"(Image file: {path})"}
]
try:
text_content = raw.decode("utf-8")
except UnicodeDecodeError:
return f"Error: Cannot read binary file {path} (MIME: {mime or 'unknown'}). Only UTF-8 text and images are supported."
all_lines = text_content.splitlines()
total = len(all_lines)
if offset < 1:
offset = 1
if total == 0:
return f"(Empty file: {path})"
if offset > total:
return f"Error: offset {offset} is beyond end of file ({total} lines)"