🐈nanobot: hello world!
This commit is contained in:
8
nanobot/agent/__init__.py
Normal file
8
nanobot/agent/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""Agent core module."""
|
||||
|
||||
from nanobot.agent.loop import AgentLoop
|
||||
from nanobot.agent.context import ContextBuilder
|
||||
from nanobot.agent.memory import MemoryStore
|
||||
from nanobot.agent.skills import SkillsLoader
|
||||
|
||||
__all__ = ["AgentLoop", "ContextBuilder", "MemoryStore", "SkillsLoader"]
|
||||
196
nanobot/agent/context.py
Normal file
196
nanobot/agent/context.py
Normal file
@@ -0,0 +1,196 @@
|
||||
"""Context builder for assembling agent prompts."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from nanobot.agent.memory import MemoryStore
|
||||
from nanobot.agent.skills import SkillsLoader
|
||||
|
||||
|
||||
class ContextBuilder:
|
||||
"""
|
||||
Builds the context (system prompt + messages) for the agent.
|
||||
|
||||
Assembles bootstrap files, memory, skills, and conversation history
|
||||
into a coherent prompt for the LLM.
|
||||
"""
|
||||
|
||||
BOOTSTRAP_FILES = ["AGENTS.md", "SOUL.md", "USER.md", "TOOLS.md", "IDENTITY.md"]
|
||||
|
||||
def __init__(self, workspace: Path):
|
||||
self.workspace = workspace
|
||||
self.memory = MemoryStore(workspace)
|
||||
self.skills = SkillsLoader(workspace)
|
||||
|
||||
def build_system_prompt(self, skill_names: list[str] | None = None) -> str:
|
||||
"""
|
||||
Build the system prompt from bootstrap files, memory, and skills.
|
||||
|
||||
Args:
|
||||
skill_names: Optional list of skills to include.
|
||||
|
||||
Returns:
|
||||
Complete system prompt.
|
||||
"""
|
||||
parts = []
|
||||
|
||||
# Core identity
|
||||
parts.append(self._get_identity())
|
||||
|
||||
# Bootstrap files
|
||||
bootstrap = self._load_bootstrap_files()
|
||||
if bootstrap:
|
||||
parts.append(bootstrap)
|
||||
|
||||
# Memory context
|
||||
memory = self.memory.get_memory_context()
|
||||
if memory:
|
||||
parts.append(f"# Memory\n\n{memory}")
|
||||
|
||||
# Skills - progressive loading
|
||||
# 1. Always-loaded skills: include full content
|
||||
always_skills = self.skills.get_always_skills()
|
||||
if always_skills:
|
||||
always_content = self.skills.load_skills_for_context(always_skills)
|
||||
if always_content:
|
||||
parts.append(f"# Active Skills\n\n{always_content}")
|
||||
|
||||
# 2. Available skills: only show summary (agent uses read_file to load)
|
||||
skills_summary = self.skills.build_skills_summary()
|
||||
if skills_summary:
|
||||
parts.append(f"""# Skills
|
||||
|
||||
The following skills extend your capabilities. To use a skill, read its SKILL.md file using the read_file tool.
|
||||
Skills with available="false" need dependencies installed first - you can try installing them with apt/brew.
|
||||
|
||||
{skills_summary}""")
|
||||
|
||||
return "\n\n---\n\n".join(parts)
|
||||
|
||||
def _get_identity(self) -> str:
|
||||
"""Get the core identity section."""
|
||||
from datetime import datetime
|
||||
now = datetime.now().strftime("%Y-%m-%d %H:%M (%A)")
|
||||
workspace_path = str(self.workspace.expanduser().resolve())
|
||||
|
||||
return f"""# nanobot 🐈
|
||||
|
||||
You are nanobot, a helpful AI assistant. You have access to tools that allow you to:
|
||||
- Read, write, and edit files
|
||||
- Execute shell commands
|
||||
- Search the web and fetch web pages
|
||||
- Send messages to users on chat channels
|
||||
|
||||
## Current Time
|
||||
{now}
|
||||
|
||||
## Workspace
|
||||
Your workspace is at: {workspace_path}
|
||||
- Memory files: {workspace_path}/memory/MEMORY.md
|
||||
- Daily notes: {workspace_path}/memory/YYYY-MM-DD.md
|
||||
- Custom skills: {workspace_path}/skills/{{skill-name}}/SKILL.md
|
||||
|
||||
IMPORTANT: When responding to direct questions or conversations, reply directly with your text response.
|
||||
Only use the 'message' tool when you need to send a message to a specific chat channel (like WhatsApp).
|
||||
For normal conversation, just respond with text - do not call the message tool.
|
||||
|
||||
Always be helpful, accurate, and concise. When using tools, explain what you're doing.
|
||||
When remembering something, write to {workspace_path}/memory/MEMORY.md"""
|
||||
|
||||
def _load_bootstrap_files(self) -> str:
|
||||
"""Load all bootstrap files from workspace."""
|
||||
parts = []
|
||||
|
||||
for filename in self.BOOTSTRAP_FILES:
|
||||
file_path = self.workspace / filename
|
||||
if file_path.exists():
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
parts.append(f"## {filename}\n\n{content}")
|
||||
|
||||
return "\n\n".join(parts) if parts else ""
|
||||
|
||||
def build_messages(
|
||||
self,
|
||||
history: list[dict[str, Any]],
|
||||
current_message: str,
|
||||
skill_names: list[str] | None = None
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Build the complete message list for an LLM call.
|
||||
|
||||
Args:
|
||||
history: Previous conversation messages.
|
||||
current_message: The new user message.
|
||||
skill_names: Optional skills to include.
|
||||
|
||||
Returns:
|
||||
List of messages including system prompt.
|
||||
"""
|
||||
messages = []
|
||||
|
||||
# System prompt
|
||||
system_prompt = self.build_system_prompt(skill_names)
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
|
||||
# History
|
||||
messages.extend(history)
|
||||
|
||||
# Current message
|
||||
messages.append({"role": "user", "content": current_message})
|
||||
|
||||
return messages
|
||||
|
||||
def add_tool_result(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tool_call_id: str,
|
||||
tool_name: str,
|
||||
result: str
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Add a tool result to the message list.
|
||||
|
||||
Args:
|
||||
messages: Current message list.
|
||||
tool_call_id: ID of the tool call.
|
||||
tool_name: Name of the tool.
|
||||
result: Tool execution result.
|
||||
|
||||
Returns:
|
||||
Updated message list.
|
||||
"""
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tool_call_id,
|
||||
"name": tool_name,
|
||||
"content": result
|
||||
})
|
||||
return messages
|
||||
|
||||
def add_assistant_message(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
content: str | None,
|
||||
tool_calls: list[dict[str, Any]] | None = None
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Add an assistant message to the message list.
|
||||
|
||||
Args:
|
||||
messages: Current message list.
|
||||
content: Message content.
|
||||
tool_calls: Optional tool calls.
|
||||
|
||||
Returns:
|
||||
Updated message list.
|
||||
"""
|
||||
msg: dict[str, Any] = {"role": "assistant"}
|
||||
|
||||
if content:
|
||||
msg["content"] = content
|
||||
|
||||
if tool_calls:
|
||||
msg["tool_calls"] = tool_calls
|
||||
|
||||
messages.append(msg)
|
||||
return messages
|
||||
213
nanobot/agent/loop.py
Normal file
213
nanobot/agent/loop.py
Normal file
@@ -0,0 +1,213 @@
|
||||
"""Agent loop: the core processing engine."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from nanobot.bus.events import InboundMessage, OutboundMessage
|
||||
from nanobot.bus.queue import MessageBus
|
||||
from nanobot.providers.base import LLMProvider
|
||||
from nanobot.agent.context import ContextBuilder
|
||||
from nanobot.agent.tools.registry import ToolRegistry
|
||||
from nanobot.agent.tools.filesystem import ReadFileTool, WriteFileTool, EditFileTool, ListDirTool
|
||||
from nanobot.agent.tools.shell import ExecTool
|
||||
from nanobot.agent.tools.web import WebSearchTool, WebFetchTool
|
||||
from nanobot.agent.tools.message import MessageTool
|
||||
from nanobot.session.manager import SessionManager
|
||||
|
||||
|
||||
class AgentLoop:
|
||||
"""
|
||||
The agent loop is the core processing engine.
|
||||
|
||||
It:
|
||||
1. Receives messages from the bus
|
||||
2. Builds context with history, memory, skills
|
||||
3. Calls the LLM
|
||||
4. Executes tool calls
|
||||
5. Sends responses back
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
bus: MessageBus,
|
||||
provider: LLMProvider,
|
||||
workspace: Path,
|
||||
model: str | None = None,
|
||||
max_iterations: int = 20,
|
||||
brave_api_key: str | None = None
|
||||
):
|
||||
self.bus = bus
|
||||
self.provider = provider
|
||||
self.workspace = workspace
|
||||
self.model = model or provider.get_default_model()
|
||||
self.max_iterations = max_iterations
|
||||
self.brave_api_key = brave_api_key
|
||||
|
||||
self.context = ContextBuilder(workspace)
|
||||
self.sessions = SessionManager(workspace)
|
||||
self.tools = ToolRegistry()
|
||||
|
||||
self._running = False
|
||||
self._register_default_tools()
|
||||
|
||||
def _register_default_tools(self) -> None:
|
||||
"""Register the default set of tools."""
|
||||
# File tools
|
||||
self.tools.register(ReadFileTool())
|
||||
self.tools.register(WriteFileTool())
|
||||
self.tools.register(EditFileTool())
|
||||
self.tools.register(ListDirTool())
|
||||
|
||||
# Shell tool
|
||||
self.tools.register(ExecTool(working_dir=str(self.workspace)))
|
||||
|
||||
# Web tools
|
||||
self.tools.register(WebSearchTool(api_key=self.brave_api_key))
|
||||
self.tools.register(WebFetchTool())
|
||||
|
||||
# Message tool
|
||||
message_tool = MessageTool(send_callback=self.bus.publish_outbound)
|
||||
self.tools.register(message_tool)
|
||||
|
||||
async def run(self) -> None:
|
||||
"""Run the agent loop, processing messages from the bus."""
|
||||
self._running = True
|
||||
logger.info("Agent loop started")
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
# Wait for next message
|
||||
msg = await asyncio.wait_for(
|
||||
self.bus.consume_inbound(),
|
||||
timeout=1.0
|
||||
)
|
||||
|
||||
# Process it
|
||||
try:
|
||||
response = await self._process_message(msg)
|
||||
if response:
|
||||
await self.bus.publish_outbound(response)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing message: {e}")
|
||||
# Send error response
|
||||
await self.bus.publish_outbound(OutboundMessage(
|
||||
channel=msg.channel,
|
||||
chat_id=msg.chat_id,
|
||||
content=f"Sorry, I encountered an error: {str(e)}"
|
||||
))
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Stop the agent loop."""
|
||||
self._running = False
|
||||
logger.info("Agent loop stopping")
|
||||
|
||||
async def _process_message(self, msg: InboundMessage) -> OutboundMessage | None:
|
||||
"""
|
||||
Process a single inbound message.
|
||||
|
||||
Args:
|
||||
msg: The inbound message to process.
|
||||
|
||||
Returns:
|
||||
The response message, or None if no response needed.
|
||||
"""
|
||||
logger.info(f"Processing message from {msg.channel}:{msg.sender_id}")
|
||||
|
||||
# Get or create session
|
||||
session = self.sessions.get_or_create(msg.session_key)
|
||||
|
||||
# Update message tool context
|
||||
message_tool = self.tools.get("message")
|
||||
if isinstance(message_tool, MessageTool):
|
||||
message_tool.set_context(msg.channel, msg.chat_id)
|
||||
|
||||
# Build initial messages (use get_history for LLM-formatted messages)
|
||||
messages = self.context.build_messages(
|
||||
history=session.get_history(),
|
||||
current_message=msg.content
|
||||
)
|
||||
|
||||
# Agent loop
|
||||
iteration = 0
|
||||
final_content = None
|
||||
|
||||
while iteration < self.max_iterations:
|
||||
iteration += 1
|
||||
|
||||
# Call LLM
|
||||
response = await self.provider.chat(
|
||||
messages=messages,
|
||||
tools=self.tools.get_definitions(),
|
||||
model=self.model
|
||||
)
|
||||
|
||||
# Handle tool calls
|
||||
if response.has_tool_calls:
|
||||
# Add assistant message with tool calls
|
||||
tool_call_dicts = [
|
||||
{
|
||||
"id": tc.id,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tc.name,
|
||||
"arguments": json.dumps(tc.arguments) # Must be JSON string
|
||||
}
|
||||
}
|
||||
for tc in response.tool_calls
|
||||
]
|
||||
messages = self.context.add_assistant_message(
|
||||
messages, response.content, tool_call_dicts
|
||||
)
|
||||
|
||||
# Execute tools
|
||||
for tool_call in response.tool_calls:
|
||||
logger.debug(f"Executing tool: {tool_call.name}")
|
||||
result = await self.tools.execute(tool_call.name, tool_call.arguments)
|
||||
messages = self.context.add_tool_result(
|
||||
messages, tool_call.id, tool_call.name, result
|
||||
)
|
||||
else:
|
||||
# No tool calls, we're done
|
||||
final_content = response.content
|
||||
break
|
||||
|
||||
if final_content is None:
|
||||
final_content = "I've completed processing but have no response to give."
|
||||
|
||||
# Save to session
|
||||
session.add_message("user", msg.content)
|
||||
session.add_message("assistant", final_content)
|
||||
self.sessions.save(session)
|
||||
|
||||
return OutboundMessage(
|
||||
channel=msg.channel,
|
||||
chat_id=msg.chat_id,
|
||||
content=final_content
|
||||
)
|
||||
|
||||
async def process_direct(self, content: str, session_key: str = "cli:direct") -> str:
|
||||
"""
|
||||
Process a message directly (for CLI usage).
|
||||
|
||||
Args:
|
||||
content: The message content.
|
||||
session_key: Session identifier.
|
||||
|
||||
Returns:
|
||||
The agent's response.
|
||||
"""
|
||||
msg = InboundMessage(
|
||||
channel="cli",
|
||||
sender_id="user",
|
||||
chat_id="direct",
|
||||
content=content
|
||||
)
|
||||
|
||||
response = await self._process_message(msg)
|
||||
return response.content if response else ""
|
||||
110
nanobot/agent/memory.py
Normal file
110
nanobot/agent/memory.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""Memory system for persistent agent memory."""
|
||||
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
from nanobot.utils.helpers import ensure_dir, today_date
|
||||
|
||||
|
||||
class MemoryStore:
|
||||
"""
|
||||
Memory system for the agent.
|
||||
|
||||
Supports daily notes (memory/YYYY-MM-DD.md) and long-term memory (MEMORY.md).
|
||||
Compatible with clawbot memory format.
|
||||
"""
|
||||
|
||||
def __init__(self, workspace: Path):
|
||||
self.workspace = workspace
|
||||
self.memory_dir = ensure_dir(workspace / "memory")
|
||||
self.memory_file = self.memory_dir / "MEMORY.md"
|
||||
|
||||
def get_today_file(self) -> Path:
|
||||
"""Get path to today's memory file."""
|
||||
return self.memory_dir / f"{today_date()}.md"
|
||||
|
||||
def read_today(self) -> str:
|
||||
"""Read today's memory notes."""
|
||||
today_file = self.get_today_file()
|
||||
if today_file.exists():
|
||||
return today_file.read_text(encoding="utf-8")
|
||||
return ""
|
||||
|
||||
def append_today(self, content: str) -> None:
|
||||
"""Append content to today's memory notes."""
|
||||
today_file = self.get_today_file()
|
||||
|
||||
if today_file.exists():
|
||||
existing = today_file.read_text(encoding="utf-8")
|
||||
content = existing + "\n" + content
|
||||
else:
|
||||
# Add header for new day
|
||||
header = f"# {today_date()}\n\n"
|
||||
content = header + content
|
||||
|
||||
today_file.write_text(content, encoding="utf-8")
|
||||
|
||||
def read_long_term(self) -> str:
|
||||
"""Read long-term memory (MEMORY.md)."""
|
||||
if self.memory_file.exists():
|
||||
return self.memory_file.read_text(encoding="utf-8")
|
||||
return ""
|
||||
|
||||
def write_long_term(self, content: str) -> None:
|
||||
"""Write to long-term memory (MEMORY.md)."""
|
||||
self.memory_file.write_text(content, encoding="utf-8")
|
||||
|
||||
def get_recent_memories(self, days: int = 7) -> str:
|
||||
"""
|
||||
Get memories from the last N days.
|
||||
|
||||
Args:
|
||||
days: Number of days to look back.
|
||||
|
||||
Returns:
|
||||
Combined memory content.
|
||||
"""
|
||||
from datetime import timedelta
|
||||
|
||||
memories = []
|
||||
today = datetime.now().date()
|
||||
|
||||
for i in range(days):
|
||||
date = today - timedelta(days=i)
|
||||
date_str = date.strftime("%Y-%m-%d")
|
||||
file_path = self.memory_dir / f"{date_str}.md"
|
||||
|
||||
if file_path.exists():
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
memories.append(content)
|
||||
|
||||
return "\n\n---\n\n".join(memories)
|
||||
|
||||
def list_memory_files(self) -> list[Path]:
|
||||
"""List all memory files sorted by date (newest first)."""
|
||||
if not self.memory_dir.exists():
|
||||
return []
|
||||
|
||||
files = list(self.memory_dir.glob("????-??-??.md"))
|
||||
return sorted(files, reverse=True)
|
||||
|
||||
def get_memory_context(self) -> str:
|
||||
"""
|
||||
Get memory context for the agent.
|
||||
|
||||
Returns:
|
||||
Formatted memory context including long-term and recent memories.
|
||||
"""
|
||||
parts = []
|
||||
|
||||
# Long-term memory
|
||||
long_term = self.read_long_term()
|
||||
if long_term:
|
||||
parts.append("## Long-term Memory\n" + long_term)
|
||||
|
||||
# Today's notes
|
||||
today = self.read_today()
|
||||
if today:
|
||||
parts.append("## Today's Notes\n" + today)
|
||||
|
||||
return "\n\n".join(parts) if parts else ""
|
||||
228
nanobot/agent/skills.py
Normal file
228
nanobot/agent/skills.py
Normal file
@@ -0,0 +1,228 @@
|
||||
"""Skills loader for agent capabilities."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
# Default builtin skills directory (relative to this file)
|
||||
BUILTIN_SKILLS_DIR = Path(__file__).parent.parent / "skills"
|
||||
|
||||
|
||||
class SkillsLoader:
|
||||
"""
|
||||
Loader for agent skills.
|
||||
|
||||
Skills are markdown files (SKILL.md) that teach the agent how to use
|
||||
specific tools or perform certain tasks.
|
||||
"""
|
||||
|
||||
def __init__(self, workspace: Path, builtin_skills_dir: Path | None = None):
|
||||
self.workspace = workspace
|
||||
self.workspace_skills = workspace / "skills"
|
||||
self.builtin_skills = builtin_skills_dir or BUILTIN_SKILLS_DIR
|
||||
|
||||
def list_skills(self, filter_unavailable: bool = True) -> list[dict[str, str]]:
|
||||
"""
|
||||
List all available skills.
|
||||
|
||||
Args:
|
||||
filter_unavailable: If True, filter out skills with unmet requirements.
|
||||
|
||||
Returns:
|
||||
List of skill info dicts with 'name', 'path', 'source'.
|
||||
"""
|
||||
skills = []
|
||||
|
||||
# Workspace skills (highest priority)
|
||||
if self.workspace_skills.exists():
|
||||
for skill_dir in self.workspace_skills.iterdir():
|
||||
if skill_dir.is_dir():
|
||||
skill_file = skill_dir / "SKILL.md"
|
||||
if skill_file.exists():
|
||||
skills.append({"name": skill_dir.name, "path": str(skill_file), "source": "workspace"})
|
||||
|
||||
# Built-in skills
|
||||
if self.builtin_skills and self.builtin_skills.exists():
|
||||
for skill_dir in self.builtin_skills.iterdir():
|
||||
if skill_dir.is_dir():
|
||||
skill_file = skill_dir / "SKILL.md"
|
||||
if skill_file.exists() and not any(s["name"] == skill_dir.name for s in skills):
|
||||
skills.append({"name": skill_dir.name, "path": str(skill_file), "source": "builtin"})
|
||||
|
||||
# Filter by requirements
|
||||
if filter_unavailable:
|
||||
return [s for s in skills if self._check_requirements(self._get_ocmeta(s["name"]))]
|
||||
return skills
|
||||
|
||||
def load_skill(self, name: str) -> str | None:
|
||||
"""
|
||||
Load a skill by name.
|
||||
|
||||
Args:
|
||||
name: Skill name (directory name).
|
||||
|
||||
Returns:
|
||||
Skill content or None if not found.
|
||||
"""
|
||||
# Check workspace first
|
||||
workspace_skill = self.workspace_skills / name / "SKILL.md"
|
||||
if workspace_skill.exists():
|
||||
return workspace_skill.read_text(encoding="utf-8")
|
||||
|
||||
# Check built-in
|
||||
if self.builtin_skills:
|
||||
builtin_skill = self.builtin_skills / name / "SKILL.md"
|
||||
if builtin_skill.exists():
|
||||
return builtin_skill.read_text(encoding="utf-8")
|
||||
|
||||
return None
|
||||
|
||||
def load_skills_for_context(self, skill_names: list[str]) -> str:
|
||||
"""
|
||||
Load specific skills for inclusion in agent context.
|
||||
|
||||
Args:
|
||||
skill_names: List of skill names to load.
|
||||
|
||||
Returns:
|
||||
Formatted skills content.
|
||||
"""
|
||||
parts = []
|
||||
for name in skill_names:
|
||||
content = self.load_skill(name)
|
||||
if content:
|
||||
content = self._strip_frontmatter(content)
|
||||
parts.append(f"### Skill: {name}\n\n{content}")
|
||||
|
||||
return "\n\n---\n\n".join(parts) if parts else ""
|
||||
|
||||
def build_skills_summary(self) -> str:
|
||||
"""
|
||||
Build a summary of all skills (name, description, path, availability).
|
||||
|
||||
This is used for progressive loading - the agent can read the full
|
||||
skill content using read_file when needed.
|
||||
|
||||
Returns:
|
||||
XML-formatted skills summary.
|
||||
"""
|
||||
all_skills = self.list_skills(filter_unavailable=False)
|
||||
if not all_skills:
|
||||
return ""
|
||||
|
||||
def escape_xml(s: str) -> str:
|
||||
return s.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
|
||||
lines = ["<skills>"]
|
||||
for s in all_skills:
|
||||
name = escape_xml(s["name"])
|
||||
path = s["path"]
|
||||
desc = escape_xml(self._get_skill_description(s["name"]))
|
||||
ocmeta = self._get_ocmeta(s["name"])
|
||||
available = self._check_requirements(ocmeta)
|
||||
|
||||
lines.append(f" <skill available=\"{str(available).lower()}\">")
|
||||
lines.append(f" <name>{name}</name>")
|
||||
lines.append(f" <description>{desc}</description>")
|
||||
lines.append(f" <location>{path}</location>")
|
||||
|
||||
# Show missing requirements for unavailable skills
|
||||
if not available:
|
||||
missing = self._get_missing_requirements(ocmeta)
|
||||
if missing:
|
||||
lines.append(f" <requires>{escape_xml(missing)}</requires>")
|
||||
|
||||
lines.append(f" </skill>")
|
||||
lines.append("</skills>")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _get_missing_requirements(self, ocmeta: dict) -> str:
|
||||
"""Get a description of missing requirements."""
|
||||
missing = []
|
||||
requires = ocmeta.get("requires", {})
|
||||
for b in requires.get("bins", []):
|
||||
if not shutil.which(b):
|
||||
missing.append(f"CLI: {b}")
|
||||
for env in requires.get("env", []):
|
||||
if not os.environ.get(env):
|
||||
missing.append(f"ENV: {env}")
|
||||
return ", ".join(missing)
|
||||
|
||||
def _get_skill_description(self, name: str) -> str:
|
||||
"""Get the description of a skill from its frontmatter."""
|
||||
meta = self.get_skill_metadata(name)
|
||||
if meta and meta.get("description"):
|
||||
return meta["description"]
|
||||
return name # Fallback to skill name
|
||||
|
||||
def _strip_frontmatter(self, content: str) -> str:
|
||||
"""Remove YAML frontmatter from markdown content."""
|
||||
if content.startswith("---"):
|
||||
match = re.match(r"^---\n.*?\n---\n", content, re.DOTALL)
|
||||
if match:
|
||||
return content[match.end():].strip()
|
||||
return content
|
||||
|
||||
def _parse_openclaw_metadata(self, raw: str) -> dict:
|
||||
"""Parse openclaw metadata JSON from frontmatter."""
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
return data.get("openclaw", {}) if isinstance(data, dict) else {}
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return {}
|
||||
|
||||
def _check_requirements(self, ocmeta: dict) -> bool:
|
||||
"""Check if skill requirements are met (bins, env vars)."""
|
||||
requires = ocmeta.get("requires", {})
|
||||
for b in requires.get("bins", []):
|
||||
if not shutil.which(b):
|
||||
return False
|
||||
for env in requires.get("env", []):
|
||||
if not os.environ.get(env):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _get_ocmeta(self, name: str) -> dict:
|
||||
"""Get openclaw metadata for a skill (cached in frontmatter)."""
|
||||
meta = self.get_skill_metadata(name) or {}
|
||||
return self._parse_openclaw_metadata(meta.get("metadata", ""))
|
||||
|
||||
def get_always_skills(self) -> list[str]:
|
||||
"""Get skills marked as always=true that meet requirements."""
|
||||
result = []
|
||||
for s in self.list_skills(filter_unavailable=True):
|
||||
meta = self.get_skill_metadata(s["name"]) or {}
|
||||
ocmeta = self._parse_openclaw_metadata(meta.get("metadata", ""))
|
||||
if ocmeta.get("always") or meta.get("always"):
|
||||
result.append(s["name"])
|
||||
return result
|
||||
|
||||
def get_skill_metadata(self, name: str) -> dict | None:
|
||||
"""
|
||||
Get metadata from a skill's frontmatter.
|
||||
|
||||
Args:
|
||||
name: Skill name.
|
||||
|
||||
Returns:
|
||||
Metadata dict or None.
|
||||
"""
|
||||
content = self.load_skill(name)
|
||||
if not content:
|
||||
return None
|
||||
|
||||
if content.startswith("---"):
|
||||
match = re.match(r"^---\n(.*?)\n---", content, re.DOTALL)
|
||||
if match:
|
||||
# Simple YAML parsing
|
||||
metadata = {}
|
||||
for line in match.group(1).split("\n"):
|
||||
if ":" in line:
|
||||
key, value = line.split(":", 1)
|
||||
metadata[key.strip()] = value.strip().strip('"\'')
|
||||
return metadata
|
||||
|
||||
return None
|
||||
6
nanobot/agent/tools/__init__.py
Normal file
6
nanobot/agent/tools/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""Agent tools module."""
|
||||
|
||||
from nanobot.agent.tools.base import Tool
|
||||
from nanobot.agent.tools.registry import ToolRegistry
|
||||
|
||||
__all__ = ["Tool", "ToolRegistry"]
|
||||
55
nanobot/agent/tools/base.py
Normal file
55
nanobot/agent/tools/base.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""Base class for agent tools."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
|
||||
class Tool(ABC):
|
||||
"""
|
||||
Abstract base class for agent tools.
|
||||
|
||||
Tools are capabilities that the agent can use to interact with
|
||||
the environment, such as reading files, executing commands, etc.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Tool name used in function calls."""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def description(self) -> str:
|
||||
"""Description of what the tool does."""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
"""JSON Schema for tool parameters."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def execute(self, **kwargs: Any) -> str:
|
||||
"""
|
||||
Execute the tool with given parameters.
|
||||
|
||||
Args:
|
||||
**kwargs: Tool-specific parameters.
|
||||
|
||||
Returns:
|
||||
String result of the tool execution.
|
||||
"""
|
||||
pass
|
||||
|
||||
def to_schema(self) -> dict[str, Any]:
|
||||
"""Convert tool to OpenAI function schema format."""
|
||||
return {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"parameters": self.parameters,
|
||||
}
|
||||
}
|
||||
191
nanobot/agent/tools/filesystem.py
Normal file
191
nanobot/agent/tools/filesystem.py
Normal file
@@ -0,0 +1,191 @@
|
||||
"""File system tools: read, write, edit."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from nanobot.agent.tools.base import Tool
|
||||
|
||||
|
||||
class ReadFileTool(Tool):
|
||||
"""Tool to read file contents."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "read_file"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return "Read the contents of a file at the given path."
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "The file path to read"
|
||||
}
|
||||
},
|
||||
"required": ["path"]
|
||||
}
|
||||
|
||||
async def execute(self, path: str, **kwargs: Any) -> str:
|
||||
try:
|
||||
file_path = Path(path).expanduser()
|
||||
if not file_path.exists():
|
||||
return f"Error: File not found: {path}"
|
||||
if not file_path.is_file():
|
||||
return f"Error: Not a file: {path}"
|
||||
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
return content
|
||||
except PermissionError:
|
||||
return f"Error: Permission denied: {path}"
|
||||
except Exception as e:
|
||||
return f"Error reading file: {str(e)}"
|
||||
|
||||
|
||||
class WriteFileTool(Tool):
|
||||
"""Tool to write content to a file."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "write_file"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return "Write content to a file at the given path. Creates parent directories if needed."
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "The file path to write to"
|
||||
},
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "The content to write"
|
||||
}
|
||||
},
|
||||
"required": ["path", "content"]
|
||||
}
|
||||
|
||||
async def execute(self, path: str, content: str, **kwargs: Any) -> str:
|
||||
try:
|
||||
file_path = Path(path).expanduser()
|
||||
file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
file_path.write_text(content, encoding="utf-8")
|
||||
return f"Successfully wrote {len(content)} bytes to {path}"
|
||||
except PermissionError:
|
||||
return f"Error: Permission denied: {path}"
|
||||
except Exception as e:
|
||||
return f"Error writing file: {str(e)}"
|
||||
|
||||
|
||||
class EditFileTool(Tool):
|
||||
"""Tool to edit a file by replacing text."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "edit_file"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return "Edit a file by replacing old_text with new_text. The old_text must exist exactly in the file."
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "The file path to edit"
|
||||
},
|
||||
"old_text": {
|
||||
"type": "string",
|
||||
"description": "The exact text to find and replace"
|
||||
},
|
||||
"new_text": {
|
||||
"type": "string",
|
||||
"description": "The text to replace with"
|
||||
}
|
||||
},
|
||||
"required": ["path", "old_text", "new_text"]
|
||||
}
|
||||
|
||||
async def execute(self, path: str, old_text: str, new_text: str, **kwargs: Any) -> str:
|
||||
try:
|
||||
file_path = Path(path).expanduser()
|
||||
if not file_path.exists():
|
||||
return f"Error: File not found: {path}"
|
||||
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
|
||||
if old_text not in content:
|
||||
return f"Error: old_text not found in file. Make sure it matches exactly."
|
||||
|
||||
# Count occurrences
|
||||
count = content.count(old_text)
|
||||
if count > 1:
|
||||
return f"Warning: old_text appears {count} times. Please provide more context to make it unique."
|
||||
|
||||
new_content = content.replace(old_text, new_text, 1)
|
||||
file_path.write_text(new_content, encoding="utf-8")
|
||||
|
||||
return f"Successfully edited {path}"
|
||||
except PermissionError:
|
||||
return f"Error: Permission denied: {path}"
|
||||
except Exception as e:
|
||||
return f"Error editing file: {str(e)}"
|
||||
|
||||
|
||||
class ListDirTool(Tool):
|
||||
"""Tool to list directory contents."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "list_dir"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return "List the contents of a directory."
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "The directory path to list"
|
||||
}
|
||||
},
|
||||
"required": ["path"]
|
||||
}
|
||||
|
||||
async def execute(self, path: str, **kwargs: Any) -> str:
|
||||
try:
|
||||
dir_path = Path(path).expanduser()
|
||||
if not dir_path.exists():
|
||||
return f"Error: Directory not found: {path}"
|
||||
if not dir_path.is_dir():
|
||||
return f"Error: Not a directory: {path}"
|
||||
|
||||
items = []
|
||||
for item in sorted(dir_path.iterdir()):
|
||||
prefix = "📁 " if item.is_dir() else "📄 "
|
||||
items.append(f"{prefix}{item.name}")
|
||||
|
||||
if not items:
|
||||
return f"Directory {path} is empty"
|
||||
|
||||
return "\n".join(items)
|
||||
except PermissionError:
|
||||
return f"Error: Permission denied: {path}"
|
||||
except Exception as e:
|
||||
return f"Error listing directory: {str(e)}"
|
||||
86
nanobot/agent/tools/message.py
Normal file
86
nanobot/agent/tools/message.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""Message tool for sending messages to users."""
|
||||
|
||||
from typing import Any, Callable, Awaitable
|
||||
|
||||
from nanobot.agent.tools.base import Tool
|
||||
from nanobot.bus.events import OutboundMessage
|
||||
|
||||
|
||||
class MessageTool(Tool):
|
||||
"""Tool to send messages to users on chat channels."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
send_callback: Callable[[OutboundMessage], Awaitable[None]] | None = None,
|
||||
default_channel: str = "",
|
||||
default_chat_id: str = ""
|
||||
):
|
||||
self._send_callback = send_callback
|
||||
self._default_channel = default_channel
|
||||
self._default_chat_id = default_chat_id
|
||||
|
||||
def set_context(self, channel: str, chat_id: str) -> None:
|
||||
"""Set the current message context."""
|
||||
self._default_channel = channel
|
||||
self._default_chat_id = chat_id
|
||||
|
||||
def set_send_callback(self, callback: Callable[[OutboundMessage], Awaitable[None]]) -> None:
|
||||
"""Set the callback for sending messages."""
|
||||
self._send_callback = callback
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "message"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return "Send a message to the user. Use this when you want to communicate something."
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "The message content to send"
|
||||
},
|
||||
"channel": {
|
||||
"type": "string",
|
||||
"description": "Optional: target channel (telegram, discord, etc.)"
|
||||
},
|
||||
"chat_id": {
|
||||
"type": "string",
|
||||
"description": "Optional: target chat/user ID"
|
||||
}
|
||||
},
|
||||
"required": ["content"]
|
||||
}
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
content: str,
|
||||
channel: str | None = None,
|
||||
chat_id: str | None = None,
|
||||
**kwargs: Any
|
||||
) -> str:
|
||||
channel = channel or self._default_channel
|
||||
chat_id = chat_id or self._default_chat_id
|
||||
|
||||
if not channel or not chat_id:
|
||||
return "Error: No target channel/chat specified"
|
||||
|
||||
if not self._send_callback:
|
||||
return "Error: Message sending not configured"
|
||||
|
||||
msg = OutboundMessage(
|
||||
channel=channel,
|
||||
chat_id=chat_id,
|
||||
content=content
|
||||
)
|
||||
|
||||
try:
|
||||
await self._send_callback(msg)
|
||||
return f"Message sent to {channel}:{chat_id}"
|
||||
except Exception as e:
|
||||
return f"Error sending message: {str(e)}"
|
||||
70
nanobot/agent/tools/registry.py
Normal file
70
nanobot/agent/tools/registry.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""Tool registry for dynamic tool management."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from nanobot.agent.tools.base import Tool
|
||||
|
||||
|
||||
class ToolRegistry:
|
||||
"""
|
||||
Registry for agent tools.
|
||||
|
||||
Allows dynamic registration and execution of tools.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._tools: dict[str, Tool] = {}
|
||||
|
||||
def register(self, tool: Tool) -> None:
|
||||
"""Register a tool."""
|
||||
self._tools[tool.name] = tool
|
||||
|
||||
def unregister(self, name: str) -> None:
|
||||
"""Unregister a tool by name."""
|
||||
self._tools.pop(name, None)
|
||||
|
||||
def get(self, name: str) -> Tool | None:
|
||||
"""Get a tool by name."""
|
||||
return self._tools.get(name)
|
||||
|
||||
def has(self, name: str) -> bool:
|
||||
"""Check if a tool is registered."""
|
||||
return name in self._tools
|
||||
|
||||
def get_definitions(self) -> list[dict[str, Any]]:
|
||||
"""Get all tool definitions in OpenAI format."""
|
||||
return [tool.to_schema() for tool in self._tools.values()]
|
||||
|
||||
async def execute(self, name: str, params: dict[str, Any]) -> str:
|
||||
"""
|
||||
Execute a tool by name with given parameters.
|
||||
|
||||
Args:
|
||||
name: Tool name.
|
||||
params: Tool parameters.
|
||||
|
||||
Returns:
|
||||
Tool execution result as string.
|
||||
|
||||
Raises:
|
||||
KeyError: If tool not found.
|
||||
"""
|
||||
tool = self._tools.get(name)
|
||||
if not tool:
|
||||
return f"Error: Tool '{name}' not found"
|
||||
|
||||
try:
|
||||
return await tool.execute(**params)
|
||||
except Exception as e:
|
||||
return f"Error executing {name}: {str(e)}"
|
||||
|
||||
@property
|
||||
def tool_names(self) -> list[str]:
|
||||
"""Get list of registered tool names."""
|
||||
return list(self._tools.keys())
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._tools)
|
||||
|
||||
def __contains__(self, name: str) -> bool:
|
||||
return name in self._tools
|
||||
85
nanobot/agent/tools/shell.py
Normal file
85
nanobot/agent/tools/shell.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""Shell execution tool."""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from nanobot.agent.tools.base import Tool
|
||||
|
||||
|
||||
class ExecTool(Tool):
|
||||
"""Tool to execute shell commands."""
|
||||
|
||||
def __init__(self, timeout: int = 60, working_dir: str | None = None):
|
||||
self.timeout = timeout
|
||||
self.working_dir = working_dir
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "exec"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return "Execute a shell command and return its output. Use with caution."
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"command": {
|
||||
"type": "string",
|
||||
"description": "The shell command to execute"
|
||||
},
|
||||
"working_dir": {
|
||||
"type": "string",
|
||||
"description": "Optional working directory for the command"
|
||||
}
|
||||
},
|
||||
"required": ["command"]
|
||||
}
|
||||
|
||||
async def execute(self, command: str, working_dir: str | None = None, **kwargs: Any) -> str:
|
||||
cwd = working_dir or self.working_dir or os.getcwd()
|
||||
|
||||
try:
|
||||
process = await asyncio.create_subprocess_shell(
|
||||
command,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
cwd=cwd,
|
||||
)
|
||||
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
process.communicate(),
|
||||
timeout=self.timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
process.kill()
|
||||
return f"Error: Command timed out after {self.timeout} seconds"
|
||||
|
||||
output_parts = []
|
||||
|
||||
if stdout:
|
||||
output_parts.append(stdout.decode("utf-8", errors="replace"))
|
||||
|
||||
if stderr:
|
||||
stderr_text = stderr.decode("utf-8", errors="replace")
|
||||
if stderr_text.strip():
|
||||
output_parts.append(f"STDERR:\n{stderr_text}")
|
||||
|
||||
if process.returncode != 0:
|
||||
output_parts.append(f"\nExit code: {process.returncode}")
|
||||
|
||||
result = "\n".join(output_parts) if output_parts else "(no output)"
|
||||
|
||||
# Truncate very long output
|
||||
max_len = 10000
|
||||
if len(result) > max_len:
|
||||
result = result[:max_len] + f"\n... (truncated, {len(result) - max_len} more chars)"
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
return f"Error executing command: {str(e)}"
|
||||
139
nanobot/agent/tools/web.py
Normal file
139
nanobot/agent/tools/web.py
Normal file
@@ -0,0 +1,139 @@
|
||||
"""Web tools: web_search and web_fetch."""
|
||||
|
||||
import html
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from nanobot.agent.tools.base import Tool
|
||||
|
||||
# Shared constants
|
||||
USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36"
|
||||
|
||||
|
||||
def _strip_tags(text: str) -> str:
|
||||
"""Remove HTML tags and decode entities."""
|
||||
text = re.sub(r'<script[\s\S]*?</script>', '', text, flags=re.I)
|
||||
text = re.sub(r'<style[\s\S]*?</style>', '', text, flags=re.I)
|
||||
text = re.sub(r'<[^>]+>', '', text)
|
||||
return html.unescape(text).strip()
|
||||
|
||||
|
||||
def _normalize(text: str) -> str:
|
||||
"""Normalize whitespace."""
|
||||
text = re.sub(r'[ \t]+', ' ', text)
|
||||
return re.sub(r'\n{3,}', '\n\n', text).strip()
|
||||
|
||||
|
||||
class WebSearchTool(Tool):
|
||||
"""Search the web using Brave Search API."""
|
||||
|
||||
name = "web_search"
|
||||
description = "Search the web. Returns titles, URLs, and snippets."
|
||||
parameters = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string", "description": "Search query"},
|
||||
"count": {"type": "integer", "description": "Results (1-10)", "minimum": 1, "maximum": 10}
|
||||
},
|
||||
"required": ["query"]
|
||||
}
|
||||
|
||||
def __init__(self, api_key: str | None = None, max_results: int = 5):
|
||||
self.api_key = api_key or os.environ.get("BRAVE_API_KEY", "")
|
||||
self.max_results = max_results
|
||||
|
||||
async def execute(self, query: str, count: int | None = None, **kwargs: Any) -> str:
|
||||
if not self.api_key:
|
||||
return "Error: BRAVE_API_KEY not configured"
|
||||
|
||||
try:
|
||||
n = min(max(count or self.max_results, 1), 10)
|
||||
async with httpx.AsyncClient() as client:
|
||||
r = await client.get(
|
||||
"https://api.search.brave.com/res/v1/web/search",
|
||||
params={"q": query, "count": n},
|
||||
headers={"Accept": "application/json", "X-Subscription-Token": self.api_key},
|
||||
timeout=10.0
|
||||
)
|
||||
r.raise_for_status()
|
||||
|
||||
results = r.json().get("web", {}).get("results", [])
|
||||
if not results:
|
||||
return f"No results for: {query}"
|
||||
|
||||
lines = [f"Results for: {query}\n"]
|
||||
for i, item in enumerate(results[:n], 1):
|
||||
lines.append(f"{i}. {item.get('title', '')}\n {item.get('url', '')}")
|
||||
if desc := item.get("description"):
|
||||
lines.append(f" {desc}")
|
||||
return "\n".join(lines)
|
||||
except Exception as e:
|
||||
return f"Error: {e}"
|
||||
|
||||
|
||||
class WebFetchTool(Tool):
|
||||
"""Fetch and extract content from a URL using Readability."""
|
||||
|
||||
name = "web_fetch"
|
||||
description = "Fetch URL and extract readable content (HTML → markdown/text)."
|
||||
parameters = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {"type": "string", "description": "URL to fetch"},
|
||||
"extractMode": {"type": "string", "enum": ["markdown", "text"], "default": "markdown"},
|
||||
"maxChars": {"type": "integer", "minimum": 100}
|
||||
},
|
||||
"required": ["url"]
|
||||
}
|
||||
|
||||
def __init__(self, max_chars: int = 50000):
|
||||
self.max_chars = max_chars
|
||||
|
||||
async def execute(self, url: str, extractMode: str = "markdown", maxChars: int | None = None, **kwargs: Any) -> str:
|
||||
from readability import Document
|
||||
|
||||
max_chars = maxChars or self.max_chars
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
r = await client.get(url, headers={"User-Agent": USER_AGENT}, follow_redirects=True, timeout=30.0)
|
||||
r.raise_for_status()
|
||||
|
||||
ctype = r.headers.get("content-type", "")
|
||||
|
||||
# JSON
|
||||
if "application/json" in ctype:
|
||||
text, extractor = json.dumps(r.json(), indent=2), "json"
|
||||
# HTML
|
||||
elif "text/html" in ctype or r.text[:256].lower().startswith(("<!doctype", "<html")):
|
||||
doc = Document(r.text)
|
||||
content = self._to_markdown(doc.summary()) if extractMode == "markdown" else _strip_tags(doc.summary())
|
||||
text = f"# {doc.title()}\n\n{content}" if doc.title() else content
|
||||
extractor = "readability"
|
||||
else:
|
||||
text, extractor = r.text, "raw"
|
||||
|
||||
truncated = len(text) > max_chars
|
||||
if truncated:
|
||||
text = text[:max_chars]
|
||||
|
||||
return json.dumps({"url": url, "finalUrl": str(r.url), "status": r.status_code,
|
||||
"extractor": extractor, "truncated": truncated, "length": len(text), "text": text})
|
||||
except Exception as e:
|
||||
return json.dumps({"error": str(e), "url": url})
|
||||
|
||||
def _to_markdown(self, html: str) -> str:
|
||||
"""Convert HTML to markdown."""
|
||||
# Convert links, headings, lists before stripping tags
|
||||
text = re.sub(r'<a\s+[^>]*href=["\']([^"\']+)["\'][^>]*>([\s\S]*?)</a>',
|
||||
lambda m: f'[{_strip_tags(m[2])}]({m[1]})', html, flags=re.I)
|
||||
text = re.sub(r'<h([1-6])[^>]*>([\s\S]*?)</h\1>',
|
||||
lambda m: f'\n{"#" * int(m[1])} {_strip_tags(m[2])}\n', text, flags=re.I)
|
||||
text = re.sub(r'<li[^>]*>([\s\S]*?)</li>', lambda m: f'\n- {_strip_tags(m[1])}', text, flags=re.I)
|
||||
text = re.sub(r'</(p|div|section|article)>', '\n\n', text, flags=re.I)
|
||||
text = re.sub(r'<(br|hr)\s*/?>', '\n', text, flags=re.I)
|
||||
return _normalize(_strip_tags(text))
|
||||
Reference in New Issue
Block a user