From 71d90de31ba735cbb6f06c3a60274f3496b5fe6a Mon Sep 17 00:00:00 2001 From: Chris Alexander <2815297+chris-alexander@users.noreply.github.com> Date: Tue, 10 Feb 2026 13:06:56 +0000 Subject: [PATCH 01/11] feat(web): configurable web search providers with fallback Add multi-provider web search support: Brave (default), Tavily, DuckDuckGo, and SearXNG. Falls back to DuckDuckGo when provider credentials are missing. Providers are dispatched via a map with register_provider() for plugin extensibility. - WebSearchConfig with env-var resolution and from_legacy() bridge - Config migration for legacy flat keys (tavilyApiKey, searxngBaseUrl) - SearXNG URL validation, explicit error for unknown providers - ddgs package (replaces deprecated duckduckgo-search) - 16 tests covering all providers, fallback, env resolution, edge cases - docs/web-search.md with full config reference Co-Authored-By: Claude Opus 4.6 --- README.md | 17 +- docs/web-search.md | 95 ++++++++++ nanobot/agent/loop.py | 181 +++++++++++++------ nanobot/agent/subagent.py | 8 +- nanobot/agent/tools/web.py | 166 +++++++++++++---- nanobot/cli/commands.py | 4 +- nanobot/config/schema.py | 5 +- pyproject.toml | 1 + tests/test_tool_validation.py | 15 ++ tests/test_web_search_tool.py | 327 ++++++++++++++++++++++++++++++++++ 10 files changed, 722 insertions(+), 97 deletions(-) create mode 100644 docs/web-search.md create mode 100644 tests/test_web_search_tool.py diff --git a/README.md b/README.md index f169bd7..01d9511 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,7 @@ nanobot channels login > [!TIP] > Set your API key in `~/.nanobot/config.json`. -> Get API keys: [OpenRouter](https://openrouter.ai/keys) (Global) · [Brave Search](https://brave.com/search/api/) (optional, for web search) +> Get API keys: [OpenRouter](https://openrouter.ai/keys) (Global) · [DashScope](https://dashscope.console.aliyun.com) (Qwen) · [Brave Search](https://brave.com/search/api/) or [Tavily](https://tavily.com/) (optional, for web search). SearXNG is supported via a base URL. **1. Initialize** @@ -185,6 +185,21 @@ Add or merge these **two parts** into your config (other options have defaults). } ``` +**Optional: Web search provider** — set `tools.web.search.provider` to `brave` (default), `duckduckgo`, `tavily`, or `searxng`. See [docs/web-search.md](docs/web-search.md) for full configuration. + +```json +{ + "tools": { + "web": { + "search": { + "provider": "tavily", + "apiKey": "tvly-..." + } + } + } +} +``` + **3. Chat** ```bash diff --git a/docs/web-search.md b/docs/web-search.md new file mode 100644 index 0000000..6e3802b --- /dev/null +++ b/docs/web-search.md @@ -0,0 +1,95 @@ +# Web Search Providers + +NanoBot supports multiple web search providers. Configure in `~/.nanobot/config.json` under `tools.web.search`. + +| Provider | Key | Env var | +|----------|-----|---------| +| `brave` (default) | `apiKey` | `BRAVE_API_KEY` | +| `tavily` | `apiKey` | `TAVILY_API_KEY` | +| `searxng` | `baseUrl` | `SEARXNG_BASE_URL` | +| `duckduckgo` | — | — | + +Each provider uses the same `apiKey` field — set the provider and key together. If no provider is specified but `apiKey` is given, Brave is assumed. + +When credentials are missing and `fallbackToDuckduckgo` is `true` (the default), searches fall back to DuckDuckGo automatically. + +## Examples + +**Brave** (default — just set the key): + +```json +{ + "tools": { + "web": { + "search": { + "apiKey": "BSA..." + } + } + } +} +``` + +**Tavily:** + +```json +{ + "tools": { + "web": { + "search": { + "provider": "tavily", + "apiKey": "tvly-..." + } + } + } +} +``` + +**SearXNG** (self-hosted, no API key needed): + +```json +{ + "tools": { + "web": { + "search": { + "provider": "searxng", + "baseUrl": "https://searx.example" + } + } + } +} +``` + +**DuckDuckGo** (no credentials required): + +```json +{ + "tools": { + "web": { + "search": { + "provider": "duckduckgo" + } + } + } +} +``` + +## Options + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `provider` | string | `"brave"` | Search backend | +| `apiKey` | string | `""` | API key for the selected provider | +| `baseUrl` | string | `""` | Base URL for SearXNG (appends `/search`) | +| `maxResults` | integer | `5` | Default results per search | +| `fallbackToDuckduckgo` | boolean | `true` | Fall back to DuckDuckGo when credentials are missing | + +## Custom providers + +Plugins can register additional providers at runtime via the dispatch dict: + +```python +async def my_search(query: str, n: int) -> str: + ... + +tool._provider_dispatch["my-engine"] = my_search +``` diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index ca9a06e..937c74f 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -28,7 +28,7 @@ from nanobot.providers.base import LLMProvider from nanobot.session.manager import Session, SessionManager if TYPE_CHECKING: - from nanobot.config.schema import ChannelsConfig, ExecToolConfig + from nanobot.config.schema import ChannelsConfig, ExecToolConfig, WebSearchConfig from nanobot.cron.service import CronService @@ -57,7 +57,7 @@ class AgentLoop: max_tokens: int = 4096, memory_window: int = 100, reasoning_effort: str | None = None, - brave_api_key: str | None = None, + web_search_config: "WebSearchConfig | None" = None, web_proxy: str | None = None, exec_config: ExecToolConfig | None = None, cron_service: CronService | None = None, @@ -66,7 +66,9 @@ class AgentLoop: mcp_servers: dict | None = None, channels_config: ChannelsConfig | None = None, ): - from nanobot.config.schema import ExecToolConfig + from nanobot.config.schema import ExecToolConfig, WebSearchConfig + from nanobot.cron.service import CronService + self.bus = bus self.channels_config = channels_config self.provider = provider @@ -77,8 +79,8 @@ class AgentLoop: self.max_tokens = max_tokens self.memory_window = memory_window self.reasoning_effort = reasoning_effort - self.brave_api_key = brave_api_key self.web_proxy = web_proxy + self.web_search_config = web_search_config or WebSearchConfig() self.exec_config = exec_config or ExecToolConfig() self.cron_service = cron_service self.restrict_to_workspace = restrict_to_workspace @@ -94,7 +96,7 @@ class AgentLoop: temperature=self.temperature, max_tokens=self.max_tokens, reasoning_effort=reasoning_effort, - brave_api_key=brave_api_key, + web_search_config=self.web_search_config, web_proxy=web_proxy, exec_config=self.exec_config, restrict_to_workspace=restrict_to_workspace, @@ -107,7 +109,9 @@ class AgentLoop: self._mcp_connecting = False self._consolidating: set[str] = set() # Session keys with consolidation in progress self._consolidation_tasks: set[asyncio.Task] = set() # Strong refs to in-flight tasks - self._consolidation_locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary() + self._consolidation_locks: weakref.WeakValueDictionary[str, asyncio.Lock] = ( + weakref.WeakValueDictionary() + ) self._active_tasks: dict[str, list[asyncio.Task]] = {} # session_key -> tasks self._processing_lock = asyncio.Lock() self._register_default_tools() @@ -117,13 +121,15 @@ class AgentLoop: allowed_dir = self.workspace if self.restrict_to_workspace else None for cls in (ReadFileTool, WriteFileTool, EditFileTool, ListDirTool): self.tools.register(cls(workspace=self.workspace, allowed_dir=allowed_dir)) - self.tools.register(ExecTool( - working_dir=str(self.workspace), - timeout=self.exec_config.timeout, - restrict_to_workspace=self.restrict_to_workspace, - path_append=self.exec_config.path_append, - )) - self.tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy)) + self.tools.register( + ExecTool( + working_dir=str(self.workspace), + timeout=self.exec_config.timeout, + restrict_to_workspace=self.restrict_to_workspace, + path_append=self.exec_config.path_append, + ) + ) + self.tools.register(WebSearchTool(config=self.web_search_config, proxy=self.web_proxy)) self.tools.register(WebFetchTool(proxy=self.web_proxy)) self.tools.register(MessageTool(send_callback=self.bus.publish_outbound)) self.tools.register(SpawnTool(manager=self.subagents)) @@ -136,6 +142,7 @@ class AgentLoop: return self._mcp_connecting = True from nanobot.agent.tools.mcp import connect_mcp_servers + try: self._mcp_stack = AsyncExitStack() await self._mcp_stack.__aenter__() @@ -169,12 +176,14 @@ class AgentLoop: @staticmethod def _tool_hint(tool_calls: list) -> str: """Format tool calls as concise hint, e.g. 'web_search("query")'.""" + def _fmt(tc): args = (tc.arguments[0] if isinstance(tc.arguments, list) else tc.arguments) or {} val = next(iter(args.values()), None) if isinstance(args, dict) else None if not isinstance(val, str): return tc.name return f'{tc.name}("{val[:40]}…")' if len(val) > 40 else f'{tc.name}("{val}")' + return ", ".join(_fmt(tc) for tc in tool_calls) async def _run_agent_loop( @@ -213,13 +222,15 @@ class AgentLoop: "type": "function", "function": { "name": tc.name, - "arguments": json.dumps(tc.arguments, ensure_ascii=False) - } + "arguments": json.dumps(tc.arguments, ensure_ascii=False), + }, } for tc in response.tool_calls ] messages = self.context.add_assistant_message( - messages, response.content, tool_call_dicts, + messages, + response.content, + tool_call_dicts, reasoning_content=response.reasoning_content, thinking_blocks=response.thinking_blocks, ) @@ -241,7 +252,9 @@ class AgentLoop: final_content = clean or "Sorry, I encountered an error calling the AI model." break messages = self.context.add_assistant_message( - messages, clean, reasoning_content=response.reasoning_content, + messages, + clean, + reasoning_content=response.reasoning_content, thinking_blocks=response.thinking_blocks, ) final_content = clean @@ -273,7 +286,12 @@ class AgentLoop: else: task = asyncio.create_task(self._dispatch(msg)) self._active_tasks.setdefault(msg.session_key, []).append(task) - task.add_done_callback(lambda t, k=msg.session_key: self._active_tasks.get(k, []) and self._active_tasks[k].remove(t) if t in self._active_tasks.get(k, []) else None) + task.add_done_callback( + lambda t, k=msg.session_key: self._active_tasks.get(k, []) + and self._active_tasks[k].remove(t) + if t in self._active_tasks.get(k, []) + else None + ) async def _handle_stop(self, msg: InboundMessage) -> None: """Cancel all active tasks and subagents for the session.""" @@ -287,9 +305,13 @@ class AgentLoop: sub_cancelled = await self.subagents.cancel_by_session(msg.session_key) total = cancelled + sub_cancelled content = f"⏹ Stopped {total} task(s)." if total else "No active task to stop." - await self.bus.publish_outbound(OutboundMessage( - channel=msg.channel, chat_id=msg.chat_id, content=content, - )) + await self.bus.publish_outbound( + OutboundMessage( + channel=msg.channel, + chat_id=msg.chat_id, + content=content, + ) + ) async def _dispatch(self, msg: InboundMessage) -> None: """Process a message under the global lock.""" @@ -299,19 +321,26 @@ class AgentLoop: if response is not None: await self.bus.publish_outbound(response) elif msg.channel == "cli": - await self.bus.publish_outbound(OutboundMessage( - channel=msg.channel, chat_id=msg.chat_id, - content="", metadata=msg.metadata or {}, - )) + await self.bus.publish_outbound( + OutboundMessage( + channel=msg.channel, + chat_id=msg.chat_id, + content="", + metadata=msg.metadata or {}, + ) + ) except asyncio.CancelledError: logger.info("Task cancelled for session {}", msg.session_key) raise except Exception: logger.exception("Error processing message for session {}", msg.session_key) - await self.bus.publish_outbound(OutboundMessage( - channel=msg.channel, chat_id=msg.chat_id, - content="Sorry, I encountered an error.", - )) + await self.bus.publish_outbound( + OutboundMessage( + channel=msg.channel, + chat_id=msg.chat_id, + content="Sorry, I encountered an error.", + ) + ) async def close_mcp(self) -> None: """Close MCP connections.""" @@ -336,8 +365,9 @@ class AgentLoop: """Process a single inbound message and return the response.""" # System messages: parse origin from chat_id ("channel:chat_id") if msg.channel == "system": - channel, chat_id = (msg.chat_id.split(":", 1) if ":" in msg.chat_id - else ("cli", msg.chat_id)) + channel, chat_id = ( + msg.chat_id.split(":", 1) if ":" in msg.chat_id else ("cli", msg.chat_id) + ) logger.info("Processing system message from {}", msg.sender_id) key = f"{channel}:{chat_id}" session = self.sessions.get_or_create(key) @@ -345,13 +375,18 @@ class AgentLoop: history = session.get_history(max_messages=self.memory_window) messages = self.context.build_messages( history=history, - current_message=msg.content, channel=channel, chat_id=chat_id, + current_message=msg.content, + channel=channel, + chat_id=chat_id, ) final_content, _, all_msgs = await self._run_agent_loop(messages) self._save_turn(session, all_msgs, 1 + len(history)) self.sessions.save(session) - return OutboundMessage(channel=channel, chat_id=chat_id, - content=final_content or "Background task completed.") + return OutboundMessage( + channel=channel, + chat_id=chat_id, + content=final_content or "Background task completed.", + ) preview = msg.content[:80] + "..." if len(msg.content) > 80 else msg.content logger.info("Processing message from {}:{}: {}", msg.channel, msg.sender_id, preview) @@ -366,19 +401,21 @@ class AgentLoop: self._consolidating.add(session.key) try: async with lock: - snapshot = session.messages[session.last_consolidated:] + snapshot = session.messages[session.last_consolidated :] if snapshot: temp = Session(key=session.key) temp.messages = list(snapshot) if not await self._consolidate_memory(temp, archive_all=True): return OutboundMessage( - channel=msg.channel, chat_id=msg.chat_id, + channel=msg.channel, + chat_id=msg.chat_id, content="Memory archival failed, session not cleared. Please try again.", ) except Exception: logger.exception("/new archival failed for {}", session.key) return OutboundMessage( - channel=msg.channel, chat_id=msg.chat_id, + channel=msg.channel, + chat_id=msg.chat_id, content="Memory archival failed, session not cleared. Please try again.", ) finally: @@ -387,14 +424,18 @@ class AgentLoop: session.clear() self.sessions.save(session) self.sessions.invalidate(session.key) - return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id, - content="New session started.") + return OutboundMessage( + channel=msg.channel, chat_id=msg.chat_id, content="New session started." + ) if cmd == "/help": - return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id, - content="🐈 nanobot commands:\n/new — Start a new conversation\n/stop — Stop the current task\n/help — Show available commands") + return OutboundMessage( + channel=msg.channel, + chat_id=msg.chat_id, + content="🐈 nanobot commands:\n/new — Start a new conversation\n/stop — Stop the current task\n/help — Show available commands", + ) unconsolidated = len(session.messages) - session.last_consolidated - if (unconsolidated >= self.memory_window and session.key not in self._consolidating): + if unconsolidated >= self.memory_window and session.key not in self._consolidating: self._consolidating.add(session.key) lock = self._consolidation_locks.setdefault(session.key, asyncio.Lock()) @@ -421,19 +462,26 @@ class AgentLoop: history=history, current_message=msg.content, media=msg.media if msg.media else None, - channel=msg.channel, chat_id=msg.chat_id, + channel=msg.channel, + chat_id=msg.chat_id, ) async def _bus_progress(content: str, *, tool_hint: bool = False) -> None: meta = dict(msg.metadata or {}) meta["_progress"] = True meta["_tool_hint"] = tool_hint - await self.bus.publish_outbound(OutboundMessage( - channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta, - )) + await self.bus.publish_outbound( + OutboundMessage( + channel=msg.channel, + chat_id=msg.chat_id, + content=content, + metadata=meta, + ) + ) final_content, _, all_msgs = await self._run_agent_loop( - initial_messages, on_progress=on_progress or _bus_progress, + initial_messages, + on_progress=on_progress or _bus_progress, ) if final_content is None: @@ -448,22 +496,31 @@ class AgentLoop: preview = final_content[:120] + "..." if len(final_content) > 120 else final_content logger.info("Response to {}:{}: {}", msg.channel, msg.sender_id, preview) return OutboundMessage( - channel=msg.channel, chat_id=msg.chat_id, content=final_content, + channel=msg.channel, + chat_id=msg.chat_id, + content=final_content, metadata=msg.metadata or {}, ) def _save_turn(self, session: Session, messages: list[dict], skip: int) -> None: """Save new-turn messages into session, truncating large tool results.""" from datetime import datetime + for m in messages[skip:]: entry = dict(m) role, content = entry.get("role"), entry.get("content") if role == "assistant" and not content and not entry.get("tool_calls"): continue # skip empty assistant messages — they poison session context - if role == "tool" and isinstance(content, str) and len(content) > self._TOOL_RESULT_MAX_CHARS: - entry["content"] = content[:self._TOOL_RESULT_MAX_CHARS] + "\n... (truncated)" + if ( + role == "tool" + and isinstance(content, str) + and len(content) > self._TOOL_RESULT_MAX_CHARS + ): + entry["content"] = content[: self._TOOL_RESULT_MAX_CHARS] + "\n... (truncated)" elif role == "user": - if isinstance(content, str) and content.startswith(ContextBuilder._RUNTIME_CONTEXT_TAG): + if isinstance(content, str) and content.startswith( + ContextBuilder._RUNTIME_CONTEXT_TAG + ): # Strip the runtime-context prefix, keep only the user text. parts = content.split("\n\n", 1) if len(parts) > 1 and parts[1].strip(): @@ -473,10 +530,15 @@ class AgentLoop: if isinstance(content, list): filtered = [] for c in content: - if c.get("type") == "text" and isinstance(c.get("text"), str) and c["text"].startswith(ContextBuilder._RUNTIME_CONTEXT_TAG): + if ( + c.get("type") == "text" + and isinstance(c.get("text"), str) + and c["text"].startswith(ContextBuilder._RUNTIME_CONTEXT_TAG) + ): continue # Strip runtime context from multimodal messages - if (c.get("type") == "image_url" - and c.get("image_url", {}).get("url", "").startswith("data:image/")): + if c.get("type") == "image_url" and c.get("image_url", {}).get( + "url", "" + ).startswith("data:image/"): filtered.append({"type": "text", "text": "[image]"}) else: filtered.append(c) @@ -490,8 +552,11 @@ class AgentLoop: async def _consolidate_memory(self, session, archive_all: bool = False) -> bool: """Delegate to MemoryStore.consolidate(). Returns True on success.""" return await MemoryStore(self.workspace).consolidate( - session, self.provider, self.model, - archive_all=archive_all, memory_window=self.memory_window, + session, + self.provider, + self.model, + archive_all=archive_all, + memory_window=self.memory_window, ) async def process_direct( @@ -505,5 +570,7 @@ class AgentLoop: """Process a message directly (for CLI or cron usage).""" await self._connect_mcp() msg = InboundMessage(channel=channel, sender_id="user", chat_id=chat_id, content=content) - response = await self._process_message(msg, session_key=session_key, on_progress=on_progress) + response = await self._process_message( + msg, session_key=session_key, on_progress=on_progress + ) return response.content if response else "" diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index f2d6ee5..3d61962 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -30,12 +30,12 @@ class SubagentManager: temperature: float = 0.7, max_tokens: int = 4096, reasoning_effort: str | None = None, - brave_api_key: str | None = None, + web_search_config: "WebSearchConfig | None" = None, web_proxy: str | None = None, exec_config: "ExecToolConfig | None" = None, restrict_to_workspace: bool = False, ): - from nanobot.config.schema import ExecToolConfig + from nanobot.config.schema import ExecToolConfig, WebSearchConfig self.provider = provider self.workspace = workspace self.bus = bus @@ -43,8 +43,8 @@ class SubagentManager: self.temperature = temperature self.max_tokens = max_tokens self.reasoning_effort = reasoning_effort - self.brave_api_key = brave_api_key self.web_proxy = web_proxy + self.web_search_config = web_search_config or WebSearchConfig() self.exec_config = exec_config or ExecToolConfig() self.restrict_to_workspace = restrict_to_workspace self._running_tasks: dict[str, asyncio.Task[None]] = {} @@ -106,7 +106,7 @@ class SubagentManager: restrict_to_workspace=self.restrict_to_workspace, path_append=self.exec_config.path_append, )) - tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy)) + tools.register(WebSearchTool(config=self.web_search_config, proxy=self.web_proxy)) tools.register(WebFetchTool(proxy=self.web_proxy)) system_prompt = self._build_subagent_prompt() diff --git a/nanobot/agent/tools/web.py b/nanobot/agent/tools/web.py index 0d8f4d1..3adc8b9 100644 --- a/nanobot/agent/tools/web.py +++ b/nanobot/agent/tools/web.py @@ -1,13 +1,16 @@ """Web tools: web_search and web_fetch.""" +import asyncio import html import json import os import re +from collections.abc import Awaitable, Callable from typing import Any from urllib.parse import urlparse import httpx +from ddgs import DDGS from loguru import logger from nanobot.agent.tools.base import Tool @@ -44,8 +47,22 @@ def _validate_url(url: str) -> tuple[bool, str]: return False, str(e) +def _format_results(query: str, items: list[dict[str, Any]], n: int) -> str: + """Format provider results into a shared plaintext output.""" + if not items: + return f"No results for: {query}" + lines = [f"Results for: {query}\n"] + for i, item in enumerate(items[:n], 1): + title = _normalize(_strip_tags(item.get('title', ''))) + snippet = _normalize(_strip_tags(item.get('content', ''))) + lines.append(f"{i}. {title}\n {item.get('url', '')}") + if snippet: + lines.append(f" {snippet}") + return "\n".join(lines) + + class WebSearchTool(Tool): - """Search the web using Brave Search API.""" + """Search the web using configured provider.""" name = "web_search" description = "Search the web. Returns titles, URLs, and snippets." @@ -58,49 +75,133 @@ class WebSearchTool(Tool): "required": ["query"] } - def __init__(self, api_key: str | None = None, max_results: int = 5, proxy: str | None = None): - self._init_api_key = api_key - self.max_results = max_results - self.proxy = proxy + def __init__( + self, + config: "WebSearchConfig | None" = None, + transport: httpx.AsyncBaseTransport | None = None, + ddgs_factory: Callable[[], DDGS] | None = None, + proxy: str | None = None, + ): + from nanobot.config.schema import WebSearchConfig - @property - def api_key(self) -> str: - """Resolve API key at call time so env/config changes are picked up.""" - return self._init_api_key or os.environ.get("BRAVE_API_KEY", "") + self.config = config if config is not None else WebSearchConfig() + self._transport = transport + self._ddgs_factory = ddgs_factory or (lambda: DDGS(timeout=10)) + self.proxy = proxy + self._provider_dispatch: dict[str, Callable[[str, int], Awaitable[str]]] = { + "duckduckgo": self._search_duckduckgo, + "tavily": self._search_tavily, + "searxng": self._search_searxng, + "brave": self._search_brave, + } async def execute(self, query: str, count: int | None = None, **kwargs: Any) -> str: - if not self.api_key: - return ( - "Error: Brave Search API key not configured. Set it in " - "~/.nanobot/config.json under tools.web.search.apiKey " - "(or export BRAVE_API_KEY), then restart the gateway." - ) + provider = (self.config.provider or "brave").strip().lower() + n = min(max(count or self.config.max_results, 1), 10) + + search = self._provider_dispatch.get(provider) + if search is None: + return f"Error: unknown search provider '{provider}'" + return await search(query, n) + + async def _fallback_to_duckduckgo(self, missing_key: str, query: str, n: int) -> str: + logger.warning("Falling back to DuckDuckGo: {} not configured", missing_key) + ddg = await self._search_duckduckgo(query=query, n=n) + if ddg.startswith('Error:'): + return ddg + return f'Using DuckDuckGo fallback ({missing_key} missing).\n\n{ddg}' + + async def _search_brave(self, query: str, n: int) -> str: + api_key = self.config.api_key or os.environ.get("BRAVE_API_KEY", "") + if not api_key: + if self.config.fallback_to_duckduckgo: + return await self._fallback_to_duckduckgo('BRAVE_API_KEY', query, n) + return "Error: BRAVE_API_KEY not configured" try: - n = min(max(count or self.max_results, 1), 10) - logger.debug("WebSearch: {}", "proxy enabled" if self.proxy else "direct connection") - async with httpx.AsyncClient(proxy=self.proxy) as client: + async with httpx.AsyncClient(transport=self._transport, proxy=self.proxy) as client: r = await client.get( "https://api.search.brave.com/res/v1/web/search", params={"q": query, "count": n}, - headers={"Accept": "application/json", "X-Subscription-Token": self.api_key}, - timeout=10.0 + headers={"Accept": "application/json", "X-Subscription-Token": api_key}, + timeout=10.0, ) r.raise_for_status() - results = r.json().get("web", {}).get("results", [])[:n] - if not results: + items = [{"title": x.get("title", ""), "url": x.get("url", ""), + "content": x.get("description", "")} + for x in r.json().get("web", {}).get("results", [])] + return _format_results(query, items, n) + except Exception as e: + return f"Error: {e}" + + async def _search_tavily(self, query: str, n: int) -> str: + api_key = self.config.api_key or os.environ.get("TAVILY_API_KEY", "") + if not api_key: + if self.config.fallback_to_duckduckgo: + return await self._fallback_to_duckduckgo('TAVILY_API_KEY', query, n) + return "Error: TAVILY_API_KEY not configured" + + try: + async with httpx.AsyncClient(transport=self._transport, proxy=self.proxy) as client: + r = await client.post( + "https://api.tavily.com/search", + headers={"Authorization": f"Bearer {api_key}"}, + json={"query": query, "max_results": n}, + timeout=15.0, + ) + r.raise_for_status() + + results = r.json().get("results", []) + return _format_results(query, results, n) + except Exception as e: + return f"Error: {e}" + + async def _search_duckduckgo(self, query: str, n: int) -> str: + try: + ddgs = self._ddgs_factory() + raw_results = await asyncio.to_thread(ddgs.text, query, max_results=n) + + if not raw_results: return f"No results for: {query}" - lines = [f"Results for: {query}\n"] - for i, item in enumerate(results, 1): - lines.append(f"{i}. {item.get('title', '')}\n {item.get('url', '')}") - if desc := item.get("description"): - lines.append(f" {desc}") - return "\n".join(lines) - except httpx.ProxyError as e: - logger.error("WebSearch proxy error: {}", e) - return f"Proxy error: {e}" + items = [ + { + "title": result.get("title", ""), + "url": result.get("href", ""), + "content": result.get("body", ""), + } + for result in raw_results + ] + return _format_results(query, items, n) + except Exception as e: + logger.warning("DuckDuckGo search failed: {}", e) + return f"Error: DuckDuckGo search failed ({e})" + + async def _search_searxng(self, query: str, n: int) -> str: + base_url = (self.config.base_url or os.environ.get("SEARXNG_BASE_URL", "")).strip() + if not base_url: + if self.config.fallback_to_duckduckgo: + return await self._fallback_to_duckduckgo('SEARXNG_BASE_URL', query, n) + return "Error: SEARXNG_BASE_URL not configured" + + endpoint = f"{base_url.rstrip('/')}/search" + is_valid, error_msg = _validate_url(endpoint) + if not is_valid: + return f"Error: invalid SearXNG URL: {error_msg}" + + try: + async with httpx.AsyncClient(transport=self._transport, proxy=self.proxy) as client: + r = await client.get( + endpoint, + params={"q": query, "format": "json"}, + headers={"User-Agent": USER_AGENT}, + timeout=10.0, + ) + r.raise_for_status() + + results = r.json().get("results", []) + return _format_results(query, results, n) except Exception as e: logger.error("WebSearch error: {}", e) return f"Error: {e}" @@ -157,7 +258,8 @@ class WebFetchTool(Tool): text, extractor = r.text, "raw" truncated = len(text) > max_chars - if truncated: text = text[:max_chars] + if truncated: + text = text[:max_chars] return json.dumps({"url": url, "finalUrl": str(r.url), "status": r.status_code, "extractor": extractor, "truncated": truncated, "length": len(text), "text": text}, ensure_ascii=False) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 2c8d6d3..218d66c 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -332,7 +332,7 @@ def gateway( max_iterations=config.agents.defaults.max_tool_iterations, memory_window=config.agents.defaults.memory_window, reasoning_effort=config.agents.defaults.reasoning_effort, - brave_api_key=config.tools.web.search.api_key or None, + web_search_config=config.tools.web.search, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, cron_service=cron, @@ -517,7 +517,7 @@ def agent( max_iterations=config.agents.defaults.max_tool_iterations, memory_window=config.agents.defaults.memory_window, reasoning_effort=config.agents.defaults.reasoning_effort, - brave_api_key=config.tools.web.search.api_key or None, + web_search_config=config.tools.web.search, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, cron_service=cron, diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 803cb61..fb482aa 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -288,7 +288,10 @@ class GatewayConfig(Base): class WebSearchConfig(Base): """Web search tool configuration.""" - api_key: str = "" # Brave Search API key + provider: str = "" # brave, tavily, searxng, duckduckgo (empty = brave) + api_key: str = "" # API key for selected provider + base_url: str = "" # Base URL (SearXNG) + fallback_to_duckduckgo: bool = True max_results: int = 5 diff --git a/pyproject.toml b/pyproject.toml index 62cf616..c756fbf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "websockets>=16.0,<17.0", "websocket-client>=1.9.0,<2.0.0", "httpx>=0.28.0,<1.0.0", + "ddgs>=9.5.5,<10.0.0", "oauth-cli-kit>=0.1.3,<1.0.0", "loguru>=0.7.3,<1.0.0", "readability-lxml>=0.8.4,<1.0.0", diff --git a/tests/test_tool_validation.py b/tests/test_tool_validation.py index c2b4b6a..7ec9a23 100644 --- a/tests/test_tool_validation.py +++ b/tests/test_tool_validation.py @@ -1,8 +1,10 @@ from typing import Any +from nanobot.agent.tools.web import WebSearchTool from nanobot.agent.tools.base import Tool from nanobot.agent.tools.registry import ToolRegistry from nanobot.agent.tools.shell import ExecTool +from nanobot.config.schema import WebSearchConfig class SampleTool(Tool): @@ -337,3 +339,16 @@ def test_cast_params_single_value_not_auto_wrapped_to_array() -> None: assert result["items"] == 5 # Not wrapped to [5] result = tool.cast_params({"items": "text"}) assert result["items"] == "text" # Not wrapped to ["text"] + + +async def test_web_search_no_fallback_returns_provider_error() -> None: + tool = WebSearchTool( + config=WebSearchConfig( + provider="brave", + api_key="", + fallback_to_duckduckgo=False, + ) + ) + + result = await tool.execute(query="fallback", count=1) + assert result == "Error: BRAVE_API_KEY not configured" diff --git a/tests/test_web_search_tool.py b/tests/test_web_search_tool.py new file mode 100644 index 0000000..0b95014 --- /dev/null +++ b/tests/test_web_search_tool.py @@ -0,0 +1,327 @@ +import httpx +import pytest +from collections.abc import Callable +from typing import Literal + +from nanobot.agent.tools.web import WebSearchTool +from nanobot.config.schema import WebSearchConfig + + +def _tool(config: WebSearchConfig, handler) -> WebSearchTool: + return WebSearchTool(config=config, transport=httpx.MockTransport(handler)) + + +def _assert_tavily_request(request: httpx.Request) -> bool: + return ( + request.method == "POST" + and str(request.url) == "https://api.tavily.com/search" + and request.headers.get("authorization") == "Bearer tavily-key" + and '"query":"openclaw"' in request.read().decode("utf-8") + ) + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + ("provider", "config_kwargs", "query", "count", "assert_request", "response", "assert_text"), + [ + ( + "brave", + {"api_key": "brave-key"}, + "nanobot", + 1, + lambda request: ( + request.method == "GET" + and str(request.url) + == "https://api.search.brave.com/res/v1/web/search?q=nanobot&count=1" + and request.headers["X-Subscription-Token"] == "brave-key" + ), + httpx.Response( + 200, + json={ + "web": { + "results": [ + { + "title": "NanoBot", + "url": "https://example.com/nanobot", + "description": "Ultra-lightweight assistant", + } + ] + } + }, + ), + ["Results for: nanobot", "1. NanoBot", "https://example.com/nanobot"], + ), + ( + "tavily", + {"api_key": "tavily-key"}, + "openclaw", + 2, + _assert_tavily_request, + httpx.Response( + 200, + json={ + "results": [ + { + "title": "OpenClaw", + "url": "https://example.com/openclaw", + "content": "Plugin-based assistant framework", + } + ] + }, + ), + ["Results for: openclaw", "1. OpenClaw", "https://example.com/openclaw"], + ), + ( + "searxng", + {"base_url": "https://searx.example"}, + "nanobot", + 1, + lambda request: ( + request.method == "GET" + and str(request.url) == "https://searx.example/search?q=nanobot&format=json" + ), + httpx.Response( + 200, + json={ + "results": [ + { + "title": "nanobot docs", + "url": "https://example.com/nanobot", + "content": "Lightweight assistant docs", + } + ] + }, + ), + ["Results for: nanobot", "1. nanobot docs", "https://example.com/nanobot"], + ), + ], +) +async def test_web_search_provider_formats_results( + provider: Literal["brave", "tavily", "searxng"], + config_kwargs: dict, + query: str, + count: int, + assert_request: Callable[[httpx.Request], bool], + response: httpx.Response, + assert_text: list[str], +) -> None: + def handler(request: httpx.Request) -> httpx.Response: + assert assert_request(request) + return response + + tool = _tool(WebSearchConfig(provider=provider, max_results=5, **config_kwargs), handler) + result = await tool.execute(query=query, count=count) + for text in assert_text: + assert text in result + + +@pytest.mark.asyncio +async def test_web_search_from_legacy_config_works() -> None: + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, + json={ + "web": { + "results": [ + {"title": "Legacy", "url": "https://example.com", "description": "ok"} + ] + } + }, + ) + + config = WebSearchConfig(api_key="legacy-key", max_results=3) + tool = WebSearchTool(config=config, transport=httpx.MockTransport(handler)) + result = await tool.execute(query="constructor", count=1) + assert "1. Legacy" in result + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + ("provider", "config", "missing_env", "expected_title"), + [ + ( + "brave", + WebSearchConfig(provider="brave", api_key="", max_results=5), + "BRAVE_API_KEY", + "Fallback Result", + ), + ( + "tavily", + WebSearchConfig(provider="tavily", api_key="", max_results=5), + "TAVILY_API_KEY", + "Tavily Fallback", + ), + ], +) +async def test_web_search_missing_key_falls_back_to_duckduckgo( + monkeypatch: pytest.MonkeyPatch, + provider: str, + config: WebSearchConfig, + missing_env: str, + expected_title: str, +) -> None: + monkeypatch.delenv(missing_env, raising=False) + + called = False + + class FakeDDGS: + def __init__(self, *args, **kwargs): + pass + + def text(self, keywords: str, max_results: int): + nonlocal called + called = True + return [ + { + "title": expected_title, + "href": f"https://example.com/{provider}-fallback", + "body": "Fallback snippet", + } + ] + + monkeypatch.setattr("nanobot.agent.tools.web.DDGS", FakeDDGS, raising=False) + + result = await WebSearchTool(config=config).execute(query="fallback", count=1) + assert called + assert "Using DuckDuckGo fallback" in result + assert f"1. {expected_title}" in result + + +@pytest.mark.asyncio +async def test_web_search_brave_missing_key_without_fallback_returns_error( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("BRAVE_API_KEY", raising=False) + tool = WebSearchTool( + config=WebSearchConfig( + provider="brave", + api_key="", + fallback_to_duckduckgo=False, + ) + ) + + result = await tool.execute(query="fallback", count=1) + assert result == "Error: BRAVE_API_KEY not configured" + + +@pytest.mark.asyncio +async def test_web_search_searxng_missing_base_url_falls_back_to_duckduckgo() -> None: + tool = WebSearchTool( + config=WebSearchConfig(provider="searxng", base_url="", max_results=5) + ) + + result = await tool.execute(query="nanobot", count=1) + assert "DuckDuckGo fallback" in result + assert "SEARXNG_BASE_URL" in result + + +@pytest.mark.asyncio +async def test_web_search_searxng_missing_base_url_no_fallback_returns_error() -> None: + tool = WebSearchTool( + config=WebSearchConfig( + provider="searxng", base_url="", + fallback_to_duckduckgo=False, max_results=5, + ) + ) + + result = await tool.execute(query="nanobot", count=1) + assert result == "Error: SEARXNG_BASE_URL not configured" + + +@pytest.mark.asyncio +async def test_web_search_searxng_uses_env_base_url( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("SEARXNG_BASE_URL", "https://searx.env") + + def handler(request: httpx.Request) -> httpx.Response: + assert request.method == "GET" + assert str(request.url) == "https://searx.env/search?q=nanobot&format=json" + return httpx.Response( + 200, + json={ + "results": [ + { + "title": "env result", + "url": "https://example.com/env", + "content": "from env", + } + ] + }, + ) + + config = WebSearchConfig(provider="searxng", base_url="", max_results=5) + result = await _tool(config, handler).execute(query="nanobot", count=1) + assert "1. env result" in result + + +@pytest.mark.asyncio +async def test_web_search_register_custom_provider() -> None: + config = WebSearchConfig(provider="custom", max_results=5) + tool = WebSearchTool(config=config) + + async def _custom_provider(query: str, n: int) -> str: + return f"custom:{query}:{n}" + + tool._provider_dispatch["custom"] = _custom_provider + + result = await tool.execute(query="nanobot", count=2) + assert result == "custom:nanobot:2" + + +@pytest.mark.asyncio +async def test_web_search_duckduckgo_uses_injected_ddgs_factory() -> None: + class FakeDDGS: + def text(self, keywords: str, max_results: int): + assert keywords == "nanobot" + assert max_results == 1 + return [ + { + "title": "NanoBot result", + "href": "https://example.com/nanobot", + "body": "Search content", + } + ] + + tool = WebSearchTool( + config=WebSearchConfig(provider="duckduckgo", max_results=5), + ddgs_factory=lambda: FakeDDGS(), + ) + + result = await tool.execute(query="nanobot", count=1) + assert "1. NanoBot result" in result + + +@pytest.mark.asyncio +async def test_web_search_unknown_provider_returns_error() -> None: + tool = WebSearchTool( + config=WebSearchConfig(provider="google", max_results=5), + ) + result = await tool.execute(query="nanobot", count=1) + assert result == "Error: unknown search provider 'google'" + + +@pytest.mark.asyncio +async def test_web_search_dispatch_dict_overwrites_builtin() -> None: + async def _custom_brave(query: str, n: int) -> str: + return f"custom-brave:{query}:{n}" + + tool = WebSearchTool( + config=WebSearchConfig(provider="brave", api_key="key", max_results=5), + ) + tool._provider_dispatch["brave"] = _custom_brave + result = await tool.execute(query="nanobot", count=2) + assert result == "custom-brave:nanobot:2" + + +@pytest.mark.asyncio +async def test_web_search_searxng_rejects_invalid_url() -> None: + tool = WebSearchTool( + config=WebSearchConfig( + provider="searxng", + base_url="ftp://internal.host", + max_results=5, + ), + ) + result = await tool.execute(query="nanobot", count=1) + assert "Error: invalid SearXNG URL" in result From d633ed6e519aab366743857154c16728682df26a Mon Sep 17 00:00:00 2001 From: Chris Alexander <2815297+chris-alexander@users.noreply.github.com> Date: Tue, 17 Feb 2026 21:59:02 +0000 Subject: [PATCH 02/11] fix(subagent): avoid missing from_legacy call --- nanobot/agent/subagent.py | 79 ++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 30 deletions(-) diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index 3d61962..d6cbe2d 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -36,6 +36,7 @@ class SubagentManager: restrict_to_workspace: bool = False, ): from nanobot.config.schema import ExecToolConfig, WebSearchConfig + self.provider = provider self.workspace = workspace self.bus = bus @@ -63,9 +64,7 @@ class SubagentManager: display_label = label or task[:30] + ("..." if len(task) > 30 else "") origin = {"channel": origin_channel, "chat_id": origin_chat_id} - bg_task = asyncio.create_task( - self._run_subagent(task_id, task, display_label, origin) - ) + bg_task = asyncio.create_task(self._run_subagent(task_id, task, display_label, origin)) self._running_tasks[task_id] = bg_task if session_key: self._session_tasks.setdefault(session_key, set()).add(task_id) @@ -100,15 +99,17 @@ class SubagentManager: tools.register(WriteFileTool(workspace=self.workspace, allowed_dir=allowed_dir)) tools.register(EditFileTool(workspace=self.workspace, allowed_dir=allowed_dir)) tools.register(ListDirTool(workspace=self.workspace, allowed_dir=allowed_dir)) - tools.register(ExecTool( - working_dir=str(self.workspace), - timeout=self.exec_config.timeout, - restrict_to_workspace=self.restrict_to_workspace, - path_append=self.exec_config.path_append, - )) + tools.register( + ExecTool( + working_dir=str(self.workspace), + timeout=self.exec_config.timeout, + restrict_to_workspace=self.restrict_to_workspace, + path_append=self.exec_config.path_append, + ) + ) tools.register(WebSearchTool(config=self.web_search_config, proxy=self.web_proxy)) tools.register(WebFetchTool(proxy=self.web_proxy)) - + system_prompt = self._build_subagent_prompt() messages: list[dict[str, Any]] = [ {"role": "system", "content": system_prompt}, @@ -145,23 +146,32 @@ class SubagentManager: } for tc in response.tool_calls ] - messages.append({ - "role": "assistant", - "content": response.content or "", - "tool_calls": tool_call_dicts, - }) + messages.append( + { + "role": "assistant", + "content": response.content or "", + "tool_calls": tool_call_dicts, + } + ) # Execute tools for tool_call in response.tool_calls: args_str = json.dumps(tool_call.arguments, ensure_ascii=False) - logger.debug("Subagent [{}] executing: {} with arguments: {}", task_id, tool_call.name, args_str) + logger.debug( + "Subagent [{}] executing: {} with arguments: {}", + task_id, + tool_call.name, + args_str, + ) result = await tools.execute(tool_call.name, tool_call.arguments) - messages.append({ - "role": "tool", - "tool_call_id": tool_call.id, - "name": tool_call.name, - "content": result, - }) + messages.append( + { + "role": "tool", + "tool_call_id": tool_call.id, + "name": tool_call.name, + "content": result, + } + ) else: final_result = response.content break @@ -207,15 +217,18 @@ Summarize this naturally for the user. Keep it brief (1-2 sentences). Do not men ) await self.bus.publish_inbound(msg) - logger.debug("Subagent [{}] announced result to {}:{}", task_id, origin['channel'], origin['chat_id']) - + logger.debug( + "Subagent [{}] announced result to {}:{}", task_id, origin["channel"], origin["chat_id"] + ) + def _build_subagent_prompt(self) -> str: """Build a focused system prompt for the subagent.""" from nanobot.agent.context import ContextBuilder from nanobot.agent.skills import SkillsLoader time_ctx = ContextBuilder._build_runtime_context(None, None) - parts = [f"""# Subagent + parts = [ + f"""# Subagent {time_ctx} @@ -223,18 +236,24 @@ You are a subagent spawned by the main agent to complete a specific task. Stay focused on the assigned task. Your final response will be reported back to the main agent. ## Workspace -{self.workspace}"""] +{self.workspace}""" + ] skills_summary = SkillsLoader(self.workspace).build_skills_summary() if skills_summary: - parts.append(f"## Skills\n\nRead SKILL.md with read_file to use a skill.\n\n{skills_summary}") + parts.append( + f"## Skills\n\nRead SKILL.md with read_file to use a skill.\n\n{skills_summary}" + ) return "\n\n".join(parts) - + async def cancel_by_session(self, session_key: str) -> int: """Cancel all subagents for the given session. Returns count cancelled.""" - tasks = [self._running_tasks[tid] for tid in self._session_tasks.get(session_key, []) - if tid in self._running_tasks and not self._running_tasks[tid].done()] + tasks = [ + self._running_tasks[tid] + for tid in self._session_tasks.get(session_key, []) + if tid in self._running_tasks and not self._running_tasks[tid].done() + ] for t in tasks: t.cancel() if tasks: From b24d6ffc941f7ff755898fa94485bab51e4415d4 Mon Sep 17 00:00:00 2001 From: shenchengtsi Date: Tue, 10 Mar 2026 11:32:11 +0800 Subject: [PATCH 03/11] fix(memory): validate save_memory payload before persisting --- nanobot/agent/memory.py | 33 ++++++--- tests/test_memory_consolidation_types.py | 94 +++++++++++++++++++++++- 2 files changed, 116 insertions(+), 11 deletions(-) diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index 21fe77d..add014b 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -139,15 +139,30 @@ class MemoryStore: logger.warning("Memory consolidation: unexpected arguments type {}", type(args).__name__) return False - if entry := args.get("history_entry"): - if not isinstance(entry, str): - entry = json.dumps(entry, ensure_ascii=False) - self.append_history(entry) - if update := args.get("memory_update"): - if not isinstance(update, str): - update = json.dumps(update, ensure_ascii=False) - if update != current_memory: - self.write_long_term(update) + if "history_entry" not in args or "memory_update" not in args: + logger.warning("Memory consolidation: save_memory payload missing required fields") + return False + + entry = args["history_entry"] + update = args["memory_update"] + + if entry is None or update is None: + logger.warning("Memory consolidation: save_memory payload contains null required fields") + return False + + if not isinstance(entry, str): + entry = json.dumps(entry, ensure_ascii=False) + if not isinstance(update, str): + update = json.dumps(update, ensure_ascii=False) + + entry = entry.strip() + if not entry: + logger.warning("Memory consolidation: history_entry is empty after normalization") + return False + + self.append_history(entry) + if update != current_memory: + self.write_long_term(update) session.last_consolidated = 0 if archive_all else len(session.messages) - keep_count logger.info("Memory consolidation done: {} messages, last_consolidated={}", len(session.messages), session.last_consolidated) diff --git a/tests/test_memory_consolidation_types.py b/tests/test_memory_consolidation_types.py index ff15584..4ba1ecd 100644 --- a/tests/test_memory_consolidation_types.py +++ b/tests/test_memory_consolidation_types.py @@ -97,7 +97,6 @@ class TestMemoryConsolidationTypeHandling: store = MemoryStore(tmp_path) provider = AsyncMock() - # Simulate arguments being a JSON string (not yet parsed) response = LLMResponse( content=None, tool_calls=[ @@ -152,7 +151,6 @@ class TestMemoryConsolidationTypeHandling: store = MemoryStore(tmp_path) provider = AsyncMock() - # Simulate arguments being a list containing a dict response = LLMResponse( content=None, tool_calls=[ @@ -220,3 +218,95 @@ class TestMemoryConsolidationTypeHandling: result = await store.consolidate(session, provider, "test-model", memory_window=50) assert result is False + + @pytest.mark.asyncio + async def test_missing_history_entry_returns_false_without_writing(self, tmp_path: Path) -> None: + """Do not persist partial results when required fields are missing.""" + store = MemoryStore(tmp_path) + provider = AsyncMock() + provider.chat = AsyncMock( + return_value=LLMResponse( + content=None, + tool_calls=[ + ToolCallRequest( + id="call_1", + name="save_memory", + arguments={"memory_update": "# Memory\nOnly memory update"}, + ) + ], + ) + ) + session = _make_session(message_count=60) + + result = await store.consolidate(session, provider, "test-model", memory_window=50) + + assert result is False + assert not store.history_file.exists() + assert not store.memory_file.exists() + assert session.last_consolidated == 0 + + @pytest.mark.asyncio + async def test_missing_memory_update_returns_false_without_writing(self, tmp_path: Path) -> None: + """Do not append history if memory_update is missing.""" + store = MemoryStore(tmp_path) + provider = AsyncMock() + provider.chat = AsyncMock( + return_value=LLMResponse( + content=None, + tool_calls=[ + ToolCallRequest( + id="call_1", + name="save_memory", + arguments={"history_entry": "[2026-01-01] Partial output."}, + ) + ], + ) + ) + session = _make_session(message_count=60) + + result = await store.consolidate(session, provider, "test-model", memory_window=50) + + assert result is False + assert not store.history_file.exists() + assert not store.memory_file.exists() + assert session.last_consolidated == 0 + + @pytest.mark.asyncio + async def test_null_required_field_returns_false_without_writing(self, tmp_path: Path) -> None: + """Null required fields should be rejected before persistence.""" + store = MemoryStore(tmp_path) + provider = AsyncMock() + provider.chat = AsyncMock( + return_value=_make_tool_response( + history_entry=None, + memory_update="# Memory\nUser likes testing.", + ) + ) + session = _make_session(message_count=60) + + result = await store.consolidate(session, provider, "test-model", memory_window=50) + + assert result is False + assert not store.history_file.exists() + assert not store.memory_file.exists() + assert session.last_consolidated == 0 + + @pytest.mark.asyncio + async def test_empty_history_entry_returns_false_without_writing(self, tmp_path: Path) -> None: + """Empty history entries should be rejected to avoid blank archival records.""" + store = MemoryStore(tmp_path) + provider = AsyncMock() + provider.chat = AsyncMock( + return_value=_make_tool_response( + history_entry=" ", + memory_update="# Memory\nUser likes testing.", + ) + ) + session = _make_session(message_count=60) + + result = await store.consolidate(session, provider, "test-model", memory_window=50) + + assert result is False + assert not store.history_file.exists() + assert not store.memory_file.exists() + assert session.last_consolidated == 0 From 9e9051229e63afb1a02c4b18ea17826a5321a9ec Mon Sep 17 00:00:00 2001 From: HuangMinlong Date: Thu, 12 Mar 2026 14:34:32 +0800 Subject: [PATCH 04/11] Integrate Langsmith for conversation tracking Added support for Langsmith API key to enable conversation viewing. --- nanobot/providers/litellm_provider.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index b4508a4..a9a0517 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -250,6 +250,10 @@ class LiteLLMProvider(LLMProvider): # Apply model-specific overrides (e.g. kimi-k2.5 temperature) self._apply_model_overrides(model, kwargs) + # Use langsmith to view the conversation + if os.getenv("LANGSMITH_API_KEY"): + kwargs["callbacks"] = ["langsmith"] + # Pass api_key directly — more reliable than env vars alone if self.api_key: kwargs["api_key"] = self.api_key From ec6e099393c5e40dac238deeb82f3a2f33339980 Mon Sep 17 00:00:00 2001 From: Jiajun Xie Date: Thu, 12 Mar 2026 13:33:59 +0800 Subject: [PATCH 05/11] feat(ci): add GitHub Actions workflow for test directory - nanobot/channels/matrix.py: Add keyword-only parameters restrict_to_workspace/workspace to MatrixChannel.__init__ and assign them to _restrict_to_workspace/_workspace with proper type conversion and path resolution - tests/test_commands.py: Add _strip_ansi() function to remove ANSI escape codes, use regex assertions for --workspace/--config parameters to allow 1 or 2 dashes --- .github/workflows/ci.yml | 32 ++++++++++++++++++++++++++++++++ nanobot/channels/matrix.py | 15 ++++++++++++--- tests/test_commands.py | 16 ++++++++++++---- 3 files changed, 56 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..98ec385 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,32 @@ +name: Test Suite + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.11, 3.12, 3.13] + continue-on-error: true + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install .[dev] + + - name: Run tests + run: | + python -m pytest tests/ -v \ No newline at end of file diff --git a/nanobot/channels/matrix.py b/nanobot/channels/matrix.py index 0d7a908..3f3f132 100644 --- a/nanobot/channels/matrix.py +++ b/nanobot/channels/matrix.py @@ -149,13 +149,22 @@ class MatrixChannel(BaseChannel): name = "matrix" display_name = "Matrix" - def __init__(self, config: Any, bus: MessageBus): + def __init__( + self, + config: Any, + bus: MessageBus, + *, + restrict_to_workspace: bool = False, + workspace: str | Path | None = None, + ): super().__init__(config, bus) self.client: AsyncClient | None = None self._sync_task: asyncio.Task | None = None self._typing_tasks: dict[str, asyncio.Task] = {} - self._restrict_to_workspace = False - self._workspace: Path | None = None + self._restrict_to_workspace = bool(restrict_to_workspace) + self._workspace = ( + Path(workspace).expanduser().resolve(strict=False) if workspace is not None else None + ) self._server_upload_limit_bytes: int | None = None self._server_upload_limit_checked = False diff --git a/tests/test_commands.py b/tests/test_commands.py index 583ef6f..9bd107d 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -1,3 +1,4 @@ +import re import shutil from pathlib import Path from unittest.mock import AsyncMock, MagicMock, patch @@ -11,6 +12,12 @@ from nanobot.providers.litellm_provider import LiteLLMProvider from nanobot.providers.openai_codex_provider import _strip_model_prefix from nanobot.providers.registry import find_by_model + +def _strip_ansi(text): + """Remove ANSI escape codes from text.""" + ansi_escape = re.compile(r'\x1b\[[0-9;]*m') + return ansi_escape.sub('', text) + runner = CliRunner() @@ -199,10 +206,11 @@ def test_agent_help_shows_workspace_and_config_options(): result = runner.invoke(app, ["agent", "--help"]) assert result.exit_code == 0 - assert "--workspace" in result.stdout - assert "-w" in result.stdout - assert "--config" in result.stdout - assert "-c" in result.stdout + stripped_output = _strip_ansi(result.stdout) + assert re.search(r'-{1,2}workspace', stripped_output) + assert re.search(r'-w', stripped_output) + assert re.search(r'-{1,2}config', stripped_output) + assert re.search(r'-c', stripped_output) def test_agent_uses_default_config_when_no_workspace_or_config_flags(mock_agent_runtime): From d48dd006823a42b13a336ee00dd3396e336d869d Mon Sep 17 00:00:00 2001 From: Frank <97429702+tsubasakong@users.noreply.github.com> Date: Thu, 12 Mar 2026 18:23:05 -0700 Subject: [PATCH 06/11] docs: correct BaiLian dashscope apiBase endpoint --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 629f59f..634222d 100644 --- a/README.md +++ b/README.md @@ -761,7 +761,7 @@ Config file: `~/.nanobot/config.json` > - **VolcEngine / BytePlus Coding Plan**: Use dedicated providers `volcengineCodingPlan` or `byteplusCodingPlan` instead of the pay-per-use `volcengine` / `byteplus` providers. > - **Zhipu Coding Plan**: If you're on Zhipu's coding plan, set `"apiBase": "https://open.bigmodel.cn/api/coding/paas/v4"` in your zhipu provider config. > - **MiniMax (Mainland China)**: If your API key is from MiniMax's mainland China platform (minimaxi.com), set `"apiBase": "https://api.minimaxi.com/v1"` in your minimax provider config. -> - **Alibaba Cloud Coding Plan**: If you're on the Alibaba Cloud Coding Plan (BaiLian), set `"apiBase": "https://coding.dashscope.aliyuncs.com/v1"` in your dashscope provider config. +> - **Alibaba Cloud BaiLian**: If you're using Alibaba Cloud BaiLian's OpenAI-compatible endpoint, set `"apiBase": "https://dashscope.aliyuncs.com/compatible-mode/v1"` in your dashscope provider config. | Provider | Purpose | Get API Key | |----------|---------|-------------| From 6d3a0ab6c93a7df0b04137b85ca560aba855bf83 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Fri, 13 Mar 2026 03:53:50 +0000 Subject: [PATCH 07/11] fix(memory): validate save_memory payload and raw-archive on repeated failure - Require both history_entry and memory_update, reject null/empty values - Fallback to tool_choice=auto when provider rejects forced function call - After 3 consecutive consolidation failures, raw-archive messages to HISTORY.md without LLM summarization to prevent context window overflow --- nanobot/agent/memory.py | 35 +++++++++++++++--- tests/test_memory_consolidation_types.py | 45 ++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 5 deletions(-) diff --git a/nanobot/agent/memory.py b/nanobot/agent/memory.py index 8cc68bc..f220f23 100644 --- a/nanobot/agent/memory.py +++ b/nanobot/agent/memory.py @@ -5,6 +5,7 @@ from __future__ import annotations import asyncio import json import weakref +from datetime import datetime from pathlib import Path from typing import TYPE_CHECKING, Any, Callable @@ -74,10 +75,13 @@ def _is_tool_choice_unsupported(content: str | None) -> bool: class MemoryStore: """Two-layer memory: MEMORY.md (long-term facts) + HISTORY.md (grep-searchable log).""" + _MAX_FAILURES_BEFORE_RAW_ARCHIVE = 3 + def __init__(self, workspace: Path): self.memory_dir = ensure_dir(workspace / "memory") self.memory_file = self.memory_dir / "MEMORY.md" self.history_file = self.memory_dir / "HISTORY.md" + self._consecutive_failures = 0 def read_long_term(self) -> str: if self.memory_file.exists(): @@ -159,39 +163,60 @@ class MemoryStore: len(response.content or ""), (response.content or "")[:200], ) - return False + return self._fail_or_raw_archive(messages) args = _normalize_save_memory_args(response.tool_calls[0].arguments) if args is None: logger.warning("Memory consolidation: unexpected save_memory arguments") - return False + return self._fail_or_raw_archive(messages) if "history_entry" not in args or "memory_update" not in args: logger.warning("Memory consolidation: save_memory payload missing required fields") - return False + return self._fail_or_raw_archive(messages) entry = args["history_entry"] update = args["memory_update"] if entry is None or update is None: logger.warning("Memory consolidation: save_memory payload contains null required fields") - return False + return self._fail_or_raw_archive(messages) entry = _ensure_text(entry).strip() if not entry: logger.warning("Memory consolidation: history_entry is empty after normalization") - return False + return self._fail_or_raw_archive(messages) self.append_history(entry) update = _ensure_text(update) if update != current_memory: self.write_long_term(update) + self._consecutive_failures = 0 logger.info("Memory consolidation done for {} messages", len(messages)) return True except Exception: logger.exception("Memory consolidation failed") + return self._fail_or_raw_archive(messages) + + def _fail_or_raw_archive(self, messages: list[dict]) -> bool: + """Increment failure count; after threshold, raw-archive messages and return True.""" + self._consecutive_failures += 1 + if self._consecutive_failures < self._MAX_FAILURES_BEFORE_RAW_ARCHIVE: return False + self._raw_archive(messages) + self._consecutive_failures = 0 + return True + + def _raw_archive(self, messages: list[dict]) -> None: + """Fallback: dump raw messages to HISTORY.md without LLM summarization.""" + ts = datetime.now().strftime("%Y-%m-%d %H:%M") + self.append_history( + f"[{ts}] [RAW] {len(messages)} messages\n" + f"{self._format_messages(messages)}" + ) + logger.warning( + "Memory consolidation degraded: raw-archived {} messages", len(messages) + ) class MemoryConsolidator: diff --git a/tests/test_memory_consolidation_types.py b/tests/test_memory_consolidation_types.py index a7c872e..d63cc90 100644 --- a/tests/test_memory_consolidation_types.py +++ b/tests/test_memory_consolidation_types.py @@ -431,3 +431,48 @@ class TestMemoryConsolidationTypeHandling: assert result is False assert not store.history_file.exists() + + @pytest.mark.asyncio + async def test_raw_archive_after_consecutive_failures(self, tmp_path: Path) -> None: + """After 3 consecutive failures, raw-archive messages and return True.""" + store = MemoryStore(tmp_path) + no_tool = LLMResponse(content="No tool call.", finish_reason="stop", tool_calls=[]) + provider = AsyncMock() + provider.chat_with_retry = AsyncMock(return_value=no_tool) + messages = _make_messages(message_count=10) + + assert await store.consolidate(messages, provider, "m") is False + assert await store.consolidate(messages, provider, "m") is False + assert await store.consolidate(messages, provider, "m") is True + + assert store.history_file.exists() + content = store.history_file.read_text() + assert "[RAW]" in content + assert "10 messages" in content + assert "msg0" in content + assert not store.memory_file.exists() + + @pytest.mark.asyncio + async def test_raw_archive_counter_resets_on_success(self, tmp_path: Path) -> None: + """A successful consolidation resets the failure counter.""" + store = MemoryStore(tmp_path) + no_tool = LLMResponse(content="Nope.", finish_reason="stop", tool_calls=[]) + ok_resp = _make_tool_response( + history_entry="[2026-01-01] OK.", + memory_update="# Memory\nOK.", + ) + messages = _make_messages(message_count=10) + + provider = AsyncMock() + provider.chat_with_retry = AsyncMock(return_value=no_tool) + assert await store.consolidate(messages, provider, "m") is False + assert await store.consolidate(messages, provider, "m") is False + assert store._consecutive_failures == 2 + + provider.chat_with_retry = AsyncMock(return_value=ok_resp) + assert await store.consolidate(messages, provider, "m") is True + assert store._consecutive_failures == 0 + + provider.chat_with_retry = AsyncMock(return_value=no_tool) + assert await store.consolidate(messages, provider, "m") is False + assert store._consecutive_failures == 1 From 84b107cf6ca1d56404a3b4b237442ce2670d0f04 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Fri, 13 Mar 2026 04:05:08 +0000 Subject: [PATCH 08/11] fix(ci): upgrade setup-python, add system deps, simplify test assertions --- .github/workflows/ci.yml | 17 +++++++++-------- tests/test_commands.py | 8 ++++---- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 98ec385..f55865f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,22 +11,23 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.11, 3.12, 3.13] - continue-on-error: true + python-version: ["3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 - + - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - + + - name: Install system dependencies + run: sudo apt-get update && sudo apt-get install -y libolm-dev build-essential + - name: Install dependencies run: | python -m pip install --upgrade pip pip install .[dev] - + - name: Run tests - run: | - python -m pytest tests/ -v \ No newline at end of file + run: python -m pytest tests/ -v diff --git a/tests/test_commands.py b/tests/test_commands.py index 8ccbe47..cb77bde 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -236,10 +236,10 @@ def test_agent_help_shows_workspace_and_config_options(): assert result.exit_code == 0 stripped_output = _strip_ansi(result.stdout) - assert re.search(r'-{1,2}workspace', stripped_output) - assert re.search(r'-w', stripped_output) - assert re.search(r'-{1,2}config', stripped_output) - assert re.search(r'-c', stripped_output) + assert "--workspace" in stripped_output + assert "-w" in stripped_output + assert "--config" in stripped_output + assert "-c" in stripped_output def test_agent_uses_default_config_when_no_workspace_or_config_flags(mock_agent_runtime): From 20b4fb3bff7aa3d1332c2870f5eab7cba43b8d4f Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Fri, 13 Mar 2026 04:54:22 +0000 Subject: [PATCH 09/11] =?UTF-8?q?fix:=20langsmith=20callback=20=E9=98=B2?= =?UTF-8?q?=E8=A6=86=E7=9B=96=20+=20=E5=8A=A0=20optional=20dep?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- nanobot/providers/litellm_provider.py | 9 +++++---- pyproject.toml | 3 +++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index a9a0517..ebc8c9b 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -62,6 +62,8 @@ class LiteLLMProvider(LLMProvider): # Drop unsupported parameters for providers (e.g., gpt-5 rejects some params) litellm.drop_params = True + self._langsmith_enabled = bool(os.getenv("LANGSMITH_API_KEY")) + def _setup_env(self, api_key: str, api_base: str | None, model: str) -> None: """Set environment variables based on detected provider.""" spec = self._gateway or find_by_model(model) @@ -250,10 +252,9 @@ class LiteLLMProvider(LLMProvider): # Apply model-specific overrides (e.g. kimi-k2.5 temperature) self._apply_model_overrides(model, kwargs) - # Use langsmith to view the conversation - if os.getenv("LANGSMITH_API_KEY"): - kwargs["callbacks"] = ["langsmith"] - + if self._langsmith_enabled: + kwargs.setdefault("callbacks", []).append("langsmith") + # Pass api_key directly — more reliable than env vars alone if self.api_key: kwargs["api_key"] = self.api_key diff --git a/pyproject.toml b/pyproject.toml index 5eb77c3..dce9e26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,9 @@ matrix = [ "mistune>=3.0.0,<4.0.0", "nh3>=0.2.17,<1.0.0", ] +langsmith = [ + "langsmith>=0.1.0", +] dev = [ "pytest>=9.0.0,<10.0.0", "pytest-asyncio>=1.3.0,<2.0.0", From ca5047b602f6de926e052e0f391fb822c667fb8d Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Fri, 13 Mar 2026 05:44:16 +0000 Subject: [PATCH 10/11] feat(web): multi-provider web search + Jina Reader fetch --- README.md | 100 +++++++++++++- nanobot/agent/loop.py | 13 +- nanobot/agent/subagent.py | 9 +- nanobot/agent/tools/web.py | 241 +++++++++++++++++++++++++++------- nanobot/cli/commands.py | 4 +- nanobot/config/schema.py | 4 +- pyproject.toml | 1 + tests/test_web_search_tool.py | 162 +++++++++++++++++++++++ 8 files changed, 470 insertions(+), 64 deletions(-) create mode 100644 tests/test_web_search_tool.py diff --git a/README.md b/README.md index 634222d..a9bad54 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,9 @@ nanobot channels login > [!TIP] > Set your API key in `~/.nanobot/config.json`. -> Get API keys: [OpenRouter](https://openrouter.ai/keys) (Global) · [Brave Search](https://brave.com/search/api/) (optional, for web search) +> Get API keys: [OpenRouter](https://openrouter.ai/keys) (Global) +> +> For web search capability setup, please see [Web Search](#web-search). **1. Initialize** @@ -960,6 +962,102 @@ That's it! Environment variables, model prefixing, config matching, and `nanobot +### Web Search + +nanobot supports multiple web search providers. Configure in `~/.nanobot/config.json` under `tools.web.search`. + +| Provider | Config fields | Env var fallback | Free | +|----------|--------------|------------------|------| +| `brave` (default) | `apiKey` | `BRAVE_API_KEY` | No | +| `tavily` | `apiKey` | `TAVILY_API_KEY` | No | +| `jina` | `apiKey` | `JINA_API_KEY` | Free tier (10M tokens) | +| `searxng` | `baseUrl` | `SEARXNG_BASE_URL` | Yes (self-hosted) | +| `duckduckgo` | — | — | Yes | + +When credentials are missing, nanobot automatically falls back to DuckDuckGo. + +**Brave** (default): +```json +{ + "tools": { + "web": { + "search": { + "provider": "brave", + "apiKey": "BSA..." + } + } + } +} +``` + +**Tavily:** +```json +{ + "tools": { + "web": { + "search": { + "provider": "tavily", + "apiKey": "tvly-..." + } + } + } +} +``` + +**Jina** (free tier with 10M tokens): +```json +{ + "tools": { + "web": { + "search": { + "provider": "jina", + "apiKey": "jina_..." + } + } + } +} +``` + +**SearXNG** (self-hosted, no API key needed): +```json +{ + "tools": { + "web": { + "search": { + "provider": "searxng", + "baseUrl": "https://searx.example" + } + } + } +} +``` + +**DuckDuckGo** (zero config): +```json +{ + "tools": { + "web": { + "search": { + "provider": "duckduckgo" + } + } + } +} +``` + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `provider` | string | `"brave"` | Search backend: `brave`, `tavily`, `jina`, `searxng`, `duckduckgo` | +| `apiKey` | string | `""` | API key for Brave or Tavily | +| `baseUrl` | string | `""` | Base URL for SearXNG | +| `maxResults` | integer | `5` | Results per search (1–10) | + +> [!TIP] +> Use `proxy` in `tools.web` to route all web requests (search + fetch) through a proxy: +> ```json +> { "tools": { "web": { "proxy": "http://127.0.0.1:7890" } } } +> ``` + ### MCP (Model Context Protocol) > [!TIP] diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index b56017a..e05a73e 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -29,7 +29,7 @@ from nanobot.providers.base import LLMProvider from nanobot.session.manager import Session, SessionManager if TYPE_CHECKING: - from nanobot.config.schema import ChannelsConfig, ExecToolConfig + from nanobot.config.schema import ChannelsConfig, ExecToolConfig, WebSearchConfig from nanobot.cron.service import CronService @@ -55,7 +55,7 @@ class AgentLoop: model: str | None = None, max_iterations: int = 40, context_window_tokens: int = 65_536, - brave_api_key: str | None = None, + web_search_config: WebSearchConfig | None = None, web_proxy: str | None = None, exec_config: ExecToolConfig | None = None, cron_service: CronService | None = None, @@ -64,7 +64,8 @@ class AgentLoop: mcp_servers: dict | None = None, channels_config: ChannelsConfig | None = None, ): - from nanobot.config.schema import ExecToolConfig + from nanobot.config.schema import ExecToolConfig, WebSearchConfig + self.bus = bus self.channels_config = channels_config self.provider = provider @@ -72,7 +73,7 @@ class AgentLoop: self.model = model or provider.get_default_model() self.max_iterations = max_iterations self.context_window_tokens = context_window_tokens - self.brave_api_key = brave_api_key + self.web_search_config = web_search_config or WebSearchConfig() self.web_proxy = web_proxy self.exec_config = exec_config or ExecToolConfig() self.cron_service = cron_service @@ -86,7 +87,7 @@ class AgentLoop: workspace=workspace, bus=bus, model=self.model, - brave_api_key=brave_api_key, + web_search_config=self.web_search_config, web_proxy=web_proxy, exec_config=self.exec_config, restrict_to_workspace=restrict_to_workspace, @@ -121,7 +122,7 @@ class AgentLoop: restrict_to_workspace=self.restrict_to_workspace, path_append=self.exec_config.path_append, )) - self.tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy)) + self.tools.register(WebSearchTool(config=self.web_search_config, proxy=self.web_proxy)) self.tools.register(WebFetchTool(proxy=self.web_proxy)) self.tools.register(MessageTool(send_callback=self.bus.publish_outbound)) self.tools.register(SpawnTool(manager=self.subagents)) diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index eb3b3b0..b6bef68 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -28,17 +28,18 @@ class SubagentManager: workspace: Path, bus: MessageBus, model: str | None = None, - brave_api_key: str | None = None, + web_search_config: "WebSearchConfig | None" = None, web_proxy: str | None = None, exec_config: "ExecToolConfig | None" = None, restrict_to_workspace: bool = False, ): - from nanobot.config.schema import ExecToolConfig + from nanobot.config.schema import ExecToolConfig, WebSearchConfig + self.provider = provider self.workspace = workspace self.bus = bus self.model = model or provider.get_default_model() - self.brave_api_key = brave_api_key + self.web_search_config = web_search_config or WebSearchConfig() self.web_proxy = web_proxy self.exec_config = exec_config or ExecToolConfig() self.restrict_to_workspace = restrict_to_workspace @@ -101,7 +102,7 @@ class SubagentManager: restrict_to_workspace=self.restrict_to_workspace, path_append=self.exec_config.path_append, )) - tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy)) + tools.register(WebSearchTool(config=self.web_search_config, proxy=self.web_proxy)) tools.register(WebFetchTool(proxy=self.web_proxy)) system_prompt = self._build_subagent_prompt() diff --git a/nanobot/agent/tools/web.py b/nanobot/agent/tools/web.py index 0d8f4d1..f1363e6 100644 --- a/nanobot/agent/tools/web.py +++ b/nanobot/agent/tools/web.py @@ -1,10 +1,13 @@ """Web tools: web_search and web_fetch.""" +from __future__ import annotations + +import asyncio import html import json import os import re -from typing import Any +from typing import TYPE_CHECKING, Any from urllib.parse import urlparse import httpx @@ -12,6 +15,9 @@ from loguru import logger from nanobot.agent.tools.base import Tool +if TYPE_CHECKING: + from nanobot.config.schema import WebSearchConfig + # Shared constants USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36" MAX_REDIRECTS = 5 # Limit redirects to prevent DoS attacks @@ -44,8 +50,22 @@ def _validate_url(url: str) -> tuple[bool, str]: return False, str(e) +def _format_results(query: str, items: list[dict[str, Any]], n: int) -> str: + """Format provider results into shared plaintext output.""" + if not items: + return f"No results for: {query}" + lines = [f"Results for: {query}\n"] + for i, item in enumerate(items[:n], 1): + title = _normalize(_strip_tags(item.get("title", ""))) + snippet = _normalize(_strip_tags(item.get("content", ""))) + lines.append(f"{i}. {title}\n {item.get('url', '')}") + if snippet: + lines.append(f" {snippet}") + return "\n".join(lines) + + class WebSearchTool(Tool): - """Search the web using Brave Search API.""" + """Search the web using configured provider.""" name = "web_search" description = "Search the web. Returns titles, URLs, and snippets." @@ -53,61 +73,140 @@ class WebSearchTool(Tool): "type": "object", "properties": { "query": {"type": "string", "description": "Search query"}, - "count": {"type": "integer", "description": "Results (1-10)", "minimum": 1, "maximum": 10} + "count": {"type": "integer", "description": "Results (1-10)", "minimum": 1, "maximum": 10}, }, - "required": ["query"] + "required": ["query"], } - def __init__(self, api_key: str | None = None, max_results: int = 5, proxy: str | None = None): - self._init_api_key = api_key - self.max_results = max_results + def __init__(self, config: WebSearchConfig | None = None, proxy: str | None = None): + from nanobot.config.schema import WebSearchConfig + + self.config = config if config is not None else WebSearchConfig() self.proxy = proxy - @property - def api_key(self) -> str: - """Resolve API key at call time so env/config changes are picked up.""" - return self._init_api_key or os.environ.get("BRAVE_API_KEY", "") - async def execute(self, query: str, count: int | None = None, **kwargs: Any) -> str: - if not self.api_key: - return ( - "Error: Brave Search API key not configured. Set it in " - "~/.nanobot/config.json under tools.web.search.apiKey " - "(or export BRAVE_API_KEY), then restart the gateway." - ) + provider = self.config.provider.strip().lower() or "brave" + n = min(max(count or self.config.max_results, 1), 10) + if provider == "duckduckgo": + return await self._search_duckduckgo(query, n) + elif provider == "tavily": + return await self._search_tavily(query, n) + elif provider == "searxng": + return await self._search_searxng(query, n) + elif provider == "jina": + return await self._search_jina(query, n) + elif provider == "brave": + return await self._search_brave(query, n) + else: + return f"Error: unknown search provider '{provider}'" + + async def _search_brave(self, query: str, n: int) -> str: + api_key = self.config.api_key or os.environ.get("BRAVE_API_KEY", "") + if not api_key: + logger.warning("BRAVE_API_KEY not set, falling back to DuckDuckGo") + return await self._search_duckduckgo(query, n) try: - n = min(max(count or self.max_results, 1), 10) - logger.debug("WebSearch: {}", "proxy enabled" if self.proxy else "direct connection") async with httpx.AsyncClient(proxy=self.proxy) as client: r = await client.get( "https://api.search.brave.com/res/v1/web/search", params={"q": query, "count": n}, - headers={"Accept": "application/json", "X-Subscription-Token": self.api_key}, - timeout=10.0 + headers={"Accept": "application/json", "X-Subscription-Token": api_key}, + timeout=10.0, ) r.raise_for_status() - - results = r.json().get("web", {}).get("results", [])[:n] - if not results: - return f"No results for: {query}" - - lines = [f"Results for: {query}\n"] - for i, item in enumerate(results, 1): - lines.append(f"{i}. {item.get('title', '')}\n {item.get('url', '')}") - if desc := item.get("description"): - lines.append(f" {desc}") - return "\n".join(lines) - except httpx.ProxyError as e: - logger.error("WebSearch proxy error: {}", e) - return f"Proxy error: {e}" + items = [ + {"title": x.get("title", ""), "url": x.get("url", ""), "content": x.get("description", "")} + for x in r.json().get("web", {}).get("results", []) + ] + return _format_results(query, items, n) except Exception as e: - logger.error("WebSearch error: {}", e) return f"Error: {e}" + async def _search_tavily(self, query: str, n: int) -> str: + api_key = self.config.api_key or os.environ.get("TAVILY_API_KEY", "") + if not api_key: + logger.warning("TAVILY_API_KEY not set, falling back to DuckDuckGo") + return await self._search_duckduckgo(query, n) + try: + async with httpx.AsyncClient(proxy=self.proxy) as client: + r = await client.post( + "https://api.tavily.com/search", + headers={"Authorization": f"Bearer {api_key}"}, + json={"query": query, "max_results": n}, + timeout=15.0, + ) + r.raise_for_status() + return _format_results(query, r.json().get("results", []), n) + except Exception as e: + return f"Error: {e}" + + async def _search_searxng(self, query: str, n: int) -> str: + base_url = (self.config.base_url or os.environ.get("SEARXNG_BASE_URL", "")).strip() + if not base_url: + logger.warning("SEARXNG_BASE_URL not set, falling back to DuckDuckGo") + return await self._search_duckduckgo(query, n) + endpoint = f"{base_url.rstrip('/')}/search" + is_valid, error_msg = _validate_url(endpoint) + if not is_valid: + return f"Error: invalid SearXNG URL: {error_msg}" + try: + async with httpx.AsyncClient(proxy=self.proxy) as client: + r = await client.get( + endpoint, + params={"q": query, "format": "json"}, + headers={"User-Agent": USER_AGENT}, + timeout=10.0, + ) + r.raise_for_status() + return _format_results(query, r.json().get("results", []), n) + except Exception as e: + return f"Error: {e}" + + async def _search_jina(self, query: str, n: int) -> str: + api_key = self.config.api_key or os.environ.get("JINA_API_KEY", "") + if not api_key: + logger.warning("JINA_API_KEY not set, falling back to DuckDuckGo") + return await self._search_duckduckgo(query, n) + try: + headers = {"Accept": "application/json", "Authorization": f"Bearer {api_key}"} + async with httpx.AsyncClient(proxy=self.proxy) as client: + r = await client.get( + f"https://s.jina.ai/", + params={"q": query}, + headers=headers, + timeout=15.0, + ) + r.raise_for_status() + data = r.json().get("data", [])[:n] + items = [ + {"title": d.get("title", ""), "url": d.get("url", ""), "content": d.get("content", "")[:500]} + for d in data + ] + return _format_results(query, items, n) + except Exception as e: + return f"Error: {e}" + + async def _search_duckduckgo(self, query: str, n: int) -> str: + try: + from ddgs import DDGS + + ddgs = DDGS(timeout=10) + raw = await asyncio.to_thread(ddgs.text, query, max_results=n) + if not raw: + return f"No results for: {query}" + items = [ + {"title": r.get("title", ""), "url": r.get("href", ""), "content": r.get("body", "")} + for r in raw + ] + return _format_results(query, items, n) + except Exception as e: + logger.warning("DuckDuckGo search failed: {}", e) + return f"Error: DuckDuckGo search failed ({e})" + class WebFetchTool(Tool): - """Fetch and extract content from a URL using Readability.""" + """Fetch and extract content from a URL.""" name = "web_fetch" description = "Fetch URL and extract readable content (HTML → markdown/text)." @@ -116,9 +215,9 @@ class WebFetchTool(Tool): "properties": { "url": {"type": "string", "description": "URL to fetch"}, "extractMode": {"type": "string", "enum": ["markdown", "text"], "default": "markdown"}, - "maxChars": {"type": "integer", "minimum": 100} + "maxChars": {"type": "integer", "minimum": 100}, }, - "required": ["url"] + "required": ["url"], } def __init__(self, max_chars: int = 50000, proxy: str | None = None): @@ -126,15 +225,55 @@ class WebFetchTool(Tool): self.proxy = proxy async def execute(self, url: str, extractMode: str = "markdown", maxChars: int | None = None, **kwargs: Any) -> str: - from readability import Document - max_chars = maxChars or self.max_chars is_valid, error_msg = _validate_url(url) if not is_valid: return json.dumps({"error": f"URL validation failed: {error_msg}", "url": url}, ensure_ascii=False) + result = await self._fetch_jina(url, max_chars) + if result is None: + result = await self._fetch_readability(url, extractMode, max_chars) + return result + + async def _fetch_jina(self, url: str, max_chars: int) -> str | None: + """Try fetching via Jina Reader API. Returns None on failure.""" + try: + headers = {"Accept": "application/json", "User-Agent": USER_AGENT} + jina_key = os.environ.get("JINA_API_KEY", "") + if jina_key: + headers["Authorization"] = f"Bearer {jina_key}" + async with httpx.AsyncClient(proxy=self.proxy, timeout=20.0) as client: + r = await client.get(f"https://r.jina.ai/{url}", headers=headers) + if r.status_code == 429: + logger.debug("Jina Reader rate limited, falling back to readability") + return None + r.raise_for_status() + + data = r.json().get("data", {}) + title = data.get("title", "") + text = data.get("content", "") + if not text: + return None + + if title: + text = f"# {title}\n\n{text}" + truncated = len(text) > max_chars + if truncated: + text = text[:max_chars] + + return json.dumps({ + "url": url, "finalUrl": data.get("url", url), "status": r.status_code, + "extractor": "jina", "truncated": truncated, "length": len(text), "text": text, + }, ensure_ascii=False) + except Exception as e: + logger.debug("Jina Reader failed for {}, falling back to readability: {}", url, e) + return None + + async def _fetch_readability(self, url: str, extract_mode: str, max_chars: int) -> str: + """Local fallback using readability-lxml.""" + from readability import Document + try: - logger.debug("WebFetch: {}", "proxy enabled" if self.proxy else "direct connection") async with httpx.AsyncClient( follow_redirects=True, max_redirects=MAX_REDIRECTS, @@ -150,17 +289,20 @@ class WebFetchTool(Tool): text, extractor = json.dumps(r.json(), indent=2, ensure_ascii=False), "json" elif "text/html" in ctype or r.text[:256].lower().startswith((" max_chars - if truncated: text = text[:max_chars] + if truncated: + text = text[:max_chars] - return json.dumps({"url": url, "finalUrl": str(r.url), "status": r.status_code, - "extractor": extractor, "truncated": truncated, "length": len(text), "text": text}, ensure_ascii=False) + return json.dumps({ + "url": url, "finalUrl": str(r.url), "status": r.status_code, + "extractor": extractor, "truncated": truncated, "length": len(text), "text": text, + }, ensure_ascii=False) except httpx.ProxyError as e: logger.error("WebFetch proxy error for {}: {}", url, e) return json.dumps({"error": f"Proxy error: {e}", "url": url}, ensure_ascii=False) @@ -168,11 +310,10 @@ class WebFetchTool(Tool): logger.error("WebFetch error for {}: {}", url, e) return json.dumps({"error": str(e), "url": url}, ensure_ascii=False) - def _to_markdown(self, html: str) -> str: + def _to_markdown(self, html_content: str) -> str: """Convert HTML to markdown.""" - # Convert links, headings, lists before stripping tags text = re.sub(r']*href=["\']([^"\']+)["\'][^>]*>([\s\S]*?)', - lambda m: f'[{_strip_tags(m[2])}]({m[1]})', html, flags=re.I) + lambda m: f'[{_strip_tags(m[2])}]({m[1]})', html_content, flags=re.I) text = re.sub(r']*>([\s\S]*?)', lambda m: f'\n{"#" * int(m[1])} {_strip_tags(m[2])}\n', text, flags=re.I) text = re.sub(r']*>([\s\S]*?)', lambda m: f'\n- {_strip_tags(m[1])}', text, flags=re.I) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 7cc4fd5..06315bf 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -395,7 +395,7 @@ def gateway( model=config.agents.defaults.model, max_iterations=config.agents.defaults.max_tool_iterations, context_window_tokens=config.agents.defaults.context_window_tokens, - brave_api_key=config.tools.web.search.api_key or None, + web_search_config=config.tools.web.search, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, cron_service=cron, @@ -578,7 +578,7 @@ def agent( model=config.agents.defaults.model, max_iterations=config.agents.defaults.max_tool_iterations, context_window_tokens=config.agents.defaults.context_window_tokens, - brave_api_key=config.tools.web.search.api_key or None, + web_search_config=config.tools.web.search, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, cron_service=cron, diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 4092eeb..2f70e05 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -310,7 +310,9 @@ class GatewayConfig(Base): class WebSearchConfig(Base): """Web search tool configuration.""" - api_key: str = "" # Brave Search API key + provider: str = "brave" # brave, tavily, duckduckgo, searxng, jina + api_key: str = "" + base_url: str = "" # SearXNG base URL max_results: int = 5 diff --git a/pyproject.toml b/pyproject.toml index dce9e26..0a81746 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "websockets>=16.0,<17.0", "websocket-client>=1.9.0,<2.0.0", "httpx>=0.28.0,<1.0.0", + "ddgs>=9.5.5,<10.0.0", "oauth-cli-kit>=0.1.3,<1.0.0", "loguru>=0.7.3,<1.0.0", "readability-lxml>=0.8.4,<1.0.0", diff --git a/tests/test_web_search_tool.py b/tests/test_web_search_tool.py new file mode 100644 index 0000000..02bf443 --- /dev/null +++ b/tests/test_web_search_tool.py @@ -0,0 +1,162 @@ +"""Tests for multi-provider web search.""" + +import httpx +import pytest + +from nanobot.agent.tools.web import WebSearchTool +from nanobot.config.schema import WebSearchConfig + + +def _tool(provider: str = "brave", api_key: str = "", base_url: str = "") -> WebSearchTool: + return WebSearchTool(config=WebSearchConfig(provider=provider, api_key=api_key, base_url=base_url)) + + +def _response(status: int = 200, json: dict | None = None) -> httpx.Response: + """Build a mock httpx.Response with a dummy request attached.""" + r = httpx.Response(status, json=json) + r._request = httpx.Request("GET", "https://mock") + return r + + +@pytest.mark.asyncio +async def test_brave_search(monkeypatch): + async def mock_get(self, url, **kw): + assert "brave" in url + assert kw["headers"]["X-Subscription-Token"] == "brave-key" + return _response(json={ + "web": {"results": [{"title": "NanoBot", "url": "https://example.com", "description": "AI assistant"}]} + }) + + monkeypatch.setattr(httpx.AsyncClient, "get", mock_get) + tool = _tool(provider="brave", api_key="brave-key") + result = await tool.execute(query="nanobot", count=1) + assert "NanoBot" in result + assert "https://example.com" in result + + +@pytest.mark.asyncio +async def test_tavily_search(monkeypatch): + async def mock_post(self, url, **kw): + assert "tavily" in url + assert kw["headers"]["Authorization"] == "Bearer tavily-key" + return _response(json={ + "results": [{"title": "OpenClaw", "url": "https://openclaw.io", "content": "Framework"}] + }) + + monkeypatch.setattr(httpx.AsyncClient, "post", mock_post) + tool = _tool(provider="tavily", api_key="tavily-key") + result = await tool.execute(query="openclaw") + assert "OpenClaw" in result + assert "https://openclaw.io" in result + + +@pytest.mark.asyncio +async def test_searxng_search(monkeypatch): + async def mock_get(self, url, **kw): + assert "searx.example" in url + return _response(json={ + "results": [{"title": "Result", "url": "https://example.com", "content": "SearXNG result"}] + }) + + monkeypatch.setattr(httpx.AsyncClient, "get", mock_get) + tool = _tool(provider="searxng", base_url="https://searx.example") + result = await tool.execute(query="test") + assert "Result" in result + + +@pytest.mark.asyncio +async def test_duckduckgo_search(monkeypatch): + class MockDDGS: + def __init__(self, **kw): + pass + + def text(self, query, max_results=5): + return [{"title": "DDG Result", "href": "https://ddg.example", "body": "From DuckDuckGo"}] + + monkeypatch.setattr("nanobot.agent.tools.web.DDGS", MockDDGS, raising=False) + import nanobot.agent.tools.web as web_mod + monkeypatch.setattr(web_mod, "DDGS", MockDDGS, raising=False) + + from ddgs import DDGS + monkeypatch.setattr("ddgs.DDGS", MockDDGS) + + tool = _tool(provider="duckduckgo") + result = await tool.execute(query="hello") + assert "DDG Result" in result + + +@pytest.mark.asyncio +async def test_brave_fallback_to_duckduckgo_when_no_key(monkeypatch): + class MockDDGS: + def __init__(self, **kw): + pass + + def text(self, query, max_results=5): + return [{"title": "Fallback", "href": "https://ddg.example", "body": "DuckDuckGo fallback"}] + + monkeypatch.setattr("ddgs.DDGS", MockDDGS) + monkeypatch.delenv("BRAVE_API_KEY", raising=False) + + tool = _tool(provider="brave", api_key="") + result = await tool.execute(query="test") + assert "Fallback" in result + + +@pytest.mark.asyncio +async def test_jina_search(monkeypatch): + async def mock_get(self, url, **kw): + assert "s.jina.ai" in str(url) + assert kw["headers"]["Authorization"] == "Bearer jina-key" + return _response(json={ + "data": [{"title": "Jina Result", "url": "https://jina.ai", "content": "AI search"}] + }) + + monkeypatch.setattr(httpx.AsyncClient, "get", mock_get) + tool = _tool(provider="jina", api_key="jina-key") + result = await tool.execute(query="test") + assert "Jina Result" in result + assert "https://jina.ai" in result + + +@pytest.mark.asyncio +async def test_unknown_provider(): + tool = _tool(provider="unknown") + result = await tool.execute(query="test") + assert "unknown" in result + assert "Error" in result + + +@pytest.mark.asyncio +async def test_default_provider_is_brave(monkeypatch): + async def mock_get(self, url, **kw): + assert "brave" in url + return _response(json={"web": {"results": []}}) + + monkeypatch.setattr(httpx.AsyncClient, "get", mock_get) + tool = _tool(provider="", api_key="test-key") + result = await tool.execute(query="test") + assert "No results" in result + + +@pytest.mark.asyncio +async def test_searxng_no_base_url_falls_back(monkeypatch): + class MockDDGS: + def __init__(self, **kw): + pass + + def text(self, query, max_results=5): + return [{"title": "Fallback", "href": "https://ddg.example", "body": "fallback"}] + + monkeypatch.setattr("ddgs.DDGS", MockDDGS) + monkeypatch.delenv("SEARXNG_BASE_URL", raising=False) + + tool = _tool(provider="searxng", base_url="") + result = await tool.execute(query="test") + assert "Fallback" in result + + +@pytest.mark.asyncio +async def test_searxng_invalid_url(): + tool = _tool(provider="searxng", base_url="not-a-url") + result = await tool.execute(query="test") + assert "Error" in result From 65cbd7eb78672e226a8108c81da3ed8ce50ab192 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Fri, 13 Mar 2026 05:54:51 +0000 Subject: [PATCH 11/11] docs: update web search configuration instruction --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a9bad54..07b7283 100644 --- a/README.md +++ b/README.md @@ -964,6 +964,12 @@ That's it! Environment variables, model prefixing, config matching, and `nanobot ### Web Search +> [!TIP] +> Use `proxy` in `tools.web` to route all web requests (search + fetch) through a proxy: +> ```json +> { "tools": { "web": { "proxy": "http://127.0.0.1:7890" } } } +> ``` + nanobot supports multiple web search providers. Configure in `~/.nanobot/config.json` under `tools.web.search`. | Provider | Config fields | Env var fallback | Free | @@ -1052,12 +1058,6 @@ When credentials are missing, nanobot automatically falls back to DuckDuckGo. | `baseUrl` | string | `""` | Base URL for SearXNG | | `maxResults` | integer | `5` | Results per search (1–10) | -> [!TIP] -> Use `proxy` in `tools.web` to route all web requests (search + fetch) through a proxy: -> ```json -> { "tools": { "web": { "proxy": "http://127.0.0.1:7890" } } } -> ``` - ### MCP (Model Context Protocol) > [!TIP]