diff --git a/README.md b/README.md
index 634222d..a9bad54 100644
--- a/README.md
+++ b/README.md
@@ -169,7 +169,9 @@ nanobot channels login
> [!TIP]
> Set your API key in `~/.nanobot/config.json`.
-> Get API keys: [OpenRouter](https://openrouter.ai/keys) (Global) · [Brave Search](https://brave.com/search/api/) (optional, for web search)
+> Get API keys: [OpenRouter](https://openrouter.ai/keys) (Global)
+>
+> For web search capability setup, please see [Web Search](#web-search).
**1. Initialize**
@@ -960,6 +962,102 @@ That's it! Environment variables, model prefixing, config matching, and `nanobot
+### Web Search
+
+nanobot supports multiple web search providers. Configure in `~/.nanobot/config.json` under `tools.web.search`.
+
+| Provider | Config fields | Env var fallback | Free |
+|----------|--------------|------------------|------|
+| `brave` (default) | `apiKey` | `BRAVE_API_KEY` | No |
+| `tavily` | `apiKey` | `TAVILY_API_KEY` | No |
+| `jina` | `apiKey` | `JINA_API_KEY` | Free tier (10M tokens) |
+| `searxng` | `baseUrl` | `SEARXNG_BASE_URL` | Yes (self-hosted) |
+| `duckduckgo` | — | — | Yes |
+
+When credentials are missing, nanobot automatically falls back to DuckDuckGo.
+
+**Brave** (default):
+```json
+{
+ "tools": {
+ "web": {
+ "search": {
+ "provider": "brave",
+ "apiKey": "BSA..."
+ }
+ }
+ }
+}
+```
+
+**Tavily:**
+```json
+{
+ "tools": {
+ "web": {
+ "search": {
+ "provider": "tavily",
+ "apiKey": "tvly-..."
+ }
+ }
+ }
+}
+```
+
+**Jina** (free tier with 10M tokens):
+```json
+{
+ "tools": {
+ "web": {
+ "search": {
+ "provider": "jina",
+ "apiKey": "jina_..."
+ }
+ }
+ }
+}
+```
+
+**SearXNG** (self-hosted, no API key needed):
+```json
+{
+ "tools": {
+ "web": {
+ "search": {
+ "provider": "searxng",
+ "baseUrl": "https://searx.example"
+ }
+ }
+ }
+}
+```
+
+**DuckDuckGo** (zero config):
+```json
+{
+ "tools": {
+ "web": {
+ "search": {
+ "provider": "duckduckgo"
+ }
+ }
+ }
+}
+```
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `provider` | string | `"brave"` | Search backend: `brave`, `tavily`, `jina`, `searxng`, `duckduckgo` |
+| `apiKey` | string | `""` | API key for Brave or Tavily |
+| `baseUrl` | string | `""` | Base URL for SearXNG |
+| `maxResults` | integer | `5` | Results per search (1–10) |
+
+> [!TIP]
+> Use `proxy` in `tools.web` to route all web requests (search + fetch) through a proxy:
+> ```json
+> { "tools": { "web": { "proxy": "http://127.0.0.1:7890" } } }
+> ```
+
### MCP (Model Context Protocol)
> [!TIP]
diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index b56017a..e05a73e 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -29,7 +29,7 @@ from nanobot.providers.base import LLMProvider
from nanobot.session.manager import Session, SessionManager
if TYPE_CHECKING:
- from nanobot.config.schema import ChannelsConfig, ExecToolConfig
+ from nanobot.config.schema import ChannelsConfig, ExecToolConfig, WebSearchConfig
from nanobot.cron.service import CronService
@@ -55,7 +55,7 @@ class AgentLoop:
model: str | None = None,
max_iterations: int = 40,
context_window_tokens: int = 65_536,
- brave_api_key: str | None = None,
+ web_search_config: WebSearchConfig | None = None,
web_proxy: str | None = None,
exec_config: ExecToolConfig | None = None,
cron_service: CronService | None = None,
@@ -64,7 +64,8 @@ class AgentLoop:
mcp_servers: dict | None = None,
channels_config: ChannelsConfig | None = None,
):
- from nanobot.config.schema import ExecToolConfig
+ from nanobot.config.schema import ExecToolConfig, WebSearchConfig
+
self.bus = bus
self.channels_config = channels_config
self.provider = provider
@@ -72,7 +73,7 @@ class AgentLoop:
self.model = model or provider.get_default_model()
self.max_iterations = max_iterations
self.context_window_tokens = context_window_tokens
- self.brave_api_key = brave_api_key
+ self.web_search_config = web_search_config or WebSearchConfig()
self.web_proxy = web_proxy
self.exec_config = exec_config or ExecToolConfig()
self.cron_service = cron_service
@@ -86,7 +87,7 @@ class AgentLoop:
workspace=workspace,
bus=bus,
model=self.model,
- brave_api_key=brave_api_key,
+ web_search_config=self.web_search_config,
web_proxy=web_proxy,
exec_config=self.exec_config,
restrict_to_workspace=restrict_to_workspace,
@@ -121,7 +122,7 @@ class AgentLoop:
restrict_to_workspace=self.restrict_to_workspace,
path_append=self.exec_config.path_append,
))
- self.tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy))
+ self.tools.register(WebSearchTool(config=self.web_search_config, proxy=self.web_proxy))
self.tools.register(WebFetchTool(proxy=self.web_proxy))
self.tools.register(MessageTool(send_callback=self.bus.publish_outbound))
self.tools.register(SpawnTool(manager=self.subagents))
diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
index eb3b3b0..b6bef68 100644
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -28,17 +28,18 @@ class SubagentManager:
workspace: Path,
bus: MessageBus,
model: str | None = None,
- brave_api_key: str | None = None,
+ web_search_config: "WebSearchConfig | None" = None,
web_proxy: str | None = None,
exec_config: "ExecToolConfig | None" = None,
restrict_to_workspace: bool = False,
):
- from nanobot.config.schema import ExecToolConfig
+ from nanobot.config.schema import ExecToolConfig, WebSearchConfig
+
self.provider = provider
self.workspace = workspace
self.bus = bus
self.model = model or provider.get_default_model()
- self.brave_api_key = brave_api_key
+ self.web_search_config = web_search_config or WebSearchConfig()
self.web_proxy = web_proxy
self.exec_config = exec_config or ExecToolConfig()
self.restrict_to_workspace = restrict_to_workspace
@@ -101,7 +102,7 @@ class SubagentManager:
restrict_to_workspace=self.restrict_to_workspace,
path_append=self.exec_config.path_append,
))
- tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy))
+ tools.register(WebSearchTool(config=self.web_search_config, proxy=self.web_proxy))
tools.register(WebFetchTool(proxy=self.web_proxy))
system_prompt = self._build_subagent_prompt()
diff --git a/nanobot/agent/tools/web.py b/nanobot/agent/tools/web.py
index 0d8f4d1..f1363e6 100644
--- a/nanobot/agent/tools/web.py
+++ b/nanobot/agent/tools/web.py
@@ -1,10 +1,13 @@
"""Web tools: web_search and web_fetch."""
+from __future__ import annotations
+
+import asyncio
import html
import json
import os
import re
-from typing import Any
+from typing import TYPE_CHECKING, Any
from urllib.parse import urlparse
import httpx
@@ -12,6 +15,9 @@ from loguru import logger
from nanobot.agent.tools.base import Tool
+if TYPE_CHECKING:
+ from nanobot.config.schema import WebSearchConfig
+
# Shared constants
USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36"
MAX_REDIRECTS = 5 # Limit redirects to prevent DoS attacks
@@ -44,8 +50,22 @@ def _validate_url(url: str) -> tuple[bool, str]:
return False, str(e)
+def _format_results(query: str, items: list[dict[str, Any]], n: int) -> str:
+ """Format provider results into shared plaintext output."""
+ if not items:
+ return f"No results for: {query}"
+ lines = [f"Results for: {query}\n"]
+ for i, item in enumerate(items[:n], 1):
+ title = _normalize(_strip_tags(item.get("title", "")))
+ snippet = _normalize(_strip_tags(item.get("content", "")))
+ lines.append(f"{i}. {title}\n {item.get('url', '')}")
+ if snippet:
+ lines.append(f" {snippet}")
+ return "\n".join(lines)
+
+
class WebSearchTool(Tool):
- """Search the web using Brave Search API."""
+ """Search the web using configured provider."""
name = "web_search"
description = "Search the web. Returns titles, URLs, and snippets."
@@ -53,61 +73,140 @@ class WebSearchTool(Tool):
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search query"},
- "count": {"type": "integer", "description": "Results (1-10)", "minimum": 1, "maximum": 10}
+ "count": {"type": "integer", "description": "Results (1-10)", "minimum": 1, "maximum": 10},
},
- "required": ["query"]
+ "required": ["query"],
}
- def __init__(self, api_key: str | None = None, max_results: int = 5, proxy: str | None = None):
- self._init_api_key = api_key
- self.max_results = max_results
+ def __init__(self, config: WebSearchConfig | None = None, proxy: str | None = None):
+ from nanobot.config.schema import WebSearchConfig
+
+ self.config = config if config is not None else WebSearchConfig()
self.proxy = proxy
- @property
- def api_key(self) -> str:
- """Resolve API key at call time so env/config changes are picked up."""
- return self._init_api_key or os.environ.get("BRAVE_API_KEY", "")
-
async def execute(self, query: str, count: int | None = None, **kwargs: Any) -> str:
- if not self.api_key:
- return (
- "Error: Brave Search API key not configured. Set it in "
- "~/.nanobot/config.json under tools.web.search.apiKey "
- "(or export BRAVE_API_KEY), then restart the gateway."
- )
+ provider = self.config.provider.strip().lower() or "brave"
+ n = min(max(count or self.config.max_results, 1), 10)
+ if provider == "duckduckgo":
+ return await self._search_duckduckgo(query, n)
+ elif provider == "tavily":
+ return await self._search_tavily(query, n)
+ elif provider == "searxng":
+ return await self._search_searxng(query, n)
+ elif provider == "jina":
+ return await self._search_jina(query, n)
+ elif provider == "brave":
+ return await self._search_brave(query, n)
+ else:
+ return f"Error: unknown search provider '{provider}'"
+
+ async def _search_brave(self, query: str, n: int) -> str:
+ api_key = self.config.api_key or os.environ.get("BRAVE_API_KEY", "")
+ if not api_key:
+ logger.warning("BRAVE_API_KEY not set, falling back to DuckDuckGo")
+ return await self._search_duckduckgo(query, n)
try:
- n = min(max(count or self.max_results, 1), 10)
- logger.debug("WebSearch: {}", "proxy enabled" if self.proxy else "direct connection")
async with httpx.AsyncClient(proxy=self.proxy) as client:
r = await client.get(
"https://api.search.brave.com/res/v1/web/search",
params={"q": query, "count": n},
- headers={"Accept": "application/json", "X-Subscription-Token": self.api_key},
- timeout=10.0
+ headers={"Accept": "application/json", "X-Subscription-Token": api_key},
+ timeout=10.0,
)
r.raise_for_status()
-
- results = r.json().get("web", {}).get("results", [])[:n]
- if not results:
- return f"No results for: {query}"
-
- lines = [f"Results for: {query}\n"]
- for i, item in enumerate(results, 1):
- lines.append(f"{i}. {item.get('title', '')}\n {item.get('url', '')}")
- if desc := item.get("description"):
- lines.append(f" {desc}")
- return "\n".join(lines)
- except httpx.ProxyError as e:
- logger.error("WebSearch proxy error: {}", e)
- return f"Proxy error: {e}"
+ items = [
+ {"title": x.get("title", ""), "url": x.get("url", ""), "content": x.get("description", "")}
+ for x in r.json().get("web", {}).get("results", [])
+ ]
+ return _format_results(query, items, n)
except Exception as e:
- logger.error("WebSearch error: {}", e)
return f"Error: {e}"
+ async def _search_tavily(self, query: str, n: int) -> str:
+ api_key = self.config.api_key or os.environ.get("TAVILY_API_KEY", "")
+ if not api_key:
+ logger.warning("TAVILY_API_KEY not set, falling back to DuckDuckGo")
+ return await self._search_duckduckgo(query, n)
+ try:
+ async with httpx.AsyncClient(proxy=self.proxy) as client:
+ r = await client.post(
+ "https://api.tavily.com/search",
+ headers={"Authorization": f"Bearer {api_key}"},
+ json={"query": query, "max_results": n},
+ timeout=15.0,
+ )
+ r.raise_for_status()
+ return _format_results(query, r.json().get("results", []), n)
+ except Exception as e:
+ return f"Error: {e}"
+
+ async def _search_searxng(self, query: str, n: int) -> str:
+ base_url = (self.config.base_url or os.environ.get("SEARXNG_BASE_URL", "")).strip()
+ if not base_url:
+ logger.warning("SEARXNG_BASE_URL not set, falling back to DuckDuckGo")
+ return await self._search_duckduckgo(query, n)
+ endpoint = f"{base_url.rstrip('/')}/search"
+ is_valid, error_msg = _validate_url(endpoint)
+ if not is_valid:
+ return f"Error: invalid SearXNG URL: {error_msg}"
+ try:
+ async with httpx.AsyncClient(proxy=self.proxy) as client:
+ r = await client.get(
+ endpoint,
+ params={"q": query, "format": "json"},
+ headers={"User-Agent": USER_AGENT},
+ timeout=10.0,
+ )
+ r.raise_for_status()
+ return _format_results(query, r.json().get("results", []), n)
+ except Exception as e:
+ return f"Error: {e}"
+
+ async def _search_jina(self, query: str, n: int) -> str:
+ api_key = self.config.api_key or os.environ.get("JINA_API_KEY", "")
+ if not api_key:
+ logger.warning("JINA_API_KEY not set, falling back to DuckDuckGo")
+ return await self._search_duckduckgo(query, n)
+ try:
+ headers = {"Accept": "application/json", "Authorization": f"Bearer {api_key}"}
+ async with httpx.AsyncClient(proxy=self.proxy) as client:
+ r = await client.get(
+ f"https://s.jina.ai/",
+ params={"q": query},
+ headers=headers,
+ timeout=15.0,
+ )
+ r.raise_for_status()
+ data = r.json().get("data", [])[:n]
+ items = [
+ {"title": d.get("title", ""), "url": d.get("url", ""), "content": d.get("content", "")[:500]}
+ for d in data
+ ]
+ return _format_results(query, items, n)
+ except Exception as e:
+ return f"Error: {e}"
+
+ async def _search_duckduckgo(self, query: str, n: int) -> str:
+ try:
+ from ddgs import DDGS
+
+ ddgs = DDGS(timeout=10)
+ raw = await asyncio.to_thread(ddgs.text, query, max_results=n)
+ if not raw:
+ return f"No results for: {query}"
+ items = [
+ {"title": r.get("title", ""), "url": r.get("href", ""), "content": r.get("body", "")}
+ for r in raw
+ ]
+ return _format_results(query, items, n)
+ except Exception as e:
+ logger.warning("DuckDuckGo search failed: {}", e)
+ return f"Error: DuckDuckGo search failed ({e})"
+
class WebFetchTool(Tool):
- """Fetch and extract content from a URL using Readability."""
+ """Fetch and extract content from a URL."""
name = "web_fetch"
description = "Fetch URL and extract readable content (HTML → markdown/text)."
@@ -116,9 +215,9 @@ class WebFetchTool(Tool):
"properties": {
"url": {"type": "string", "description": "URL to fetch"},
"extractMode": {"type": "string", "enum": ["markdown", "text"], "default": "markdown"},
- "maxChars": {"type": "integer", "minimum": 100}
+ "maxChars": {"type": "integer", "minimum": 100},
},
- "required": ["url"]
+ "required": ["url"],
}
def __init__(self, max_chars: int = 50000, proxy: str | None = None):
@@ -126,15 +225,55 @@ class WebFetchTool(Tool):
self.proxy = proxy
async def execute(self, url: str, extractMode: str = "markdown", maxChars: int | None = None, **kwargs: Any) -> str:
- from readability import Document
-
max_chars = maxChars or self.max_chars
is_valid, error_msg = _validate_url(url)
if not is_valid:
return json.dumps({"error": f"URL validation failed: {error_msg}", "url": url}, ensure_ascii=False)
+ result = await self._fetch_jina(url, max_chars)
+ if result is None:
+ result = await self._fetch_readability(url, extractMode, max_chars)
+ return result
+
+ async def _fetch_jina(self, url: str, max_chars: int) -> str | None:
+ """Try fetching via Jina Reader API. Returns None on failure."""
+ try:
+ headers = {"Accept": "application/json", "User-Agent": USER_AGENT}
+ jina_key = os.environ.get("JINA_API_KEY", "")
+ if jina_key:
+ headers["Authorization"] = f"Bearer {jina_key}"
+ async with httpx.AsyncClient(proxy=self.proxy, timeout=20.0) as client:
+ r = await client.get(f"https://r.jina.ai/{url}", headers=headers)
+ if r.status_code == 429:
+ logger.debug("Jina Reader rate limited, falling back to readability")
+ return None
+ r.raise_for_status()
+
+ data = r.json().get("data", {})
+ title = data.get("title", "")
+ text = data.get("content", "")
+ if not text:
+ return None
+
+ if title:
+ text = f"# {title}\n\n{text}"
+ truncated = len(text) > max_chars
+ if truncated:
+ text = text[:max_chars]
+
+ return json.dumps({
+ "url": url, "finalUrl": data.get("url", url), "status": r.status_code,
+ "extractor": "jina", "truncated": truncated, "length": len(text), "text": text,
+ }, ensure_ascii=False)
+ except Exception as e:
+ logger.debug("Jina Reader failed for {}, falling back to readability: {}", url, e)
+ return None
+
+ async def _fetch_readability(self, url: str, extract_mode: str, max_chars: int) -> str:
+ """Local fallback using readability-lxml."""
+ from readability import Document
+
try:
- logger.debug("WebFetch: {}", "proxy enabled" if self.proxy else "direct connection")
async with httpx.AsyncClient(
follow_redirects=True,
max_redirects=MAX_REDIRECTS,
@@ -150,17 +289,20 @@ class WebFetchTool(Tool):
text, extractor = json.dumps(r.json(), indent=2, ensure_ascii=False), "json"
elif "text/html" in ctype or r.text[:256].lower().startswith((" max_chars
- if truncated: text = text[:max_chars]
+ if truncated:
+ text = text[:max_chars]
- return json.dumps({"url": url, "finalUrl": str(r.url), "status": r.status_code,
- "extractor": extractor, "truncated": truncated, "length": len(text), "text": text}, ensure_ascii=False)
+ return json.dumps({
+ "url": url, "finalUrl": str(r.url), "status": r.status_code,
+ "extractor": extractor, "truncated": truncated, "length": len(text), "text": text,
+ }, ensure_ascii=False)
except httpx.ProxyError as e:
logger.error("WebFetch proxy error for {}: {}", url, e)
return json.dumps({"error": f"Proxy error: {e}", "url": url}, ensure_ascii=False)
@@ -168,11 +310,10 @@ class WebFetchTool(Tool):
logger.error("WebFetch error for {}: {}", url, e)
return json.dumps({"error": str(e), "url": url}, ensure_ascii=False)
- def _to_markdown(self, html: str) -> str:
+ def _to_markdown(self, html_content: str) -> str:
"""Convert HTML to markdown."""
- # Convert links, headings, lists before stripping tags
text = re.sub(r']*href=["\']([^"\']+)["\'][^>]*>([\s\S]*?)',
- lambda m: f'[{_strip_tags(m[2])}]({m[1]})', html, flags=re.I)
+ lambda m: f'[{_strip_tags(m[2])}]({m[1]})', html_content, flags=re.I)
text = re.sub(r'