feat(web): multi-provider web search + Jina Reader fetch

2026-03-13 05:44:16 +00:00
parent 511a335e82
commit ca5047b602
8 changed files with 470 additions and 64 deletions
--- a/README.md
+++ b/README.md
@@ -169,7 +169,9 @@ nanobot channels login

 > [!TIP]
 > Set your API key in `~/.nanobot/config.json`.
-> Get API keys: [OpenRouter](https://openrouter.ai/keys) (Global) · [Brave Search](https://brave.com/search/api/) (optional, for web search)
+> Get API keys: [OpenRouter](https://openrouter.ai/keys) (Global)
+>
+> For web search capability setup, please see [Web Search](#web-search).

 **1. Initialize**

@@ -960,6 +962,102 @@ That's it! Environment variables, model prefixing, config matching, and `nanobot
 </details>


+### Web Search
+
+nanobot supports multiple web search providers. Configure in `~/.nanobot/config.json` under `tools.web.search`.
+
+| Provider | Config fields | Env var fallback | Free |
+|----------|--------------|------------------|------|
+| `brave` (default) | `apiKey` | `BRAVE_API_KEY` | No |
+| `tavily` | `apiKey` | `TAVILY_API_KEY` | No |
+| `jina` | `apiKey` | `JINA_API_KEY` | Free tier (10M tokens) |
+| `searxng` | `baseUrl` | `SEARXNG_BASE_URL` | Yes (self-hosted) |
+| `duckduckgo` | — | — | Yes |
+
+When credentials are missing, nanobot automatically falls back to DuckDuckGo.
+
+**Brave** (default):
+```json
+{
+  "tools": {
+    "web": {
+      "search": {
+        "provider": "brave",
+        "apiKey": "BSA..."
+      }
+    }
+  }
+}
+```
+
+**Tavily:**
+```json
+{
+  "tools": {
+    "web": {
+      "search": {
+        "provider": "tavily",
+        "apiKey": "tvly-..."
+      }
+    }
+  }
+}
+```
+
+**Jina** (free tier with 10M tokens):
+```json
+{
+  "tools": {
+    "web": {
+      "search": {
+        "provider": "jina",
+        "apiKey": "jina_..."
+      }
+    }
+  }
+}
+```
+
+**SearXNG** (self-hosted, no API key needed):
+```json
+{
+  "tools": {
+    "web": {
+      "search": {
+        "provider": "searxng",
+        "baseUrl": "https://searx.example"
+      }
+    }
+  }
+}
+```
+
+**DuckDuckGo** (zero config):
+```json
+{
+  "tools": {
+    "web": {
+      "search": {
+        "provider": "duckduckgo"
+      }
+    }
+  }
+}
+```
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `provider` | string | `"brave"` | Search backend: `brave`, `tavily`, `jina`, `searxng`, `duckduckgo` |
+| `apiKey` | string | `""` | API key for Brave or Tavily |
+| `baseUrl` | string | `""` | Base URL for SearXNG |
+| `maxResults` | integer | `5` | Results per search (1–10) |
+
+> [!TIP]
+> Use `proxy` in `tools.web` to route all web requests (search + fetch) through a proxy:
+> ```json
+> { "tools": { "web": { "proxy": "http://127.0.0.1:7890" } } }
+> ```
+
 ### MCP (Model Context Protocol)

 > [!TIP]
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -29,7 +29,7 @@ from nanobot.providers.base import LLMProvider
 from nanobot.session.manager import Session, SessionManager

 if TYPE_CHECKING:
-    from nanobot.config.schema import ChannelsConfig, ExecToolConfig
+    from nanobot.config.schema import ChannelsConfig, ExecToolConfig, WebSearchConfig
    from nanobot.cron.service import CronService


@@ -55,7 +55,7 @@ class AgentLoop:
        model: str | None = None,
        max_iterations: int = 40,
        context_window_tokens: int = 65_536,
-        brave_api_key: str | None = None,
+        web_search_config: WebSearchConfig | None = None,
        web_proxy: str | None = None,
        exec_config: ExecToolConfig | None = None,
        cron_service: CronService | None = None,
@@ -64,7 +64,8 @@ class AgentLoop:
        mcp_servers: dict | None = None,
        channels_config: ChannelsConfig | None = None,
    ):
-        from nanobot.config.schema import ExecToolConfig
+        from nanobot.config.schema import ExecToolConfig, WebSearchConfig
+
        self.bus = bus
        self.channels_config = channels_config
        self.provider = provider
@@ -72,7 +73,7 @@ class AgentLoop:
        self.model = model or provider.get_default_model()
        self.max_iterations = max_iterations
        self.context_window_tokens = context_window_tokens
-        self.brave_api_key = brave_api_key
+        self.web_search_config = web_search_config or WebSearchConfig()
        self.web_proxy = web_proxy
        self.exec_config = exec_config or ExecToolConfig()
        self.cron_service = cron_service
@@ -86,7 +87,7 @@ class AgentLoop:
            workspace=workspace,
            bus=bus,
            model=self.model,
-            brave_api_key=brave_api_key,
+            web_search_config=self.web_search_config,
            web_proxy=web_proxy,
            exec_config=self.exec_config,
            restrict_to_workspace=restrict_to_workspace,
@@ -121,7 +122,7 @@ class AgentLoop:
            restrict_to_workspace=self.restrict_to_workspace,
            path_append=self.exec_config.path_append,
        ))
-        self.tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy))
+        self.tools.register(WebSearchTool(config=self.web_search_config, proxy=self.web_proxy))
        self.tools.register(WebFetchTool(proxy=self.web_proxy))
        self.tools.register(MessageTool(send_callback=self.bus.publish_outbound))
        self.tools.register(SpawnTool(manager=self.subagents))
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -28,17 +28,18 @@ class SubagentManager:
        workspace: Path,
        bus: MessageBus,
        model: str | None = None,
-        brave_api_key: str | None = None,
+        web_search_config: "WebSearchConfig | None" = None,
        web_proxy: str | None = None,
        exec_config: "ExecToolConfig | None" = None,
        restrict_to_workspace: bool = False,
    ):
-        from nanobot.config.schema import ExecToolConfig
+        from nanobot.config.schema import ExecToolConfig, WebSearchConfig
+
        self.provider = provider
        self.workspace = workspace
        self.bus = bus
        self.model = model or provider.get_default_model()
-        self.brave_api_key = brave_api_key
+        self.web_search_config = web_search_config or WebSearchConfig()
        self.web_proxy = web_proxy
        self.exec_config = exec_config or ExecToolConfig()
        self.restrict_to_workspace = restrict_to_workspace
@@ -101,7 +102,7 @@ class SubagentManager:
                restrict_to_workspace=self.restrict_to_workspace,
                path_append=self.exec_config.path_append,
            ))
-            tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy))
+            tools.register(WebSearchTool(config=self.web_search_config, proxy=self.web_proxy))
            tools.register(WebFetchTool(proxy=self.web_proxy))
            
            system_prompt = self._build_subagent_prompt()
--- a/nanobot/agent/tools/web.py
+++ b/nanobot/agent/tools/web.py
@@ -1,10 +1,13 @@
 """Web tools: web_search and web_fetch."""

+from __future__ import annotations
+
+import asyncio
 import html
 import json
 import os
 import re
-from typing import Any
+from typing import TYPE_CHECKING, Any
 from urllib.parse import urlparse

 import httpx
@@ -12,6 +15,9 @@ from loguru import logger

 from nanobot.agent.tools.base import Tool

+if TYPE_CHECKING:
+    from nanobot.config.schema import WebSearchConfig
+
 # Shared constants
 USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36"
 MAX_REDIRECTS = 5  # Limit redirects to prevent DoS attacks
@@ -44,8 +50,22 @@ def _validate_url(url: str) -> tuple[bool, str]:
        return False, str(e)


+def _format_results(query: str, items: list[dict[str, Any]], n: int) -> str:
+    """Format provider results into shared plaintext output."""
+    if not items:
+        return f"No results for: {query}"
+    lines = [f"Results for: {query}\n"]
+    for i, item in enumerate(items[:n], 1):
+        title = _normalize(_strip_tags(item.get("title", "")))
+        snippet = _normalize(_strip_tags(item.get("content", "")))
+        lines.append(f"{i}. {title}\n   {item.get('url', '')}")
+        if snippet:
+            lines.append(f"   {snippet}")
+    return "\n".join(lines)
+
+
 class WebSearchTool(Tool):
-    """Search the web using Brave Search API."""
+    """Search the web using configured provider."""

    name = "web_search"
    description = "Search the web. Returns titles, URLs, and snippets."
@@ -53,61 +73,140 @@ class WebSearchTool(Tool):
        "type": "object",
        "properties": {
            "query": {"type": "string", "description": "Search query"},
-            "count": {"type": "integer", "description": "Results (1-10)", "minimum": 1, "maximum": 10}
+            "count": {"type": "integer", "description": "Results (1-10)", "minimum": 1, "maximum": 10},
        },
-        "required": ["query"]
+        "required": ["query"],
    }

-    def __init__(self, api_key: str | None = None, max_results: int = 5, proxy: str | None = None):
-        self._init_api_key = api_key
-        self.max_results = max_results
+    def __init__(self, config: WebSearchConfig | None = None, proxy: str | None = None):
+        from nanobot.config.schema import WebSearchConfig
+
+        self.config = config if config is not None else WebSearchConfig()
        self.proxy = proxy

-    @property
-    def api_key(self) -> str:
-        """Resolve API key at call time so env/config changes are picked up."""
-        return self._init_api_key or os.environ.get("BRAVE_API_KEY", "")
-
    async def execute(self, query: str, count: int | None = None, **kwargs: Any) -> str:
-        if not self.api_key:
-            return (
-                "Error: Brave Search API key not configured. Set it in "
-                "~/.nanobot/config.json under tools.web.search.apiKey "
-                "(or export BRAVE_API_KEY), then restart the gateway."
-            )
+        provider = self.config.provider.strip().lower() or "brave"
+        n = min(max(count or self.config.max_results, 1), 10)

+        if provider == "duckduckgo":
+            return await self._search_duckduckgo(query, n)
+        elif provider == "tavily":
+            return await self._search_tavily(query, n)
+        elif provider == "searxng":
+            return await self._search_searxng(query, n)
+        elif provider == "jina":
+            return await self._search_jina(query, n)
+        elif provider == "brave":
+            return await self._search_brave(query, n)
+        else:
+            return f"Error: unknown search provider '{provider}'"
+
+    async def _search_brave(self, query: str, n: int) -> str:
+        api_key = self.config.api_key or os.environ.get("BRAVE_API_KEY", "")
+        if not api_key:
+            logger.warning("BRAVE_API_KEY not set, falling back to DuckDuckGo")
+            return await self._search_duckduckgo(query, n)
        try:
-            n = min(max(count or self.max_results, 1), 10)
-            logger.debug("WebSearch: {}", "proxy enabled" if self.proxy else "direct connection")
            async with httpx.AsyncClient(proxy=self.proxy) as client:
                r = await client.get(
                    "https://api.search.brave.com/res/v1/web/search",
                    params={"q": query, "count": n},
-                    headers={"Accept": "application/json", "X-Subscription-Token": self.api_key},
-                    timeout=10.0
+                    headers={"Accept": "application/json", "X-Subscription-Token": api_key},
+                    timeout=10.0,
                )
                r.raise_for_status()
-
-            results = r.json().get("web", {}).get("results", [])[:n]
-            if not results:
-                return f"No results for: {query}"
-
-            lines = [f"Results for: {query}\n"]
-            for i, item in enumerate(results, 1):
-                lines.append(f"{i}. {item.get('title', '')}\n   {item.get('url', '')}")
-                if desc := item.get("description"):
-                    lines.append(f"   {desc}")
-            return "\n".join(lines)
-        except httpx.ProxyError as e:
-            logger.error("WebSearch proxy error: {}", e)
-            return f"Proxy error: {e}"
+            items = [
+                {"title": x.get("title", ""), "url": x.get("url", ""), "content": x.get("description", "")}
+                for x in r.json().get("web", {}).get("results", [])
+            ]
+            return _format_results(query, items, n)
        except Exception as e:
-            logger.error("WebSearch error: {}", e)
            return f"Error: {e}"

+    async def _search_tavily(self, query: str, n: int) -> str:
+        api_key = self.config.api_key or os.environ.get("TAVILY_API_KEY", "")
+        if not api_key:
+            logger.warning("TAVILY_API_KEY not set, falling back to DuckDuckGo")
+            return await self._search_duckduckgo(query, n)
+        try:
+            async with httpx.AsyncClient(proxy=self.proxy) as client:
+                r = await client.post(
+                    "https://api.tavily.com/search",
+                    headers={"Authorization": f"Bearer {api_key}"},
+                    json={"query": query, "max_results": n},
+                    timeout=15.0,
+                )
+                r.raise_for_status()
+            return _format_results(query, r.json().get("results", []), n)
+        except Exception as e:
+            return f"Error: {e}"
+
+    async def _search_searxng(self, query: str, n: int) -> str:
+        base_url = (self.config.base_url or os.environ.get("SEARXNG_BASE_URL", "")).strip()
+        if not base_url:
+            logger.warning("SEARXNG_BASE_URL not set, falling back to DuckDuckGo")
+            return await self._search_duckduckgo(query, n)
+        endpoint = f"{base_url.rstrip('/')}/search"
+        is_valid, error_msg = _validate_url(endpoint)
+        if not is_valid:
+            return f"Error: invalid SearXNG URL: {error_msg}"
+        try:
+            async with httpx.AsyncClient(proxy=self.proxy) as client:
+                r = await client.get(
+                    endpoint,
+                    params={"q": query, "format": "json"},
+                    headers={"User-Agent": USER_AGENT},
+                    timeout=10.0,
+                )
+                r.raise_for_status()
+            return _format_results(query, r.json().get("results", []), n)
+        except Exception as e:
+            return f"Error: {e}"
+
+    async def _search_jina(self, query: str, n: int) -> str:
+        api_key = self.config.api_key or os.environ.get("JINA_API_KEY", "")
+        if not api_key:
+            logger.warning("JINA_API_KEY not set, falling back to DuckDuckGo")
+            return await self._search_duckduckgo(query, n)
+        try:
+            headers = {"Accept": "application/json", "Authorization": f"Bearer {api_key}"}
+            async with httpx.AsyncClient(proxy=self.proxy) as client:
+                r = await client.get(
+                    f"https://s.jina.ai/",
+                    params={"q": query},
+                    headers=headers,
+                    timeout=15.0,
+                )
+                r.raise_for_status()
+            data = r.json().get("data", [])[:n]
+            items = [
+                {"title": d.get("title", ""), "url": d.get("url", ""), "content": d.get("content", "")[:500]}
+                for d in data
+            ]
+            return _format_results(query, items, n)
+        except Exception as e:
+            return f"Error: {e}"
+
+    async def _search_duckduckgo(self, query: str, n: int) -> str:
+        try:
+            from ddgs import DDGS
+
+            ddgs = DDGS(timeout=10)
+            raw = await asyncio.to_thread(ddgs.text, query, max_results=n)
+            if not raw:
+                return f"No results for: {query}"
+            items = [
+                {"title": r.get("title", ""), "url": r.get("href", ""), "content": r.get("body", "")}
+                for r in raw
+            ]
+            return _format_results(query, items, n)
+        except Exception as e:
+            logger.warning("DuckDuckGo search failed: {}", e)
+            return f"Error: DuckDuckGo search failed ({e})"
+

 class WebFetchTool(Tool):
-    """Fetch and extract content from a URL using Readability."""
+    """Fetch and extract content from a URL."""

    name = "web_fetch"
    description = "Fetch URL and extract readable content (HTML → markdown/text)."
@@ -116,9 +215,9 @@ class WebFetchTool(Tool):
        "properties": {
            "url": {"type": "string", "description": "URL to fetch"},
            "extractMode": {"type": "string", "enum": ["markdown", "text"], "default": "markdown"},
-            "maxChars": {"type": "integer", "minimum": 100}
+            "maxChars": {"type": "integer", "minimum": 100},
        },
-        "required": ["url"]
+        "required": ["url"],
    }

    def __init__(self, max_chars: int = 50000, proxy: str | None = None):
@@ -126,15 +225,55 @@ class WebFetchTool(Tool):
        self.proxy = proxy

    async def execute(self, url: str, extractMode: str = "markdown", maxChars: int | None = None, **kwargs: Any) -> str:
-        from readability import Document
-
        max_chars = maxChars or self.max_chars
        is_valid, error_msg = _validate_url(url)
        if not is_valid:
            return json.dumps({"error": f"URL validation failed: {error_msg}", "url": url}, ensure_ascii=False)

+        result = await self._fetch_jina(url, max_chars)
+        if result is None:
+            result = await self._fetch_readability(url, extractMode, max_chars)
+        return result
+
+    async def _fetch_jina(self, url: str, max_chars: int) -> str | None:
+        """Try fetching via Jina Reader API. Returns None on failure."""
+        try:
+            headers = {"Accept": "application/json", "User-Agent": USER_AGENT}
+            jina_key = os.environ.get("JINA_API_KEY", "")
+            if jina_key:
+                headers["Authorization"] = f"Bearer {jina_key}"
+            async with httpx.AsyncClient(proxy=self.proxy, timeout=20.0) as client:
+                r = await client.get(f"https://r.jina.ai/{url}", headers=headers)
+                if r.status_code == 429:
+                    logger.debug("Jina Reader rate limited, falling back to readability")
+                    return None
+                r.raise_for_status()
+
+            data = r.json().get("data", {})
+            title = data.get("title", "")
+            text = data.get("content", "")
+            if not text:
+                return None
+
+            if title:
+                text = f"# {title}\n\n{text}"
+            truncated = len(text) > max_chars
+            if truncated:
+                text = text[:max_chars]
+
+            return json.dumps({
+                "url": url, "finalUrl": data.get("url", url), "status": r.status_code,
+                "extractor": "jina", "truncated": truncated, "length": len(text), "text": text,
+            }, ensure_ascii=False)
+        except Exception as e:
+            logger.debug("Jina Reader failed for {}, falling back to readability: {}", url, e)
+            return None
+
+    async def _fetch_readability(self, url: str, extract_mode: str, max_chars: int) -> str:
+        """Local fallback using readability-lxml."""
+        from readability import Document
+
        try:
-            logger.debug("WebFetch: {}", "proxy enabled" if self.proxy else "direct connection")
            async with httpx.AsyncClient(
                follow_redirects=True,
                max_redirects=MAX_REDIRECTS,
@@ -150,17 +289,20 @@ class WebFetchTool(Tool):
                text, extractor = json.dumps(r.json(), indent=2, ensure_ascii=False), "json"
            elif "text/html" in ctype or r.text[:256].lower().startswith(("<!doctype", "<html")):
                doc = Document(r.text)
-                content = self._to_markdown(doc.summary()) if extractMode == "markdown" else _strip_tags(doc.summary())
+                content = self._to_markdown(doc.summary()) if extract_mode == "markdown" else _strip_tags(doc.summary())
                text = f"# {doc.title()}\n\n{content}" if doc.title() else content
                extractor = "readability"
            else:
                text, extractor = r.text, "raw"

            truncated = len(text) > max_chars
-            if truncated: text = text[:max_chars]
+            if truncated:
+                text = text[:max_chars]

-            return json.dumps({"url": url, "finalUrl": str(r.url), "status": r.status_code,
-                              "extractor": extractor, "truncated": truncated, "length": len(text), "text": text}, ensure_ascii=False)
+            return json.dumps({
+                "url": url, "finalUrl": str(r.url), "status": r.status_code,
+                "extractor": extractor, "truncated": truncated, "length": len(text), "text": text,
+            }, ensure_ascii=False)
        except httpx.ProxyError as e:
            logger.error("WebFetch proxy error for {}: {}", url, e)
            return json.dumps({"error": f"Proxy error: {e}", "url": url}, ensure_ascii=False)
@@ -168,11 +310,10 @@ class WebFetchTool(Tool):
            logger.error("WebFetch error for {}: {}", url, e)
            return json.dumps({"error": str(e), "url": url}, ensure_ascii=False)

-    def _to_markdown(self, html: str) -> str:
+    def _to_markdown(self, html_content: str) -> str:
        """Convert HTML to markdown."""
-        # Convert links, headings, lists before stripping tags
        text = re.sub(r'<a\s+[^>]*href=["\']([^"\']+)["\'][^>]*>([\s\S]*?)</a>',
-                      lambda m: f'[{_strip_tags(m[2])}]({m[1]})', html, flags=re.I)
+                      lambda m: f'[{_strip_tags(m[2])}]({m[1]})', html_content, flags=re.I)
        text = re.sub(r'<h([1-6])[^>]*>([\s\S]*?)</h\1>',
                      lambda m: f'\n{"#" * int(m[1])} {_strip_tags(m[2])}\n', text, flags=re.I)
        text = re.sub(r'<li[^>]*>([\s\S]*?)</li>', lambda m: f'\n- {_strip_tags(m[1])}', text, flags=re.I)
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -395,7 +395,7 @@ def gateway(
        model=config.agents.defaults.model,
        max_iterations=config.agents.defaults.max_tool_iterations,
        context_window_tokens=config.agents.defaults.context_window_tokens,
-        brave_api_key=config.tools.web.search.api_key or None,
+        web_search_config=config.tools.web.search,
        web_proxy=config.tools.web.proxy or None,
        exec_config=config.tools.exec,
        cron_service=cron,
@@ -578,7 +578,7 @@ def agent(
        model=config.agents.defaults.model,
        max_iterations=config.agents.defaults.max_tool_iterations,
        context_window_tokens=config.agents.defaults.context_window_tokens,
-        brave_api_key=config.tools.web.search.api_key or None,
+        web_search_config=config.tools.web.search,
        web_proxy=config.tools.web.proxy or None,
        exec_config=config.tools.exec,
        cron_service=cron,
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -310,7 +310,9 @@ class GatewayConfig(Base):
 class WebSearchConfig(Base):
    """Web search tool configuration."""

-    api_key: str = ""  # Brave Search API key
+    provider: str = "brave"  # brave, tavily, duckduckgo, searxng, jina
+    api_key: str = ""
+    base_url: str = ""  # SearXNG base URL
    max_results: int = 5


--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ dependencies = [
    "websockets>=16.0,<17.0",
    "websocket-client>=1.9.0,<2.0.0",
    "httpx>=0.28.0,<1.0.0",
+    "ddgs>=9.5.5,<10.0.0",
    "oauth-cli-kit>=0.1.3,<1.0.0",
    "loguru>=0.7.3,<1.0.0",
    "readability-lxml>=0.8.4,<1.0.0",
--- a/tests/test_web_search_tool.py
+++ b/tests/test_web_search_tool.py
@@ -0,0 +1,162 @@
+"""Tests for multi-provider web search."""
+
+import httpx
+import pytest
+
+from nanobot.agent.tools.web import WebSearchTool
+from nanobot.config.schema import WebSearchConfig
+
+
+def _tool(provider: str = "brave", api_key: str = "", base_url: str = "") -> WebSearchTool:
+    return WebSearchTool(config=WebSearchConfig(provider=provider, api_key=api_key, base_url=base_url))
+
+
+def _response(status: int = 200, json: dict | None = None) -> httpx.Response:
+    """Build a mock httpx.Response with a dummy request attached."""
+    r = httpx.Response(status, json=json)
+    r._request = httpx.Request("GET", "https://mock")
+    return r
+
+
+@pytest.mark.asyncio
+async def test_brave_search(monkeypatch):
+    async def mock_get(self, url, **kw):
+        assert "brave" in url
+        assert kw["headers"]["X-Subscription-Token"] == "brave-key"
+        return _response(json={
+            "web": {"results": [{"title": "NanoBot", "url": "https://example.com", "description": "AI assistant"}]}
+        })
+
+    monkeypatch.setattr(httpx.AsyncClient, "get", mock_get)
+    tool = _tool(provider="brave", api_key="brave-key")
+    result = await tool.execute(query="nanobot", count=1)
+    assert "NanoBot" in result
+    assert "https://example.com" in result
+
+
+@pytest.mark.asyncio
+async def test_tavily_search(monkeypatch):
+    async def mock_post(self, url, **kw):
+        assert "tavily" in url
+        assert kw["headers"]["Authorization"] == "Bearer tavily-key"
+        return _response(json={
+            "results": [{"title": "OpenClaw", "url": "https://openclaw.io", "content": "Framework"}]
+        })
+
+    monkeypatch.setattr(httpx.AsyncClient, "post", mock_post)
+    tool = _tool(provider="tavily", api_key="tavily-key")
+    result = await tool.execute(query="openclaw")
+    assert "OpenClaw" in result
+    assert "https://openclaw.io" in result
+
+
+@pytest.mark.asyncio
+async def test_searxng_search(monkeypatch):
+    async def mock_get(self, url, **kw):
+        assert "searx.example" in url
+        return _response(json={
+            "results": [{"title": "Result", "url": "https://example.com", "content": "SearXNG result"}]
+        })
+
+    monkeypatch.setattr(httpx.AsyncClient, "get", mock_get)
+    tool = _tool(provider="searxng", base_url="https://searx.example")
+    result = await tool.execute(query="test")
+    assert "Result" in result
+
+
+@pytest.mark.asyncio
+async def test_duckduckgo_search(monkeypatch):
+    class MockDDGS:
+        def __init__(self, **kw):
+            pass
+
+        def text(self, query, max_results=5):
+            return [{"title": "DDG Result", "href": "https://ddg.example", "body": "From DuckDuckGo"}]
+
+    monkeypatch.setattr("nanobot.agent.tools.web.DDGS", MockDDGS, raising=False)
+    import nanobot.agent.tools.web as web_mod
+    monkeypatch.setattr(web_mod, "DDGS", MockDDGS, raising=False)
+
+    from ddgs import DDGS
+    monkeypatch.setattr("ddgs.DDGS", MockDDGS)
+
+    tool = _tool(provider="duckduckgo")
+    result = await tool.execute(query="hello")
+    assert "DDG Result" in result
+
+
+@pytest.mark.asyncio
+async def test_brave_fallback_to_duckduckgo_when_no_key(monkeypatch):
+    class MockDDGS:
+        def __init__(self, **kw):
+            pass
+
+        def text(self, query, max_results=5):
+            return [{"title": "Fallback", "href": "https://ddg.example", "body": "DuckDuckGo fallback"}]
+
+    monkeypatch.setattr("ddgs.DDGS", MockDDGS)
+    monkeypatch.delenv("BRAVE_API_KEY", raising=False)
+
+    tool = _tool(provider="brave", api_key="")
+    result = await tool.execute(query="test")
+    assert "Fallback" in result
+
+
+@pytest.mark.asyncio
+async def test_jina_search(monkeypatch):
+    async def mock_get(self, url, **kw):
+        assert "s.jina.ai" in str(url)
+        assert kw["headers"]["Authorization"] == "Bearer jina-key"
+        return _response(json={
+            "data": [{"title": "Jina Result", "url": "https://jina.ai", "content": "AI search"}]
+        })
+
+    monkeypatch.setattr(httpx.AsyncClient, "get", mock_get)
+    tool = _tool(provider="jina", api_key="jina-key")
+    result = await tool.execute(query="test")
+    assert "Jina Result" in result
+    assert "https://jina.ai" in result
+
+
+@pytest.mark.asyncio
+async def test_unknown_provider():
+    tool = _tool(provider="unknown")
+    result = await tool.execute(query="test")
+    assert "unknown" in result
+    assert "Error" in result
+
+
+@pytest.mark.asyncio
+async def test_default_provider_is_brave(monkeypatch):
+    async def mock_get(self, url, **kw):
+        assert "brave" in url
+        return _response(json={"web": {"results": []}})
+
+    monkeypatch.setattr(httpx.AsyncClient, "get", mock_get)
+    tool = _tool(provider="", api_key="test-key")
+    result = await tool.execute(query="test")
+    assert "No results" in result
+
+
+@pytest.mark.asyncio
+async def test_searxng_no_base_url_falls_back(monkeypatch):
+    class MockDDGS:
+        def __init__(self, **kw):
+            pass
+
+        def text(self, query, max_results=5):
+            return [{"title": "Fallback", "href": "https://ddg.example", "body": "fallback"}]
+
+    monkeypatch.setattr("ddgs.DDGS", MockDDGS)
+    monkeypatch.delenv("SEARXNG_BASE_URL", raising=False)
+
+    tool = _tool(provider="searxng", base_url="")
+    result = await tool.execute(query="test")
+    assert "Fallback" in result
+
+
+@pytest.mark.asyncio
+async def test_searxng_invalid_url():
+    tool = _tool(provider="searxng", base_url="not-a-url")
+    result = await tool.execute(query="test")
+    assert "Error" in result