From ca5047b602f6de926e052e0f391fb822c667fb8d Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Fri, 13 Mar 2026 05:44:16 +0000 Subject: [PATCH] feat(web): multi-provider web search + Jina Reader fetch --- README.md | 100 +++++++++++++- nanobot/agent/loop.py | 13 +- nanobot/agent/subagent.py | 9 +- nanobot/agent/tools/web.py | 241 +++++++++++++++++++++++++++------- nanobot/cli/commands.py | 4 +- nanobot/config/schema.py | 4 +- pyproject.toml | 1 + tests/test_web_search_tool.py | 162 +++++++++++++++++++++++ 8 files changed, 470 insertions(+), 64 deletions(-) create mode 100644 tests/test_web_search_tool.py diff --git a/README.md b/README.md index 634222d..a9bad54 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,9 @@ nanobot channels login > [!TIP] > Set your API key in `~/.nanobot/config.json`. -> Get API keys: [OpenRouter](https://openrouter.ai/keys) (Global) · [Brave Search](https://brave.com/search/api/) (optional, for web search) +> Get API keys: [OpenRouter](https://openrouter.ai/keys) (Global) +> +> For web search capability setup, please see [Web Search](#web-search). **1. Initialize** @@ -960,6 +962,102 @@ That's it! Environment variables, model prefixing, config matching, and `nanobot +### Web Search + +nanobot supports multiple web search providers. Configure in `~/.nanobot/config.json` under `tools.web.search`. + +| Provider | Config fields | Env var fallback | Free | +|----------|--------------|------------------|------| +| `brave` (default) | `apiKey` | `BRAVE_API_KEY` | No | +| `tavily` | `apiKey` | `TAVILY_API_KEY` | No | +| `jina` | `apiKey` | `JINA_API_KEY` | Free tier (10M tokens) | +| `searxng` | `baseUrl` | `SEARXNG_BASE_URL` | Yes (self-hosted) | +| `duckduckgo` | — | — | Yes | + +When credentials are missing, nanobot automatically falls back to DuckDuckGo. + +**Brave** (default): +```json +{ + "tools": { + "web": { + "search": { + "provider": "brave", + "apiKey": "BSA..." + } + } + } +} +``` + +**Tavily:** +```json +{ + "tools": { + "web": { + "search": { + "provider": "tavily", + "apiKey": "tvly-..." + } + } + } +} +``` + +**Jina** (free tier with 10M tokens): +```json +{ + "tools": { + "web": { + "search": { + "provider": "jina", + "apiKey": "jina_..." + } + } + } +} +``` + +**SearXNG** (self-hosted, no API key needed): +```json +{ + "tools": { + "web": { + "search": { + "provider": "searxng", + "baseUrl": "https://searx.example" + } + } + } +} +``` + +**DuckDuckGo** (zero config): +```json +{ + "tools": { + "web": { + "search": { + "provider": "duckduckgo" + } + } + } +} +``` + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `provider` | string | `"brave"` | Search backend: `brave`, `tavily`, `jina`, `searxng`, `duckduckgo` | +| `apiKey` | string | `""` | API key for Brave or Tavily | +| `baseUrl` | string | `""` | Base URL for SearXNG | +| `maxResults` | integer | `5` | Results per search (1–10) | + +> [!TIP] +> Use `proxy` in `tools.web` to route all web requests (search + fetch) through a proxy: +> ```json +> { "tools": { "web": { "proxy": "http://127.0.0.1:7890" } } } +> ``` + ### MCP (Model Context Protocol) > [!TIP] diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index b56017a..e05a73e 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -29,7 +29,7 @@ from nanobot.providers.base import LLMProvider from nanobot.session.manager import Session, SessionManager if TYPE_CHECKING: - from nanobot.config.schema import ChannelsConfig, ExecToolConfig + from nanobot.config.schema import ChannelsConfig, ExecToolConfig, WebSearchConfig from nanobot.cron.service import CronService @@ -55,7 +55,7 @@ class AgentLoop: model: str | None = None, max_iterations: int = 40, context_window_tokens: int = 65_536, - brave_api_key: str | None = None, + web_search_config: WebSearchConfig | None = None, web_proxy: str | None = None, exec_config: ExecToolConfig | None = None, cron_service: CronService | None = None, @@ -64,7 +64,8 @@ class AgentLoop: mcp_servers: dict | None = None, channels_config: ChannelsConfig | None = None, ): - from nanobot.config.schema import ExecToolConfig + from nanobot.config.schema import ExecToolConfig, WebSearchConfig + self.bus = bus self.channels_config = channels_config self.provider = provider @@ -72,7 +73,7 @@ class AgentLoop: self.model = model or provider.get_default_model() self.max_iterations = max_iterations self.context_window_tokens = context_window_tokens - self.brave_api_key = brave_api_key + self.web_search_config = web_search_config or WebSearchConfig() self.web_proxy = web_proxy self.exec_config = exec_config or ExecToolConfig() self.cron_service = cron_service @@ -86,7 +87,7 @@ class AgentLoop: workspace=workspace, bus=bus, model=self.model, - brave_api_key=brave_api_key, + web_search_config=self.web_search_config, web_proxy=web_proxy, exec_config=self.exec_config, restrict_to_workspace=restrict_to_workspace, @@ -121,7 +122,7 @@ class AgentLoop: restrict_to_workspace=self.restrict_to_workspace, path_append=self.exec_config.path_append, )) - self.tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy)) + self.tools.register(WebSearchTool(config=self.web_search_config, proxy=self.web_proxy)) self.tools.register(WebFetchTool(proxy=self.web_proxy)) self.tools.register(MessageTool(send_callback=self.bus.publish_outbound)) self.tools.register(SpawnTool(manager=self.subagents)) diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index eb3b3b0..b6bef68 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -28,17 +28,18 @@ class SubagentManager: workspace: Path, bus: MessageBus, model: str | None = None, - brave_api_key: str | None = None, + web_search_config: "WebSearchConfig | None" = None, web_proxy: str | None = None, exec_config: "ExecToolConfig | None" = None, restrict_to_workspace: bool = False, ): - from nanobot.config.schema import ExecToolConfig + from nanobot.config.schema import ExecToolConfig, WebSearchConfig + self.provider = provider self.workspace = workspace self.bus = bus self.model = model or provider.get_default_model() - self.brave_api_key = brave_api_key + self.web_search_config = web_search_config or WebSearchConfig() self.web_proxy = web_proxy self.exec_config = exec_config or ExecToolConfig() self.restrict_to_workspace = restrict_to_workspace @@ -101,7 +102,7 @@ class SubagentManager: restrict_to_workspace=self.restrict_to_workspace, path_append=self.exec_config.path_append, )) - tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy)) + tools.register(WebSearchTool(config=self.web_search_config, proxy=self.web_proxy)) tools.register(WebFetchTool(proxy=self.web_proxy)) system_prompt = self._build_subagent_prompt() diff --git a/nanobot/agent/tools/web.py b/nanobot/agent/tools/web.py index 0d8f4d1..f1363e6 100644 --- a/nanobot/agent/tools/web.py +++ b/nanobot/agent/tools/web.py @@ -1,10 +1,13 @@ """Web tools: web_search and web_fetch.""" +from __future__ import annotations + +import asyncio import html import json import os import re -from typing import Any +from typing import TYPE_CHECKING, Any from urllib.parse import urlparse import httpx @@ -12,6 +15,9 @@ from loguru import logger from nanobot.agent.tools.base import Tool +if TYPE_CHECKING: + from nanobot.config.schema import WebSearchConfig + # Shared constants USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36" MAX_REDIRECTS = 5 # Limit redirects to prevent DoS attacks @@ -44,8 +50,22 @@ def _validate_url(url: str) -> tuple[bool, str]: return False, str(e) +def _format_results(query: str, items: list[dict[str, Any]], n: int) -> str: + """Format provider results into shared plaintext output.""" + if not items: + return f"No results for: {query}" + lines = [f"Results for: {query}\n"] + for i, item in enumerate(items[:n], 1): + title = _normalize(_strip_tags(item.get("title", ""))) + snippet = _normalize(_strip_tags(item.get("content", ""))) + lines.append(f"{i}. {title}\n {item.get('url', '')}") + if snippet: + lines.append(f" {snippet}") + return "\n".join(lines) + + class WebSearchTool(Tool): - """Search the web using Brave Search API.""" + """Search the web using configured provider.""" name = "web_search" description = "Search the web. Returns titles, URLs, and snippets." @@ -53,61 +73,140 @@ class WebSearchTool(Tool): "type": "object", "properties": { "query": {"type": "string", "description": "Search query"}, - "count": {"type": "integer", "description": "Results (1-10)", "minimum": 1, "maximum": 10} + "count": {"type": "integer", "description": "Results (1-10)", "minimum": 1, "maximum": 10}, }, - "required": ["query"] + "required": ["query"], } - def __init__(self, api_key: str | None = None, max_results: int = 5, proxy: str | None = None): - self._init_api_key = api_key - self.max_results = max_results + def __init__(self, config: WebSearchConfig | None = None, proxy: str | None = None): + from nanobot.config.schema import WebSearchConfig + + self.config = config if config is not None else WebSearchConfig() self.proxy = proxy - @property - def api_key(self) -> str: - """Resolve API key at call time so env/config changes are picked up.""" - return self._init_api_key or os.environ.get("BRAVE_API_KEY", "") - async def execute(self, query: str, count: int | None = None, **kwargs: Any) -> str: - if not self.api_key: - return ( - "Error: Brave Search API key not configured. Set it in " - "~/.nanobot/config.json under tools.web.search.apiKey " - "(or export BRAVE_API_KEY), then restart the gateway." - ) + provider = self.config.provider.strip().lower() or "brave" + n = min(max(count or self.config.max_results, 1), 10) + if provider == "duckduckgo": + return await self._search_duckduckgo(query, n) + elif provider == "tavily": + return await self._search_tavily(query, n) + elif provider == "searxng": + return await self._search_searxng(query, n) + elif provider == "jina": + return await self._search_jina(query, n) + elif provider == "brave": + return await self._search_brave(query, n) + else: + return f"Error: unknown search provider '{provider}'" + + async def _search_brave(self, query: str, n: int) -> str: + api_key = self.config.api_key or os.environ.get("BRAVE_API_KEY", "") + if not api_key: + logger.warning("BRAVE_API_KEY not set, falling back to DuckDuckGo") + return await self._search_duckduckgo(query, n) try: - n = min(max(count or self.max_results, 1), 10) - logger.debug("WebSearch: {}", "proxy enabled" if self.proxy else "direct connection") async with httpx.AsyncClient(proxy=self.proxy) as client: r = await client.get( "https://api.search.brave.com/res/v1/web/search", params={"q": query, "count": n}, - headers={"Accept": "application/json", "X-Subscription-Token": self.api_key}, - timeout=10.0 + headers={"Accept": "application/json", "X-Subscription-Token": api_key}, + timeout=10.0, ) r.raise_for_status() - - results = r.json().get("web", {}).get("results", [])[:n] - if not results: - return f"No results for: {query}" - - lines = [f"Results for: {query}\n"] - for i, item in enumerate(results, 1): - lines.append(f"{i}. {item.get('title', '')}\n {item.get('url', '')}") - if desc := item.get("description"): - lines.append(f" {desc}") - return "\n".join(lines) - except httpx.ProxyError as e: - logger.error("WebSearch proxy error: {}", e) - return f"Proxy error: {e}" + items = [ + {"title": x.get("title", ""), "url": x.get("url", ""), "content": x.get("description", "")} + for x in r.json().get("web", {}).get("results", []) + ] + return _format_results(query, items, n) except Exception as e: - logger.error("WebSearch error: {}", e) return f"Error: {e}" + async def _search_tavily(self, query: str, n: int) -> str: + api_key = self.config.api_key or os.environ.get("TAVILY_API_KEY", "") + if not api_key: + logger.warning("TAVILY_API_KEY not set, falling back to DuckDuckGo") + return await self._search_duckduckgo(query, n) + try: + async with httpx.AsyncClient(proxy=self.proxy) as client: + r = await client.post( + "https://api.tavily.com/search", + headers={"Authorization": f"Bearer {api_key}"}, + json={"query": query, "max_results": n}, + timeout=15.0, + ) + r.raise_for_status() + return _format_results(query, r.json().get("results", []), n) + except Exception as e: + return f"Error: {e}" + + async def _search_searxng(self, query: str, n: int) -> str: + base_url = (self.config.base_url or os.environ.get("SEARXNG_BASE_URL", "")).strip() + if not base_url: + logger.warning("SEARXNG_BASE_URL not set, falling back to DuckDuckGo") + return await self._search_duckduckgo(query, n) + endpoint = f"{base_url.rstrip('/')}/search" + is_valid, error_msg = _validate_url(endpoint) + if not is_valid: + return f"Error: invalid SearXNG URL: {error_msg}" + try: + async with httpx.AsyncClient(proxy=self.proxy) as client: + r = await client.get( + endpoint, + params={"q": query, "format": "json"}, + headers={"User-Agent": USER_AGENT}, + timeout=10.0, + ) + r.raise_for_status() + return _format_results(query, r.json().get("results", []), n) + except Exception as e: + return f"Error: {e}" + + async def _search_jina(self, query: str, n: int) -> str: + api_key = self.config.api_key or os.environ.get("JINA_API_KEY", "") + if not api_key: + logger.warning("JINA_API_KEY not set, falling back to DuckDuckGo") + return await self._search_duckduckgo(query, n) + try: + headers = {"Accept": "application/json", "Authorization": f"Bearer {api_key}"} + async with httpx.AsyncClient(proxy=self.proxy) as client: + r = await client.get( + f"https://s.jina.ai/", + params={"q": query}, + headers=headers, + timeout=15.0, + ) + r.raise_for_status() + data = r.json().get("data", [])[:n] + items = [ + {"title": d.get("title", ""), "url": d.get("url", ""), "content": d.get("content", "")[:500]} + for d in data + ] + return _format_results(query, items, n) + except Exception as e: + return f"Error: {e}" + + async def _search_duckduckgo(self, query: str, n: int) -> str: + try: + from ddgs import DDGS + + ddgs = DDGS(timeout=10) + raw = await asyncio.to_thread(ddgs.text, query, max_results=n) + if not raw: + return f"No results for: {query}" + items = [ + {"title": r.get("title", ""), "url": r.get("href", ""), "content": r.get("body", "")} + for r in raw + ] + return _format_results(query, items, n) + except Exception as e: + logger.warning("DuckDuckGo search failed: {}", e) + return f"Error: DuckDuckGo search failed ({e})" + class WebFetchTool(Tool): - """Fetch and extract content from a URL using Readability.""" + """Fetch and extract content from a URL.""" name = "web_fetch" description = "Fetch URL and extract readable content (HTML → markdown/text)." @@ -116,9 +215,9 @@ class WebFetchTool(Tool): "properties": { "url": {"type": "string", "description": "URL to fetch"}, "extractMode": {"type": "string", "enum": ["markdown", "text"], "default": "markdown"}, - "maxChars": {"type": "integer", "minimum": 100} + "maxChars": {"type": "integer", "minimum": 100}, }, - "required": ["url"] + "required": ["url"], } def __init__(self, max_chars: int = 50000, proxy: str | None = None): @@ -126,15 +225,55 @@ class WebFetchTool(Tool): self.proxy = proxy async def execute(self, url: str, extractMode: str = "markdown", maxChars: int | None = None, **kwargs: Any) -> str: - from readability import Document - max_chars = maxChars or self.max_chars is_valid, error_msg = _validate_url(url) if not is_valid: return json.dumps({"error": f"URL validation failed: {error_msg}", "url": url}, ensure_ascii=False) + result = await self._fetch_jina(url, max_chars) + if result is None: + result = await self._fetch_readability(url, extractMode, max_chars) + return result + + async def _fetch_jina(self, url: str, max_chars: int) -> str | None: + """Try fetching via Jina Reader API. Returns None on failure.""" + try: + headers = {"Accept": "application/json", "User-Agent": USER_AGENT} + jina_key = os.environ.get("JINA_API_KEY", "") + if jina_key: + headers["Authorization"] = f"Bearer {jina_key}" + async with httpx.AsyncClient(proxy=self.proxy, timeout=20.0) as client: + r = await client.get(f"https://r.jina.ai/{url}", headers=headers) + if r.status_code == 429: + logger.debug("Jina Reader rate limited, falling back to readability") + return None + r.raise_for_status() + + data = r.json().get("data", {}) + title = data.get("title", "") + text = data.get("content", "") + if not text: + return None + + if title: + text = f"# {title}\n\n{text}" + truncated = len(text) > max_chars + if truncated: + text = text[:max_chars] + + return json.dumps({ + "url": url, "finalUrl": data.get("url", url), "status": r.status_code, + "extractor": "jina", "truncated": truncated, "length": len(text), "text": text, + }, ensure_ascii=False) + except Exception as e: + logger.debug("Jina Reader failed for {}, falling back to readability: {}", url, e) + return None + + async def _fetch_readability(self, url: str, extract_mode: str, max_chars: int) -> str: + """Local fallback using readability-lxml.""" + from readability import Document + try: - logger.debug("WebFetch: {}", "proxy enabled" if self.proxy else "direct connection") async with httpx.AsyncClient( follow_redirects=True, max_redirects=MAX_REDIRECTS, @@ -150,17 +289,20 @@ class WebFetchTool(Tool): text, extractor = json.dumps(r.json(), indent=2, ensure_ascii=False), "json" elif "text/html" in ctype or r.text[:256].lower().startswith((" max_chars - if truncated: text = text[:max_chars] + if truncated: + text = text[:max_chars] - return json.dumps({"url": url, "finalUrl": str(r.url), "status": r.status_code, - "extractor": extractor, "truncated": truncated, "length": len(text), "text": text}, ensure_ascii=False) + return json.dumps({ + "url": url, "finalUrl": str(r.url), "status": r.status_code, + "extractor": extractor, "truncated": truncated, "length": len(text), "text": text, + }, ensure_ascii=False) except httpx.ProxyError as e: logger.error("WebFetch proxy error for {}: {}", url, e) return json.dumps({"error": f"Proxy error: {e}", "url": url}, ensure_ascii=False) @@ -168,11 +310,10 @@ class WebFetchTool(Tool): logger.error("WebFetch error for {}: {}", url, e) return json.dumps({"error": str(e), "url": url}, ensure_ascii=False) - def _to_markdown(self, html: str) -> str: + def _to_markdown(self, html_content: str) -> str: """Convert HTML to markdown.""" - # Convert links, headings, lists before stripping tags text = re.sub(r']*href=["\']([^"\']+)["\'][^>]*>([\s\S]*?)', - lambda m: f'[{_strip_tags(m[2])}]({m[1]})', html, flags=re.I) + lambda m: f'[{_strip_tags(m[2])}]({m[1]})', html_content, flags=re.I) text = re.sub(r']*>([\s\S]*?)', lambda m: f'\n{"#" * int(m[1])} {_strip_tags(m[2])}\n', text, flags=re.I) text = re.sub(r']*>([\s\S]*?)', lambda m: f'\n- {_strip_tags(m[1])}', text, flags=re.I) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 7cc4fd5..06315bf 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -395,7 +395,7 @@ def gateway( model=config.agents.defaults.model, max_iterations=config.agents.defaults.max_tool_iterations, context_window_tokens=config.agents.defaults.context_window_tokens, - brave_api_key=config.tools.web.search.api_key or None, + web_search_config=config.tools.web.search, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, cron_service=cron, @@ -578,7 +578,7 @@ def agent( model=config.agents.defaults.model, max_iterations=config.agents.defaults.max_tool_iterations, context_window_tokens=config.agents.defaults.context_window_tokens, - brave_api_key=config.tools.web.search.api_key or None, + web_search_config=config.tools.web.search, web_proxy=config.tools.web.proxy or None, exec_config=config.tools.exec, cron_service=cron, diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 4092eeb..2f70e05 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -310,7 +310,9 @@ class GatewayConfig(Base): class WebSearchConfig(Base): """Web search tool configuration.""" - api_key: str = "" # Brave Search API key + provider: str = "brave" # brave, tavily, duckduckgo, searxng, jina + api_key: str = "" + base_url: str = "" # SearXNG base URL max_results: int = 5 diff --git a/pyproject.toml b/pyproject.toml index dce9e26..0a81746 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "websockets>=16.0,<17.0", "websocket-client>=1.9.0,<2.0.0", "httpx>=0.28.0,<1.0.0", + "ddgs>=9.5.5,<10.0.0", "oauth-cli-kit>=0.1.3,<1.0.0", "loguru>=0.7.3,<1.0.0", "readability-lxml>=0.8.4,<1.0.0", diff --git a/tests/test_web_search_tool.py b/tests/test_web_search_tool.py new file mode 100644 index 0000000..02bf443 --- /dev/null +++ b/tests/test_web_search_tool.py @@ -0,0 +1,162 @@ +"""Tests for multi-provider web search.""" + +import httpx +import pytest + +from nanobot.agent.tools.web import WebSearchTool +from nanobot.config.schema import WebSearchConfig + + +def _tool(provider: str = "brave", api_key: str = "", base_url: str = "") -> WebSearchTool: + return WebSearchTool(config=WebSearchConfig(provider=provider, api_key=api_key, base_url=base_url)) + + +def _response(status: int = 200, json: dict | None = None) -> httpx.Response: + """Build a mock httpx.Response with a dummy request attached.""" + r = httpx.Response(status, json=json) + r._request = httpx.Request("GET", "https://mock") + return r + + +@pytest.mark.asyncio +async def test_brave_search(monkeypatch): + async def mock_get(self, url, **kw): + assert "brave" in url + assert kw["headers"]["X-Subscription-Token"] == "brave-key" + return _response(json={ + "web": {"results": [{"title": "NanoBot", "url": "https://example.com", "description": "AI assistant"}]} + }) + + monkeypatch.setattr(httpx.AsyncClient, "get", mock_get) + tool = _tool(provider="brave", api_key="brave-key") + result = await tool.execute(query="nanobot", count=1) + assert "NanoBot" in result + assert "https://example.com" in result + + +@pytest.mark.asyncio +async def test_tavily_search(monkeypatch): + async def mock_post(self, url, **kw): + assert "tavily" in url + assert kw["headers"]["Authorization"] == "Bearer tavily-key" + return _response(json={ + "results": [{"title": "OpenClaw", "url": "https://openclaw.io", "content": "Framework"}] + }) + + monkeypatch.setattr(httpx.AsyncClient, "post", mock_post) + tool = _tool(provider="tavily", api_key="tavily-key") + result = await tool.execute(query="openclaw") + assert "OpenClaw" in result + assert "https://openclaw.io" in result + + +@pytest.mark.asyncio +async def test_searxng_search(monkeypatch): + async def mock_get(self, url, **kw): + assert "searx.example" in url + return _response(json={ + "results": [{"title": "Result", "url": "https://example.com", "content": "SearXNG result"}] + }) + + monkeypatch.setattr(httpx.AsyncClient, "get", mock_get) + tool = _tool(provider="searxng", base_url="https://searx.example") + result = await tool.execute(query="test") + assert "Result" in result + + +@pytest.mark.asyncio +async def test_duckduckgo_search(monkeypatch): + class MockDDGS: + def __init__(self, **kw): + pass + + def text(self, query, max_results=5): + return [{"title": "DDG Result", "href": "https://ddg.example", "body": "From DuckDuckGo"}] + + monkeypatch.setattr("nanobot.agent.tools.web.DDGS", MockDDGS, raising=False) + import nanobot.agent.tools.web as web_mod + monkeypatch.setattr(web_mod, "DDGS", MockDDGS, raising=False) + + from ddgs import DDGS + monkeypatch.setattr("ddgs.DDGS", MockDDGS) + + tool = _tool(provider="duckduckgo") + result = await tool.execute(query="hello") + assert "DDG Result" in result + + +@pytest.mark.asyncio +async def test_brave_fallback_to_duckduckgo_when_no_key(monkeypatch): + class MockDDGS: + def __init__(self, **kw): + pass + + def text(self, query, max_results=5): + return [{"title": "Fallback", "href": "https://ddg.example", "body": "DuckDuckGo fallback"}] + + monkeypatch.setattr("ddgs.DDGS", MockDDGS) + monkeypatch.delenv("BRAVE_API_KEY", raising=False) + + tool = _tool(provider="brave", api_key="") + result = await tool.execute(query="test") + assert "Fallback" in result + + +@pytest.mark.asyncio +async def test_jina_search(monkeypatch): + async def mock_get(self, url, **kw): + assert "s.jina.ai" in str(url) + assert kw["headers"]["Authorization"] == "Bearer jina-key" + return _response(json={ + "data": [{"title": "Jina Result", "url": "https://jina.ai", "content": "AI search"}] + }) + + monkeypatch.setattr(httpx.AsyncClient, "get", mock_get) + tool = _tool(provider="jina", api_key="jina-key") + result = await tool.execute(query="test") + assert "Jina Result" in result + assert "https://jina.ai" in result + + +@pytest.mark.asyncio +async def test_unknown_provider(): + tool = _tool(provider="unknown") + result = await tool.execute(query="test") + assert "unknown" in result + assert "Error" in result + + +@pytest.mark.asyncio +async def test_default_provider_is_brave(monkeypatch): + async def mock_get(self, url, **kw): + assert "brave" in url + return _response(json={"web": {"results": []}}) + + monkeypatch.setattr(httpx.AsyncClient, "get", mock_get) + tool = _tool(provider="", api_key="test-key") + result = await tool.execute(query="test") + assert "No results" in result + + +@pytest.mark.asyncio +async def test_searxng_no_base_url_falls_back(monkeypatch): + class MockDDGS: + def __init__(self, **kw): + pass + + def text(self, query, max_results=5): + return [{"title": "Fallback", "href": "https://ddg.example", "body": "fallback"}] + + monkeypatch.setattr("ddgs.DDGS", MockDDGS) + monkeypatch.delenv("SEARXNG_BASE_URL", raising=False) + + tool = _tool(provider="searxng", base_url="") + result = await tool.execute(query="test") + assert "Fallback" in result + + +@pytest.mark.asyncio +async def test_searxng_invalid_url(): + tool = _tool(provider="searxng", base_url="not-a-url") + result = await tool.execute(query="test") + assert "Error" in result