diff --git a/README.md b/README.md index 8dba2d7..efc2f7d 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ nanobot channels login > [!TIP] > Set your API key in `~/.nanobot/config.json`. -> Get API keys: [OpenRouter](https://openrouter.ai/keys) (Global) · [Brave Search](https://brave.com/search/api/) (optional, for web search) +> Get API keys: [OpenRouter](https://openrouter.ai/keys) (Global) · [Brave Search](https://brave.com/search/api/) or a self-hosted SearXNG instance (optional, for web search) **1. Initialize** @@ -212,6 +212,42 @@ nanobot agent That's it! You have a working AI assistant in 2 minutes. +### Optional: Web Search + +`web_search` supports both Brave Search and SearXNG. + +**Brave Search** + +```json +{ + "tools": { + "web": { + "search": { + "provider": "brave", + "apiKey": "your-brave-api-key" + } + } + } +} +``` + +**SearXNG** + +```json +{ + "tools": { + "web": { + "search": { + "provider": "searxng", + "baseUrl": "http://localhost:8080" + } + } + } +} +``` + +`baseUrl` can point either to the SearXNG root (for example `http://localhost:8080`) or directly to `/search`. + ## 💬 Chat Apps Connect nanobot to your favorite chat platform. diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index ac8700c..89ff963 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -55,6 +55,9 @@ class AgentLoop: context_window_tokens: int = 65_536, brave_api_key: str | None = None, web_proxy: str | None = None, + web_search_provider: str = "brave", + web_search_base_url: str | None = None, + web_search_max_results: int = 5, exec_config: ExecToolConfig | None = None, cron_service: CronService | None = None, restrict_to_workspace: bool = False, @@ -72,6 +75,9 @@ class AgentLoop: self.context_window_tokens = context_window_tokens self.brave_api_key = brave_api_key self.web_proxy = web_proxy + self.web_search_provider = web_search_provider + self.web_search_base_url = web_search_base_url + self.web_search_max_results = web_search_max_results self.exec_config = exec_config or ExecToolConfig() self.cron_service = cron_service self.restrict_to_workspace = restrict_to_workspace @@ -86,6 +92,9 @@ class AgentLoop: model=self.model, brave_api_key=brave_api_key, web_proxy=web_proxy, + web_search_provider=web_search_provider, + web_search_base_url=web_search_base_url, + web_search_max_results=web_search_max_results, exec_config=self.exec_config, restrict_to_workspace=restrict_to_workspace, ) @@ -119,7 +128,15 @@ class AgentLoop: restrict_to_workspace=self.restrict_to_workspace, path_append=self.exec_config.path_append, )) - self.tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy)) + self.tools.register( + WebSearchTool( + provider=self.web_search_provider, + api_key=self.brave_api_key, + base_url=self.web_search_base_url, + max_results=self.web_search_max_results, + proxy=self.web_proxy, + ) + ) self.tools.register(WebFetchTool(proxy=self.web_proxy)) self.tools.register(MessageTool(send_callback=self.bus.publish_outbound)) self.tools.register(SpawnTool(manager=self.subagents)) diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index eb3b3b0..bee90a4 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -30,6 +30,9 @@ class SubagentManager: model: str | None = None, brave_api_key: str | None = None, web_proxy: str | None = None, + web_search_provider: str = "brave", + web_search_base_url: str | None = None, + web_search_max_results: int = 5, exec_config: "ExecToolConfig | None" = None, restrict_to_workspace: bool = False, ): @@ -40,6 +43,9 @@ class SubagentManager: self.model = model or provider.get_default_model() self.brave_api_key = brave_api_key self.web_proxy = web_proxy + self.web_search_provider = web_search_provider + self.web_search_base_url = web_search_base_url + self.web_search_max_results = web_search_max_results self.exec_config = exec_config or ExecToolConfig() self.restrict_to_workspace = restrict_to_workspace self._running_tasks: dict[str, asyncio.Task[None]] = {} @@ -101,9 +107,17 @@ class SubagentManager: restrict_to_workspace=self.restrict_to_workspace, path_append=self.exec_config.path_append, )) - tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy)) + tools.register( + WebSearchTool( + provider=self.web_search_provider, + api_key=self.brave_api_key, + base_url=self.web_search_base_url, + max_results=self.web_search_max_results, + proxy=self.web_proxy, + ) + ) tools.register(WebFetchTool(proxy=self.web_proxy)) - + system_prompt = self._build_subagent_prompt() messages: list[dict[str, Any]] = [ {"role": "system", "content": system_prompt}, diff --git a/nanobot/agent/tools/web.py b/nanobot/agent/tools/web.py index 0d8f4d1..35c5c3b 100644 --- a/nanobot/agent/tools/web.py +++ b/nanobot/agent/tools/web.py @@ -45,7 +45,7 @@ def _validate_url(url: str) -> tuple[bool, str]: class WebSearchTool(Tool): - """Search the web using Brave Search API.""" + """Search the web using Brave Search or SearXNG.""" name = "web_search" description = "Search the web. Returns titles, URLs, and snippets." @@ -58,8 +58,17 @@ class WebSearchTool(Tool): "required": ["query"] } - def __init__(self, api_key: str | None = None, max_results: int = 5, proxy: str | None = None): + def __init__( + self, + provider: str = "brave", + api_key: str | None = None, + base_url: str | None = None, + max_results: int = 5, + proxy: str | None = None, + ): + self._init_provider = provider self._init_api_key = api_key + self._init_base_url = base_url self.max_results = max_results self.proxy = proxy @@ -68,7 +77,32 @@ class WebSearchTool(Tool): """Resolve API key at call time so env/config changes are picked up.""" return self._init_api_key or os.environ.get("BRAVE_API_KEY", "") + @property + def provider(self) -> str: + """Resolve search provider at call time so env/config changes are picked up.""" + return ( + self._init_provider or os.environ.get("WEB_SEARCH_PROVIDER", "brave") + ).strip().lower() + + @property + def base_url(self) -> str: + """Resolve SearXNG base URL at call time so env/config changes are picked up.""" + return (self._init_base_url or os.environ.get("SEARXNG_BASE_URL", "")).strip() + async def execute(self, query: str, count: int | None = None, **kwargs: Any) -> str: + provider = self.provider + n = min(max(count or self.max_results, 1), 10) + + if provider == "brave": + return await self._search_brave(query=query, count=n) + if provider == "searxng": + return await self._search_searxng(query=query, count=n) + return ( + f"Error: Unsupported web search provider '{provider}'. " + "Supported values: brave, searxng." + ) + + async def _search_brave(self, query: str, count: int) -> str: if not self.api_key: return ( "Error: Brave Search API key not configured. Set it in " @@ -77,27 +111,18 @@ class WebSearchTool(Tool): ) try: - n = min(max(count or self.max_results, 1), 10) logger.debug("WebSearch: {}", "proxy enabled" if self.proxy else "direct connection") async with httpx.AsyncClient(proxy=self.proxy) as client: r = await client.get( "https://api.search.brave.com/res/v1/web/search", - params={"q": query, "count": n}, + params={"q": query, "count": count}, headers={"Accept": "application/json", "X-Subscription-Token": self.api_key}, - timeout=10.0 + timeout=10.0, ) r.raise_for_status() - results = r.json().get("web", {}).get("results", [])[:n] - if not results: - return f"No results for: {query}" - - lines = [f"Results for: {query}\n"] - for i, item in enumerate(results, 1): - lines.append(f"{i}. {item.get('title', '')}\n {item.get('url', '')}") - if desc := item.get("description"): - lines.append(f" {desc}") - return "\n".join(lines) + results = r.json().get("web", {}).get("results", [])[:count] + return self._format_results(query, results, snippet_keys=("description",)) except httpx.ProxyError as e: logger.error("WebSearch proxy error: {}", e) return f"Proxy error: {e}" @@ -105,6 +130,62 @@ class WebSearchTool(Tool): logger.error("WebSearch error: {}", e) return f"Error: {e}" + async def _search_searxng(self, query: str, count: int) -> str: + if not self.base_url: + return ( + "Error: SearXNG base URL not configured. Set tools.web.search.baseUrl " + 'in ~/.nanobot/config.json (or export SEARXNG_BASE_URL), e.g. "http://localhost:8080".' + ) + + is_valid, error_msg = _validate_url(self.base_url) + if not is_valid: + return f"Error: Invalid SearXNG base URL: {error_msg}" + + try: + logger.debug("WebSearch: {}", "proxy enabled" if self.proxy else "direct connection") + async with httpx.AsyncClient(proxy=self.proxy) as client: + r = await client.get( + self._build_searxng_search_url(), + params={"q": query, "format": "json"}, + headers={"Accept": "application/json"}, + timeout=10.0, + ) + r.raise_for_status() + + results = r.json().get("results", [])[:count] + return self._format_results( + query, + results, + snippet_keys=("content", "snippet", "description"), + ) + except httpx.ProxyError as e: + logger.error("WebSearch proxy error: {}", e) + return f"Proxy error: {e}" + except Exception as e: + logger.error("WebSearch error: {}", e) + return f"Error: {e}" + + def _build_searxng_search_url(self) -> str: + base_url = self.base_url.rstrip("/") + return base_url if base_url.endswith("/search") else f"{base_url}/search" + + @staticmethod + def _format_results( + query: str, + results: list[dict[str, Any]], + snippet_keys: tuple[str, ...], + ) -> str: + if not results: + return f"No results for: {query}" + + lines = [f"Results for: {query}\n"] + for i, item in enumerate(results, 1): + lines.append(f"{i}. {item.get('title', '')}\n {item.get('url', '')}") + snippet = next((item.get(key) for key in snippet_keys if item.get(key)), None) + if snippet: + lines.append(f" {snippet}") + return "\n".join(lines) + class WebFetchTool(Tool): """Fetch and extract content from a URL using Readability.""" diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index dd5e60c..762e875 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -350,6 +350,9 @@ def gateway( context_window_tokens=config.agents.defaults.context_window_tokens, brave_api_key=config.tools.web.search.api_key or None, web_proxy=config.tools.web.proxy or None, + web_search_provider=config.tools.web.search.provider, + web_search_base_url=config.tools.web.search.base_url or None, + web_search_max_results=config.tools.web.search.max_results, exec_config=config.tools.exec, cron_service=cron, restrict_to_workspace=config.tools.restrict_to_workspace, @@ -533,6 +536,9 @@ def agent( context_window_tokens=config.agents.defaults.context_window_tokens, brave_api_key=config.tools.web.search.api_key or None, web_proxy=config.tools.web.proxy or None, + web_search_provider=config.tools.web.search.provider, + web_search_base_url=config.tools.web.search.base_url or None, + web_search_max_results=config.tools.web.search.max_results, exec_config=config.tools.exec, cron_service=cron, restrict_to_workspace=config.tools.restrict_to_workspace, diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 1b26dd7..12e89a3 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -306,7 +306,9 @@ class GatewayConfig(Base): class WebSearchConfig(Base): """Web search tool configuration.""" - api_key: str = "" # Brave Search API key + provider: Literal["brave", "searxng"] = "brave" + api_key: str = "" # Brave Search API key (ignored by SearXNG) + base_url: str = "" # Required for SearXNG, e.g. "http://localhost:8080" max_results: int = 5 diff --git a/tests/test_commands.py b/tests/test_commands.py index 583ef6f..a31eeca 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -306,6 +306,20 @@ def test_agent_warns_about_deprecated_memory_window(mock_agent_runtime): assert "contextWindowTokens" in result.stdout +def test_agent_passes_web_search_config_to_agent_loop(mock_agent_runtime) -> None: + mock_agent_runtime["config"].tools.web.search.provider = "searxng" + mock_agent_runtime["config"].tools.web.search.base_url = "http://localhost:8080" + mock_agent_runtime["config"].tools.web.search.max_results = 7 + + result = runner.invoke(app, ["agent", "-m", "hello"]) + + assert result.exit_code == 0 + kwargs = mock_agent_runtime["agent_loop_cls"].call_args.kwargs + assert kwargs["web_search_provider"] == "searxng" + assert kwargs["web_search_base_url"] == "http://localhost:8080" + assert kwargs["web_search_max_results"] == 7 + + def test_gateway_uses_workspace_from_config_by_default(monkeypatch, tmp_path: Path) -> None: config_file = tmp_path / "instance" / "config.json" config_file.parent.mkdir(parents=True) diff --git a/tests/test_web_tools.py b/tests/test_web_tools.py new file mode 100644 index 0000000..2d7c8cd --- /dev/null +++ b/tests/test_web_tools.py @@ -0,0 +1,150 @@ +from typing import Any + +import pytest + +from nanobot.agent.tools import web as web_module +from nanobot.agent.tools.web import WebSearchTool +from nanobot.config.schema import Config + + +class _FakeResponse: + def __init__(self, payload: dict[str, Any]) -> None: + self._payload = payload + + def raise_for_status(self) -> None: + return None + + def json(self) -> dict[str, Any]: + return self._payload + + +@pytest.mark.asyncio +async def test_web_search_tool_brave_formats_results(monkeypatch: pytest.MonkeyPatch) -> None: + calls: list[dict[str, Any]] = [] + payload = { + "web": { + "results": [ + { + "title": "Nanobot", + "url": "https://example.com/nanobot", + "description": "A lightweight personal AI assistant.", + } + ] + } + } + + class _FakeAsyncClient: + def __init__(self, *args: Any, **kwargs: Any) -> None: + self.proxy = kwargs.get("proxy") + + async def __aenter__(self) -> "_FakeAsyncClient": + return self + + async def __aexit__(self, exc_type, exc, tb) -> None: + return None + + async def get( + self, + url: str, + *, + params: dict[str, Any] | None = None, + headers: dict[str, str] | None = None, + timeout: float | None = None, + ) -> _FakeResponse: + calls.append({"url": url, "params": params, "headers": headers, "timeout": timeout}) + return _FakeResponse(payload) + + monkeypatch.setattr(web_module.httpx, "AsyncClient", _FakeAsyncClient) + + tool = WebSearchTool(provider="brave", api_key="test-key") + result = await tool.execute(query="nanobot", count=3) + + assert "Nanobot" in result + assert "https://example.com/nanobot" in result + assert "A lightweight personal AI assistant." in result + assert calls == [ + { + "url": "https://api.search.brave.com/res/v1/web/search", + "params": {"q": "nanobot", "count": 3}, + "headers": {"Accept": "application/json", "X-Subscription-Token": "test-key"}, + "timeout": 10.0, + } + ] + + +@pytest.mark.asyncio +async def test_web_search_tool_searxng_formats_results(monkeypatch: pytest.MonkeyPatch) -> None: + calls: list[dict[str, Any]] = [] + payload = { + "results": [ + { + "title": "Nanobot Docs", + "url": "https://example.com/docs", + "content": "Self-hosted search works.", + } + ] + } + + class _FakeAsyncClient: + def __init__(self, *args: Any, **kwargs: Any) -> None: + self.proxy = kwargs.get("proxy") + + async def __aenter__(self) -> "_FakeAsyncClient": + return self + + async def __aexit__(self, exc_type, exc, tb) -> None: + return None + + async def get( + self, + url: str, + *, + params: dict[str, Any] | None = None, + headers: dict[str, str] | None = None, + timeout: float | None = None, + ) -> _FakeResponse: + calls.append({"url": url, "params": params, "headers": headers, "timeout": timeout}) + return _FakeResponse(payload) + + monkeypatch.setattr(web_module.httpx, "AsyncClient", _FakeAsyncClient) + + tool = WebSearchTool(provider="searxng", base_url="http://localhost:8080") + result = await tool.execute(query="nanobot", count=4) + + assert "Nanobot Docs" in result + assert "https://example.com/docs" in result + assert "Self-hosted search works." in result + assert calls == [ + { + "url": "http://localhost:8080/search", + "params": {"q": "nanobot", "format": "json"}, + "headers": {"Accept": "application/json"}, + "timeout": 10.0, + } + ] + + +def test_web_search_tool_searxng_keeps_explicit_search_path() -> None: + tool = WebSearchTool(provider="searxng", base_url="https://search.example.com/search/") + + assert tool._build_searxng_search_url() == "https://search.example.com/search" + + +def test_web_search_config_accepts_searxng_fields() -> None: + config = Config.model_validate( + { + "tools": { + "web": { + "search": { + "provider": "searxng", + "baseUrl": "http://localhost:8080", + "maxResults": 7, + } + } + } + } + ) + + assert config.tools.web.search.provider == "searxng" + assert config.tools.web.search.base_url == "http://localhost:8080" + assert config.tools.web.search.max_results == 7