fix: searxng搜索引擎支持

This commit is contained in:
Hua
2026-03-12 12:38:01 +08:00
parent fc4f7cca21
commit 7f1e42c3fd
8 changed files with 340 additions and 20 deletions

View File

@@ -169,7 +169,7 @@ nanobot channels login
> [!TIP]
> Set your API key in `~/.nanobot/config.json`.
> Get API keys: [OpenRouter](https://openrouter.ai/keys) (Global) · [Brave Search](https://brave.com/search/api/) (optional, for web search)
> Get API keys: [OpenRouter](https://openrouter.ai/keys) (Global) · [Brave Search](https://brave.com/search/api/) or a self-hosted SearXNG instance (optional, for web search)
**1. Initialize**
@@ -212,6 +212,42 @@ nanobot agent
That's it! You have a working AI assistant in 2 minutes.
### Optional: Web Search
`web_search` supports both Brave Search and SearXNG.
**Brave Search**
```json
{
"tools": {
"web": {
"search": {
"provider": "brave",
"apiKey": "your-brave-api-key"
}
}
}
}
```
**SearXNG**
```json
{
"tools": {
"web": {
"search": {
"provider": "searxng",
"baseUrl": "http://localhost:8080"
}
}
}
}
```
`baseUrl` can point either to the SearXNG root (for example `http://localhost:8080`) or directly to `/search`.
## 💬 Chat Apps
Connect nanobot to your favorite chat platform.

View File

@@ -55,6 +55,9 @@ class AgentLoop:
context_window_tokens: int = 65_536,
brave_api_key: str | None = None,
web_proxy: str | None = None,
web_search_provider: str = "brave",
web_search_base_url: str | None = None,
web_search_max_results: int = 5,
exec_config: ExecToolConfig | None = None,
cron_service: CronService | None = None,
restrict_to_workspace: bool = False,
@@ -72,6 +75,9 @@ class AgentLoop:
self.context_window_tokens = context_window_tokens
self.brave_api_key = brave_api_key
self.web_proxy = web_proxy
self.web_search_provider = web_search_provider
self.web_search_base_url = web_search_base_url
self.web_search_max_results = web_search_max_results
self.exec_config = exec_config or ExecToolConfig()
self.cron_service = cron_service
self.restrict_to_workspace = restrict_to_workspace
@@ -86,6 +92,9 @@ class AgentLoop:
model=self.model,
brave_api_key=brave_api_key,
web_proxy=web_proxy,
web_search_provider=web_search_provider,
web_search_base_url=web_search_base_url,
web_search_max_results=web_search_max_results,
exec_config=self.exec_config,
restrict_to_workspace=restrict_to_workspace,
)
@@ -119,7 +128,15 @@ class AgentLoop:
restrict_to_workspace=self.restrict_to_workspace,
path_append=self.exec_config.path_append,
))
self.tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy))
self.tools.register(
WebSearchTool(
provider=self.web_search_provider,
api_key=self.brave_api_key,
base_url=self.web_search_base_url,
max_results=self.web_search_max_results,
proxy=self.web_proxy,
)
)
self.tools.register(WebFetchTool(proxy=self.web_proxy))
self.tools.register(MessageTool(send_callback=self.bus.publish_outbound))
self.tools.register(SpawnTool(manager=self.subagents))

View File

@@ -30,6 +30,9 @@ class SubagentManager:
model: str | None = None,
brave_api_key: str | None = None,
web_proxy: str | None = None,
web_search_provider: str = "brave",
web_search_base_url: str | None = None,
web_search_max_results: int = 5,
exec_config: "ExecToolConfig | None" = None,
restrict_to_workspace: bool = False,
):
@@ -40,6 +43,9 @@ class SubagentManager:
self.model = model or provider.get_default_model()
self.brave_api_key = brave_api_key
self.web_proxy = web_proxy
self.web_search_provider = web_search_provider
self.web_search_base_url = web_search_base_url
self.web_search_max_results = web_search_max_results
self.exec_config = exec_config or ExecToolConfig()
self.restrict_to_workspace = restrict_to_workspace
self._running_tasks: dict[str, asyncio.Task[None]] = {}
@@ -101,9 +107,17 @@ class SubagentManager:
restrict_to_workspace=self.restrict_to_workspace,
path_append=self.exec_config.path_append,
))
tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy))
tools.register(
WebSearchTool(
provider=self.web_search_provider,
api_key=self.brave_api_key,
base_url=self.web_search_base_url,
max_results=self.web_search_max_results,
proxy=self.web_proxy,
)
)
tools.register(WebFetchTool(proxy=self.web_proxy))
system_prompt = self._build_subagent_prompt()
messages: list[dict[str, Any]] = [
{"role": "system", "content": system_prompt},

View File

@@ -45,7 +45,7 @@ def _validate_url(url: str) -> tuple[bool, str]:
class WebSearchTool(Tool):
"""Search the web using Brave Search API."""
"""Search the web using Brave Search or SearXNG."""
name = "web_search"
description = "Search the web. Returns titles, URLs, and snippets."
@@ -58,8 +58,17 @@ class WebSearchTool(Tool):
"required": ["query"]
}
def __init__(self, api_key: str | None = None, max_results: int = 5, proxy: str | None = None):
def __init__(
self,
provider: str = "brave",
api_key: str | None = None,
base_url: str | None = None,
max_results: int = 5,
proxy: str | None = None,
):
self._init_provider = provider
self._init_api_key = api_key
self._init_base_url = base_url
self.max_results = max_results
self.proxy = proxy
@@ -68,7 +77,32 @@ class WebSearchTool(Tool):
"""Resolve API key at call time so env/config changes are picked up."""
return self._init_api_key or os.environ.get("BRAVE_API_KEY", "")
@property
def provider(self) -> str:
"""Resolve search provider at call time so env/config changes are picked up."""
return (
self._init_provider or os.environ.get("WEB_SEARCH_PROVIDER", "brave")
).strip().lower()
@property
def base_url(self) -> str:
"""Resolve SearXNG base URL at call time so env/config changes are picked up."""
return (self._init_base_url or os.environ.get("SEARXNG_BASE_URL", "")).strip()
async def execute(self, query: str, count: int | None = None, **kwargs: Any) -> str:
provider = self.provider
n = min(max(count or self.max_results, 1), 10)
if provider == "brave":
return await self._search_brave(query=query, count=n)
if provider == "searxng":
return await self._search_searxng(query=query, count=n)
return (
f"Error: Unsupported web search provider '{provider}'. "
"Supported values: brave, searxng."
)
async def _search_brave(self, query: str, count: int) -> str:
if not self.api_key:
return (
"Error: Brave Search API key not configured. Set it in "
@@ -77,27 +111,18 @@ class WebSearchTool(Tool):
)
try:
n = min(max(count or self.max_results, 1), 10)
logger.debug("WebSearch: {}", "proxy enabled" if self.proxy else "direct connection")
async with httpx.AsyncClient(proxy=self.proxy) as client:
r = await client.get(
"https://api.search.brave.com/res/v1/web/search",
params={"q": query, "count": n},
params={"q": query, "count": count},
headers={"Accept": "application/json", "X-Subscription-Token": self.api_key},
timeout=10.0
timeout=10.0,
)
r.raise_for_status()
results = r.json().get("web", {}).get("results", [])[:n]
if not results:
return f"No results for: {query}"
lines = [f"Results for: {query}\n"]
for i, item in enumerate(results, 1):
lines.append(f"{i}. {item.get('title', '')}\n {item.get('url', '')}")
if desc := item.get("description"):
lines.append(f" {desc}")
return "\n".join(lines)
results = r.json().get("web", {}).get("results", [])[:count]
return self._format_results(query, results, snippet_keys=("description",))
except httpx.ProxyError as e:
logger.error("WebSearch proxy error: {}", e)
return f"Proxy error: {e}"
@@ -105,6 +130,62 @@ class WebSearchTool(Tool):
logger.error("WebSearch error: {}", e)
return f"Error: {e}"
async def _search_searxng(self, query: str, count: int) -> str:
if not self.base_url:
return (
"Error: SearXNG base URL not configured. Set tools.web.search.baseUrl "
'in ~/.nanobot/config.json (or export SEARXNG_BASE_URL), e.g. "http://localhost:8080".'
)
is_valid, error_msg = _validate_url(self.base_url)
if not is_valid:
return f"Error: Invalid SearXNG base URL: {error_msg}"
try:
logger.debug("WebSearch: {}", "proxy enabled" if self.proxy else "direct connection")
async with httpx.AsyncClient(proxy=self.proxy) as client:
r = await client.get(
self._build_searxng_search_url(),
params={"q": query, "format": "json"},
headers={"Accept": "application/json"},
timeout=10.0,
)
r.raise_for_status()
results = r.json().get("results", [])[:count]
return self._format_results(
query,
results,
snippet_keys=("content", "snippet", "description"),
)
except httpx.ProxyError as e:
logger.error("WebSearch proxy error: {}", e)
return f"Proxy error: {e}"
except Exception as e:
logger.error("WebSearch error: {}", e)
return f"Error: {e}"
def _build_searxng_search_url(self) -> str:
base_url = self.base_url.rstrip("/")
return base_url if base_url.endswith("/search") else f"{base_url}/search"
@staticmethod
def _format_results(
query: str,
results: list[dict[str, Any]],
snippet_keys: tuple[str, ...],
) -> str:
if not results:
return f"No results for: {query}"
lines = [f"Results for: {query}\n"]
for i, item in enumerate(results, 1):
lines.append(f"{i}. {item.get('title', '')}\n {item.get('url', '')}")
snippet = next((item.get(key) for key in snippet_keys if item.get(key)), None)
if snippet:
lines.append(f" {snippet}")
return "\n".join(lines)
class WebFetchTool(Tool):
"""Fetch and extract content from a URL using Readability."""

View File

@@ -350,6 +350,9 @@ def gateway(
context_window_tokens=config.agents.defaults.context_window_tokens,
brave_api_key=config.tools.web.search.api_key or None,
web_proxy=config.tools.web.proxy or None,
web_search_provider=config.tools.web.search.provider,
web_search_base_url=config.tools.web.search.base_url or None,
web_search_max_results=config.tools.web.search.max_results,
exec_config=config.tools.exec,
cron_service=cron,
restrict_to_workspace=config.tools.restrict_to_workspace,
@@ -533,6 +536,9 @@ def agent(
context_window_tokens=config.agents.defaults.context_window_tokens,
brave_api_key=config.tools.web.search.api_key or None,
web_proxy=config.tools.web.proxy or None,
web_search_provider=config.tools.web.search.provider,
web_search_base_url=config.tools.web.search.base_url or None,
web_search_max_results=config.tools.web.search.max_results,
exec_config=config.tools.exec,
cron_service=cron,
restrict_to_workspace=config.tools.restrict_to_workspace,

View File

@@ -306,7 +306,9 @@ class GatewayConfig(Base):
class WebSearchConfig(Base):
"""Web search tool configuration."""
api_key: str = "" # Brave Search API key
provider: Literal["brave", "searxng"] = "brave"
api_key: str = "" # Brave Search API key (ignored by SearXNG)
base_url: str = "" # Required for SearXNG, e.g. "http://localhost:8080"
max_results: int = 5

View File

@@ -306,6 +306,20 @@ def test_agent_warns_about_deprecated_memory_window(mock_agent_runtime):
assert "contextWindowTokens" in result.stdout
def test_agent_passes_web_search_config_to_agent_loop(mock_agent_runtime) -> None:
mock_agent_runtime["config"].tools.web.search.provider = "searxng"
mock_agent_runtime["config"].tools.web.search.base_url = "http://localhost:8080"
mock_agent_runtime["config"].tools.web.search.max_results = 7
result = runner.invoke(app, ["agent", "-m", "hello"])
assert result.exit_code == 0
kwargs = mock_agent_runtime["agent_loop_cls"].call_args.kwargs
assert kwargs["web_search_provider"] == "searxng"
assert kwargs["web_search_base_url"] == "http://localhost:8080"
assert kwargs["web_search_max_results"] == 7
def test_gateway_uses_workspace_from_config_by_default(monkeypatch, tmp_path: Path) -> None:
config_file = tmp_path / "instance" / "config.json"
config_file.parent.mkdir(parents=True)

150
tests/test_web_tools.py Normal file
View File

@@ -0,0 +1,150 @@
from typing import Any
import pytest
from nanobot.agent.tools import web as web_module
from nanobot.agent.tools.web import WebSearchTool
from nanobot.config.schema import Config
class _FakeResponse:
def __init__(self, payload: dict[str, Any]) -> None:
self._payload = payload
def raise_for_status(self) -> None:
return None
def json(self) -> dict[str, Any]:
return self._payload
@pytest.mark.asyncio
async def test_web_search_tool_brave_formats_results(monkeypatch: pytest.MonkeyPatch) -> None:
calls: list[dict[str, Any]] = []
payload = {
"web": {
"results": [
{
"title": "Nanobot",
"url": "https://example.com/nanobot",
"description": "A lightweight personal AI assistant.",
}
]
}
}
class _FakeAsyncClient:
def __init__(self, *args: Any, **kwargs: Any) -> None:
self.proxy = kwargs.get("proxy")
async def __aenter__(self) -> "_FakeAsyncClient":
return self
async def __aexit__(self, exc_type, exc, tb) -> None:
return None
async def get(
self,
url: str,
*,
params: dict[str, Any] | None = None,
headers: dict[str, str] | None = None,
timeout: float | None = None,
) -> _FakeResponse:
calls.append({"url": url, "params": params, "headers": headers, "timeout": timeout})
return _FakeResponse(payload)
monkeypatch.setattr(web_module.httpx, "AsyncClient", _FakeAsyncClient)
tool = WebSearchTool(provider="brave", api_key="test-key")
result = await tool.execute(query="nanobot", count=3)
assert "Nanobot" in result
assert "https://example.com/nanobot" in result
assert "A lightweight personal AI assistant." in result
assert calls == [
{
"url": "https://api.search.brave.com/res/v1/web/search",
"params": {"q": "nanobot", "count": 3},
"headers": {"Accept": "application/json", "X-Subscription-Token": "test-key"},
"timeout": 10.0,
}
]
@pytest.mark.asyncio
async def test_web_search_tool_searxng_formats_results(monkeypatch: pytest.MonkeyPatch) -> None:
calls: list[dict[str, Any]] = []
payload = {
"results": [
{
"title": "Nanobot Docs",
"url": "https://example.com/docs",
"content": "Self-hosted search works.",
}
]
}
class _FakeAsyncClient:
def __init__(self, *args: Any, **kwargs: Any) -> None:
self.proxy = kwargs.get("proxy")
async def __aenter__(self) -> "_FakeAsyncClient":
return self
async def __aexit__(self, exc_type, exc, tb) -> None:
return None
async def get(
self,
url: str,
*,
params: dict[str, Any] | None = None,
headers: dict[str, str] | None = None,
timeout: float | None = None,
) -> _FakeResponse:
calls.append({"url": url, "params": params, "headers": headers, "timeout": timeout})
return _FakeResponse(payload)
monkeypatch.setattr(web_module.httpx, "AsyncClient", _FakeAsyncClient)
tool = WebSearchTool(provider="searxng", base_url="http://localhost:8080")
result = await tool.execute(query="nanobot", count=4)
assert "Nanobot Docs" in result
assert "https://example.com/docs" in result
assert "Self-hosted search works." in result
assert calls == [
{
"url": "http://localhost:8080/search",
"params": {"q": "nanobot", "format": "json"},
"headers": {"Accept": "application/json"},
"timeout": 10.0,
}
]
def test_web_search_tool_searxng_keeps_explicit_search_path() -> None:
tool = WebSearchTool(provider="searxng", base_url="https://search.example.com/search/")
assert tool._build_searxng_search_url() == "https://search.example.com/search"
def test_web_search_config_accepts_searxng_fields() -> None:
config = Config.model_validate(
{
"tools": {
"web": {
"search": {
"provider": "searxng",
"baseUrl": "http://localhost:8080",
"maxResults": 7,
}
}
}
}
)
assert config.tools.web.search.provider == "searxng"
assert config.tools.web.search.base_url == "http://localhost:8080"
assert config.tools.web.search.max_results == 7