fix: searxng搜索引擎支持

This commit is contained in:
Hua
2026-03-12 12:38:01 +08:00
parent fc4f7cca21
commit 7f1e42c3fd
8 changed files with 340 additions and 20 deletions

View File

@@ -55,6 +55,9 @@ class AgentLoop:
context_window_tokens: int = 65_536,
brave_api_key: str | None = None,
web_proxy: str | None = None,
web_search_provider: str = "brave",
web_search_base_url: str | None = None,
web_search_max_results: int = 5,
exec_config: ExecToolConfig | None = None,
cron_service: CronService | None = None,
restrict_to_workspace: bool = False,
@@ -72,6 +75,9 @@ class AgentLoop:
self.context_window_tokens = context_window_tokens
self.brave_api_key = brave_api_key
self.web_proxy = web_proxy
self.web_search_provider = web_search_provider
self.web_search_base_url = web_search_base_url
self.web_search_max_results = web_search_max_results
self.exec_config = exec_config or ExecToolConfig()
self.cron_service = cron_service
self.restrict_to_workspace = restrict_to_workspace
@@ -86,6 +92,9 @@ class AgentLoop:
model=self.model,
brave_api_key=brave_api_key,
web_proxy=web_proxy,
web_search_provider=web_search_provider,
web_search_base_url=web_search_base_url,
web_search_max_results=web_search_max_results,
exec_config=self.exec_config,
restrict_to_workspace=restrict_to_workspace,
)
@@ -119,7 +128,15 @@ class AgentLoop:
restrict_to_workspace=self.restrict_to_workspace,
path_append=self.exec_config.path_append,
))
self.tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy))
self.tools.register(
WebSearchTool(
provider=self.web_search_provider,
api_key=self.brave_api_key,
base_url=self.web_search_base_url,
max_results=self.web_search_max_results,
proxy=self.web_proxy,
)
)
self.tools.register(WebFetchTool(proxy=self.web_proxy))
self.tools.register(MessageTool(send_callback=self.bus.publish_outbound))
self.tools.register(SpawnTool(manager=self.subagents))

View File

@@ -30,6 +30,9 @@ class SubagentManager:
model: str | None = None,
brave_api_key: str | None = None,
web_proxy: str | None = None,
web_search_provider: str = "brave",
web_search_base_url: str | None = None,
web_search_max_results: int = 5,
exec_config: "ExecToolConfig | None" = None,
restrict_to_workspace: bool = False,
):
@@ -40,6 +43,9 @@ class SubagentManager:
self.model = model or provider.get_default_model()
self.brave_api_key = brave_api_key
self.web_proxy = web_proxy
self.web_search_provider = web_search_provider
self.web_search_base_url = web_search_base_url
self.web_search_max_results = web_search_max_results
self.exec_config = exec_config or ExecToolConfig()
self.restrict_to_workspace = restrict_to_workspace
self._running_tasks: dict[str, asyncio.Task[None]] = {}
@@ -101,9 +107,17 @@ class SubagentManager:
restrict_to_workspace=self.restrict_to_workspace,
path_append=self.exec_config.path_append,
))
tools.register(WebSearchTool(api_key=self.brave_api_key, proxy=self.web_proxy))
tools.register(
WebSearchTool(
provider=self.web_search_provider,
api_key=self.brave_api_key,
base_url=self.web_search_base_url,
max_results=self.web_search_max_results,
proxy=self.web_proxy,
)
)
tools.register(WebFetchTool(proxy=self.web_proxy))
system_prompt = self._build_subagent_prompt()
messages: list[dict[str, Any]] = [
{"role": "system", "content": system_prompt},

View File

@@ -45,7 +45,7 @@ def _validate_url(url: str) -> tuple[bool, str]:
class WebSearchTool(Tool):
"""Search the web using Brave Search API."""
"""Search the web using Brave Search or SearXNG."""
name = "web_search"
description = "Search the web. Returns titles, URLs, and snippets."
@@ -58,8 +58,17 @@ class WebSearchTool(Tool):
"required": ["query"]
}
def __init__(self, api_key: str | None = None, max_results: int = 5, proxy: str | None = None):
def __init__(
self,
provider: str = "brave",
api_key: str | None = None,
base_url: str | None = None,
max_results: int = 5,
proxy: str | None = None,
):
self._init_provider = provider
self._init_api_key = api_key
self._init_base_url = base_url
self.max_results = max_results
self.proxy = proxy
@@ -68,7 +77,32 @@ class WebSearchTool(Tool):
"""Resolve API key at call time so env/config changes are picked up."""
return self._init_api_key or os.environ.get("BRAVE_API_KEY", "")
@property
def provider(self) -> str:
"""Resolve search provider at call time so env/config changes are picked up."""
return (
self._init_provider or os.environ.get("WEB_SEARCH_PROVIDER", "brave")
).strip().lower()
@property
def base_url(self) -> str:
"""Resolve SearXNG base URL at call time so env/config changes are picked up."""
return (self._init_base_url or os.environ.get("SEARXNG_BASE_URL", "")).strip()
async def execute(self, query: str, count: int | None = None, **kwargs: Any) -> str:
provider = self.provider
n = min(max(count or self.max_results, 1), 10)
if provider == "brave":
return await self._search_brave(query=query, count=n)
if provider == "searxng":
return await self._search_searxng(query=query, count=n)
return (
f"Error: Unsupported web search provider '{provider}'. "
"Supported values: brave, searxng."
)
async def _search_brave(self, query: str, count: int) -> str:
if not self.api_key:
return (
"Error: Brave Search API key not configured. Set it in "
@@ -77,27 +111,18 @@ class WebSearchTool(Tool):
)
try:
n = min(max(count or self.max_results, 1), 10)
logger.debug("WebSearch: {}", "proxy enabled" if self.proxy else "direct connection")
async with httpx.AsyncClient(proxy=self.proxy) as client:
r = await client.get(
"https://api.search.brave.com/res/v1/web/search",
params={"q": query, "count": n},
params={"q": query, "count": count},
headers={"Accept": "application/json", "X-Subscription-Token": self.api_key},
timeout=10.0
timeout=10.0,
)
r.raise_for_status()
results = r.json().get("web", {}).get("results", [])[:n]
if not results:
return f"No results for: {query}"
lines = [f"Results for: {query}\n"]
for i, item in enumerate(results, 1):
lines.append(f"{i}. {item.get('title', '')}\n {item.get('url', '')}")
if desc := item.get("description"):
lines.append(f" {desc}")
return "\n".join(lines)
results = r.json().get("web", {}).get("results", [])[:count]
return self._format_results(query, results, snippet_keys=("description",))
except httpx.ProxyError as e:
logger.error("WebSearch proxy error: {}", e)
return f"Proxy error: {e}"
@@ -105,6 +130,62 @@ class WebSearchTool(Tool):
logger.error("WebSearch error: {}", e)
return f"Error: {e}"
async def _search_searxng(self, query: str, count: int) -> str:
if not self.base_url:
return (
"Error: SearXNG base URL not configured. Set tools.web.search.baseUrl "
'in ~/.nanobot/config.json (or export SEARXNG_BASE_URL), e.g. "http://localhost:8080".'
)
is_valid, error_msg = _validate_url(self.base_url)
if not is_valid:
return f"Error: Invalid SearXNG base URL: {error_msg}"
try:
logger.debug("WebSearch: {}", "proxy enabled" if self.proxy else "direct connection")
async with httpx.AsyncClient(proxy=self.proxy) as client:
r = await client.get(
self._build_searxng_search_url(),
params={"q": query, "format": "json"},
headers={"Accept": "application/json"},
timeout=10.0,
)
r.raise_for_status()
results = r.json().get("results", [])[:count]
return self._format_results(
query,
results,
snippet_keys=("content", "snippet", "description"),
)
except httpx.ProxyError as e:
logger.error("WebSearch proxy error: {}", e)
return f"Proxy error: {e}"
except Exception as e:
logger.error("WebSearch error: {}", e)
return f"Error: {e}"
def _build_searxng_search_url(self) -> str:
base_url = self.base_url.rstrip("/")
return base_url if base_url.endswith("/search") else f"{base_url}/search"
@staticmethod
def _format_results(
query: str,
results: list[dict[str, Any]],
snippet_keys: tuple[str, ...],
) -> str:
if not results:
return f"No results for: {query}"
lines = [f"Results for: {query}\n"]
for i, item in enumerate(results, 1):
lines.append(f"{i}. {item.get('title', '')}\n {item.get('url', '')}")
snippet = next((item.get(key) for key in snippet_keys if item.get(key)), None)
if snippet:
lines.append(f" {snippet}")
return "\n".join(lines)
class WebFetchTool(Tool):
"""Fetch and extract content from a URL using Readability."""

View File

@@ -350,6 +350,9 @@ def gateway(
context_window_tokens=config.agents.defaults.context_window_tokens,
brave_api_key=config.tools.web.search.api_key or None,
web_proxy=config.tools.web.proxy or None,
web_search_provider=config.tools.web.search.provider,
web_search_base_url=config.tools.web.search.base_url or None,
web_search_max_results=config.tools.web.search.max_results,
exec_config=config.tools.exec,
cron_service=cron,
restrict_to_workspace=config.tools.restrict_to_workspace,
@@ -533,6 +536,9 @@ def agent(
context_window_tokens=config.agents.defaults.context_window_tokens,
brave_api_key=config.tools.web.search.api_key or None,
web_proxy=config.tools.web.proxy or None,
web_search_provider=config.tools.web.search.provider,
web_search_base_url=config.tools.web.search.base_url or None,
web_search_max_results=config.tools.web.search.max_results,
exec_config=config.tools.exec,
cron_service=cron,
restrict_to_workspace=config.tools.restrict_to_workspace,

View File

@@ -306,7 +306,9 @@ class GatewayConfig(Base):
class WebSearchConfig(Base):
"""Web search tool configuration."""
api_key: str = "" # Brave Search API key
provider: Literal["brave", "searxng"] = "brave"
api_key: str = "" # Brave Search API key (ignored by SearXNG)
base_url: str = "" # Required for SearXNG, e.g. "http://localhost:8080"
max_results: int = 5