feat(web): configurable web search providers with fallback
Add multi-provider web search support: Brave (default), Tavily, DuckDuckGo, and SearXNG. Falls back to DuckDuckGo when provider credentials are missing. Providers are dispatched via a map with register_provider() for plugin extensibility. - WebSearchConfig with env-var resolution and from_legacy() bridge - Config migration for legacy flat keys (tavilyApiKey, searxngBaseUrl) - SearXNG URL validation, explicit error for unknown providers - ddgs package (replaces deprecated duckduckgo-search) - 16 tests covering all providers, fallback, env resolution, edge cases - docs/web-search.md with full config reference Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
from typing import Any
|
||||
|
||||
from nanobot.agent.tools.web import WebSearchTool
|
||||
from nanobot.agent.tools.base import Tool
|
||||
from nanobot.agent.tools.registry import ToolRegistry
|
||||
from nanobot.agent.tools.shell import ExecTool
|
||||
from nanobot.config.schema import WebSearchConfig
|
||||
|
||||
|
||||
class SampleTool(Tool):
|
||||
@@ -337,3 +339,16 @@ def test_cast_params_single_value_not_auto_wrapped_to_array() -> None:
|
||||
assert result["items"] == 5 # Not wrapped to [5]
|
||||
result = tool.cast_params({"items": "text"})
|
||||
assert result["items"] == "text" # Not wrapped to ["text"]
|
||||
|
||||
|
||||
async def test_web_search_no_fallback_returns_provider_error() -> None:
|
||||
tool = WebSearchTool(
|
||||
config=WebSearchConfig(
|
||||
provider="brave",
|
||||
api_key="",
|
||||
fallback_to_duckduckgo=False,
|
||||
)
|
||||
)
|
||||
|
||||
result = await tool.execute(query="fallback", count=1)
|
||||
assert result == "Error: BRAVE_API_KEY not configured"
|
||||
|
||||
327
tests/test_web_search_tool.py
Normal file
327
tests/test_web_search_tool.py
Normal file
@@ -0,0 +1,327 @@
|
||||
import httpx
|
||||
import pytest
|
||||
from collections.abc import Callable
|
||||
from typing import Literal
|
||||
|
||||
from nanobot.agent.tools.web import WebSearchTool
|
||||
from nanobot.config.schema import WebSearchConfig
|
||||
|
||||
|
||||
def _tool(config: WebSearchConfig, handler) -> WebSearchTool:
|
||||
return WebSearchTool(config=config, transport=httpx.MockTransport(handler))
|
||||
|
||||
|
||||
def _assert_tavily_request(request: httpx.Request) -> bool:
|
||||
return (
|
||||
request.method == "POST"
|
||||
and str(request.url) == "https://api.tavily.com/search"
|
||||
and request.headers.get("authorization") == "Bearer tavily-key"
|
||||
and '"query":"openclaw"' in request.read().decode("utf-8")
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
("provider", "config_kwargs", "query", "count", "assert_request", "response", "assert_text"),
|
||||
[
|
||||
(
|
||||
"brave",
|
||||
{"api_key": "brave-key"},
|
||||
"nanobot",
|
||||
1,
|
||||
lambda request: (
|
||||
request.method == "GET"
|
||||
and str(request.url)
|
||||
== "https://api.search.brave.com/res/v1/web/search?q=nanobot&count=1"
|
||||
and request.headers["X-Subscription-Token"] == "brave-key"
|
||||
),
|
||||
httpx.Response(
|
||||
200,
|
||||
json={
|
||||
"web": {
|
||||
"results": [
|
||||
{
|
||||
"title": "NanoBot",
|
||||
"url": "https://example.com/nanobot",
|
||||
"description": "Ultra-lightweight assistant",
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
),
|
||||
["Results for: nanobot", "1. NanoBot", "https://example.com/nanobot"],
|
||||
),
|
||||
(
|
||||
"tavily",
|
||||
{"api_key": "tavily-key"},
|
||||
"openclaw",
|
||||
2,
|
||||
_assert_tavily_request,
|
||||
httpx.Response(
|
||||
200,
|
||||
json={
|
||||
"results": [
|
||||
{
|
||||
"title": "OpenClaw",
|
||||
"url": "https://example.com/openclaw",
|
||||
"content": "Plugin-based assistant framework",
|
||||
}
|
||||
]
|
||||
},
|
||||
),
|
||||
["Results for: openclaw", "1. OpenClaw", "https://example.com/openclaw"],
|
||||
),
|
||||
(
|
||||
"searxng",
|
||||
{"base_url": "https://searx.example"},
|
||||
"nanobot",
|
||||
1,
|
||||
lambda request: (
|
||||
request.method == "GET"
|
||||
and str(request.url) == "https://searx.example/search?q=nanobot&format=json"
|
||||
),
|
||||
httpx.Response(
|
||||
200,
|
||||
json={
|
||||
"results": [
|
||||
{
|
||||
"title": "nanobot docs",
|
||||
"url": "https://example.com/nanobot",
|
||||
"content": "Lightweight assistant docs",
|
||||
}
|
||||
]
|
||||
},
|
||||
),
|
||||
["Results for: nanobot", "1. nanobot docs", "https://example.com/nanobot"],
|
||||
),
|
||||
],
|
||||
)
|
||||
async def test_web_search_provider_formats_results(
|
||||
provider: Literal["brave", "tavily", "searxng"],
|
||||
config_kwargs: dict,
|
||||
query: str,
|
||||
count: int,
|
||||
assert_request: Callable[[httpx.Request], bool],
|
||||
response: httpx.Response,
|
||||
assert_text: list[str],
|
||||
) -> None:
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
assert assert_request(request)
|
||||
return response
|
||||
|
||||
tool = _tool(WebSearchConfig(provider=provider, max_results=5, **config_kwargs), handler)
|
||||
result = await tool.execute(query=query, count=count)
|
||||
for text in assert_text:
|
||||
assert text in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_search_from_legacy_config_works() -> None:
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(
|
||||
200,
|
||||
json={
|
||||
"web": {
|
||||
"results": [
|
||||
{"title": "Legacy", "url": "https://example.com", "description": "ok"}
|
||||
]
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
config = WebSearchConfig(api_key="legacy-key", max_results=3)
|
||||
tool = WebSearchTool(config=config, transport=httpx.MockTransport(handler))
|
||||
result = await tool.execute(query="constructor", count=1)
|
||||
assert "1. Legacy" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
("provider", "config", "missing_env", "expected_title"),
|
||||
[
|
||||
(
|
||||
"brave",
|
||||
WebSearchConfig(provider="brave", api_key="", max_results=5),
|
||||
"BRAVE_API_KEY",
|
||||
"Fallback Result",
|
||||
),
|
||||
(
|
||||
"tavily",
|
||||
WebSearchConfig(provider="tavily", api_key="", max_results=5),
|
||||
"TAVILY_API_KEY",
|
||||
"Tavily Fallback",
|
||||
),
|
||||
],
|
||||
)
|
||||
async def test_web_search_missing_key_falls_back_to_duckduckgo(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
provider: str,
|
||||
config: WebSearchConfig,
|
||||
missing_env: str,
|
||||
expected_title: str,
|
||||
) -> None:
|
||||
monkeypatch.delenv(missing_env, raising=False)
|
||||
|
||||
called = False
|
||||
|
||||
class FakeDDGS:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def text(self, keywords: str, max_results: int):
|
||||
nonlocal called
|
||||
called = True
|
||||
return [
|
||||
{
|
||||
"title": expected_title,
|
||||
"href": f"https://example.com/{provider}-fallback",
|
||||
"body": "Fallback snippet",
|
||||
}
|
||||
]
|
||||
|
||||
monkeypatch.setattr("nanobot.agent.tools.web.DDGS", FakeDDGS, raising=False)
|
||||
|
||||
result = await WebSearchTool(config=config).execute(query="fallback", count=1)
|
||||
assert called
|
||||
assert "Using DuckDuckGo fallback" in result
|
||||
assert f"1. {expected_title}" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_search_brave_missing_key_without_fallback_returns_error(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.delenv("BRAVE_API_KEY", raising=False)
|
||||
tool = WebSearchTool(
|
||||
config=WebSearchConfig(
|
||||
provider="brave",
|
||||
api_key="",
|
||||
fallback_to_duckduckgo=False,
|
||||
)
|
||||
)
|
||||
|
||||
result = await tool.execute(query="fallback", count=1)
|
||||
assert result == "Error: BRAVE_API_KEY not configured"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_search_searxng_missing_base_url_falls_back_to_duckduckgo() -> None:
|
||||
tool = WebSearchTool(
|
||||
config=WebSearchConfig(provider="searxng", base_url="", max_results=5)
|
||||
)
|
||||
|
||||
result = await tool.execute(query="nanobot", count=1)
|
||||
assert "DuckDuckGo fallback" in result
|
||||
assert "SEARXNG_BASE_URL" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_search_searxng_missing_base_url_no_fallback_returns_error() -> None:
|
||||
tool = WebSearchTool(
|
||||
config=WebSearchConfig(
|
||||
provider="searxng", base_url="",
|
||||
fallback_to_duckduckgo=False, max_results=5,
|
||||
)
|
||||
)
|
||||
|
||||
result = await tool.execute(query="nanobot", count=1)
|
||||
assert result == "Error: SEARXNG_BASE_URL not configured"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_search_searxng_uses_env_base_url(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setenv("SEARXNG_BASE_URL", "https://searx.env")
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
assert request.method == "GET"
|
||||
assert str(request.url) == "https://searx.env/search?q=nanobot&format=json"
|
||||
return httpx.Response(
|
||||
200,
|
||||
json={
|
||||
"results": [
|
||||
{
|
||||
"title": "env result",
|
||||
"url": "https://example.com/env",
|
||||
"content": "from env",
|
||||
}
|
||||
]
|
||||
},
|
||||
)
|
||||
|
||||
config = WebSearchConfig(provider="searxng", base_url="", max_results=5)
|
||||
result = await _tool(config, handler).execute(query="nanobot", count=1)
|
||||
assert "1. env result" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_search_register_custom_provider() -> None:
|
||||
config = WebSearchConfig(provider="custom", max_results=5)
|
||||
tool = WebSearchTool(config=config)
|
||||
|
||||
async def _custom_provider(query: str, n: int) -> str:
|
||||
return f"custom:{query}:{n}"
|
||||
|
||||
tool._provider_dispatch["custom"] = _custom_provider
|
||||
|
||||
result = await tool.execute(query="nanobot", count=2)
|
||||
assert result == "custom:nanobot:2"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_search_duckduckgo_uses_injected_ddgs_factory() -> None:
|
||||
class FakeDDGS:
|
||||
def text(self, keywords: str, max_results: int):
|
||||
assert keywords == "nanobot"
|
||||
assert max_results == 1
|
||||
return [
|
||||
{
|
||||
"title": "NanoBot result",
|
||||
"href": "https://example.com/nanobot",
|
||||
"body": "Search content",
|
||||
}
|
||||
]
|
||||
|
||||
tool = WebSearchTool(
|
||||
config=WebSearchConfig(provider="duckduckgo", max_results=5),
|
||||
ddgs_factory=lambda: FakeDDGS(),
|
||||
)
|
||||
|
||||
result = await tool.execute(query="nanobot", count=1)
|
||||
assert "1. NanoBot result" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_search_unknown_provider_returns_error() -> None:
|
||||
tool = WebSearchTool(
|
||||
config=WebSearchConfig(provider="google", max_results=5),
|
||||
)
|
||||
result = await tool.execute(query="nanobot", count=1)
|
||||
assert result == "Error: unknown search provider 'google'"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_search_dispatch_dict_overwrites_builtin() -> None:
|
||||
async def _custom_brave(query: str, n: int) -> str:
|
||||
return f"custom-brave:{query}:{n}"
|
||||
|
||||
tool = WebSearchTool(
|
||||
config=WebSearchConfig(provider="brave", api_key="key", max_results=5),
|
||||
)
|
||||
tool._provider_dispatch["brave"] = _custom_brave
|
||||
result = await tool.execute(query="nanobot", count=2)
|
||||
assert result == "custom-brave:nanobot:2"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_search_searxng_rejects_invalid_url() -> None:
|
||||
tool = WebSearchTool(
|
||||
config=WebSearchConfig(
|
||||
provider="searxng",
|
||||
base_url="ftp://internal.host",
|
||||
max_results=5,
|
||||
),
|
||||
)
|
||||
result = await tool.execute(query="nanobot", count=1)
|
||||
assert "Error: invalid SearXNG URL" in result
|
||||
Reference in New Issue
Block a user