Merge branch 'main' into pr-30

This commit is contained in:
Re-bin
2026-02-04 03:24:31 +00:00
14 changed files with 328 additions and 40 deletions

View File

@@ -5,6 +5,7 @@ import json
import os
import re
from typing import Any
from urllib.parse import urlparse
import httpx
@@ -12,6 +13,7 @@ from nanobot.agent.tools.base import Tool
# Shared constants
USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36"
MAX_REDIRECTS = 5 # Limit redirects to prevent DoS attacks
def _strip_tags(text: str) -> str:
@@ -28,6 +30,19 @@ def _normalize(text: str) -> str:
return re.sub(r'\n{3,}', '\n\n', text).strip()
def _validate_url(url: str) -> tuple[bool, str]:
"""Validate URL: must be http(s) with valid domain."""
try:
p = urlparse(url)
if p.scheme not in ('http', 'https'):
return False, f"Only http/https allowed, got '{p.scheme or 'none'}'"
if not p.netloc:
return False, "Missing domain"
return True, ""
except Exception as e:
return False, str(e)
class WebSearchTool(Tool):
"""Search the web using Brave Search API."""
@@ -95,12 +110,21 @@ class WebFetchTool(Tool):
async def execute(self, url: str, extractMode: str = "markdown", maxChars: int | None = None, **kwargs: Any) -> str:
from readability import Document
max_chars = maxChars or self.max_chars
# Validate URL before fetching
is_valid, error_msg = _validate_url(url)
if not is_valid:
return json.dumps({"error": f"URL validation failed: {error_msg}", "url": url})
try:
async with httpx.AsyncClient() as client:
r = await client.get(url, headers={"User-Agent": USER_AGENT}, follow_redirects=True, timeout=30.0)
async with httpx.AsyncClient(
follow_redirects=True,
max_redirects=MAX_REDIRECTS,
timeout=30.0
) as client:
r = await client.get(url, headers={"User-Agent": USER_AGENT})
r.raise_for_status()
ctype = r.headers.get("content-type", "")

View File

@@ -37,7 +37,9 @@ class ChannelManager:
try:
from nanobot.channels.telegram import TelegramChannel
self.channels["telegram"] = TelegramChannel(
self.config.channels.telegram, self.bus
self.config.channels.telegram,
self.bus,
groq_api_key=self.config.providers.groq.api_key,
)
logger.info("Telegram channel enabled")
except ImportError as e:

View File

@@ -85,9 +85,10 @@ class TelegramChannel(BaseChannel):
name = "telegram"
def __init__(self, config: TelegramConfig, bus: MessageBus):
def __init__(self, config: TelegramConfig, bus: MessageBus, groq_api_key: str = ""):
super().__init__(config, bus)
self.config: TelegramConfig = config
self.groq_api_key = groq_api_key
self._app: Application | None = None
self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies
@@ -249,7 +250,20 @@ class TelegramChannel(BaseChannel):
await file.download_to_drive(str(file_path))
media_paths.append(str(file_path))
content_parts.append(f"[{media_type}: {file_path}]")
# Handle voice transcription
if media_type == "voice" or media_type == "audio":
from nanobot.providers.transcription import GroqTranscriptionProvider
transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key)
transcription = await transcriber.transcribe(file_path)
if transcription:
logger.info(f"Transcribed {media_type}: {transcription[:50]}...")
content_parts.append(f"[transcription: {transcription}]")
else:
content_parts.append(f"[{media_type}: {file_path}]")
else:
content_parts.append(f"[{media_type}: {file_path}]")
logger.debug(f"Downloaded {media_type} to {file_path}")
except Exception as e:
logger.error(f"Failed to download media: {e}")

View File

@@ -107,6 +107,11 @@ class WhatsAppChannel(BaseChannel):
# Extract just the phone number as chat_id
chat_id = sender.split("@")[0] if "@" in sender else sender
# Handle voice transcription if it's a voice message
if content == "[Voice Message]":
logger.info(f"Voice message received from {chat_id}, but direct download from bridge is not yet supported.")
content = "[Voice Message: Transcription not available for WhatsApp yet]"
await self._handle_message(
sender_id=chat_id,
chat_id=sender, # Use full JID for replies

View File

@@ -178,11 +178,13 @@ def gateway(
# Create components
bus = MessageBus()
# Create provider (supports OpenRouter, Anthropic, OpenAI)
# Create provider (supports OpenRouter, Anthropic, OpenAI, Bedrock)
api_key = config.get_api_key()
api_base = config.get_api_base()
if not api_key:
model = config.agents.defaults.model
is_bedrock = model.startswith("bedrock/")
if not api_key and not is_bedrock:
console.print("[red]Error: No API key configured.[/red]")
console.print("Set one in ~/.nanobot/config.json under providers.openrouter.apiKey")
raise typer.Exit(1)
@@ -289,11 +291,13 @@ def agent(
api_key = config.get_api_key()
api_base = config.get_api_base()
if not api_key:
model = config.agents.defaults.model
is_bedrock = model.startswith("bedrock/")
if not api_key and not is_bedrock:
console.print("[red]Error: No API key configured.[/red]")
raise typer.Exit(1)
bus = MessageBus()
provider = LiteLLMProvider(
api_key=api_key,
@@ -348,21 +352,31 @@ app.add_typer(channels_app, name="channels")
def channels_status():
"""Show channel status."""
from nanobot.config.loader import load_config
config = load_config()
table = Table(title="Channel Status")
table.add_column("Channel", style="cyan")
table.add_column("Enabled", style="green")
table.add_column("Bridge URL", style="yellow")
table.add_column("Configuration", style="yellow")
# WhatsApp
wa = config.channels.whatsapp
table.add_row(
"WhatsApp",
"" if wa.enabled else "",
wa.bridge_url
)
# Telegram
tg = config.channels.telegram
tg_config = f"token: {tg.token[:10]}..." if tg.token else "[dim]not configured[/dim]"
table.add_row(
"Telegram",
"" if tg.enabled else "",
tg_config
)
console.print(table)
@@ -608,18 +622,17 @@ def cron_run(
def status():
"""Show nanobot status."""
from nanobot.config.loader import load_config, get_config_path
from nanobot.utils.helpers import get_workspace_path
config_path = get_config_path()
workspace = get_workspace_path()
config = load_config()
workspace = config.workspace_path
console.print(f"{__logo__} nanobot Status\n")
console.print(f"Config: {config_path} {'[green]✓[/green]' if config_path.exists() else '[red]✗[/red]'}")
console.print(f"Workspace: {workspace} {'[green]✓[/green]' if workspace.exists() else '[red]✗[/red]'}")
if config_path.exists():
config = load_config()
console.print(f"Model: {config.agents.defaults.model}")
# Check API keys

View File

@@ -50,6 +50,7 @@ class ProvidersConfig(BaseModel):
anthropic: ProviderConfig = Field(default_factory=ProviderConfig)
openai: ProviderConfig = Field(default_factory=ProviderConfig)
openrouter: ProviderConfig = Field(default_factory=ProviderConfig)
groq: ProviderConfig = Field(default_factory=ProviderConfig)
zhipu: ProviderConfig = Field(default_factory=ProviderConfig)
vllm: ProviderConfig = Field(default_factory=ProviderConfig)
gemini: ProviderConfig = Field(default_factory=ProviderConfig)
@@ -91,13 +92,14 @@ class Config(BaseSettings):
return Path(self.agents.defaults.workspace).expanduser()
def get_api_key(self) -> str | None:
"""Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > Zhipu > vLLM."""
"""Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > Zhipu > Groq > vLLM."""
return (
self.providers.openrouter.api_key or
self.providers.anthropic.api_key or
self.providers.openai.api_key or
self.providers.gemini.api_key or
self.providers.zhipu.api_key or
self.providers.groq.api_key or
self.providers.vllm.api_key or
None
)

View File

@@ -115,7 +115,7 @@ class HeartbeatService:
response = await self.on_heartbeat(HEARTBEAT_PROMPT)
# Check if agent said "nothing to do"
if HEARTBEAT_OK_TOKEN in response.upper().replace("_", ""):
if HEARTBEAT_OK_TOKEN.replace("_", "") in response.upper().replace("_", ""):
logger.info("Heartbeat: OK (no action needed)")
else:
logger.info(f"Heartbeat: completed task")

View File

@@ -51,6 +51,8 @@ class LiteLLMProvider(LLMProvider):
os.environ.setdefault("GEMINI_API_KEY", api_key)
elif "zhipu" in default_model or "glm" in default_model or "zai" in default_model:
os.environ.setdefault("ZHIPUAI_API_KEY", api_key)
elif "groq" in default_model:
os.environ.setdefault("GROQ_API_KEY", api_key)
if api_base:
litellm.api_base = api_base

View File

@@ -0,0 +1,65 @@
"""Voice transcription provider using Groq."""
import os
from pathlib import Path
from typing import Any
import httpx
from loguru import logger
class GroqTranscriptionProvider:
"""
Voice transcription provider using Groq's Whisper API.
Groq offers extremely fast transcription with a generous free tier.
"""
def __init__(self, api_key: str | None = None):
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions"
async def transcribe(self, file_path: str | Path) -> str:
"""
Transcribe an audio file using Groq.
Args:
file_path: Path to the audio file.
Returns:
Transcribed text.
"""
if not self.api_key:
logger.warning("Groq API key not configured for transcription")
return ""
path = Path(file_path)
if not path.exists():
logger.error(f"Audio file not found: {file_path}")
return ""
try:
async with httpx.AsyncClient() as client:
with open(path, "rb") as f:
files = {
"file": (path.name, f),
"model": (None, "whisper-large-v3"),
}
headers = {
"Authorization": f"Bearer {self.api_key}",
}
response = await client.post(
self.api_url,
headers=headers,
files=files,
timeout=60.0
)
response.raise_for_status()
data = response.json()
return data.get("text", "")
except Exception as e:
logger.error(f"Groq transcription error: {e}")
return ""