Merge branch 'main' into feat/discord-support
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
"""Context builder for assembling agent prompts."""
|
||||
|
||||
import base64
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
@@ -114,32 +116,53 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md"""
|
||||
self,
|
||||
history: list[dict[str, Any]],
|
||||
current_message: str,
|
||||
skill_names: list[str] | None = None
|
||||
skill_names: list[str] | None = None,
|
||||
media: list[str] | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Build the complete message list for an LLM call.
|
||||
|
||||
|
||||
Args:
|
||||
history: Previous conversation messages.
|
||||
current_message: The new user message.
|
||||
skill_names: Optional skills to include.
|
||||
|
||||
media: Optional list of local file paths for images/media.
|
||||
|
||||
Returns:
|
||||
List of messages including system prompt.
|
||||
"""
|
||||
messages = []
|
||||
|
||||
|
||||
# System prompt
|
||||
system_prompt = self.build_system_prompt(skill_names)
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
|
||||
|
||||
# History
|
||||
messages.extend(history)
|
||||
|
||||
# Current message
|
||||
messages.append({"role": "user", "content": current_message})
|
||||
|
||||
|
||||
# Current message (with optional image attachments)
|
||||
user_content = self._build_user_content(current_message, media)
|
||||
messages.append({"role": "user", "content": user_content})
|
||||
|
||||
return messages
|
||||
|
||||
def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]:
|
||||
"""Build user message content with optional base64-encoded images."""
|
||||
if not media:
|
||||
return text
|
||||
|
||||
images = []
|
||||
for path in media:
|
||||
p = Path(path)
|
||||
mime, _ = mimetypes.guess_type(path)
|
||||
if not p.is_file() or not mime or not mime.startswith("image/"):
|
||||
continue
|
||||
b64 = base64.b64encode(p.read_bytes()).decode()
|
||||
images.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}})
|
||||
|
||||
if not images:
|
||||
return text
|
||||
return images + [{"type": "text", "text": text}]
|
||||
|
||||
def add_tool_result(
|
||||
self,
|
||||
|
||||
@@ -152,7 +152,8 @@ class AgentLoop:
|
||||
# Build initial messages (use get_history for LLM-formatted messages)
|
||||
messages = self.context.build_messages(
|
||||
history=session.get_history(),
|
||||
current_message=msg.content
|
||||
current_message=msg.content,
|
||||
media=msg.media if msg.media else None,
|
||||
)
|
||||
|
||||
# Agent loop
|
||||
|
||||
@@ -37,7 +37,9 @@ class ChannelManager:
|
||||
try:
|
||||
from nanobot.channels.telegram import TelegramChannel
|
||||
self.channels["telegram"] = TelegramChannel(
|
||||
self.config.channels.telegram, self.bus
|
||||
self.config.channels.telegram,
|
||||
self.bus,
|
||||
groq_api_key=self.config.providers.groq.api_key,
|
||||
)
|
||||
logger.info("Telegram channel enabled")
|
||||
except ImportError as e:
|
||||
|
||||
@@ -85,9 +85,10 @@ class TelegramChannel(BaseChannel):
|
||||
|
||||
name = "telegram"
|
||||
|
||||
def __init__(self, config: TelegramConfig, bus: MessageBus):
|
||||
def __init__(self, config: TelegramConfig, bus: MessageBus, groq_api_key: str = ""):
|
||||
super().__init__(config, bus)
|
||||
self.config: TelegramConfig = config
|
||||
self.groq_api_key = groq_api_key
|
||||
self._app: Application | None = None
|
||||
self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies
|
||||
|
||||
@@ -249,7 +250,20 @@ class TelegramChannel(BaseChannel):
|
||||
await file.download_to_drive(str(file_path))
|
||||
|
||||
media_paths.append(str(file_path))
|
||||
content_parts.append(f"[{media_type}: {file_path}]")
|
||||
|
||||
# Handle voice transcription
|
||||
if media_type == "voice" or media_type == "audio":
|
||||
from nanobot.providers.transcription import GroqTranscriptionProvider
|
||||
transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key)
|
||||
transcription = await transcriber.transcribe(file_path)
|
||||
if transcription:
|
||||
logger.info(f"Transcribed {media_type}: {transcription[:50]}...")
|
||||
content_parts.append(f"[transcription: {transcription}]")
|
||||
else:
|
||||
content_parts.append(f"[{media_type}: {file_path}]")
|
||||
else:
|
||||
content_parts.append(f"[{media_type}: {file_path}]")
|
||||
|
||||
logger.debug(f"Downloaded {media_type} to {file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to download media: {e}")
|
||||
|
||||
@@ -107,6 +107,11 @@ class WhatsAppChannel(BaseChannel):
|
||||
# Extract just the phone number as chat_id
|
||||
chat_id = sender.split("@")[0] if "@" in sender else sender
|
||||
|
||||
# Handle voice transcription if it's a voice message
|
||||
if content == "[Voice Message]":
|
||||
logger.info(f"Voice message received from {chat_id}, but direct download from bridge is not yet supported.")
|
||||
content = "[Voice Message: Transcription not available for WhatsApp yet]"
|
||||
|
||||
await self._handle_message(
|
||||
sender_id=chat_id,
|
||||
chat_id=sender, # Use full JID for replies
|
||||
|
||||
@@ -178,11 +178,13 @@ def gateway(
|
||||
# Create components
|
||||
bus = MessageBus()
|
||||
|
||||
# Create provider (supports OpenRouter, Anthropic, OpenAI)
|
||||
# Create provider (supports OpenRouter, Anthropic, OpenAI, Bedrock)
|
||||
api_key = config.get_api_key()
|
||||
api_base = config.get_api_base()
|
||||
|
||||
if not api_key:
|
||||
model = config.agents.defaults.model
|
||||
is_bedrock = model.startswith("bedrock/")
|
||||
|
||||
if not api_key and not is_bedrock:
|
||||
console.print("[red]Error: No API key configured.[/red]")
|
||||
console.print("Set one in ~/.nanobot/config.json under providers.openrouter.apiKey")
|
||||
raise typer.Exit(1)
|
||||
@@ -289,11 +291,13 @@ def agent(
|
||||
|
||||
api_key = config.get_api_key()
|
||||
api_base = config.get_api_base()
|
||||
|
||||
if not api_key:
|
||||
model = config.agents.defaults.model
|
||||
is_bedrock = model.startswith("bedrock/")
|
||||
|
||||
if not api_key and not is_bedrock:
|
||||
console.print("[red]Error: No API key configured.[/red]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
bus = MessageBus()
|
||||
provider = LiteLLMProvider(
|
||||
api_key=api_key,
|
||||
@@ -348,14 +352,15 @@ app.add_typer(channels_app, name="channels")
|
||||
def channels_status():
|
||||
"""Show channel status."""
|
||||
from nanobot.config.loader import load_config
|
||||
|
||||
|
||||
config = load_config()
|
||||
|
||||
|
||||
table = Table(title="Channel Status")
|
||||
table.add_column("Channel", style="cyan")
|
||||
table.add_column("Enabled", style="green")
|
||||
table.add_column("Bridge URL", style="yellow")
|
||||
|
||||
table.add_column("Configuration", style="yellow")
|
||||
|
||||
# WhatsApp
|
||||
wa = config.channels.whatsapp
|
||||
table.add_row(
|
||||
"WhatsApp",
|
||||
@@ -363,13 +368,6 @@ def channels_status():
|
||||
wa.bridge_url
|
||||
)
|
||||
|
||||
tg = config.channels.telegram
|
||||
table.add_row(
|
||||
"Telegram",
|
||||
"✓" if tg.enabled else "✗",
|
||||
"polling"
|
||||
)
|
||||
|
||||
dc = config.channels.discord
|
||||
table.add_row(
|
||||
"Discord",
|
||||
@@ -377,6 +375,15 @@ def channels_status():
|
||||
dc.gateway_url
|
||||
)
|
||||
|
||||
# Telegram
|
||||
tg = config.channels.telegram
|
||||
tg_config = f"token: {tg.token[:10]}..." if tg.token else "[dim]not configured[/dim]"
|
||||
table.add_row(
|
||||
"Telegram",
|
||||
"✓" if tg.enabled else "✗",
|
||||
tg_config
|
||||
)
|
||||
|
||||
console.print(table)
|
||||
|
||||
|
||||
@@ -520,6 +527,7 @@ def cron_add(
|
||||
at: str = typer.Option(None, "--at", help="Run once at time (ISO format)"),
|
||||
deliver: bool = typer.Option(False, "--deliver", "-d", help="Deliver response to channel"),
|
||||
to: str = typer.Option(None, "--to", help="Recipient for delivery"),
|
||||
channel: str = typer.Option(None, "--channel", help="Channel for delivery (e.g. 'telegram', 'whatsapp')"),
|
||||
):
|
||||
"""Add a scheduled job."""
|
||||
from nanobot.config.loader import get_data_dir
|
||||
@@ -548,6 +556,7 @@ def cron_add(
|
||||
message=message,
|
||||
deliver=deliver,
|
||||
to=to,
|
||||
channel=channel,
|
||||
)
|
||||
|
||||
console.print(f"[green]✓[/green] Added job '{job.name}' ({job.id})")
|
||||
|
||||
@@ -60,6 +60,7 @@ class ProvidersConfig(BaseModel):
|
||||
anthropic: ProviderConfig = Field(default_factory=ProviderConfig)
|
||||
openai: ProviderConfig = Field(default_factory=ProviderConfig)
|
||||
openrouter: ProviderConfig = Field(default_factory=ProviderConfig)
|
||||
groq: ProviderConfig = Field(default_factory=ProviderConfig)
|
||||
zhipu: ProviderConfig = Field(default_factory=ProviderConfig)
|
||||
vllm: ProviderConfig = Field(default_factory=ProviderConfig)
|
||||
gemini: ProviderConfig = Field(default_factory=ProviderConfig)
|
||||
@@ -101,14 +102,14 @@ class Config(BaseSettings):
|
||||
return Path(self.agents.defaults.workspace).expanduser()
|
||||
|
||||
def get_api_key(self) -> str | None:
|
||||
"""Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > vLLM."""
|
||||
"""Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > Zhipu > vLLM."""
|
||||
"""Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > Zhipu > Groq > vLLM."""
|
||||
return (
|
||||
self.providers.openrouter.api_key or
|
||||
self.providers.anthropic.api_key or
|
||||
self.providers.openai.api_key or
|
||||
self.providers.gemini.api_key or
|
||||
self.providers.zhipu.api_key or
|
||||
self.providers.groq.api_key or
|
||||
self.providers.vllm.api_key or
|
||||
None
|
||||
)
|
||||
|
||||
@@ -51,6 +51,8 @@ class LiteLLMProvider(LLMProvider):
|
||||
os.environ.setdefault("GEMINI_API_KEY", api_key)
|
||||
elif "zhipu" in default_model or "glm" in default_model or "zai" in default_model:
|
||||
os.environ.setdefault("ZHIPUAI_API_KEY", api_key)
|
||||
elif "groq" in default_model:
|
||||
os.environ.setdefault("GROQ_API_KEY", api_key)
|
||||
|
||||
if api_base:
|
||||
litellm.api_base = api_base
|
||||
|
||||
65
nanobot/providers/transcription.py
Normal file
65
nanobot/providers/transcription.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""Voice transcription provider using Groq."""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class GroqTranscriptionProvider:
|
||||
"""
|
||||
Voice transcription provider using Groq's Whisper API.
|
||||
|
||||
Groq offers extremely fast transcription with a generous free tier.
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: str | None = None):
|
||||
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
|
||||
self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions"
|
||||
|
||||
async def transcribe(self, file_path: str | Path) -> str:
|
||||
"""
|
||||
Transcribe an audio file using Groq.
|
||||
|
||||
Args:
|
||||
file_path: Path to the audio file.
|
||||
|
||||
Returns:
|
||||
Transcribed text.
|
||||
"""
|
||||
if not self.api_key:
|
||||
logger.warning("Groq API key not configured for transcription")
|
||||
return ""
|
||||
|
||||
path = Path(file_path)
|
||||
if not path.exists():
|
||||
logger.error(f"Audio file not found: {file_path}")
|
||||
return ""
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
with open(path, "rb") as f:
|
||||
files = {
|
||||
"file": (path.name, f),
|
||||
"model": (None, "whisper-large-v3"),
|
||||
}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
self.api_url,
|
||||
headers=headers,
|
||||
files=files,
|
||||
timeout=60.0
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data.get("text", "")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Groq transcription error: {e}")
|
||||
return ""
|
||||
Reference in New Issue
Block a user