Merge remote-tracking branch 'origin/main' into pr-1618

2026-03-07 03:42:02 +00:00
parent 73be53d4bd 7b491ed4b3
commit 7c074e4684
4 changed files with 121 additions and 18 deletions
--- a/README.md
+++ b/README.md
@@ -420,6 +420,10 @@ nanobot channels login
 nanobot gateway
 ```

+> WhatsApp bridge updates are not applied automatically for existing installations.
+> If you upgrade nanobot and need the latest WhatsApp bridge, run:
+> `rm -rf ~/.nanobot/bridge && nanobot channels login`
+
 </details>

 <details>
--- a/bridge/src/whatsapp.ts
+++ b/bridge/src/whatsapp.ts
@@ -9,11 +9,17 @@ import makeWASocket, {
  useMultiFileAuthState,
  fetchLatestBaileysVersion,
  makeCacheableSignalKeyStore,
+  downloadMediaMessage,
+  extractMessageContent as baileysExtractMessageContent,
 } from '@whiskeysockets/baileys';

 import { Boom } from '@hapi/boom';
 import qrcode from 'qrcode-terminal';
 import pino from 'pino';
+import { writeFile, mkdir } from 'fs/promises';
+import { join } from 'path';
+import { homedir } from 'os';
+import { randomBytes } from 'crypto';

 const VERSION = '0.1.0';

@@ -24,6 +30,7 @@ export interface InboundMessage {
  content: string;
  timestamp: number;
  isGroup: boolean;
+  media?: string[];
 }

 export interface WhatsAppClientOptions {
@@ -110,14 +117,33 @@ export class WhatsAppClient {
      if (type !== 'notify') return;

      for (const msg of messages) {
-        // Skip own messages
        if (msg.key.fromMe) continue;
-
-        // Skip status updates
        if (msg.key.remoteJid === 'status@broadcast') continue;

-        const content = this.extractMessageContent(msg);
-        if (!content) continue;
+        const unwrapped = baileysExtractMessageContent(msg.message);
+        if (!unwrapped) continue;
+
+        const content = this.getTextContent(unwrapped);
+        let fallbackContent: string | null = null;
+        const mediaPaths: string[] = [];
+
+        if (unwrapped.imageMessage) {
+          fallbackContent = '[Image]';
+          const path = await this.downloadMedia(msg, unwrapped.imageMessage.mimetype ?? undefined);
+          if (path) mediaPaths.push(path);
+        } else if (unwrapped.documentMessage) {
+          fallbackContent = '[Document]';
+          const path = await this.downloadMedia(msg, unwrapped.documentMessage.mimetype ?? undefined,
+            unwrapped.documentMessage.fileName ?? undefined);
+          if (path) mediaPaths.push(path);
+        } else if (unwrapped.videoMessage) {
+          fallbackContent = '[Video]';
+          const path = await this.downloadMedia(msg, unwrapped.videoMessage.mimetype ?? undefined);
+          if (path) mediaPaths.push(path);
+        }
+
+        const finalContent = content || (mediaPaths.length === 0 ? fallbackContent : '') || '';
+        if (!finalContent && mediaPaths.length === 0) continue;

        const isGroup = msg.key.remoteJid?.endsWith('@g.us') || false;

@@ -125,18 +151,45 @@ export class WhatsAppClient {
          id: msg.key.id || '',
          sender: msg.key.remoteJid || '',
          pn: msg.key.remoteJidAlt || '',
-          content,
+          content: finalContent,
          timestamp: msg.messageTimestamp as number,
          isGroup,
+          ...(mediaPaths.length > 0 ? { media: mediaPaths } : {}),
        });
      }
    });
  }

-  private extractMessageContent(msg: any): string | null {
-    const message = msg.message;
-    if (!message) return null;
+  private async downloadMedia(msg: any, mimetype?: string, fileName?: string): Promise<string | null> {
+    try {
+      const mediaDir = join(homedir(), '.nanobot', 'media');
+      await mkdir(mediaDir, { recursive: true });

+      const buffer = await downloadMediaMessage(msg, 'buffer', {}) as Buffer;
+
+      let outFilename: string;
+      if (fileName) {
+        // Documents have a filename — use it with a unique prefix to avoid collisions
+        const prefix = `wa_${Date.now()}_${randomBytes(4).toString('hex')}_`;
+        outFilename = prefix + fileName;
+      } else {
+        const mime = mimetype || 'application/octet-stream';
+        // Derive extension from mimetype subtype (e.g. "image/png" → ".png", "application/pdf" → ".pdf")
+        const ext = '.' + (mime.split('/').pop()?.split(';')[0] || 'bin');
+        outFilename = `wa_${Date.now()}_${randomBytes(4).toString('hex')}${ext}`;
+      }
+
+      const filepath = join(mediaDir, outFilename);
+      await writeFile(filepath, buffer);
+
+      return filepath;
+    } catch (err) {
+      console.error('Failed to download media:', err);
+      return null;
+    }
+  }
+
+  private getTextContent(message: any): string | null {
    // Text message
    if (message.conversation) {
      return message.conversation;
@@ -147,19 +200,19 @@ export class WhatsAppClient {
      return message.extendedTextMessage.text;
    }

-    // Image with caption
-    if (message.imageMessage?.caption) {
-      return `[Image] ${message.imageMessage.caption}`;
+    // Image with optional caption
+    if (message.imageMessage) {
+      return message.imageMessage.caption || '';
    }

-    // Video with caption
-    if (message.videoMessage?.caption) {
-      return `[Video] ${message.videoMessage.caption}`;
+    // Video with optional caption
+    if (message.videoMessage) {
+      return message.videoMessage.caption || '';
    }

-    // Document with caption
-    if (message.documentMessage?.caption) {
-      return `[Document] ${message.documentMessage.caption}`;
+    // Document with optional caption
+    if (message.documentMessage) {
+      return message.documentMessage.caption || '';
    }

    // Voice/Audio message
--- a/nanobot/channels/whatsapp.py
+++ b/nanobot/channels/whatsapp.py
@@ -2,6 +2,7 @@

 import asyncio
 import json
+import mimetypes
 from collections import OrderedDict

 from loguru import logger
@@ -128,10 +129,22 @@ class WhatsAppChannel(BaseChannel):
                logger.info("Voice message received from {}, but direct download from bridge is not yet supported.", sender_id)
                content = "[Voice Message: Transcription not available for WhatsApp yet]"

+            # Extract media paths (images/documents/videos downloaded by the bridge)
+            media_paths = data.get("media") or []
+
+            # Build content tags matching Telegram's pattern: [image: /path] or [file: /path]
+            if media_paths:
+                for p in media_paths:
+                    mime, _ = mimetypes.guess_type(p)
+                    media_type = "image" if mime and mime.startswith("image/") else "file"
+                    media_tag = f"[{media_type}: {p}]"
+                    content = f"{content}\n{media_tag}" if content else media_tag
+
            await self._handle_message(
                sender_id=sender_id,
                chat_id=sender,  # Use full LID for replies
                content=content,
+                media=media_paths,
                metadata={
                    "message_id": message_id,
                    "timestamp": data.get("timestamp"),
--- a/nanobot/providers/litellm_provider.py
+++ b/nanobot/providers/litellm_provider.py
@@ -1,5 +1,6 @@
 """LiteLLM provider implementation for multi-provider support."""

+import hashlib
 import os
 import secrets
 import string
@@ -166,16 +167,48 @@ class LiteLLMProvider(LLMProvider):
            return _ANTHROPIC_EXTRA_KEYS
        return frozenset()

+    @staticmethod
+    def _normalize_tool_call_id(tool_call_id: Any) -> Any:
+        """Normalize tool_call_id to a provider-safe 9-char alphanumeric form."""
+        if not isinstance(tool_call_id, str):
+            return tool_call_id
+        if len(tool_call_id) == 9 and tool_call_id.isalnum():
+            return tool_call_id
+        return hashlib.sha1(tool_call_id.encode()).hexdigest()[:9]
+
    @staticmethod
    def _sanitize_messages(messages: list[dict[str, Any]], extra_keys: frozenset[str] = frozenset()) -> list[dict[str, Any]]:
        """Strip non-standard keys and ensure assistant messages have a content key."""
        allowed = _ALLOWED_MSG_KEYS | extra_keys
        sanitized = []
+        id_map: dict[str, str] = {}
+
+        def map_id(value: Any) -> Any:
+            if not isinstance(value, str):
+                return value
+            return id_map.setdefault(value, LiteLLMProvider._normalize_tool_call_id(value))
+
        for msg in messages:
            clean = {k: v for k, v in msg.items() if k in allowed}
            # Strict providers require "content" even when assistant only has tool_calls
            if clean.get("role") == "assistant" and "content" not in clean:
                clean["content"] = None
+
+            # Keep assistant tool_calls[].id and tool tool_call_id in sync after
+            # shortening, otherwise strict providers reject the broken linkage.
+            if isinstance(clean.get("tool_calls"), list):
+                normalized_tool_calls = []
+                for tc in clean["tool_calls"]:
+                    if not isinstance(tc, dict):
+                        normalized_tool_calls.append(tc)
+                        continue
+                    tc_clean = dict(tc)
+                    tc_clean["id"] = map_id(tc_clean.get("id"))
+                    normalized_tool_calls.append(tc_clean)
+                clean["tool_calls"] = normalized_tool_calls
+
+            if "tool_call_id" in clean and clean["tool_call_id"]:
+                clean["tool_call_id"] = map_id(clean["tool_call_id"])
            sanitized.append(clean)
        return sanitized