Refactored from image support to generic media

2026-03-07 00:13:38 +00:00
parent 8c25897532
commit 067965da50
3 changed files with 37 additions and 1385 deletions
--- a/bridge/package-lock.json
+++ b/bridge/package-lock.json
--- a/bridge/src/whatsapp.ts
+++ b/bridge/src/whatsapp.ts
@@ -127,7 +127,14 @@ export class WhatsAppClient {
        const mediaPaths: string[] = [];
        if (unwrapped.imageMessage) {
-          const path = await this.downloadImage(msg, unwrapped.imageMessage.mimetype ?? undefined);
+          const path = await this.downloadMedia(msg, unwrapped.imageMessage.mimetype ?? undefined);
          if (path) mediaPaths.push(path);
        } else if (unwrapped.documentMessage) {
          const path = await this.downloadMedia(msg, unwrapped.documentMessage.mimetype ?? undefined,
            unwrapped.documentMessage.fileName ?? undefined);
          if (path) mediaPaths.push(path);
        } else if (unwrapped.videoMessage) {
          const path = await this.downloadMedia(msg, unwrapped.videoMessage.mimetype ?? undefined);
          if (path) mediaPaths.push(path);
        }
@@ -148,29 +155,31 @@ export class WhatsAppClient {
    });
  }
-  private async downloadImage(msg: any, mimetype?: string): Promise<string | null> {
+  private async downloadMedia(msg: any, mimetype?: string, fileName?: string): Promise<string | null> {
    try {
      const mediaDir = join(homedir(), '.nanobot', 'media');
      await mkdir(mediaDir, { recursive: true });
      const buffer = await downloadMediaMessage(msg, 'buffer', {}) as Buffer;
-      const mime = mimetype || 'image/jpeg';
+      let outFilename: string;
-      const extMap: Record<string, string> = {
+      if (fileName) {
-        'image/jpeg': '.jpg',
+        // Documents have a filename — use it with a unique prefix to avoid collisions
-        'image/png': '.png',
+        const prefix = `wa_${Date.now()}_${randomBytes(4).toString('hex')}_`;
-        'image/gif': '.gif',
+        outFilename = prefix + fileName;
-        'image/webp': '.webp',
+      } else {
-      };
+        const mime = mimetype || 'application/octet-stream';
-      const ext = extMap[mime] || '.jpg';
+        // Derive extension from mimetype subtype (e.g. "image/png" → ".png", "application/pdf" → ".pdf")
        const ext = '.' + (mime.split('/').pop()?.split(';')[0] || 'bin');
        outFilename = `wa_${Date.now()}_${randomBytes(4).toString('hex')}${ext}`;
      }
-      const filename = `wa_${Date.now()}_${randomBytes(4).toString('hex')}${ext}`;
+      const filepath = join(mediaDir, outFilename);
      const filepath = join(mediaDir, filename);
      await writeFile(filepath, buffer);
      return filepath;
    } catch (err) {
-      console.error('Failed to download image:', err);
+      console.error('Failed to download media:', err);
      return null;
    }
  }
@@ -191,14 +200,14 @@ export class WhatsAppClient {
      return message.imageMessage.caption || '';
    }
-    // Video with caption
+    // Video with optional caption
-    if (message.videoMessage?.caption) {
+    if (message.videoMessage) {
-      return `[Video] ${message.videoMessage.caption}`;
+      return message.videoMessage.caption || '';
    }
-    // Document with caption
+    // Document with optional caption
-    if (message.documentMessage?.caption) {
+    if (message.documentMessage) {
-      return `[Document] ${message.documentMessage.caption}`;
+      return message.documentMessage.caption || '';
    }
    // Voice/Audio message
--- a/nanobot/channels/whatsapp.py
+++ b/nanobot/channels/whatsapp.py
@@ -2,6 +2,7 @@
 import asyncio
 import json
 import mimetypes
 from collections import OrderedDict
 from loguru import logger
@@ -128,12 +129,16 @@ class WhatsAppChannel(BaseChannel):
                logger.info("Voice message received from {}, but direct download from bridge is not yet supported.", sender_id)
                content = "[Voice Message: Transcription not available for WhatsApp yet]"
-            # Extract media paths (images downloaded by the bridge)
+            # Extract media paths (images/documents/videos downloaded by the bridge)
            media_paths = data.get("media") or []
-            # For image messages without caption, provide descriptive content
+            # Build content tags matching Telegram's pattern: [image: /path] or [file: /path]
-            if not content and media_paths:
+            if media_paths:
-                content = "[image]"
+                for p in media_paths:
                    mime, _ = mimetypes.guess_type(p)
                    media_type = "image" if mime and mime.startswith("image/") else "file"
                    media_tag = f"[{media_type}: {p}]"
                    content = f"{content}\n{media_tag}" if content else media_tag
            await self._handle_message(
                sender_id=sender_id,