Merge PR #1090: feat(feishu): extract and download images from post messages

2026-02-24 11:32:04 +00:00
parent cd5a8ac03d 04218276ab
commit f514ba02e9
1 changed files with 40 additions and 14 deletions
--- a/nanobot/channels/feishu.py
+++ b/nanobot/channels/feishu.py
@@ -180,21 +180,25 @@ def _extract_element_content(element: dict) -> list[str]:
    return parts


-def _extract_post_text(content_json: dict) -> str:
-    """Extract plain text from Feishu post (rich text) message content.
+def _extract_post_content(content_json: dict) -> tuple[str, list[str]]:
+    """Extract text and image keys from Feishu post (rich text) message content.
    
    Supports two formats:
    1. Direct format: {"title": "...", "content": [...]}
    2. Localized format: {"zh_cn": {"title": "...", "content": [...]}}
+    
+    Returns:
+        (text, image_keys) - extracted text and list of image keys
    """
-    def extract_from_lang(lang_content: dict) -> str | None:
+    def extract_from_lang(lang_content: dict) -> tuple[str | None, list[str]]:
        if not isinstance(lang_content, dict):
-            return None
+            return None, []
        title = lang_content.get("title", "")
        content_blocks = lang_content.get("content", [])
        if not isinstance(content_blocks, list):
-            return None
+            return None, []
        text_parts = []
+        image_keys = []
        if title:
            text_parts.append(title)
        for block in content_blocks:
@@ -209,22 +213,36 @@ def _extract_post_text(content_json: dict) -> str:
                        text_parts.append(element.get("text", ""))
                    elif tag == "at":
                        text_parts.append(f"@{element.get('user_name', 'user')}")
-        return " ".join(text_parts).strip() if text_parts else None
+                    elif tag == "img":
+                        img_key = element.get("image_key")
+                        if img_key:
+                            image_keys.append(img_key)
+        text = " ".join(text_parts).strip() if text_parts else None
+        return text, image_keys
    
    # Try direct format first
    if "content" in content_json:
-        result = extract_from_lang(content_json)
-        if result:
-            return result
+        text, images = extract_from_lang(content_json)
+        if text or images:
+            return text or "", images
    
    # Try localized format
    for lang_key in ("zh_cn", "en_us", "ja_jp"):
        lang_content = content_json.get(lang_key)
-        result = extract_from_lang(lang_content)
-        if result:
-            return result
+        text, images = extract_from_lang(lang_content)
+        if text or images:
+            return text or "", images
    
-    return ""
+    return "", []
+
+
+def _extract_post_text(content_json: dict) -> str:
+    """Extract plain text from Feishu post (rich text) message content.
+    
+    Legacy wrapper for _extract_post_content, returns only text.
+    """
+    text, _ = _extract_post_content(content_json)
+    return text


 class FeishuChannel(BaseChannel):
@@ -691,9 +709,17 @@ class FeishuChannel(BaseChannel):
                    content_parts.append(text)

            elif msg_type == "post":
-                text = _extract_post_text(content_json)
+                text, image_keys = _extract_post_content(content_json)
                if text:
                    content_parts.append(text)
+                # Download images embedded in post
+                for img_key in image_keys:
+                    file_path, content_text = await self._download_and_save_media(
+                        "image", {"image_key": img_key}, message_id
+                    )
+                    if file_path:
+                        media_paths.append(file_path)
+                    content_parts.append(content_text)

            elif msg_type in ("image", "audio", "file", "media"):
                file_path, content_text = await self._download_and_save_media(msg_type, content_json, message_id)