Merge PR #1090: feat(feishu): extract and download images from post messages

This commit is contained in:
Re-bin
2026-02-24 11:32:04 +00:00

View File

@@ -180,21 +180,25 @@ def _extract_element_content(element: dict) -> list[str]:
return parts
def _extract_post_text(content_json: dict) -> str:
"""Extract plain text from Feishu post (rich text) message content.
def _extract_post_content(content_json: dict) -> tuple[str, list[str]]:
"""Extract text and image keys from Feishu post (rich text) message content.
Supports two formats:
1. Direct format: {"title": "...", "content": [...]}
2. Localized format: {"zh_cn": {"title": "...", "content": [...]}}
Returns:
(text, image_keys) - extracted text and list of image keys
"""
def extract_from_lang(lang_content: dict) -> str | None:
def extract_from_lang(lang_content: dict) -> tuple[str | None, list[str]]:
if not isinstance(lang_content, dict):
return None
return None, []
title = lang_content.get("title", "")
content_blocks = lang_content.get("content", [])
if not isinstance(content_blocks, list):
return None
return None, []
text_parts = []
image_keys = []
if title:
text_parts.append(title)
for block in content_blocks:
@@ -209,22 +213,36 @@ def _extract_post_text(content_json: dict) -> str:
text_parts.append(element.get("text", ""))
elif tag == "at":
text_parts.append(f"@{element.get('user_name', 'user')}")
return " ".join(text_parts).strip() if text_parts else None
elif tag == "img":
img_key = element.get("image_key")
if img_key:
image_keys.append(img_key)
text = " ".join(text_parts).strip() if text_parts else None
return text, image_keys
# Try direct format first
if "content" in content_json:
result = extract_from_lang(content_json)
if result:
return result
text, images = extract_from_lang(content_json)
if text or images:
return text or "", images
# Try localized format
for lang_key in ("zh_cn", "en_us", "ja_jp"):
lang_content = content_json.get(lang_key)
result = extract_from_lang(lang_content)
if result:
return result
text, images = extract_from_lang(lang_content)
if text or images:
return text or "", images
return ""
return "", []
def _extract_post_text(content_json: dict) -> str:
"""Extract plain text from Feishu post (rich text) message content.
Legacy wrapper for _extract_post_content, returns only text.
"""
text, _ = _extract_post_content(content_json)
return text
class FeishuChannel(BaseChannel):
@@ -691,9 +709,17 @@ class FeishuChannel(BaseChannel):
content_parts.append(text)
elif msg_type == "post":
text = _extract_post_text(content_json)
text, image_keys = _extract_post_content(content_json)
if text:
content_parts.append(text)
# Download images embedded in post
for img_key in image_keys:
file_path, content_text = await self._download_and_save_media(
"image", {"image_key": img_key}, message_id
)
if file_path:
media_paths.append(file_path)
content_parts.append(content_text)
elif msg_type in ("image", "audio", "file", "media"):
file_path, content_text = await self._download_and_save_media(msg_type, content_json, message_id)