merge origin/main into pr-1361

This commit is contained in:
Re-bin
2026-03-01 06:36:29 +00:00
parent c2bbd6d20d
commit 4752e95a24

View File

@@ -181,71 +181,59 @@ def _extract_element_content(element: dict) -> list[str]:
def _extract_post_content(content_json: dict) -> tuple[str, list[str]]: def _extract_post_content(content_json: dict) -> tuple[str, list[str]]:
"""Extract text and image keys from Feishu post (rich text) message content. """Extract text and image keys from Feishu post (rich text) message.
Supports two formats: Handles three payload shapes:
1. Direct format: {"title": "...", "content": [...]} - Direct: {"title": "...", "content": [[...]]}
2. Localized format: {"zh_cn": {"title": "...", "content": [...]}} - Localized: {"zh_cn": {"title": "...", "content": [...]}}
- Wrapped: {"post": {"zh_cn": {"title": "...", "content": [...]}}}
Returns:
(text, image_keys) - extracted text and list of image keys
""" """
def extract_from_lang(lang_content: dict) -> tuple[str | None, list[str]]:
if not isinstance(lang_content, dict): def _parse_block(block: dict) -> tuple[str | None, list[str]]:
if not isinstance(block, dict) or not isinstance(block.get("content"), list):
return None, [] return None, []
title = lang_content.get("title", "") texts, images = [], []
content_blocks = lang_content.get("content", []) if title := block.get("title"):
if not isinstance(content_blocks, list): texts.append(title)
return None, [] for row in block["content"]:
text_parts = [] if not isinstance(row, list):
image_keys = []
if title:
text_parts.append(title)
for block in content_blocks:
if not isinstance(block, list):
continue continue
for element in block: for el in row:
if isinstance(element, dict): if not isinstance(el, dict):
tag = element.get("tag") continue
if tag == "text": tag = el.get("tag")
text_parts.append(element.get("text", "")) if tag in ("text", "a"):
elif tag == "a": texts.append(el.get("text", ""))
text_parts.append(element.get("text", "")) elif tag == "at":
elif tag == "at": texts.append(f"@{el.get('user_name', 'user')}")
text_parts.append(f"@{element.get('user_name', 'user')}") elif tag == "img" and (key := el.get("image_key")):
elif tag == "img": images.append(key)
img_key = element.get("image_key") return (" ".join(texts).strip() or None), images
if img_key:
image_keys.append(img_key)
text = " ".join(text_parts).strip() if text_parts else None
return text, image_keys
# Compatible with both shapes: # Unwrap optional {"post": ...} envelope
# 1) {"post": {"zh_cn": {...}}} root = content_json
# 2) {"zh_cn": {...}} or {"title": "...", "content": [...]} if isinstance(root, dict) and isinstance(root.get("post"), dict):
post_root = content_json.get("post") if isinstance(content_json, dict) else None root = root["post"]
if not isinstance(post_root, dict): if not isinstance(root, dict):
post_root = content_json if isinstance(content_json, dict) else {} return "", []
# Try direct format first # Direct format
if "content" in post_root: if "content" in root:
text, images = extract_from_lang(post_root) text, imgs = _parse_block(root)
if text or images: if text or imgs:
return text or "", images return text or "", imgs
# Try localized format # Localized: prefer known locales, then fall back to any dict child
for lang_key in ("zh_cn", "en_us", "ja_jp"): for key in ("zh_cn", "en_us", "ja_jp"):
lang_content = post_root.get(lang_key) if key in root:
text, images = extract_from_lang(lang_content) text, imgs = _parse_block(root[key])
if text or images: if text or imgs:
return text or "", images return text or "", imgs
for val in root.values():
# Fallback: first dict-shaped child if isinstance(val, dict):
for value in post_root.values(): text, imgs = _parse_block(val)
if isinstance(value, dict): if text or imgs:
text, images = extract_from_lang(value) return text or "", imgs
if text or images:
return text or "", images
return "", [] return "", []