tingwu_new

2026-03-11 20:41:49 +08:00
parent b0aa902f89
commit f41fd01367
3 changed files with 210 additions and 160 deletions
--- a/backend/ai_services/services.py
+++ b/backend/ai_services/services.py
@@ -150,3 +150,148 @@ class AliyunTingwuService:
        except (ClientException, ServerException) as e:
            logger.error(f"Tingwu GetTaskInfo failed: {e}")
            raise e
+
+    def parse_and_update_task(self, task, result):
+        """
+        解析听悟结果并更新任务
+        :param task: TranscriptionTask 实例
+        :param result: get_task_info 返回的完整 JSON (或 Data 部分)
+        """
+        # 1. 提取 Data 对象
+        if isinstance(result, dict):
+            data_obj = result.get('Data', result)
+        else:
+            data_obj = result
+            
+        if not isinstance(data_obj, dict):
+            logger.error(f"Unexpected data format: {type(data_obj)}")
+            return
+
+        # 2. 更新状态
+        task_status = data_obj.get('TaskStatus') or data_obj.get('Status')
+        if task_status in ['COMPLETE', 'COMPLETED', 'SUCCEEDED']:
+            task.status = 'SUCCEEDED' # 使用字符串引用，避免导入模型循环引用
+        elif task_status == 'FAILED':
+            task.status = 'FAILED'
+            task.error_message = data_obj.get('TaskStatusText', data_obj.get('Message', 'Unknown error'))
+            task.save()
+            return
+        else:
+            # 仍在处理中，不更新内容
+            return
+
+        # 3. 解析结果
+        task_result = data_obj.get('Result', {})
+        
+        # --- A. 处理逐字稿 (Transcription) ---
+        transcription_data = task_result.get('Transcription', {})
+        
+        # 处理 URL 下载
+        if isinstance(transcription_data, str) and transcription_data.startswith('http'):
+            try:
+                import requests
+                t_resp = requests.get(transcription_data)
+                if t_resp.status_code == 200:
+                    transcription_data = t_resp.json()
+            except Exception as e:
+                logger.error(f"Download transcription failed: {e}")
+                transcription_data = {}
+        elif isinstance(transcription_data, dict) and 'TranscriptionUrl' in transcription_data:
+             try:
+                import requests
+                t_resp = requests.get(transcription_data['TranscriptionUrl'])
+                if t_resp.status_code == 200:
+                    transcription_data = t_resp.json()
+             except Exception as e:
+                 logger.error(f"Download transcription url failed: {e}")
+
+        # 保存原始数据
+        task.transcription_data = transcription_data
+        
+        # 提取文本
+        # 结构: {"Transcription": {"Paragraphs": [{"Words": [{"Text": "..."}]}]}}
+        # 或直接 {"Paragraphs": ...}
+        content_source = transcription_data
+        if 'Transcription' in content_source and isinstance(content_source['Transcription'], dict):
+            content_source = content_source['Transcription']
+            
+        paragraphs = content_source.get('Paragraphs', [])
+        full_text_lines = []
+        
+        if paragraphs and isinstance(paragraphs, list):
+            for p in paragraphs:
+                # 尝试从 Words 中提取
+                words = p.get('Words', [])
+                if words:
+                    line_text = "".join([str(w.get('Text', '')) for w in words])
+                    full_text_lines.append(line_text)
+                # 兼容旧结构或直接 Text
+                elif 'Text' in p:
+                    full_text_lines.append(p['Text'])
+        
+        if full_text_lines:
+            task.transcription = "\n".join(full_text_lines)
+        
+        # --- B. 处理 AI 总结 (Summarization) ---
+        summarization = task_result.get('Summarization', {})
+        
+        # 处理 URL 下载
+        if isinstance(summarization, str) and summarization.startswith('http'):
+            try:
+                import requests
+                s_resp = requests.get(summarization)
+                if s_resp.status_code == 200:
+                    summarization = s_resp.json()
+            except Exception as e:
+                logger.error(f"Download summarization failed: {e}")
+                summarization = {}
+
+        # 保存原始数据
+        task.summary_data = summarization
+        
+        # 提取文本 (MindMapSummary)
+        # 结构: {"MindMapSummary": [{"Title": "...", "Topic": [...]}]}
+        summary_text = []
+        
+        def parse_mindmap_topic(topic_list, level=0):
+            indent = "  " * level
+            for topic in topic_list:
+                title = topic.get('Title', '')
+                if title:
+                    summary_text.append(f"{indent}- {title}")
+                
+                sub_topics = topic.get('Topic', [])
+                if sub_topics:
+                    parse_mindmap_topic(sub_topics, level + 1)
+
+        if 'MindMapSummary' in summarization:
+            parse_mindmap_topic(summarization['MindMapSummary'])
+        elif 'Text' in summarization:
+            summary_text.append(summarization['Text'])
+        elif 'Headline' in summarization:
+             summary_text.append(summarization['Headline'])
+             
+        if summary_text:
+            task.summary = "\n".join(summary_text)
+
+        # --- C. 处理章节 (AutoChapters) ---
+        auto_chapters = task_result.get('AutoChapters', [])
+        
+        # 处理 URL 下载
+        if isinstance(auto_chapters, str) and auto_chapters.startswith('http'):
+            try:
+                import requests
+                ac_resp = requests.get(auto_chapters)
+                if ac_resp.status_code == 200:
+                    auto_chapters = ac_resp.json()
+            except Exception as e:
+                logger.error(f"Download auto chapters failed: {e}")
+                auto_chapters = []
+
+        # 保存原始数据
+        task.auto_chapters_data = auto_chapters
+        
+        # (可选) 将章节信息追加到 summary 或 evaluation 中，或者仅保存 raw data
+        # 根据用户需求，这里主要保存到 model 的 auto_chapters_data 字段 (已在 models.py 定义)
+        
+        task.save()