import logging import json import os from django.conf import settings from openai import OpenAI from .models import AIEvaluation logger = logging.getLogger(__name__) class BailianService: def __init__(self): self.api_key = getattr(settings, 'DASHSCOPE_API_KEY', None) if not self.api_key: self.api_key = os.environ.get("DASHSCOPE_API_KEY") if self.api_key: self.client = OpenAI( api_key=self.api_key, base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" ) else: self.client = None logger.warning("DASHSCOPE_API_KEY not configured.") def evaluate_task(self, evaluation: AIEvaluation): """ 执行AI评估 """ if not self.client: evaluation.status = AIEvaluation.Status.FAILED evaluation.error_message = "服务未配置 (DASHSCOPE_API_KEY missing)" evaluation.save() return task = evaluation.task if not task.transcription: evaluation.status = AIEvaluation.Status.FAILED evaluation.error_message = "关联任务无逐字稿内容" evaluation.save() return evaluation.status = AIEvaluation.Status.PROCESSING evaluation.save() try: prompt = evaluation.prompt content = task.transcription # 准备章节/时间戳数据以辅助分析发言节奏 chapter_context = "" if task.auto_chapters_data: try: chapters_str = "" # 处理特定的 AutoChapters 结构 # 格式: {"AutoChapters": [{"Id": 1, "Start": 740, "End": 203436, "Headline": "...", "Summary": "..."}, ...]} if isinstance(task.auto_chapters_data, dict) and 'AutoChapters' in task.auto_chapters_data: chapters = task.auto_chapters_data['AutoChapters'] if isinstance(chapters, list): chapter_lines = [] for ch in chapters: # 毫秒转 MM:SS start_ms = ch.get('Start', 0) end_ms = ch.get('End', 0) start_str = f"{start_ms // 60000:02d}:{(start_ms // 1000) % 60:02d}" end_str = f"{end_ms // 60000:02d}:{(end_ms // 1000) % 60:02d}" headline = ch.get('Headline', '无标题') summary = ch.get('Summary', '') line = f"- [{start_str} - {end_str}] {headline}" if summary: line += f"\n 摘要: {summary}" chapter_lines.append(line) chapters_str = "\n".join(chapter_lines) # 如果上面的解析为空(或者格式不匹配),回退到通用 JSON dump if not chapters_str: if isinstance(task.auto_chapters_data, (dict, list)): chapters_str = json.dumps(task.auto_chapters_data, ensure_ascii=False, indent=2) else: chapters_str = str(task.auto_chapters_data) chapter_context = f"\n\n【章节与时间戳信息】\n{chapters_str}\n\n(提示:请结合上述章节时间戳信息,分析发言者的语速、节奏变化及停顿情况。)" except Exception as e: logger.warning(f"Failed to process auto_chapters_data: {e}") # 截断过长的内容以防止超出Token限制 (简单处理,取前10000字) if len(content) > 10000: content = content[:10000] + "...(内容过长已截断)" # Construct messages messages = [ {'role': 'system', 'content': 'You are a helpful assistant designed to output JSON.'}, {'role': 'user', 'content': f"{prompt}\n\n以下是需要评估的内容:\n{content}{chapter_context}"} ] completion = self.client.chat.completions.create( model=evaluation.model_selection, messages=messages, response_format={"type": "json_object"} ) response_content = completion.choices[0].message.content # Convert to dict for storage raw_response = completion.model_dump() evaluation.raw_response = raw_response # Parse JSON try: result = json.loads(response_content) evaluation.score = result.get('score') evaluation.evaluation = result.get('evaluation') or result.get('comment') # 尝试获取推理过程(如果模型返回了) evaluation.reasoning = result.get('reasoning') or result.get('analysis') if not evaluation.reasoning: # 如果JSON里没有,把整个JSON作为推理参考 evaluation.reasoning = json.dumps(result, ensure_ascii=False, indent=2) evaluation.status = AIEvaluation.Status.COMPLETED except json.JSONDecodeError: evaluation.status = AIEvaluation.Status.FAILED evaluation.error_message = f"无法解析JSON响应: {response_content}" evaluation.reasoning = response_content evaluation.save() return evaluation except Exception as e: logger.error(f"AI Evaluation failed: {e}") evaluation.status = AIEvaluation.Status.FAILED evaluation.error_message = str(e) evaluation.save() return evaluation def summarize_task(self, task): """ 对转写任务进行总结 """ if not self.client: logger.warning("BailianService not initialized, skipping summary.") return if not task.transcription: logger.warning(f"Task {task.id} has no transcription, skipping summary.") return try: content = task.transcription # 简单截断防止过长 if len(content) > 15000: content = content[:15000] + "...(内容过长已截断)" # 准备上下文数据 context_data = "" if task.summary_data: context_data += f"\n\n【总结原始数据】\n{json.dumps(task.summary_data, ensure_ascii=False, indent=2)}" if task.auto_chapters_data: context_data += f"\n\n【章节原始数据】\n{json.dumps(task.auto_chapters_data, ensure_ascii=False, indent=2)}" system_prompt = f"""你是一个专业的会议/内容总结助手。请根据提供的【转写文本】、【总结原始数据】和【章节原始数据】,生成一份结构清晰、内容详实的总结报告。 请按照以下结构输出(Markdown格式): 1. **标题**:基于内容生成一个合适的标题。 2. **核心摘要**:简要概括主要内容。 3. **主要观点/话题**:结合思维导图数据,列出关键话题和层级。 4. **章节速览**:结合章节数据,列出时间点和主要内容。[HH:MM:SS]格式来把章节列出来 5. **问答精选**(如果有):基于问答总结数据,列出重要问答。 请确保语言通顺,重点突出,能够还原内容的逻辑结构。""" user_content = f"以下是需要总结的内容:\n\n【转写文本】\n{content}{context_data}" messages = [ {'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': user_content} ] # 使用 qwen-plus 作为默认模型 completion = self.client.chat.completions.create( model="qwen-plus", messages=messages ) summary_content = completion.choices[0].message.content task.summary = summary_content task.save(update_fields=['summary']) logger.info(f"Task {task.id} summary generated successfully.") except Exception as e: logger.error(f"Failed to generate summary for task {task.id}: {e}")