Files
market_page/backend/ai_services/bailian_service.py
jeremygan2021 809aab9e02
All checks were successful
Deploy to Server / deploy (push) Successful in 17s
tingwu_new
2026-03-11 21:58:09 +08:00

197 lines
8.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import logging
import json
import os
from django.conf import settings
from openai import OpenAI
from .models import AIEvaluation
logger = logging.getLogger(__name__)
class BailianService:
def __init__(self):
self.api_key = getattr(settings, 'DASHSCOPE_API_KEY', None)
if not self.api_key:
self.api_key = os.environ.get("DASHSCOPE_API_KEY")
if self.api_key:
self.client = OpenAI(
api_key=self.api_key,
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
)
else:
self.client = None
logger.warning("DASHSCOPE_API_KEY not configured.")
def evaluate_task(self, evaluation: AIEvaluation):
"""
执行AI评估
"""
if not self.client:
evaluation.status = AIEvaluation.Status.FAILED
evaluation.error_message = "服务未配置 (DASHSCOPE_API_KEY missing)"
evaluation.save()
return
task = evaluation.task
if not task.transcription:
evaluation.status = AIEvaluation.Status.FAILED
evaluation.error_message = "关联任务无逐字稿内容"
evaluation.save()
return
evaluation.status = AIEvaluation.Status.PROCESSING
evaluation.save()
try:
prompt = evaluation.prompt
content = task.transcription
# 准备章节/时间戳数据以辅助分析发言节奏
chapter_context = ""
if task.auto_chapters_data:
try:
chapters_str = ""
# 处理特定的 AutoChapters 结构
# 格式: {"AutoChapters": [{"Id": 1, "Start": 740, "End": 203436, "Headline": "...", "Summary": "..."}, ...]}
if isinstance(task.auto_chapters_data, dict) and 'AutoChapters' in task.auto_chapters_data:
chapters = task.auto_chapters_data['AutoChapters']
if isinstance(chapters, list):
chapter_lines = []
for ch in chapters:
# 毫秒转 MM:SS
start_ms = ch.get('Start', 0)
end_ms = ch.get('End', 0)
start_str = f"{start_ms // 60000:02d}:{(start_ms // 1000) % 60:02d}"
end_str = f"{end_ms // 60000:02d}:{(end_ms // 1000) % 60:02d}"
headline = ch.get('Headline', '无标题')
summary = ch.get('Summary', '')
line = f"- [{start_str} - {end_str}] {headline}"
if summary:
line += f"\n 摘要: {summary}"
chapter_lines.append(line)
chapters_str = "\n".join(chapter_lines)
# 如果上面的解析为空(或者格式不匹配),回退到通用 JSON dump
if not chapters_str:
if isinstance(task.auto_chapters_data, (dict, list)):
chapters_str = json.dumps(task.auto_chapters_data, ensure_ascii=False, indent=2)
else:
chapters_str = str(task.auto_chapters_data)
chapter_context = f"\n\n【章节与时间戳信息】\n{chapters_str}\n\n(提示:请结合上述章节时间戳信息,分析发言者的语速、节奏变化及停顿情况。)"
except Exception as e:
logger.warning(f"Failed to process auto_chapters_data: {e}")
# 截断过长的内容以防止超出Token限制 (简单处理取前10000字)
if len(content) > 10000:
content = content[:10000] + "...(内容过长已截断)"
# Construct messages
messages = [
{'role': 'system', 'content': 'You are a helpful assistant designed to output JSON.'},
{'role': 'user', 'content': f"{prompt}\n\n以下是需要评估的内容:\n{content}{chapter_context}"}
]
completion = self.client.chat.completions.create(
model=evaluation.model_selection,
messages=messages,
response_format={"type": "json_object"}
)
response_content = completion.choices[0].message.content
# Convert to dict for storage
raw_response = completion.model_dump()
evaluation.raw_response = raw_response
# Parse JSON
try:
result = json.loads(response_content)
evaluation.score = result.get('score')
evaluation.evaluation = result.get('evaluation') or result.get('comment')
# 尝试获取推理过程(如果模型返回了)
evaluation.reasoning = result.get('reasoning') or result.get('analysis')
if not evaluation.reasoning:
# 如果JSON里没有把整个JSON作为推理参考
evaluation.reasoning = json.dumps(result, ensure_ascii=False, indent=2)
evaluation.status = AIEvaluation.Status.COMPLETED
except json.JSONDecodeError:
evaluation.status = AIEvaluation.Status.FAILED
evaluation.error_message = f"无法解析JSON响应: {response_content}"
evaluation.reasoning = response_content
evaluation.save()
return evaluation
except Exception as e:
logger.error(f"AI Evaluation failed: {e}")
evaluation.status = AIEvaluation.Status.FAILED
evaluation.error_message = str(e)
evaluation.save()
return evaluation
def summarize_task(self, task):
"""
对转写任务进行总结
"""
if not self.client:
logger.warning("BailianService not initialized, skipping summary.")
return
if not task.transcription:
logger.warning(f"Task {task.id} has no transcription, skipping summary.")
return
try:
content = task.transcription
# 简单截断防止过长
if len(content) > 15000:
content = content[:15000] + "...(内容过长已截断)"
# 准备上下文数据
context_data = ""
if task.summary_data:
context_data += f"\n\n【总结原始数据】\n{json.dumps(task.summary_data, ensure_ascii=False, indent=2)}"
if task.auto_chapters_data:
context_data += f"\n\n【章节原始数据】\n{json.dumps(task.auto_chapters_data, ensure_ascii=False, indent=2)}"
system_prompt = f"""你是一个专业的会议/内容总结助手。请根据提供的【转写文本】、【总结原始数据】和【章节原始数据】,生成一份结构清晰、内容详实的总结报告。
请按照以下结构输出Markdown格式
1. **标题**:基于内容生成一个合适的标题。
2. **核心摘要**:简要概括主要内容。
3. **主要观点/话题**:结合思维导图数据,列出关键话题和层级。
4. **章节速览**:结合章节数据,列出时间点和主要内容。[HH:MM:SS]格式来把章节列出来
5. **问答精选**(如果有):基于问答总结数据,列出重要问答。
请确保语言通顺,重点突出,能够还原内容的逻辑结构。"""
user_content = f"以下是需要总结的内容:\n\n【转写文本】\n{content}{context_data}"
messages = [
{'role': 'system', 'content': system_prompt},
{'role': 'user', 'content': user_content}
]
# 使用 qwen-plus 作为默认模型
completion = self.client.chat.completions.create(
model="qwen-plus",
messages=messages
)
summary_content = completion.choices[0].message.content
task.summary = summary_content
task.save(update_fields=['summary'])
logger.info(f"Task {task.id} summary generated successfully.")
except Exception as e:
logger.error(f"Failed to generate summary for task {task.id}: {e}")