创赢未来评分系统 - 初始化提交(移除大文件)
All checks were successful
Deploy to Server / deploy (push) Successful in 18s

This commit is contained in:
爽哒哒
2026-03-18 22:28:45 +08:00
commit f26d35da66
315 changed files with 36043 additions and 0 deletions

View File

View File

@@ -0,0 +1,47 @@
from django.contrib import admin
from unfold.admin import ModelAdmin as UnfoldModelAdmin
from unfold.admin import StackedInline as UnfoldStackedInline
from .models import TranscriptionTask, AIEvaluation, AIEvaluationTemplate
class AIEvaluationInline(UnfoldStackedInline):
model = AIEvaluation
extra = 0
can_delete = True
verbose_name = "AI评估结果"
verbose_name_plural = "AI评估结果"
readonly_fields = ['created_at', 'updated_at', 'raw_response', 'reasoning', 'template']
fields = ('template', 'score', 'evaluation', 'model_selection', 'prompt', 'reasoning', 'status', 'error_message')
@admin.register(TranscriptionTask)
class TranscriptionTaskAdmin(UnfoldModelAdmin):
list_display = ['id', 'status', 'task_id', 'created_at']
list_filter = ['status', 'created_at']
search_fields = ['id', 'task_id', 'transcription', 'summary']
readonly_fields = ['id', 'created_at', 'updated_at', 'task_id']
inlines = [AIEvaluationInline]
@admin.register(AIEvaluationTemplate)
class AIEvaluationTemplateAdmin(UnfoldModelAdmin):
list_display = ['name', 'model_selection', 'score_dimension', 'is_default', 'is_active', 'created_at']
list_filter = ['is_active', 'is_default', 'model_selection', 'created_at']
search_fields = ['name', 'prompt']
@admin.register(AIEvaluation)
class AIEvaluationAdmin(UnfoldModelAdmin):
list_display = ['id', 'task', 'template', 'score', 'status', 'model_selection', 'created_at']
list_filter = ['status', 'model_selection', 'created_at', 'template']
search_fields = ['task__id', 'evaluation', 'reasoning']
readonly_fields = ['id', 'created_at', 'updated_at', 'raw_response']
fieldsets = (
(None, {
'fields': ('task', 'template', 'status', 'score', 'evaluation')
}),
('配置快照', {
'fields': ('model_selection', 'prompt'),
'classes': ('collapse',),
}),
('调试信息', {
'fields': ('raw_response', 'reasoning', 'error_message'),
'classes': ('collapse',),
}),
)

View File

@@ -0,0 +1,5 @@
from django.apps import AppConfig
class AiServicesConfig(AppConfig):
name = 'ai_services'

View File

@@ -0,0 +1,323 @@
import logging
import json
import os
from django.conf import settings
from openai import OpenAI
from .models import AIEvaluation
logger = logging.getLogger(__name__)
class BailianService:
def __init__(self):
self.api_key = getattr(settings, 'DASHSCOPE_API_KEY', None)
if not self.api_key:
self.api_key = os.environ.get("DASHSCOPE_API_KEY")
if self.api_key:
self.client = OpenAI(
api_key=self.api_key,
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
)
else:
self.client = None
logger.warning("DASHSCOPE_API_KEY not configured.")
def evaluate_task(self, evaluation: AIEvaluation):
"""
执行AI评估
"""
if not self.client:
evaluation.status = AIEvaluation.Status.FAILED
evaluation.error_message = "服务未配置 (DASHSCOPE_API_KEY missing)"
evaluation.save()
return
task = evaluation.task
if not task.transcription:
evaluation.status = AIEvaluation.Status.FAILED
evaluation.error_message = "关联任务无逐字稿内容"
evaluation.save()
return
evaluation.status = AIEvaluation.Status.PROCESSING
evaluation.save()
try:
prompt = evaluation.prompt
content = task.transcription
# 准备章节/时间戳数据以辅助分析发言节奏
chapter_context = ""
if task.auto_chapters_data:
try:
chapters_str = ""
# 处理特定的 AutoChapters 结构
# 格式: {"AutoChapters": [{"Id": 1, "Start": 740, "End": 203436, "Headline": "...", "Summary": "..."}, ...]}
if isinstance(task.auto_chapters_data, dict) and 'AutoChapters' in task.auto_chapters_data:
chapters = task.auto_chapters_data['AutoChapters']
if isinstance(chapters, list):
chapter_lines = []
for ch in chapters:
# 毫秒转 MM:SS
start_ms = ch.get('Start', 0)
end_ms = ch.get('End', 0)
start_str = f"{start_ms // 60000:02d}:{(start_ms // 1000) % 60:02d}"
end_str = f"{end_ms // 60000:02d}:{(end_ms // 1000) % 60:02d}"
headline = ch.get('Headline', '无标题')
summary = ch.get('Summary', '')
line = f"- [{start_str} - {end_str}] {headline}"
if summary:
line += f"\n 摘要: {summary}"
chapter_lines.append(line)
chapters_str = "\n".join(chapter_lines)
# 如果上面的解析为空(或者格式不匹配),回退到通用 JSON dump
if not chapters_str:
if isinstance(task.auto_chapters_data, (dict, list)):
chapters_str = json.dumps(task.auto_chapters_data, ensure_ascii=False, indent=2)
else:
chapters_str = str(task.auto_chapters_data)
chapter_context = f"\n\n【章节与时间戳信息】\n{chapters_str}\n\n(提示:请结合上述章节时间戳信息,分析发言者的语速、节奏变化及停顿情况。)"
except Exception as e:
logger.warning(f"Failed to process auto_chapters_data: {e}")
# 截断过长的内容以防止超出Token限制 (简单处理取前10000字)
if len(content) > 10000:
content = content[:10000] + "...(内容过长已截断)"
# Construct messages
messages = [
{'role': 'system', 'content': 'You are a helpful assistant designed to output JSON.'},
{'role': 'user', 'content': f"{prompt}\n\n以下是需要评估的内容:\n{content}{chapter_context}"}
]
# 增加重试机制 (最多重试3次)
completion = None
last_error = None
import time
for attempt in range(3):
try:
completion = self.client.chat.completions.create(
model=evaluation.model_selection,
messages=messages,
response_format={"type": "json_object"}
)
break # 成功则跳出循环
except Exception as e:
last_error = e
logger.warning(f"AI Evaluation attempt {attempt+1}/3 failed for eval {evaluation.id}: {e}")
if attempt < 2:
time.sleep(2 * (attempt + 1)) # 简单的指数退避
if not completion:
raise last_error or Exception("AI Service call failed after retries")
response_content = completion.choices[0].message.content
# Convert to dict for storage
raw_response = completion.model_dump()
evaluation.raw_response = raw_response
# Parse JSON
try:
result = json.loads(response_content)
evaluation.score = result.get('score')
evaluation.evaluation = result.get('evaluation') or result.get('comment')
# 尝试获取推理过程(如果模型返回了)
evaluation.reasoning = result.get('reasoning') or result.get('analysis')
if not evaluation.reasoning:
# 如果JSON里没有把整个JSON作为推理参考
evaluation.reasoning = json.dumps(result, ensure_ascii=False, indent=2)
evaluation.status = AIEvaluation.Status.COMPLETED
except json.JSONDecodeError:
evaluation.status = AIEvaluation.Status.FAILED
evaluation.error_message = f"无法解析JSON响应: {response_content}"
evaluation.reasoning = response_content
evaluation.save()
# 同步结果到参赛项目 (如果关联了)
self._sync_evaluation_to_project(evaluation)
return evaluation
except Exception as e:
logger.error(f"AI Evaluation failed: {e}")
evaluation.status = AIEvaluation.Status.FAILED
evaluation.error_message = str(e)
evaluation.save()
return evaluation
def _sync_evaluation_to_project(self, evaluation: AIEvaluation):
"""
将AI评估结果同步到关联的参赛项目评分和评语
"""
try:
task = evaluation.task
if not task.project:
return
project = task.project
competition = project.competition
# 1. 确定评委身份 (Based on Template)
# 用户要求:评委显示的是模板名称
template_name = evaluation.template.name if evaluation.template else "AI智能评委"
# 使用固定前缀 + template_id 确保唯一性,这样同一个模板在不同项目里是同一个评委
openid = f"ai_judge_{evaluation.template.id}" if evaluation.template else "ai_judge_default"
# 延迟导入以避免循环依赖
from shop.models import WeChatUser
from competition.models import CompetitionEnrollment, Score, Comment, ScoreDimension
# 获取或创建虚拟评委用户
user, created = WeChatUser.objects.get_or_create(
openid=openid,
defaults={
'nickname': template_name,
'avatar_url': 'https://ui-avatars.com/api/?name=AI&background=random&color=fff'
}
)
# 如果名字不匹配(比如模板改名了),更新它
if user.nickname != template_name:
user.nickname = template_name
user.save(update_fields=['nickname'])
# 2. 确保评委已报名 (Enrollment)
enrollment, _ = CompetitionEnrollment.objects.get_or_create(
competition=competition,
user=user,
defaults={
'role': 'judge',
'status': 'approved'
}
)
# 3. 同步评分 (Score)
if evaluation.score is not None:
# 尝试找到匹配的维度
dimensions = competition.score_dimensions.all()
target_dimension = None
# 0. 优先使用模板配置的维度
if evaluation.template and evaluation.template.score_dimension:
# 检查配置的维度是否属于当前比赛
if evaluation.template.score_dimension.competition_id == competition.id:
target_dimension = evaluation.template.score_dimension
else:
# 如果不属于当前比赛(跨比赛复用模板),尝试查找同名维度
target_dimension = dimensions.filter(name=evaluation.template.score_dimension.name).first()
# 1. 如果未配置或未找到,尝试匹配 "AI Rating" (用户指定默认值)
if not target_dimension:
target_dimension = dimensions.filter(name__iexact="AI Rating").first()
# 2. 尝试匹配包含 "AI" 的维度
if not target_dimension:
for dim in dimensions:
if "AI" in dim.name.upper():
target_dimension = dim
break
# 3. 尝试匹配模板名称
if not target_dimension:
target_dimension = dimensions.filter(name=template_name).first()
# 4. 最后兜底:使用第一个维度
if not target_dimension and dimensions.exists():
target_dimension = dimensions.first()
if target_dimension:
Score.objects.update_or_create(
project=project,
judge=enrollment,
dimension=target_dimension,
defaults={'score': evaluation.score}
)
logger.info(f"Synced AI score {evaluation.score} to project {project.id} dimension {target_dimension.name}")
# 4. 同步评语 (Comment)
if evaluation.evaluation:
# 检查是否已存在该评委的评语,避免重复
comment = Comment.objects.filter(project=project, judge=enrollment).first()
if comment:
comment.content = evaluation.evaluation
comment.save()
else:
Comment.objects.create(
project=project,
judge=enrollment,
content=evaluation.evaluation
)
logger.info(f"Synced AI comment to project {project.id}")
except Exception as e:
logger.error(f"Failed to sync evaluation to project: {e}")
def summarize_task(self, task):
"""
对转写任务进行总结
"""
if not self.client:
logger.warning("BailianService not initialized, skipping summary.")
return
if not task.transcription:
logger.warning(f"Task {task.id} has no transcription, skipping summary.")
return
try:
content = task.transcription
# 简单截断防止过长
if len(content) > 15000:
content = content[:15000] + "...(内容过长已截断)"
# 准备上下文数据
context_data = ""
if task.summary_data:
context_data += f"\n\n【总结原始数据】\n{json.dumps(task.summary_data, ensure_ascii=False, indent=2)}"
if task.auto_chapters_data:
context_data += f"\n\n【章节原始数据】\n{json.dumps(task.auto_chapters_data, ensure_ascii=False, indent=2)}"
system_prompt = f"""你是一个专业的会议/内容总结助手。请根据提供的【转写文本】、【总结原始数据】和【章节原始数据】,生成一份结构清晰、内容详实的总结报告。
请按照以下结构输出Markdown格式
1. **标题**:基于内容生成一个合适的标题。
2. **核心摘要**:简要概括主要内容。
3. **主要观点/话题**:结合思维导图数据,列出关键话题和层级。
4. **章节速览**:结合章节数据,列出时间点和主要内容。[HH:MM:SS]格式来把章节列出来
5. **问答精选**(如果有):基于问答总结数据,列出重要问答。
请确保语言通顺,重点突出,能够还原内容的逻辑结构。"""
user_content = f"以下是需要总结的内容:\n\n【转写文本】\n{content}{context_data}"
messages = [
{'role': 'system', 'content': system_prompt},
{'role': 'user', 'content': user_content}
]
# 使用 qwen-plus 作为默认模型
completion = self.client.chat.completions.create(
model="qwen-plus",
messages=messages
)
summary_content = completion.choices[0].message.content
task.summary = summary_content
task.save(update_fields=['summary'])
logger.info(f"Task {task.id} summary generated successfully.")
except Exception as e:
logger.error(f"Failed to generate summary for task {task.id}: {e}")

View File

@@ -0,0 +1,54 @@
from django.core.management.base import BaseCommand
from django.conf import settings
import oss2
from aliyunsdkcore.client import AcsClient
class Command(BaseCommand):
help = 'Check Aliyun configuration status'
def handle(self, *args, **options):
self.stdout.write("Checking Aliyun Configuration...")
configs = {
'ALIYUN_ACCESS_KEY_ID': settings.ALIYUN_ACCESS_KEY_ID,
'ALIYUN_ACCESS_KEY_SECRET': settings.ALIYUN_ACCESS_KEY_SECRET,
'ALIYUN_OSS_BUCKET_NAME': settings.ALIYUN_OSS_BUCKET_NAME,
'ALIYUN_OSS_ENDPOINT': settings.ALIYUN_OSS_ENDPOINT,
'ALIYUN_TINGWU_APP_KEY': settings.ALIYUN_TINGWU_APP_KEY,
}
all_valid = True
for key, value in configs.items():
if not value:
self.stdout.write(self.style.ERROR(f"[MISSING] {key} is not set or empty"))
all_valid = False
else:
masked_value = value[:4] + "****" + value[-4:] if len(value) > 8 else "****"
self.stdout.write(self.style.SUCCESS(f"[OK] {key}: {masked_value}"))
if not all_valid:
self.stdout.write(self.style.ERROR("\nConfiguration check FAILED. Some required settings are missing."))
return
# Test OSS Connection
self.stdout.write("\nTesting OSS Connection...")
try:
auth = oss2.Auth(configs['ALIYUN_ACCESS_KEY_ID'], configs['ALIYUN_ACCESS_KEY_SECRET'])
bucket = oss2.Bucket(auth, configs['ALIYUN_OSS_ENDPOINT'], configs['ALIYUN_OSS_BUCKET_NAME'])
bucket.get_bucket_info()
self.stdout.write(self.style.SUCCESS("[OK] OSS Connection successful"))
except Exception as e:
self.stdout.write(self.style.ERROR(f"[FAILED] OSS Connection failed: {e}"))
# Test Tingwu Client Initialization
self.stdout.write("\nTesting Tingwu Client Initialization...")
try:
client = AcsClient(
configs['ALIYUN_ACCESS_KEY_ID'],
configs['ALIYUN_ACCESS_KEY_SECRET'],
"cn-beijing"
)
self.stdout.write(self.style.SUCCESS("[OK] Tingwu Client initialized"))
except Exception as e:
self.stdout.write(self.style.ERROR(f"[FAILED] Tingwu Client init failed: {e}"))

View File

@@ -0,0 +1,63 @@
import time
import logging
from django.core.management.base import BaseCommand
from ai_services.models import TranscriptionTask
from ai_services.services import AliyunTingwuService
logger = logging.getLogger(__name__)
class Command(BaseCommand):
help = 'Polls Aliyun Tingwu for transcription results every 10 seconds'
def handle(self, *args, **options):
self.stdout.write(self.style.SUCCESS('Starting polling service...'))
service = AliyunTingwuService()
while True:
try:
# Find tasks that are PENDING or PROCESSING
# Include PENDING because create() might set it to PENDING initially
# though usually it sets to PROCESSING if task_id is obtained.
# Just in case.
tasks = TranscriptionTask.objects.filter(
status__in=[TranscriptionTask.Status.PENDING, TranscriptionTask.Status.PROCESSING]
).exclude(task_id__isnull=True).exclude(task_id='')
count = tasks.count()
if count > 0:
self.stdout.write(f'Found {count} pending/processing tasks.')
for task in tasks:
self.stdout.write(f'Checking task {task.task_id} (Status: {task.status})...')
try:
result = service.get_task_info(task.task_id)
# Store old status to check for changes
old_status = task.status
service.parse_and_update_task(task, result)
# Re-fetch or check updated object
if task.status != old_status:
if task.status == TranscriptionTask.Status.SUCCEEDED:
self.stdout.write(self.style.SUCCESS(f'Task {task.task_id} SUCCEEDED'))
elif task.status == TranscriptionTask.Status.FAILED:
self.stdout.write(self.style.ERROR(f'Task {task.task_id} FAILED: {task.error_message}'))
else:
# Still processing
pass
except Exception as e:
logger.error(f"Error checking task {task.task_id}: {e}")
self.stdout.write(self.style.ERROR(f"Error checking task {task.task_id}: {e}"))
# Wait for 10 seconds
time.sleep(10)
except KeyboardInterrupt:
self.stdout.write(self.style.SUCCESS('Stopping polling service...'))
break
except Exception as e:
logger.error(f"Polling loop error: {e}")
self.stdout.write(self.style.ERROR(f"Polling loop error: {e}"))
time.sleep(10)

View File

@@ -0,0 +1,102 @@
import os
import sys
import django
import json
import logging
from django.conf import settings
# 设置 Django 环境
# 添加项目根目录到 sys.path
sys.path.append('/Volumes/data/Quant-Speed/market_page/backend')
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') # 修正为正确的 settings 模块路径
django.setup()
from ai_services.services import AliyunTingwuService
from ai_services.models import TranscriptionTask
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def test_tingwu_transcription():
file_url = "https://tangledup-ai-staging.oss-cn-shanghai.aliyuncs.com/Video/%E6%95%99%E5%AD%A6.mp4"
print(f"Testing transcription for: {file_url}")
service = AliyunTingwuService()
# 1. 创建任务
try:
print("Creating task...")
response = service.create_transcription_task(file_url)
print(f"Create task response: {json.dumps(response, indent=2, ensure_ascii=False)}")
if 'Data' in response and isinstance(response['Data'], dict):
task_id = response['Data'].get('TaskId')
else:
task_id = response.get('TaskId')
if not task_id:
print("Failed to get TaskId")
return
print(f"Task created with ID: {task_id}")
# 2. 轮询查询任务状态
import time
max_retries = 60 # 5 minutes
for i in range(max_retries):
print(f"Checking status (attempt {i+1}/{max_retries})...")
result = service.get_task_info(task_id)
# 解析结果
if isinstance(result, str):
try:
result = json.loads(result)
except:
pass
if isinstance(result, dict):
data_obj = result.get('Data', result)
else:
data_obj = result
task_status = data_obj.get('TaskStatus')
if not task_status:
task_status = data_obj.get('Status')
print(f"Current status: {task_status}")
if task_status in ['COMPLETE', 'COMPLETED', 'SUCCEEDED']:
print("Task succeeded!")
print(f"Full Result: {json.dumps(data_obj, indent=2, ensure_ascii=False)}")
# 尝试解析 Transcription
task_result = data_obj.get('Result', {})
transcription_data = task_result.get('Transcription', {})
if isinstance(transcription_data, str) and transcription_data.startswith('http'):
import requests
print(f"Downloading transcription from {transcription_data}")
t_resp = requests.get(transcription_data)
if t_resp.status_code == 200:
content = t_resp.json()
print(f"Downloaded content structure keys: {content.keys()}")
# print(f"Content sample: {json.dumps(content, indent=2, ensure_ascii=False)[:500]}...")
else:
print(f"Failed to download: {t_resp.status_code}")
break
elif task_status == 'FAILED':
print(f"Task failed: {data_obj}")
break
time.sleep(5)
except Exception as e:
print(f"Error: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
test_tingwu_transcription()

View File

@@ -0,0 +1,34 @@
# Generated by Django 6.0.1 on 2026-03-11 05:11
import uuid
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='TranscriptionTask',
fields=[
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('file_url', models.URLField(max_length=1024, verbose_name='文件链接')),
('task_id', models.CharField(blank=True, max_length=100, null=True, verbose_name='听悟任务ID')),
('status', models.CharField(choices=[('PENDING', '等待中'), ('PROCESSING', '处理中'), ('SUCCEEDED', '成功'), ('FAILED', '失败')], default='PENDING', max_length=20, verbose_name='状态')),
('transcription', models.TextField(blank=True, null=True, verbose_name='逐字稿')),
('summary', models.TextField(blank=True, null=True, verbose_name='AI总结')),
('error_message', models.TextField(blank=True, null=True, verbose_name='错误信息')),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='更新时间')),
],
options={
'verbose_name': '转写任务',
'verbose_name_plural': '转写任务',
'ordering': ['-created_at'],
},
),
]

View File

@@ -0,0 +1,23 @@
# Generated by Django 6.0.1 on 2026-03-11 05:12
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ai_services', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='transcriptiontask',
name='evaluation',
field=models.TextField(blank=True, null=True, verbose_name='AI评语'),
),
migrations.AddField(
model_name='transcriptiontask',
name='score',
field=models.IntegerField(blank=True, help_text='基于转写内容的评分', null=True, verbose_name='AI评分'),
),
]

View File

@@ -0,0 +1,28 @@
# Generated by Django 6.0.1 on 2026-03-11 12:30
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ai_services', '0002_transcriptiontask_evaluation_transcriptiontask_score'),
]
operations = [
migrations.AddField(
model_name='transcriptiontask',
name='auto_chapters_data',
field=models.JSONField(blank=True, help_text='阿里云返回的AutoChapters完整JSON', null=True, verbose_name='章节原始数据'),
),
migrations.AddField(
model_name='transcriptiontask',
name='summary_data',
field=models.JSONField(blank=True, help_text='阿里云返回的Summarization完整JSON', null=True, verbose_name='总结原始数据'),
),
migrations.AddField(
model_name='transcriptiontask',
name='transcription_data',
field=models.JSONField(blank=True, help_text='阿里云返回的Transcription完整JSON', null=True, verbose_name='转写原始数据'),
),
]

View File

@@ -0,0 +1,44 @@
# Generated by Django 6.0.1 on 2026-03-11 12:44
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ai_services', '0003_transcriptiontask_auto_chapters_data_and_more'),
]
operations = [
migrations.RemoveField(
model_name='transcriptiontask',
name='evaluation',
),
migrations.RemoveField(
model_name='transcriptiontask',
name='score',
),
migrations.CreateModel(
name='AIEvaluation',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('score', models.IntegerField(blank=True, help_text='0-100分', null=True, verbose_name='AI评分')),
('evaluation', models.TextField(blank=True, null=True, verbose_name='AI评语')),
('model_selection', models.CharField(default='qwen-plus', help_text='例如: qwen-plus, qwen-turbo, qwen-max', max_length=50, verbose_name='模型选择')),
('prompt', models.TextField(default='你是一个专业的评分助手。请根据提供的转写内容对内容质量、逻辑清晰度、语言表达等方面进行综合评分0-100分并给出详细的评语。请以JSON格式返回包含"score""evaluation"字段。', help_text='用于指导AI评分的提示词', verbose_name='评分提示词')),
('raw_response', models.JSONField(blank=True, help_text='大模型返回的完整JSON', null=True, verbose_name='原始响应')),
('reasoning', models.TextField(blank=True, help_text='AI的推理过程如果有', null=True, verbose_name='推理过程')),
('status', models.CharField(choices=[('PENDING', '等待中'), ('PROCESSING', '生成中'), ('COMPLETED', '已完成'), ('FAILED', '失败')], default='PENDING', max_length=20, verbose_name='评估状态')),
('error_message', models.TextField(blank=True, null=True, verbose_name='错误信息')),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='更新时间')),
('task', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='ai_evaluation', to='ai_services.transcriptiontask', verbose_name='关联任务')),
],
options={
'verbose_name': 'AI智能评估',
'verbose_name_plural': 'AI智能评估',
'ordering': ['-created_at'],
},
),
]

View File

@@ -0,0 +1,55 @@
# Generated by Django 6.0.1 on 2026-03-11 13:00
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ai_services', '0004_remove_transcriptiontask_evaluation_and_more'),
]
operations = [
migrations.CreateModel(
name='AIEvaluationTemplate',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(help_text='例如:销售话术评分、逻辑性分析', max_length=100, verbose_name='模板名称')),
('model_selection', models.CharField(default='qwen-plus', help_text='例如: qwen-plus, qwen-turbo, qwen-max', max_length=50, verbose_name='模型选择')),
('prompt', models.TextField(default='你是一个专业的评分助手。请根据提供的转写内容对内容质量、逻辑清晰度、语言表达等方面进行综合评分0-100分并给出详细的评语。请以JSON格式返回包含"score""evaluation"字段。', help_text='用于指导AI评分的提示词', verbose_name='评分提示词')),
('is_active', models.BooleanField(default=True, help_text='启用后,新的转写任务完成后将自动使用此模板进行评估', verbose_name='是否启用')),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='更新时间')),
],
options={
'verbose_name': 'AI评估模板',
'verbose_name_plural': 'AI评估模板',
'ordering': ['-created_at'],
},
),
migrations.AlterModelOptions(
name='aievaluation',
options={'ordering': ['-created_at'], 'verbose_name': 'AI评估结果', 'verbose_name_plural': 'AI评估结果'},
),
migrations.AlterField(
model_name='aievaluation',
name='model_selection',
field=models.CharField(default='qwen-plus', max_length=50, verbose_name='模型选择'),
),
migrations.AlterField(
model_name='aievaluation',
name='prompt',
field=models.TextField(verbose_name='评分提示词'),
),
migrations.AlterField(
model_name='aievaluation',
name='task',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='ai_evaluations', to='ai_services.transcriptiontask', verbose_name='关联任务'),
),
migrations.AddField(
model_name='aievaluation',
name='template',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='evaluations', to='ai_services.aievaluationtemplate', verbose_name='使用的模板'),
),
]

View File

@@ -0,0 +1,20 @@
# Generated by Django 6.0.1 on 2026-03-11 14:10
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ai_services', '0005_aievaluationtemplate_alter_aievaluation_options_and_more'),
('competition', '0003_competition_project_visibility'),
]
operations = [
migrations.AddField(
model_name='transcriptiontask',
name='project',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='transcription_tasks', to='competition.project', verbose_name='关联参赛项目'),
),
]

View File

@@ -0,0 +1,20 @@
# Generated by Django 6.0.1 on 2026-03-11 15:03
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ai_services', '0006_transcriptiontask_project'),
('competition', '0003_competition_project_visibility'),
]
operations = [
migrations.AddField(
model_name='aievaluationtemplate',
name='score_dimension',
field=models.ForeignKey(blank=True, help_text='如果同步到比赛评分,优先使用此维度。未填写则默认使用"AI Rating"或包含"AI"的维度', null=True, on_delete=django.db.models.deletion.SET_NULL, to='competition.scoredimension', verbose_name='关联评分维度'),
),
]

View File

@@ -0,0 +1,18 @@
# Generated by Django 6.0.1 on 2026-03-17 15:55
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ai_services', '0007_aievaluationtemplate_score_dimension'),
]
operations = [
migrations.AddField(
model_name='aievaluationtemplate',
name='is_default',
field=models.BooleanField(default=False, help_text='默认模板会评价所有比赛,非默认模板且未关联评分维度时不会自动评价', verbose_name='是否为默认模板'),
),
]

View File

@@ -0,0 +1,23 @@
# Generated by Django 4.2.29 on 2026-03-18 12:00
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ai_services', '0008_add_is_default_to_template'),
]
operations = [
migrations.AlterField(
model_name='aievaluation',
name='id',
field=models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
),
migrations.AlterField(
model_name='aievaluationtemplate',
name='id',
field=models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
),
]

View File

@@ -0,0 +1,150 @@
import uuid
from django.db import models
from django.utils.translation import gettext_lazy as _
class TranscriptionTask(models.Model):
class Status(models.TextChoices):
PENDING = 'PENDING', _('等待中')
PROCESSING = 'PROCESSING', _('处理中')
SUCCEEDED = 'SUCCEEDED', _('成功')
FAILED = 'FAILED', _('失败')
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
file_url = models.URLField(verbose_name=_('文件链接'), max_length=1024)
task_id = models.CharField(verbose_name=_('听悟任务ID'), max_length=100, blank=True, null=True)
status = models.CharField(
verbose_name=_('状态'),
max_length=20,
choices=Status.choices,
default=Status.PENDING
)
# 存储阿里云听悟返回的原始 JSON 结构
transcription_data = models.JSONField(verbose_name=_('转写原始数据'), blank=True, null=True, help_text=_('阿里云返回的Transcription完整JSON'))
summary_data = models.JSONField(verbose_name=_('总结原始数据'), blank=True, null=True, help_text=_('阿里云返回的Summarization完整JSON'))
auto_chapters_data = models.JSONField(verbose_name=_('章节原始数据'), blank=True, null=True, help_text=_('阿里云返回的AutoChapters完整JSON'))
project = models.ForeignKey(
'competition.Project',
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name='transcription_tasks',
verbose_name=_('关联参赛项目')
)
transcription = models.TextField(verbose_name=_('逐字稿'), blank=True, null=True)
summary = models.TextField(verbose_name=_('AI总结'), blank=True, null=True)
# 已解耦到 AIEvaluation 模型
# score = models.IntegerField(verbose_name=_('AI评分'), blank=True, null=True, help_text=_('基于转写内容的评分'))
# evaluation = models.TextField(verbose_name=_('AI评语'), blank=True, null=True)
error_message = models.TextField(verbose_name=_('错误信息'), blank=True, null=True)
created_at = models.DateTimeField(verbose_name=_('创建时间'), auto_now_add=True)
updated_at = models.DateTimeField(verbose_name=_('更新时间'), auto_now=True)
class Meta:
verbose_name = _('转写任务')
verbose_name_plural = _('转写任务')
ordering = ['-created_at']
def __str__(self):
return f"{self.id} - {self.get_status_display()}"
class AIEvaluationTemplate(models.Model):
name = models.CharField(verbose_name=_('模板名称'), max_length=100, help_text=_('例如:销售话术评分、逻辑性分析'))
model_selection = models.CharField(
verbose_name=_('模型选择'),
max_length=50,
default='qwen-plus',
help_text=_('例如: qwen-plus, qwen-turbo, qwen-max')
)
prompt = models.TextField(
verbose_name=_('评分提示词'),
default='你是一个专业的评分助手。请根据提供的转写内容对内容质量、逻辑清晰度、语言表达等方面进行综合评分0-100分并给出详细的评语。请以JSON格式返回包含"score""evaluation"字段。',
help_text=_('用于指导AI评分的提示词')
)
score_dimension = models.ForeignKey(
'competition.ScoreDimension',
on_delete=models.SET_NULL,
null=True,
blank=True,
verbose_name=_('关联评分维度'),
help_text=_('如果同步到比赛评分,优先使用此维度。未填写则默认使用"AI Rating"或包含"AI"的维度')
)
is_default = models.BooleanField(
verbose_name=_('是否为默认模板'),
default=False,
help_text=_('默认模板会评价所有比赛,非默认模板且未关联评分维度时不会自动评价')
)
is_active = models.BooleanField(verbose_name=_('是否启用'), default=True, help_text=_('启用后,新的转写任务完成后将自动使用此模板进行评估'))
created_at = models.DateTimeField(verbose_name=_('创建时间'), auto_now_add=True)
updated_at = models.DateTimeField(verbose_name=_('更新时间'), auto_now=True)
class Meta:
verbose_name = _('AI评估模板')
verbose_name_plural = _('AI评估模板')
ordering = ['-created_at']
def __str__(self):
return self.name
class AIEvaluation(models.Model):
class Status(models.TextChoices):
PENDING = 'PENDING', _('等待中')
PROCESSING = 'PROCESSING', _('生成中')
COMPLETED = 'COMPLETED', _('已完成')
FAILED = 'FAILED', _('失败')
task = models.ForeignKey(
TranscriptionTask,
on_delete=models.CASCADE,
related_name='ai_evaluations',
verbose_name=_('关联任务')
)
template = models.ForeignKey(
AIEvaluationTemplate,
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name='evaluations',
verbose_name=_('使用的模板')
)
# 评分与评语
score = models.IntegerField(verbose_name=_('AI评分'), blank=True, null=True, help_text=_('0-100分'))
evaluation = models.TextField(verbose_name=_('AI评语'), blank=True, null=True)
# 记录当时的配置 (快照)
model_selection = models.CharField(
verbose_name=_('模型选择'),
max_length=50,
default='qwen-plus'
)
prompt = models.TextField(verbose_name=_('评分提示词'))
# 原始数据与推理
raw_response = models.JSONField(verbose_name=_('原始响应'), blank=True, null=True, help_text=_('大模型返回的完整JSON'))
reasoning = models.TextField(verbose_name=_('推理过程'), blank=True, null=True, help_text=_('AI的推理过程如果有'))
status = models.CharField(
verbose_name=_('评估状态'),
max_length=20,
choices=Status.choices,
default=Status.PENDING
)
error_message = models.TextField(verbose_name=_('错误信息'), blank=True, null=True)
created_at = models.DateTimeField(verbose_name=_('创建时间'), auto_now_add=True)
updated_at = models.DateTimeField(verbose_name=_('更新时间'), auto_now=True)
class Meta:
verbose_name = _('AI评估结果')
verbose_name_plural = _('AI评估结果')
ordering = ['-created_at']
def __str__(self):
return f"Evaluation for Task {self.task.id} ({self.template.name if self.template else 'Custom'})"

View File

@@ -0,0 +1,28 @@
from rest_framework import serializers
from .models import TranscriptionTask, AIEvaluation, AIEvaluationTemplate
class AIEvaluationTemplateSerializer(serializers.ModelSerializer):
class Meta:
model = AIEvaluationTemplate
fields = ['id', 'name', 'model_selection', 'prompt', 'is_active', 'created_at']
class AIEvaluationSerializer(serializers.ModelSerializer):
template = AIEvaluationTemplateSerializer(read_only=True)
class Meta:
model = AIEvaluation
fields = ['id', 'template', 'score', 'evaluation', 'model_selection', 'prompt', 'reasoning', 'status', 'error_message', 'created_at', 'updated_at']
class TranscriptionTaskSerializer(serializers.ModelSerializer):
ai_evaluations = AIEvaluationSerializer(many=True, read_only=True)
project_title = serializers.CharField(source='project.title', read_only=True)
class Meta:
model = TranscriptionTask
fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'transcription_data', 'summary_data', 'auto_chapters_data', 'ai_evaluations', 'project', 'project_title']
read_only_fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'transcription_data', 'summary_data', 'auto_chapters_data', 'ai_evaluations', 'project_title']
class TranscriptionUploadSerializer(serializers.Serializer):
file = serializers.FileField(help_text="上传的音频文件", required=False)
file_url = serializers.URLField(help_text="音频文件的URL地址", required=False)
project_id = serializers.IntegerField(help_text="关联的参赛项目ID", required=False)

View File

@@ -0,0 +1,420 @@
import json
import logging
import time
import uuid
import oss2
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.acs_exception.exceptions import ClientException, ServerException
# 尝试导入最新的 API 版本,如果有问题可能需要调整
try:
from aliyunsdktingwu.request.v20230930 import CreateTaskRequest, GetTaskInfoRequest
except ImportError:
# Fallback or error handling if version differs
pass
from django.conf import settings
logger = logging.getLogger(__name__)
from .models import TranscriptionTask, AIEvaluation, AIEvaluationTemplate
class AliyunTingwuService:
def __init__(self):
self.access_key_id = settings.ALIYUN_ACCESS_KEY_ID
self.access_key_secret = settings.ALIYUN_ACCESS_KEY_SECRET
self.oss_bucket_name = settings.ALIYUN_OSS_BUCKET_NAME
self.oss_endpoint = settings.ALIYUN_OSS_ENDPOINT
self.tingwu_app_key = settings.ALIYUN_TINGWU_APP_KEY
self.region_id = "cn-shanghai" # 听悟服务区域根据文档应与OSS区域一致或者使用 'cn-beijing'
# 初始化 OSS Bucket
if self.access_key_id and self.access_key_secret and self.oss_endpoint:
auth = oss2.Auth(self.access_key_id, self.access_key_secret)
self.bucket = oss2.Bucket(auth, self.oss_endpoint, self.oss_bucket_name)
else:
self.bucket = None
logger.warning("Aliyun OSS configuration missing.")
# 初始化听悟 Client
if self.access_key_id and self.access_key_secret:
self.client = AcsClient(
self.access_key_id,
self.access_key_secret,
self.region_id
)
# 显式添加听悟服务的 Endpoint 映射,解决 EndpointResolvingError
# 听悟 API 的服务接入点通常是 tingwu.cn-beijing.aliyuncs.com
# 但新版听悟 API (tingwu.aliyuncs.com) 可能不同,需根据实际情况添加
# 这里添加一个通用的 Endpoint 映射
try:
# 尝试为 tingwu 产品设置 Endpoint
# 注意听悟服务主要部署在北京Endpoint 通常为 tingwu.cn-beijing.aliyuncs.com
# 如果您的服务在上海,也可能需要连接到北京的接入点
self.client.add_endpoint(self.region_id, "tingwu", "tingwu.cn-beijing.aliyuncs.com")
except Exception as e:
logger.warning(f"Failed to add endpoint: {e}")
else:
self.client = None
logger.warning("Aliyun AccessKey configuration missing.")
def upload_to_oss(self, file_obj, file_name, day=7):
"""
上传文件到 OSS 并返回带签名的 URL
默认生成有效期为 7 天 (3600 * 24 * day) 的签名URL方便评委在一段时间内都能播放。
"""
if not self.bucket:
raise Exception("OSS Client not initialized")
try:
# 上传文件
# file_obj 应该是打开的文件对象或字节流
self.bucket.put_object(file_name, file_obj)
# 生成签名 URL有效期 7 天 (3600 * 24 * 7 = 604800 秒)
url = self.bucket.sign_url('GET', file_name, 3600 * 24 * day)
return url
except Exception as e:
logger.error(f"OSS Upload failed: {e}")
raise e
def create_transcription_task(self, file_url, language="cn"):
"""
创建听悟转写任务
"""
if not self.client:
raise Exception("Tingwu Client not initialized")
request = CreateTaskRequest.CreateTaskRequest()
# 针对阿里云 SDK 不同版本的兼容性处理
# "type" 参数是听悟 API (ROA 风格) 的必填项,用于指定任务类型
# 根据官方文档,离线任务的 type 通常就是 'offline'
request.add_query_param('type', 'offline')
# 构造请求体 (Body)
# 根据听悟 API 文档AppKey, Input, Parameters 应位于 JSON Body 中
# 而不是 Query Parameter
body = {
"AppKey": self.tingwu_app_key,
"Input": {
"FileUrl": file_url,
"SourceLanguage": language,
"TaskKey": str(uuid.uuid4())
},
"Parameters": {
"Transcoding": {
"TargetAudioFormat": "mp3"
},
"Transcription": {
"DiarizationEnabled": True,
"ChannelId": 0
},
"TranscriptionEnabled": True,
"AutoChaptersEnabled": True,
"SummarizationEnabled": True,
"Summarization": {
"Types": ["Paragraph", "Conversational", "QuestionsAnswering", "MindMap"]
}
}
}
# 设置 Body 内容
request.set_content(json.dumps(body))
request.add_header('Content-Type', 'application/json')
# 强制设置 Endpoint避免 SDK.EndpointResolvingError
# 听悟目前主要服务点在北京
request.set_endpoint("tingwu.cn-beijing.aliyuncs.com")
# 显式设置 Method 为 PUT
request.set_method('PUT')
try:
response = self.client.do_action_with_exception(request)
return json.loads(response)
except (ClientException, ServerException) as e:
logger.error(f"Tingwu CreateTask failed: {e}")
raise e
def get_task_info(self, task_id):
"""
查询任务状态和结果
"""
if not self.client:
raise Exception("Tingwu Client not initialized")
request = GetTaskInfoRequest.GetTaskInfoRequest()
request.set_TaskId(task_id)
try:
response = self.client.do_action_with_exception(request)
return json.loads(response)
except (ClientException, ServerException) as e:
logger.error(f"Tingwu GetTaskInfo failed: {e}")
raise e
def parse_and_update_task(self, task, result):
"""
解析听悟结果并更新任务
:param task: TranscriptionTask 实例
:param result: get_task_info 返回的完整 JSON (或 Data 部分)
"""
# 记录之前的状态,用于判断是否是首次完成
previous_status = task.status
# 1. 提取 Data 对象
if isinstance(result, dict):
data_obj = result.get('Data', result)
else:
data_obj = result
if not isinstance(data_obj, dict):
logger.error(f"Unexpected data format: {type(data_obj)}")
return
# 2. 更新状态
task_status = data_obj.get('TaskStatus') or data_obj.get('Status')
if task_status in ['COMPLETE', 'COMPLETED', 'SUCCEEDED']:
task.status = 'SUCCEEDED' # 使用字符串引用,避免导入模型循环引用
elif task_status == 'FAILED':
task.status = 'FAILED'
task.error_message = data_obj.get('TaskStatusText', data_obj.get('Message', 'Unknown error'))
task.save()
return
else:
# 仍在处理中,不更新内容
return
# 3. 解析结果
task_result = data_obj.get('Result', {})
# 兼容处理:如果 Result 为空,或者不存在,尝试直接使用 data_obj 作为结果源
# 某些情况下Summarization/AutoChapters 可能直接位于 Data 层级
if not task_result:
task_result = data_obj
# 辅助函数:从源字典或其 Result 子字典中获取字段
def get_data_field(source, key):
# 1. 尝试直接从 task_result 获取 (如果 task_result 就是 Data 本身,这里也会生效)
if isinstance(source, dict) and key in source:
return source[key]
# 2. 如果 source 是 Data尝试从 source['Result'] 获取
if isinstance(source, dict) and 'Result' in source and isinstance(source['Result'], dict):
if key in source['Result']:
return source['Result'][key]
return None
# --- A. 处理逐字稿 (Transcription) ---
transcription_data = get_data_field(task_result, 'Transcription') or get_data_field(data_obj, 'Transcription') or {}
# 处理 URL 下载
if isinstance(transcription_data, str) and transcription_data.startswith('http'):
try:
import requests
t_resp = requests.get(transcription_data)
if t_resp.status_code == 200:
transcription_data = t_resp.json()
except Exception as e:
logger.error(f"Download transcription failed: {e}")
transcription_data = {}
elif isinstance(transcription_data, dict) and 'TranscriptionUrl' in transcription_data:
try:
import requests
t_resp = requests.get(transcription_data['TranscriptionUrl'])
if t_resp.status_code == 200:
transcription_data = t_resp.json()
except Exception as e:
logger.error(f"Download transcription url failed: {e}")
# 保存原始数据
task.transcription_data = transcription_data
# 提取文本
# 结构: {"Transcription": {"Paragraphs": [{"Words": [{"Text": "..."}]}]}}
# 或直接 {"Paragraphs": ...}
content_source = transcription_data
if 'Transcription' in content_source and isinstance(content_source['Transcription'], dict):
content_source = content_source['Transcription']
paragraphs = content_source.get('Paragraphs', [])
full_text_lines = []
if paragraphs and isinstance(paragraphs, list):
for p in paragraphs:
# 尝试从 Words 中提取
words = p.get('Words', [])
if words:
line_text = "".join([str(w.get('Text', '')) for w in words])
full_text_lines.append(line_text)
# 兼容旧结构或直接 Text
elif 'Text' in p:
full_text_lines.append(p['Text'])
if full_text_lines:
task.transcription = "\n".join(full_text_lines)
# --- B. 处理 AI 总结 (Summarization) ---
summarization = get_data_field(task_result, 'Summarization') or get_data_field(data_obj, 'Summarization') or {}
# 处理 URL 下载
if isinstance(summarization, str) and summarization.startswith('http'):
try:
import requests
s_resp = requests.get(summarization)
if s_resp.status_code == 200:
summarization = s_resp.json()
except Exception as e:
logger.error(f"Download summarization failed: {e}")
summarization = {}
# 保存原始数据
task.summary_data = summarization
# 提取文本 (MindMapSummary)
# 结构: {"MindMapSummary": [{"Title": "...", "Topic": [...]}]}
# 移除了原先的 summary_text 拼接逻辑
# --- C. 处理章节 (AutoChapters) ---
auto_chapters = get_data_field(task_result, 'AutoChapters') or get_data_field(data_obj, 'AutoChapters') or []
# 处理 URL 下载
if isinstance(auto_chapters, str) and auto_chapters.startswith('http'):
try:
import requests
ac_resp = requests.get(auto_chapters)
if ac_resp.status_code == 200:
auto_chapters = ac_resp.json()
except Exception as e:
logger.error(f"Download auto chapters failed: {e}")
auto_chapters = []
# 保存原始数据
task.auto_chapters_data = auto_chapters
# 保存任务,确保原始数据已写入数据库
task.save()
# 调用大模型生成总结 (如果 summary_data 或 auto_chapters_data 存在)
if task.summary_data or task.auto_chapters_data:
try:
# 设置占位状态
task.summary = "AI总结生成当中..."
task.save(update_fields=['summary'])
# 异步执行总结
import threading
from .bailian_service import BailianService
def async_summarize_in_service(task_id):
try:
# 重新获取 task 以避免线程安全问题
from .models import TranscriptionTask
t = TranscriptionTask.objects.get(id=task_id)
bailian_service = BailianService()
bailian_service.summarize_task(t)
except Exception as e:
logger.error(f"Async summary generation failed in service: {e}")
threading.Thread(target=async_summarize_in_service, args=(task.id,)).start()
logger.info(f"Triggered async summary generation for task {task.id}")
except Exception as e:
logger.error(f"Failed to trigger AI summarization: {e}")
# 4. 自动触发 AI 评估 (如果任务首次成功且有启用的模板)
if previous_status != 'SUCCEEDED' and task.status == 'SUCCEEDED' and task.transcription:
# 同样改为异步触发,传递 task.id 以避免线程中的对象状态问题
import threading
threading.Thread(target=self.trigger_ai_evaluations, args=(task.id,)).start()
def trigger_ai_evaluations(self, task_id):
"""
根据启用的模板自动触发 AI 评估
逻辑:
1. 如果模板关联了评分维度(s score_dimension),只对关联了相同维度的比赛进行评估
2. 如果模板未关联评分维度:
- 如果是默认模板(is_default=True),评价所有比赛
- 否则不进行自动评价
"""
try:
# 在线程中重新获取 task 对象,并预加载 project避免懒加载导致的线程数据库连接问题
from .models import TranscriptionTask
task = TranscriptionTask.objects.select_related('project', 'project__competition').get(id=task_id)
except Exception as e:
# 兼容处理:如果 task_id 其实是 task 对象(虽然我们上面改了,但防止其他地方调用传错)
if hasattr(task_id, 'id'):
try:
from .models import TranscriptionTask
task = TranscriptionTask.objects.select_related('project', 'project__competition').get(id=task_id.id)
except:
task = task_id
else:
logger.error(f"Failed to retrieve task {task_id}: {e}")
return
active_templates = AIEvaluationTemplate.objects.filter(is_active=True)
if not active_templates.exists():
logger.info("No active AI evaluation templates found.")
return
from .bailian_service import BailianService
service = BailianService()
for template in active_templates:
# 检查是否已经存在相同的评估,避免重复创建
if AIEvaluation.objects.filter(task=task, template=template).exists():
logger.info(f"Evaluation for task {task.id} and template {template.name} already exists.")
continue
# 获取任务关联的比赛
task_competition = None
if task.project and task.project.competition:
task_competition = task.project.competition
# 判断是否应该对此任务进行评估
should_evaluate = False
if template.score_dimension:
# 模板关联了评分维度,只对关联了相同维度的比赛进行评估
if task_competition:
# 获取该比赛下所有关联了相同评分维度的比赛ID列表
from competition.models import ScoreDimension
related_competition_ids = ScoreDimension.objects.filter(
id=template.score_dimension.id
).values_list('competition_id', flat=True)
if task_competition.id in related_competition_ids:
should_evaluate = True
logger.info(f"Template '{template.name}' is linked to score_dimension, task's competition matches.")
else:
logger.info(f"Template '{template.name}' is linked to score_dimension, but task's competition does not match. Skipping.")
else:
logger.info(f"Task {task.id} has no associated competition. Skipping template '{template.name}'.")
else:
# 模板未关联评分维度,只有默认模板才评价所有比赛
if template.is_default:
should_evaluate = True
logger.info(f"Template '{template.name}' is default template, evaluating all competitions.")
else:
logger.info(f"Template '{template.name}' is not linked to score_dimension and is not default. Skipping.")
if not should_evaluate:
continue
# 创建评估记录
evaluation = AIEvaluation.objects.create(
task=task,
template=template,
model_selection=template.model_selection,
prompt=template.prompt,
status=AIEvaluation.Status.PENDING
)
# 触发评估
try:
service.evaluate_task(evaluation)
logger.info(f"Triggered evaluation {evaluation.id} for template {template.name}")
except Exception as e:
logger.error(f"Failed to trigger evaluation {evaluation.id}: {e}")

View File

@@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

View File

@@ -0,0 +1,11 @@
from django.urls import path, include
from rest_framework.routers import DefaultRouter
from .views import TranscriptionTaskViewSet, tingwu_callback
router = DefaultRouter()
router.register(r'transcriptions', TranscriptionTaskViewSet)
urlpatterns = [
path('callback/', tingwu_callback, name='tingwu-callback'),
path('', include(router.urls)),
]

View File

@@ -0,0 +1,364 @@
import logging
import uuid
from rest_framework import viewsets, status
from rest_framework.decorators import action, api_view, permission_classes, parser_classes
from rest_framework.response import Response
from rest_framework.parsers import MultiPartParser, FormParser, JSONParser
from rest_framework.permissions import AllowAny
from django.conf import settings
from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes
from .models import TranscriptionTask, AIEvaluation
from .serializers import TranscriptionTaskSerializer, TranscriptionUploadSerializer, AIEvaluationSerializer
from .services import AliyunTingwuService
logger = logging.getLogger(__name__)
@api_view(['POST'])
@permission_classes([AllowAny])
def tingwu_callback(request):
"""
处理阿里云听悟的回调消息
"""
data = request.data
logger.info(f"收到听悟回调: {data}")
# 1. 处理连通性测试消息
# 格式: {"Code": "0", "Data": {"Test": "..."}, "Message": "success", "RequestId": "..."}
if isinstance(data, dict) and 'Data' in data and 'Test' in data['Data']:
logger.info("收到听悟连通性测试请求")
return Response({'message': 'success'}, status=status.HTTP_200_OK)
# 2. 处理任务完成消息 (根据实际文档或后续调试完善)
# 通常会包含 TaskId 和 Status
# 注意:阿里云听悟回调的结构可能在 Header 或 Body 中不同,需根据实际情况调整
# 这里是一个通用的处理逻辑
task_id = data.get('TaskId')
task_status = data.get('Status')
if task_id:
try:
task = TranscriptionTask.objects.filter(task_id=task_id).first()
if task:
if task_status == 'COMPLETE':
logger.info(f"任务 {task_id} 完成,等待下一次查询刷新")
# 可以在这里直接调用 get_task_info 刷新数据,但要注意超时
elif task_status == 'FAILED':
task.status = TranscriptionTask.Status.FAILED
task.error_message = data.get('StatusText', 'Callback reported failure')
task.save()
else:
logger.warning(f"回调收到未知任务ID: {task_id}")
except Exception as e:
logger.error(f"处理回调异常: {e}")
return Response({'message': 'success'}, status=status.HTTP_200_OK)
class TranscriptionTaskViewSet(viewsets.ModelViewSet):
queryset = TranscriptionTask.objects.all()
serializer_class = TranscriptionTaskSerializer
parser_classes = (MultiPartParser, FormParser)
@extend_schema(
request={
'multipart/form-data': {
'type': 'object',
'properties': {
'file': {
'type': 'string',
'format': 'binary'
},
'file_url': {
'type': 'string',
'description': '音频文件的URL地址'
},
'project_id': {
'type': 'integer',
'description': '关联的参赛项目ID'
}
}
}
},
responses={201: TranscriptionTaskSerializer}
)
def create(self, request, *args, **kwargs):
"""
上传音频文件并创建听悟转写任务
"""
file_obj = request.FILES.get('file')
file_url = request.data.get('file_url')
project_id = request.data.get('project_id')
if not file_obj and not file_url:
return Response({'error': '请提供文件或文件URL'}, status=status.HTTP_400_BAD_REQUEST)
service = AliyunTingwuService()
if not service.bucket or not service.client:
return Response({'error': '阿里云服务未配置'}, status=status.HTTP_503_SERVICE_UNAVAILABLE)
try:
oss_url = None
if file_obj:
# 1. 上传文件到 OSS
file_extension = file_obj.name.split('.')[-1]
file_name = f"transcription/{uuid.uuid4()}.{file_extension}"
# 使用服务上传
oss_url = service.upload_to_oss(file_obj, file_name)
else:
# 使用提供的 URL
oss_url = file_url
# 2. 创建数据库记录
task_data = {
'file_url': oss_url,
'status': TranscriptionTask.Status.PENDING
}
if project_id:
try:
p_id = int(project_id)
# 只有当 ID > 0 时才认为是有效的项目 ID
# 避免前端传递 0 或 Swagger 默认值导致的外键约束错误
if p_id > 0:
task_data['project_id'] = p_id
except (ValueError, TypeError):
pass # Ignore invalid project_id
task_record = TranscriptionTask.objects.create(**task_data)
logger.info(f"Created TranscriptionTask {task_record.id} with project_id={project_id}")
# 3. 调用听悟接口创建任务
try:
tingwu_response = service.create_transcription_task(oss_url)
# 兼容处理响应结构,通常为 {"Data": {"TaskId": "...", ...}}
if 'Data' in tingwu_response and isinstance(tingwu_response['Data'], dict):
task_id = tingwu_response['Data'].get('TaskId')
else:
task_id = tingwu_response.get('TaskId')
if task_id:
task_record.task_id = task_id
task_record.status = TranscriptionTask.Status.PROCESSING
task_record.save()
else:
task_record.status = TranscriptionTask.Status.FAILED
task_record.error_message = "未能获取 TaskId"
task_record.save()
return Response({'error': '未能获取 TaskId'}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
except Exception as e:
task_record.status = TranscriptionTask.Status.FAILED
task_record.error_message = str(e)
task_record.save()
logger.error(f"创建听悟任务失败: {e}")
return Response({'error': f"创建听悟任务失败: {str(e)}"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
serializer = self.get_serializer(task_record)
return Response(serializer.data, status=status.HTTP_201_CREATED)
except Exception as e:
logger.error(f"处理上传请求失败: {e}")
return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
@action(detail=True, methods=['post'])
@extend_schema(
request={
'application/json': {
'type': 'object',
'properties': {
'model_selection': {'type': 'string', 'description': '模型选择'},
'prompt': {'type': 'string', 'description': '评分提示词'},
}
}
},
responses={200: AIEvaluationSerializer(many=True)}
)
def evaluate(self, request, pk=None):
"""
触发AI评估
"""
task = self.get_object()
# 1. 如果有 active template触发所有 active template
# 2. 如果请求体提供了 custom prompt则创建一个 custom evaluation (no template)
from .models import AIEvaluationTemplate
from .bailian_service import BailianService
service = BailianService()
evaluations_to_process = []
# A. 如果指定了 Prompt/Model视为手动单次评估
model_selection = request.data.get('model_selection')
prompt = request.data.get('prompt')
if prompt:
# 创建一个不关联 Template 的评估
eval, _ = AIEvaluation.objects.get_or_create(
task=task,
template=None,
defaults={
'model_selection': model_selection or 'qwen-plus',
'prompt': prompt
}
)
# 更新配置
eval.model_selection = model_selection or eval.model_selection
eval.prompt = prompt
eval.save()
evaluations_to_process.append(eval)
else:
# B. 否则触发所有 Active Templates
active_templates = AIEvaluationTemplate.objects.filter(is_active=True)
if not active_templates.exists():
return Response({'message': 'No active templates and no custom prompt provided'}, status=status.HTTP_400_BAD_REQUEST)
for t in active_templates:
eval, _ = AIEvaluation.objects.get_or_create(
task=task,
template=t,
defaults={
'model_selection': t.model_selection,
'prompt': t.prompt
}
)
# 始终更新为模板最新配置? 或者保留历史? 用户意图似乎是"模版搭好...启用...生成几份"
# 这里假设触发时应用模板当前配置
eval.model_selection = t.model_selection
eval.prompt = t.prompt
eval.save()
evaluations_to_process.append(eval)
# 执行评估 (改为异步并发执行)
# 提取ID列表避免传递模型对象导致可能的线程问题
eval_ids = [e.id for e in evaluations_to_process]
if eval_ids:
import threading
from concurrent.futures import ThreadPoolExecutor
def run_evaluations_background(ids):
# 在后台线程中重新引入依赖
from .models import AIEvaluation
from .bailian_service import BailianService
# 为该线程创建独立的服务实例
local_service = BailianService()
# 获取最新的对象
target_evals = AIEvaluation.objects.filter(id__in=ids)
# 使用线程池并发执行
# max_workers=4 可以同时处理4个评估请求
with ThreadPoolExecutor(max_workers=4) as executor:
executor.map(local_service.evaluate_task, target_evals)
# 启动后台线程,不阻塞当前 HTTP 请求
thread = threading.Thread(target=run_evaluations_background, args=(eval_ids,))
thread.daemon = True # 设置为守护线程
thread.start()
# 返回该任务的所有评估结果
all_evals = AIEvaluation.objects.filter(task=task)
serializer = AIEvaluationSerializer(all_evals, many=True)
return Response(serializer.data)
@action(detail=True, methods=['get'])
@extend_schema(
parameters=[
OpenApiParameter("id", OpenApiTypes.UUID, OpenApiParameter.PATH, description="Task ID"),
],
responses={200: TranscriptionTaskSerializer}
)
def refresh_status(self, request, pk=None):
"""
刷新任务状态并获取结果
"""
task = self.get_object()
# 允许刷新的条件:
# 1. 任务未完成 (PENDING, PROCESSING)
# 2. 任务已完成但逐字稿 (transcription) 为空
# 3. 任务已完成但 AI总结 (summary) 为空 (新增)
should_refresh = False
if task.status not in [TranscriptionTask.Status.SUCCEEDED, TranscriptionTask.Status.FAILED]:
should_refresh = True
elif task.status == TranscriptionTask.Status.SUCCEEDED:
if not task.transcription or not task.summary:
should_refresh = True
if not should_refresh:
serializer = self.get_serializer(task)
return Response(serializer.data)
if not task.task_id:
return Response({'error': '任务ID不存在'}, status=status.HTTP_400_BAD_REQUEST)
service = AliyunTingwuService()
try:
result = service.get_task_info(task.task_id)
# 兼容处理响应结构 {"Data": {"TaskStatus": "...", "Result": ...}}
# 有些情况下 SDK 返回的是 JSON 字符串,需要二次解析
if isinstance(result, str):
import json
try:
result = json.loads(result)
except:
pass
if isinstance(result, dict):
data_obj = result.get('Data', result)
else:
data_obj = result
if not isinstance(data_obj, dict):
# 如果 Data 不是字典,可能它本身就是字符串,或者 result 结构更平铺
data_obj = result
# 防御性编程:确保 data_obj 是字典
if not isinstance(data_obj, dict):
logger.error(f"Unexpected response format: {type(data_obj)} - {data_obj}")
return Response({'error': f"Unexpected response format: {type(data_obj)}"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
# 调用 Service 进行解析和更新
service.parse_and_update_task(task, result)
# 如果任务成功但 AI 总结仍为空 (可能之前解析没触发,或者大模型调用失败)
# 再次尝试强制触发 summarize_task (如果原始数据存在)
# 注意service.parse_and_update_task 内部已经尝试异步触发,这里作为补救措施
if task.status == TranscriptionTask.Status.SUCCEEDED and not task.summary:
if task.summary_data or task.auto_chapters_data:
try:
# 先设置状态为 "AI总结生成当中..."
task.summary = "AI总结生成当中..."
task.save(update_fields=['summary'])
# 异步触发总结生成
import threading
from .bailian_service import BailianService
def async_summarize(task_id):
try:
# 重新获取 task 对象以避免线程问题
from .models import TranscriptionTask
task_obj = TranscriptionTask.objects.get(id=task_id)
bailian_service = BailianService()
bailian_service.summarize_task(task_obj)
except Exception as e:
logger.error(f"Async summary generation failed: {e}")
threading.Thread(target=async_summarize, args=(task.id,)).start()
except Exception as e:
logger.error(f"Force trigger AI summarization failed: {e}")
# 重新获取 task 以包含更新后的关联字段
task.refresh_from_db()
serializer = self.get_serializer(task)
return Response(serializer.data)
except Exception as e:
logger.error(f"刷新任务状态失败: {e}")
return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)