This commit is contained in:
102
backend/ai_services/management/commands/test_tingwu_local.py
Normal file
102
backend/ai_services/management/commands/test_tingwu_local.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import os
|
||||
import sys
|
||||
import django
|
||||
import json
|
||||
import logging
|
||||
from django.conf import settings
|
||||
|
||||
# 设置 Django 环境
|
||||
# 添加项目根目录到 sys.path
|
||||
sys.path.append('/Volumes/data/Quant-Speed/market_page/backend')
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') # 修正为正确的 settings 模块路径
|
||||
django.setup()
|
||||
|
||||
from ai_services.services import AliyunTingwuService
|
||||
from ai_services.models import TranscriptionTask
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def test_tingwu_transcription():
|
||||
file_url = "https://tangledup-ai-staging.oss-cn-shanghai.aliyuncs.com/Video/%E6%95%99%E5%AD%A6.mp4"
|
||||
|
||||
print(f"Testing transcription for: {file_url}")
|
||||
|
||||
service = AliyunTingwuService()
|
||||
|
||||
# 1. 创建任务
|
||||
try:
|
||||
print("Creating task...")
|
||||
response = service.create_transcription_task(file_url)
|
||||
print(f"Create task response: {json.dumps(response, indent=2, ensure_ascii=False)}")
|
||||
|
||||
if 'Data' in response and isinstance(response['Data'], dict):
|
||||
task_id = response['Data'].get('TaskId')
|
||||
else:
|
||||
task_id = response.get('TaskId')
|
||||
|
||||
if not task_id:
|
||||
print("Failed to get TaskId")
|
||||
return
|
||||
|
||||
print(f"Task created with ID: {task_id}")
|
||||
|
||||
# 2. 轮询查询任务状态
|
||||
import time
|
||||
max_retries = 60 # 5 minutes
|
||||
for i in range(max_retries):
|
||||
print(f"Checking status (attempt {i+1}/{max_retries})...")
|
||||
result = service.get_task_info(task_id)
|
||||
|
||||
# 解析结果
|
||||
if isinstance(result, str):
|
||||
try:
|
||||
result = json.loads(result)
|
||||
except:
|
||||
pass
|
||||
|
||||
if isinstance(result, dict):
|
||||
data_obj = result.get('Data', result)
|
||||
else:
|
||||
data_obj = result
|
||||
|
||||
task_status = data_obj.get('TaskStatus')
|
||||
if not task_status:
|
||||
task_status = data_obj.get('Status')
|
||||
|
||||
print(f"Current status: {task_status}")
|
||||
|
||||
if task_status in ['COMPLETE', 'COMPLETED', 'SUCCEEDED']:
|
||||
print("Task succeeded!")
|
||||
print(f"Full Result: {json.dumps(data_obj, indent=2, ensure_ascii=False)}")
|
||||
|
||||
# 尝试解析 Transcription
|
||||
task_result = data_obj.get('Result', {})
|
||||
transcription_data = task_result.get('Transcription', {})
|
||||
|
||||
if isinstance(transcription_data, str) and transcription_data.startswith('http'):
|
||||
import requests
|
||||
print(f"Downloading transcription from {transcription_data}")
|
||||
t_resp = requests.get(transcription_data)
|
||||
if t_resp.status_code == 200:
|
||||
content = t_resp.json()
|
||||
print(f"Downloaded content structure keys: {content.keys()}")
|
||||
# print(f"Content sample: {json.dumps(content, indent=2, ensure_ascii=False)[:500]}...")
|
||||
else:
|
||||
print(f"Failed to download: {t_resp.status_code}")
|
||||
|
||||
break
|
||||
elif task_status == 'FAILED':
|
||||
print(f"Task failed: {data_obj}")
|
||||
break
|
||||
|
||||
time.sleep(5)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_tingwu_transcription()
|
||||
@@ -0,0 +1,28 @@
|
||||
# Generated by Django 6.0.1 on 2026-03-11 12:30
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('ai_services', '0002_transcriptiontask_evaluation_transcriptiontask_score'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='transcriptiontask',
|
||||
name='auto_chapters_data',
|
||||
field=models.JSONField(blank=True, help_text='阿里云返回的AutoChapters完整JSON', null=True, verbose_name='章节原始数据'),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='transcriptiontask',
|
||||
name='summary_data',
|
||||
field=models.JSONField(blank=True, help_text='阿里云返回的Summarization完整JSON', null=True, verbose_name='总结原始数据'),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='transcriptiontask',
|
||||
name='transcription_data',
|
||||
field=models.JSONField(blank=True, help_text='阿里云返回的Transcription完整JSON', null=True, verbose_name='转写原始数据'),
|
||||
),
|
||||
]
|
||||
@@ -18,6 +18,11 @@ class TranscriptionTask(models.Model):
|
||||
choices=Status.choices,
|
||||
default=Status.PENDING
|
||||
)
|
||||
# 存储阿里云听悟返回的原始 JSON 结构
|
||||
transcription_data = models.JSONField(verbose_name=_('转写原始数据'), blank=True, null=True, help_text=_('阿里云返回的Transcription完整JSON'))
|
||||
summary_data = models.JSONField(verbose_name=_('总结原始数据'), blank=True, null=True, help_text=_('阿里云返回的Summarization完整JSON'))
|
||||
auto_chapters_data = models.JSONField(verbose_name=_('章节原始数据'), blank=True, null=True, help_text=_('阿里云返回的AutoChapters完整JSON'))
|
||||
|
||||
transcription = models.TextField(verbose_name=_('逐字稿'), blank=True, null=True)
|
||||
summary = models.TextField(verbose_name=_('AI总结'), blank=True, null=True)
|
||||
score = models.IntegerField(verbose_name=_('AI评分'), blank=True, null=True, help_text=_('基于转写内容的评分'))
|
||||
|
||||
@@ -4,8 +4,8 @@ from .models import TranscriptionTask
|
||||
class TranscriptionTaskSerializer(serializers.ModelSerializer):
|
||||
class Meta:
|
||||
model = TranscriptionTask
|
||||
fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation']
|
||||
read_only_fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation']
|
||||
fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation', 'transcription_data', 'summary_data', 'auto_chapters_data']
|
||||
read_only_fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation', 'transcription_data', 'summary_data', 'auto_chapters_data']
|
||||
|
||||
class TranscriptionUploadSerializer(serializers.Serializer):
|
||||
file = serializers.FileField(help_text="上传的音频文件")
|
||||
|
||||
@@ -218,6 +218,8 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
||||
if t_resp.status_code == 200:
|
||||
transcription_data = t_resp.json()
|
||||
logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}")
|
||||
# 保存原始数据
|
||||
task.transcription_data = transcription_data
|
||||
else:
|
||||
logger.warning(f"Failed to download transcription: {t_resp.status_code}")
|
||||
transcription_data = {}
|
||||
@@ -234,6 +236,8 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
||||
if t_resp.status_code == 200:
|
||||
transcription_data = t_resp.json()
|
||||
logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}")
|
||||
# 保存原始数据
|
||||
task.transcription_data = transcription_data
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading transcription nested url: {e}")
|
||||
|
||||
@@ -300,6 +304,8 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
||||
s_resp = requests.get(summarization)
|
||||
if s_resp.status_code == 200:
|
||||
summarization = s_resp.json()
|
||||
# 保存原始数据
|
||||
task.summary_data = summarization
|
||||
else:
|
||||
logger.warning(f"Failed to download summarization: {s_resp.status_code}")
|
||||
summarization = {}
|
||||
@@ -317,6 +323,19 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
||||
else:
|
||||
# 尝试从章节摘要中提取
|
||||
chapters = task_result.get('Chapters', [])
|
||||
# 处理 AutoChapters
|
||||
auto_chapters = task_result.get('AutoChapters', {})
|
||||
if isinstance(auto_chapters, str) and auto_chapters.startswith('http'):
|
||||
try:
|
||||
import requests
|
||||
logger.info(f"Downloading auto chapters from {auto_chapters}")
|
||||
ac_resp = requests.get(auto_chapters)
|
||||
if ac_resp.status_code == 200:
|
||||
auto_chapters = ac_resp.json()
|
||||
task.auto_chapters_data = auto_chapters
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading auto chapters: {e}")
|
||||
|
||||
summary_parts = []
|
||||
for chapter in chapters:
|
||||
if 'Headline' in chapter:
|
||||
|
||||
Reference in New Issue
Block a user