tingwu_new
All checks were successful
Deploy to Server / deploy (push) Successful in 21s

This commit is contained in:
jeremygan2021
2026-03-11 20:31:09 +08:00
parent 44d90e643f
commit b0aa902f89
5 changed files with 156 additions and 2 deletions

View File

@@ -0,0 +1,102 @@
import os
import sys
import django
import json
import logging
from django.conf import settings
# 设置 Django 环境
# 添加项目根目录到 sys.path
sys.path.append('/Volumes/data/Quant-Speed/market_page/backend')
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') # 修正为正确的 settings 模块路径
django.setup()
from ai_services.services import AliyunTingwuService
from ai_services.models import TranscriptionTask
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def test_tingwu_transcription():
file_url = "https://tangledup-ai-staging.oss-cn-shanghai.aliyuncs.com/Video/%E6%95%99%E5%AD%A6.mp4"
print(f"Testing transcription for: {file_url}")
service = AliyunTingwuService()
# 1. 创建任务
try:
print("Creating task...")
response = service.create_transcription_task(file_url)
print(f"Create task response: {json.dumps(response, indent=2, ensure_ascii=False)}")
if 'Data' in response and isinstance(response['Data'], dict):
task_id = response['Data'].get('TaskId')
else:
task_id = response.get('TaskId')
if not task_id:
print("Failed to get TaskId")
return
print(f"Task created with ID: {task_id}")
# 2. 轮询查询任务状态
import time
max_retries = 60 # 5 minutes
for i in range(max_retries):
print(f"Checking status (attempt {i+1}/{max_retries})...")
result = service.get_task_info(task_id)
# 解析结果
if isinstance(result, str):
try:
result = json.loads(result)
except:
pass
if isinstance(result, dict):
data_obj = result.get('Data', result)
else:
data_obj = result
task_status = data_obj.get('TaskStatus')
if not task_status:
task_status = data_obj.get('Status')
print(f"Current status: {task_status}")
if task_status in ['COMPLETE', 'COMPLETED', 'SUCCEEDED']:
print("Task succeeded!")
print(f"Full Result: {json.dumps(data_obj, indent=2, ensure_ascii=False)}")
# 尝试解析 Transcription
task_result = data_obj.get('Result', {})
transcription_data = task_result.get('Transcription', {})
if isinstance(transcription_data, str) and transcription_data.startswith('http'):
import requests
print(f"Downloading transcription from {transcription_data}")
t_resp = requests.get(transcription_data)
if t_resp.status_code == 200:
content = t_resp.json()
print(f"Downloaded content structure keys: {content.keys()}")
# print(f"Content sample: {json.dumps(content, indent=2, ensure_ascii=False)[:500]}...")
else:
print(f"Failed to download: {t_resp.status_code}")
break
elif task_status == 'FAILED':
print(f"Task failed: {data_obj}")
break
time.sleep(5)
except Exception as e:
print(f"Error: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
test_tingwu_transcription()

View File

@@ -0,0 +1,28 @@
# Generated by Django 6.0.1 on 2026-03-11 12:30
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ai_services', '0002_transcriptiontask_evaluation_transcriptiontask_score'),
]
operations = [
migrations.AddField(
model_name='transcriptiontask',
name='auto_chapters_data',
field=models.JSONField(blank=True, help_text='阿里云返回的AutoChapters完整JSON', null=True, verbose_name='章节原始数据'),
),
migrations.AddField(
model_name='transcriptiontask',
name='summary_data',
field=models.JSONField(blank=True, help_text='阿里云返回的Summarization完整JSON', null=True, verbose_name='总结原始数据'),
),
migrations.AddField(
model_name='transcriptiontask',
name='transcription_data',
field=models.JSONField(blank=True, help_text='阿里云返回的Transcription完整JSON', null=True, verbose_name='转写原始数据'),
),
]

View File

@@ -18,6 +18,11 @@ class TranscriptionTask(models.Model):
choices=Status.choices,
default=Status.PENDING
)
# 存储阿里云听悟返回的原始 JSON 结构
transcription_data = models.JSONField(verbose_name=_('转写原始数据'), blank=True, null=True, help_text=_('阿里云返回的Transcription完整JSON'))
summary_data = models.JSONField(verbose_name=_('总结原始数据'), blank=True, null=True, help_text=_('阿里云返回的Summarization完整JSON'))
auto_chapters_data = models.JSONField(verbose_name=_('章节原始数据'), blank=True, null=True, help_text=_('阿里云返回的AutoChapters完整JSON'))
transcription = models.TextField(verbose_name=_('逐字稿'), blank=True, null=True)
summary = models.TextField(verbose_name=_('AI总结'), blank=True, null=True)
score = models.IntegerField(verbose_name=_('AI评分'), blank=True, null=True, help_text=_('基于转写内容的评分'))

View File

@@ -4,8 +4,8 @@ from .models import TranscriptionTask
class TranscriptionTaskSerializer(serializers.ModelSerializer):
class Meta:
model = TranscriptionTask
fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation']
read_only_fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation']
fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation', 'transcription_data', 'summary_data', 'auto_chapters_data']
read_only_fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation', 'transcription_data', 'summary_data', 'auto_chapters_data']
class TranscriptionUploadSerializer(serializers.Serializer):
file = serializers.FileField(help_text="上传的音频文件")

View File

@@ -218,6 +218,8 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
if t_resp.status_code == 200:
transcription_data = t_resp.json()
logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}")
# 保存原始数据
task.transcription_data = transcription_data
else:
logger.warning(f"Failed to download transcription: {t_resp.status_code}")
transcription_data = {}
@@ -234,6 +236,8 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
if t_resp.status_code == 200:
transcription_data = t_resp.json()
logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}")
# 保存原始数据
task.transcription_data = transcription_data
except Exception as e:
logger.error(f"Error downloading transcription nested url: {e}")
@@ -300,6 +304,8 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
s_resp = requests.get(summarization)
if s_resp.status_code == 200:
summarization = s_resp.json()
# 保存原始数据
task.summary_data = summarization
else:
logger.warning(f"Failed to download summarization: {s_resp.status_code}")
summarization = {}
@@ -317,6 +323,19 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
else:
# 尝试从章节摘要中提取
chapters = task_result.get('Chapters', [])
# 处理 AutoChapters
auto_chapters = task_result.get('AutoChapters', {})
if isinstance(auto_chapters, str) and auto_chapters.startswith('http'):
try:
import requests
logger.info(f"Downloading auto chapters from {auto_chapters}")
ac_resp = requests.get(auto_chapters)
if ac_resp.status_code == 200:
auto_chapters = ac_resp.json()
task.auto_chapters_data = auto_chapters
except Exception as e:
logger.error(f"Error downloading auto chapters: {e}")
summary_parts = []
for chapter in chapters:
if 'Headline' in chapter: