This commit is contained in:
102
backend/ai_services/management/commands/test_tingwu_local.py
Normal file
102
backend/ai_services/management/commands/test_tingwu_local.py
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import django
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
# 设置 Django 环境
|
||||||
|
# 添加项目根目录到 sys.path
|
||||||
|
sys.path.append('/Volumes/data/Quant-Speed/market_page/backend')
|
||||||
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') # 修正为正确的 settings 模块路径
|
||||||
|
django.setup()
|
||||||
|
|
||||||
|
from ai_services.services import AliyunTingwuService
|
||||||
|
from ai_services.models import TranscriptionTask
|
||||||
|
|
||||||
|
# 配置日志
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def test_tingwu_transcription():
|
||||||
|
file_url = "https://tangledup-ai-staging.oss-cn-shanghai.aliyuncs.com/Video/%E6%95%99%E5%AD%A6.mp4"
|
||||||
|
|
||||||
|
print(f"Testing transcription for: {file_url}")
|
||||||
|
|
||||||
|
service = AliyunTingwuService()
|
||||||
|
|
||||||
|
# 1. 创建任务
|
||||||
|
try:
|
||||||
|
print("Creating task...")
|
||||||
|
response = service.create_transcription_task(file_url)
|
||||||
|
print(f"Create task response: {json.dumps(response, indent=2, ensure_ascii=False)}")
|
||||||
|
|
||||||
|
if 'Data' in response and isinstance(response['Data'], dict):
|
||||||
|
task_id = response['Data'].get('TaskId')
|
||||||
|
else:
|
||||||
|
task_id = response.get('TaskId')
|
||||||
|
|
||||||
|
if not task_id:
|
||||||
|
print("Failed to get TaskId")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Task created with ID: {task_id}")
|
||||||
|
|
||||||
|
# 2. 轮询查询任务状态
|
||||||
|
import time
|
||||||
|
max_retries = 60 # 5 minutes
|
||||||
|
for i in range(max_retries):
|
||||||
|
print(f"Checking status (attempt {i+1}/{max_retries})...")
|
||||||
|
result = service.get_task_info(task_id)
|
||||||
|
|
||||||
|
# 解析结果
|
||||||
|
if isinstance(result, str):
|
||||||
|
try:
|
||||||
|
result = json.loads(result)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if isinstance(result, dict):
|
||||||
|
data_obj = result.get('Data', result)
|
||||||
|
else:
|
||||||
|
data_obj = result
|
||||||
|
|
||||||
|
task_status = data_obj.get('TaskStatus')
|
||||||
|
if not task_status:
|
||||||
|
task_status = data_obj.get('Status')
|
||||||
|
|
||||||
|
print(f"Current status: {task_status}")
|
||||||
|
|
||||||
|
if task_status in ['COMPLETE', 'COMPLETED', 'SUCCEEDED']:
|
||||||
|
print("Task succeeded!")
|
||||||
|
print(f"Full Result: {json.dumps(data_obj, indent=2, ensure_ascii=False)}")
|
||||||
|
|
||||||
|
# 尝试解析 Transcription
|
||||||
|
task_result = data_obj.get('Result', {})
|
||||||
|
transcription_data = task_result.get('Transcription', {})
|
||||||
|
|
||||||
|
if isinstance(transcription_data, str) and transcription_data.startswith('http'):
|
||||||
|
import requests
|
||||||
|
print(f"Downloading transcription from {transcription_data}")
|
||||||
|
t_resp = requests.get(transcription_data)
|
||||||
|
if t_resp.status_code == 200:
|
||||||
|
content = t_resp.json()
|
||||||
|
print(f"Downloaded content structure keys: {content.keys()}")
|
||||||
|
# print(f"Content sample: {json.dumps(content, indent=2, ensure_ascii=False)[:500]}...")
|
||||||
|
else:
|
||||||
|
print(f"Failed to download: {t_resp.status_code}")
|
||||||
|
|
||||||
|
break
|
||||||
|
elif task_status == 'FAILED':
|
||||||
|
print(f"Task failed: {data_obj}")
|
||||||
|
break
|
||||||
|
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_tingwu_transcription()
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
# Generated by Django 6.0.1 on 2026-03-11 12:30
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('ai_services', '0002_transcriptiontask_evaluation_transcriptiontask_score'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='transcriptiontask',
|
||||||
|
name='auto_chapters_data',
|
||||||
|
field=models.JSONField(blank=True, help_text='阿里云返回的AutoChapters完整JSON', null=True, verbose_name='章节原始数据'),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='transcriptiontask',
|
||||||
|
name='summary_data',
|
||||||
|
field=models.JSONField(blank=True, help_text='阿里云返回的Summarization完整JSON', null=True, verbose_name='总结原始数据'),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='transcriptiontask',
|
||||||
|
name='transcription_data',
|
||||||
|
field=models.JSONField(blank=True, help_text='阿里云返回的Transcription完整JSON', null=True, verbose_name='转写原始数据'),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -18,6 +18,11 @@ class TranscriptionTask(models.Model):
|
|||||||
choices=Status.choices,
|
choices=Status.choices,
|
||||||
default=Status.PENDING
|
default=Status.PENDING
|
||||||
)
|
)
|
||||||
|
# 存储阿里云听悟返回的原始 JSON 结构
|
||||||
|
transcription_data = models.JSONField(verbose_name=_('转写原始数据'), blank=True, null=True, help_text=_('阿里云返回的Transcription完整JSON'))
|
||||||
|
summary_data = models.JSONField(verbose_name=_('总结原始数据'), blank=True, null=True, help_text=_('阿里云返回的Summarization完整JSON'))
|
||||||
|
auto_chapters_data = models.JSONField(verbose_name=_('章节原始数据'), blank=True, null=True, help_text=_('阿里云返回的AutoChapters完整JSON'))
|
||||||
|
|
||||||
transcription = models.TextField(verbose_name=_('逐字稿'), blank=True, null=True)
|
transcription = models.TextField(verbose_name=_('逐字稿'), blank=True, null=True)
|
||||||
summary = models.TextField(verbose_name=_('AI总结'), blank=True, null=True)
|
summary = models.TextField(verbose_name=_('AI总结'), blank=True, null=True)
|
||||||
score = models.IntegerField(verbose_name=_('AI评分'), blank=True, null=True, help_text=_('基于转写内容的评分'))
|
score = models.IntegerField(verbose_name=_('AI评分'), blank=True, null=True, help_text=_('基于转写内容的评分'))
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ from .models import TranscriptionTask
|
|||||||
class TranscriptionTaskSerializer(serializers.ModelSerializer):
|
class TranscriptionTaskSerializer(serializers.ModelSerializer):
|
||||||
class Meta:
|
class Meta:
|
||||||
model = TranscriptionTask
|
model = TranscriptionTask
|
||||||
fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation']
|
fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation', 'transcription_data', 'summary_data', 'auto_chapters_data']
|
||||||
read_only_fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation']
|
read_only_fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation', 'transcription_data', 'summary_data', 'auto_chapters_data']
|
||||||
|
|
||||||
class TranscriptionUploadSerializer(serializers.Serializer):
|
class TranscriptionUploadSerializer(serializers.Serializer):
|
||||||
file = serializers.FileField(help_text="上传的音频文件")
|
file = serializers.FileField(help_text="上传的音频文件")
|
||||||
|
|||||||
@@ -218,6 +218,8 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
|||||||
if t_resp.status_code == 200:
|
if t_resp.status_code == 200:
|
||||||
transcription_data = t_resp.json()
|
transcription_data = t_resp.json()
|
||||||
logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}")
|
logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}")
|
||||||
|
# 保存原始数据
|
||||||
|
task.transcription_data = transcription_data
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Failed to download transcription: {t_resp.status_code}")
|
logger.warning(f"Failed to download transcription: {t_resp.status_code}")
|
||||||
transcription_data = {}
|
transcription_data = {}
|
||||||
@@ -234,6 +236,8 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
|||||||
if t_resp.status_code == 200:
|
if t_resp.status_code == 200:
|
||||||
transcription_data = t_resp.json()
|
transcription_data = t_resp.json()
|
||||||
logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}")
|
logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}")
|
||||||
|
# 保存原始数据
|
||||||
|
task.transcription_data = transcription_data
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error downloading transcription nested url: {e}")
|
logger.error(f"Error downloading transcription nested url: {e}")
|
||||||
|
|
||||||
@@ -300,6 +304,8 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
|||||||
s_resp = requests.get(summarization)
|
s_resp = requests.get(summarization)
|
||||||
if s_resp.status_code == 200:
|
if s_resp.status_code == 200:
|
||||||
summarization = s_resp.json()
|
summarization = s_resp.json()
|
||||||
|
# 保存原始数据
|
||||||
|
task.summary_data = summarization
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Failed to download summarization: {s_resp.status_code}")
|
logger.warning(f"Failed to download summarization: {s_resp.status_code}")
|
||||||
summarization = {}
|
summarization = {}
|
||||||
@@ -317,6 +323,19 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
|||||||
else:
|
else:
|
||||||
# 尝试从章节摘要中提取
|
# 尝试从章节摘要中提取
|
||||||
chapters = task_result.get('Chapters', [])
|
chapters = task_result.get('Chapters', [])
|
||||||
|
# 处理 AutoChapters
|
||||||
|
auto_chapters = task_result.get('AutoChapters', {})
|
||||||
|
if isinstance(auto_chapters, str) and auto_chapters.startswith('http'):
|
||||||
|
try:
|
||||||
|
import requests
|
||||||
|
logger.info(f"Downloading auto chapters from {auto_chapters}")
|
||||||
|
ac_resp = requests.get(auto_chapters)
|
||||||
|
if ac_resp.status_code == 200:
|
||||||
|
auto_chapters = ac_resp.json()
|
||||||
|
task.auto_chapters_data = auto_chapters
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error downloading auto chapters: {e}")
|
||||||
|
|
||||||
summary_parts = []
|
summary_parts = []
|
||||||
for chapter in chapters:
|
for chapter in chapters:
|
||||||
if 'Headline' in chapter:
|
if 'Headline' in chapter:
|
||||||
|
|||||||
Reference in New Issue
Block a user