diff --git a/backend/ai_services/management/commands/test_tingwu_local.py b/backend/ai_services/management/commands/test_tingwu_local.py new file mode 100644 index 0000000..1c195f6 --- /dev/null +++ b/backend/ai_services/management/commands/test_tingwu_local.py @@ -0,0 +1,102 @@ +import os +import sys +import django +import json +import logging +from django.conf import settings + +# 设置 Django 环境 +# 添加项目根目录到 sys.path +sys.path.append('/Volumes/data/Quant-Speed/market_page/backend') +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') # 修正为正确的 settings 模块路径 +django.setup() + +from ai_services.services import AliyunTingwuService +from ai_services.models import TranscriptionTask + +# 配置日志 +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def test_tingwu_transcription(): + file_url = "https://tangledup-ai-staging.oss-cn-shanghai.aliyuncs.com/Video/%E6%95%99%E5%AD%A6.mp4" + + print(f"Testing transcription for: {file_url}") + + service = AliyunTingwuService() + + # 1. 创建任务 + try: + print("Creating task...") + response = service.create_transcription_task(file_url) + print(f"Create task response: {json.dumps(response, indent=2, ensure_ascii=False)}") + + if 'Data' in response and isinstance(response['Data'], dict): + task_id = response['Data'].get('TaskId') + else: + task_id = response.get('TaskId') + + if not task_id: + print("Failed to get TaskId") + return + + print(f"Task created with ID: {task_id}") + + # 2. 轮询查询任务状态 + import time + max_retries = 60 # 5 minutes + for i in range(max_retries): + print(f"Checking status (attempt {i+1}/{max_retries})...") + result = service.get_task_info(task_id) + + # 解析结果 + if isinstance(result, str): + try: + result = json.loads(result) + except: + pass + + if isinstance(result, dict): + data_obj = result.get('Data', result) + else: + data_obj = result + + task_status = data_obj.get('TaskStatus') + if not task_status: + task_status = data_obj.get('Status') + + print(f"Current status: {task_status}") + + if task_status in ['COMPLETE', 'COMPLETED', 'SUCCEEDED']: + print("Task succeeded!") + print(f"Full Result: {json.dumps(data_obj, indent=2, ensure_ascii=False)}") + + # 尝试解析 Transcription + task_result = data_obj.get('Result', {}) + transcription_data = task_result.get('Transcription', {}) + + if isinstance(transcription_data, str) and transcription_data.startswith('http'): + import requests + print(f"Downloading transcription from {transcription_data}") + t_resp = requests.get(transcription_data) + if t_resp.status_code == 200: + content = t_resp.json() + print(f"Downloaded content structure keys: {content.keys()}") + # print(f"Content sample: {json.dumps(content, indent=2, ensure_ascii=False)[:500]}...") + else: + print(f"Failed to download: {t_resp.status_code}") + + break + elif task_status == 'FAILED': + print(f"Task failed: {data_obj}") + break + + time.sleep(5) + + except Exception as e: + print(f"Error: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_tingwu_transcription() diff --git a/backend/ai_services/migrations/0003_transcriptiontask_auto_chapters_data_and_more.py b/backend/ai_services/migrations/0003_transcriptiontask_auto_chapters_data_and_more.py new file mode 100644 index 0000000..af3798d --- /dev/null +++ b/backend/ai_services/migrations/0003_transcriptiontask_auto_chapters_data_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 6.0.1 on 2026-03-11 12:30 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('ai_services', '0002_transcriptiontask_evaluation_transcriptiontask_score'), + ] + + operations = [ + migrations.AddField( + model_name='transcriptiontask', + name='auto_chapters_data', + field=models.JSONField(blank=True, help_text='阿里云返回的AutoChapters完整JSON', null=True, verbose_name='章节原始数据'), + ), + migrations.AddField( + model_name='transcriptiontask', + name='summary_data', + field=models.JSONField(blank=True, help_text='阿里云返回的Summarization完整JSON', null=True, verbose_name='总结原始数据'), + ), + migrations.AddField( + model_name='transcriptiontask', + name='transcription_data', + field=models.JSONField(blank=True, help_text='阿里云返回的Transcription完整JSON', null=True, verbose_name='转写原始数据'), + ), + ] diff --git a/backend/ai_services/models.py b/backend/ai_services/models.py index 4db4766..5977247 100644 --- a/backend/ai_services/models.py +++ b/backend/ai_services/models.py @@ -18,6 +18,11 @@ class TranscriptionTask(models.Model): choices=Status.choices, default=Status.PENDING ) + # 存储阿里云听悟返回的原始 JSON 结构 + transcription_data = models.JSONField(verbose_name=_('转写原始数据'), blank=True, null=True, help_text=_('阿里云返回的Transcription完整JSON')) + summary_data = models.JSONField(verbose_name=_('总结原始数据'), blank=True, null=True, help_text=_('阿里云返回的Summarization完整JSON')) + auto_chapters_data = models.JSONField(verbose_name=_('章节原始数据'), blank=True, null=True, help_text=_('阿里云返回的AutoChapters完整JSON')) + transcription = models.TextField(verbose_name=_('逐字稿'), blank=True, null=True) summary = models.TextField(verbose_name=_('AI总结'), blank=True, null=True) score = models.IntegerField(verbose_name=_('AI评分'), blank=True, null=True, help_text=_('基于转写内容的评分')) diff --git a/backend/ai_services/serializers.py b/backend/ai_services/serializers.py index 336fae4..d279284 100644 --- a/backend/ai_services/serializers.py +++ b/backend/ai_services/serializers.py @@ -4,8 +4,8 @@ from .models import TranscriptionTask class TranscriptionTaskSerializer(serializers.ModelSerializer): class Meta: model = TranscriptionTask - fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation'] - read_only_fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation'] + fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation', 'transcription_data', 'summary_data', 'auto_chapters_data'] + read_only_fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at', 'score', 'evaluation', 'transcription_data', 'summary_data', 'auto_chapters_data'] class TranscriptionUploadSerializer(serializers.Serializer): file = serializers.FileField(help_text="上传的音频文件") diff --git a/backend/ai_services/views.py b/backend/ai_services/views.py index 87ccae6..ee2b479 100644 --- a/backend/ai_services/views.py +++ b/backend/ai_services/views.py @@ -218,6 +218,8 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet): if t_resp.status_code == 200: transcription_data = t_resp.json() logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}") + # 保存原始数据 + task.transcription_data = transcription_data else: logger.warning(f"Failed to download transcription: {t_resp.status_code}") transcription_data = {} @@ -234,6 +236,8 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet): if t_resp.status_code == 200: transcription_data = t_resp.json() logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}") + # 保存原始数据 + task.transcription_data = transcription_data except Exception as e: logger.error(f"Error downloading transcription nested url: {e}") @@ -300,6 +304,8 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet): s_resp = requests.get(summarization) if s_resp.status_code == 200: summarization = s_resp.json() + # 保存原始数据 + task.summary_data = summarization else: logger.warning(f"Failed to download summarization: {s_resp.status_code}") summarization = {} @@ -317,6 +323,19 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet): else: # 尝试从章节摘要中提取 chapters = task_result.get('Chapters', []) + # 处理 AutoChapters + auto_chapters = task_result.get('AutoChapters', {}) + if isinstance(auto_chapters, str) and auto_chapters.startswith('http'): + try: + import requests + logger.info(f"Downloading auto chapters from {auto_chapters}") + ac_resp = requests.get(auto_chapters) + if ac_resp.status_code == 200: + auto_chapters = ac_resp.json() + task.auto_chapters_data = auto_chapters + except Exception as e: + logger.error(f"Error downloading auto chapters: {e}") + summary_parts = [] for chapter in chapters: if 'Headline' in chapter: