From 504db66b0bec07af7a6259acd61beda11051210d Mon Sep 17 00:00:00 2001 From: jeremygan2021 Date: Wed, 11 Mar 2026 20:12:14 +0800 Subject: [PATCH] tingwu_new --- backend/ai_services/views.py | 98 ++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 39 deletions(-) diff --git a/backend/ai_services/views.py b/backend/ai_services/views.py index 5088ba7..695d593 100644 --- a/backend/ai_services/views.py +++ b/backend/ai_services/views.py @@ -66,9 +66,12 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet): 'file': { 'type': 'string', 'format': 'binary' + }, + 'file_url': { + 'type': 'string', + 'description': '音频文件的URL地址' } - }, - 'required': ['file'] + } } }, responses={201: TranscriptionTaskSerializer} @@ -78,20 +81,27 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet): 上传音频文件并创建听悟转写任务 """ file_obj = request.FILES.get('file') - if not file_obj: - return Response({'error': '未提供文件'}, status=status.HTTP_400_BAD_REQUEST) + file_url = request.data.get('file_url') + + if not file_obj and not file_url: + return Response({'error': '请提供文件或文件URL'}, status=status.HTTP_400_BAD_REQUEST) service = AliyunTingwuService() if not service.bucket or not service.client: return Response({'error': '阿里云服务未配置'}, status=status.HTTP_503_SERVICE_UNAVAILABLE) try: - # 1. 上传文件到 OSS - file_extension = file_obj.name.split('.')[-1] - file_name = f"transcription/{uuid.uuid4()}.{file_extension}" - - # 使用服务上传 - oss_url = service.upload_to_oss(file_obj, file_name) + oss_url = None + if file_obj: + # 1. 上传文件到 OSS + file_extension = file_obj.name.split('.')[-1] + file_name = f"transcription/{uuid.uuid4()}.{file_extension}" + + # 使用服务上传 + oss_url = service.upload_to_oss(file_obj, file_name) + else: + # 使用提供的 URL + oss_url = file_url # 2. 创建数据库记录 task_record = TranscriptionTask.objects.create( @@ -228,41 +238,51 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet): logger.error(f"Error downloading transcription nested url: {e}") if isinstance(transcription_data, dict): - # 尝试多种可能的路径提取句子 - # 1. 直接在根目录: {"Sentences": [...]} - # 2. 在 Transcription 字段下: {"Transcription": {"Sentences": [...]}} - # 3. 可能是 Paragraphs 结构 - sentences = transcription_data.get('Sentences', []) - if not sentences and 'Transcription' in transcription_data: - sentences = transcription_data['Transcription'].get('Sentences', []) + # 确定包含实际内容的字典源 + # 下载的 JSON 可能结构为 {"TaskId": "...", "Transcription": { "Sentences": [...] }} + # 或者 {"Sentences": [...]} + content_source = transcription_data + + # 如果存在嵌套的 Transcription 键,且它是字典,则优先使用它 + if 'Transcription' in transcription_data and isinstance(transcription_data['Transcription'], dict): + content_source = transcription_data['Transcription'] + logger.info(f"Using nested Transcription dict, keys: {content_source.keys()}") + + # 尝试提取 Sentences + sentences = content_source.get('Sentences', []) + + # 尝试提取 Paragraphs + # 注意:Paragraphs 字段本身可能是一个列表,或者是一个包含 Paragraphs 键的字典 + paragraphs_data = content_source.get('Paragraphs', []) if sentences: full_text = " ".join([s.get('Text', '') for s in sentences]) task.transcription = full_text - elif 'Paragraphs' in transcription_data: - # 尝试从段落中提取 - paragraphs = transcription_data.get('Paragraphs', {}) - if isinstance(paragraphs, dict): - # Paragraphs 可能是一个字典,包含 list - para_list = paragraphs.get('Paragraphs', []) # 有时候是嵌套的 - if not para_list and isinstance(paragraphs, list): - para_list = paragraphs - elif isinstance(paragraphs, list): - para_list = paragraphs + elif paragraphs_data: + # 处理 Paragraphs + para_list = [] + if isinstance(paragraphs_data, dict): + # 有时结构是 {"Paragraphs": {"Paragraphs": [...]}} 或者 {"Paragraphs": [...]} + para_list = paragraphs_data.get('Paragraphs', []) + # 如果取出来是空的,但 paragraphs_data 本身有内容,可能结构不同,暂不处理深层嵌套 + elif isinstance(paragraphs_data, list): + para_list = paragraphs_data + + if para_list: + texts = [] + for p in para_list: + if 'Text' in p: + texts.append(p['Text']) + elif 'Sentences' in p: + for s in p['Sentences']: + if 'Text' in s: + texts.append(s['Text']) + task.transcription = "\n".join(texts) + logger.info(f"Extracted {len(texts)} paragraphs") else: - para_list = [] - - texts = [] - for p in para_list: - if 'Text' in p: - texts.append(p['Text']) - elif 'Sentences' in p: - for s in p['Sentences']: - if 'Text' in s: - texts.append(s['Text']) - task.transcription = "\n".join(texts) + logger.warning(f"Paragraphs found but failed to extract list. Type: {type(paragraphs_data)}") else: - logger.warning(f"Could not find Sentences or Paragraphs in transcription data: {transcription_data.keys()}") + logger.warning(f"Could not find Sentences or Paragraphs in content source. Keys: {content_source.keys()}") # 提取总结 # 总结结果结构可能因配置不同而异,这里尝试获取摘要