This commit is contained in:
@@ -66,9 +66,12 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
||||
'file': {
|
||||
'type': 'string',
|
||||
'format': 'binary'
|
||||
}
|
||||
},
|
||||
'required': ['file']
|
||||
'file_url': {
|
||||
'type': 'string',
|
||||
'description': '音频文件的URL地址'
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
responses={201: TranscriptionTaskSerializer}
|
||||
@@ -78,20 +81,27 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
||||
上传音频文件并创建听悟转写任务
|
||||
"""
|
||||
file_obj = request.FILES.get('file')
|
||||
if not file_obj:
|
||||
return Response({'error': '未提供文件'}, status=status.HTTP_400_BAD_REQUEST)
|
||||
file_url = request.data.get('file_url')
|
||||
|
||||
if not file_obj and not file_url:
|
||||
return Response({'error': '请提供文件或文件URL'}, status=status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
service = AliyunTingwuService()
|
||||
if not service.bucket or not service.client:
|
||||
return Response({'error': '阿里云服务未配置'}, status=status.HTTP_503_SERVICE_UNAVAILABLE)
|
||||
|
||||
try:
|
||||
oss_url = None
|
||||
if file_obj:
|
||||
# 1. 上传文件到 OSS
|
||||
file_extension = file_obj.name.split('.')[-1]
|
||||
file_name = f"transcription/{uuid.uuid4()}.{file_extension}"
|
||||
|
||||
# 使用服务上传
|
||||
oss_url = service.upload_to_oss(file_obj, file_name)
|
||||
else:
|
||||
# 使用提供的 URL
|
||||
oss_url = file_url
|
||||
|
||||
# 2. 创建数据库记录
|
||||
task_record = TranscriptionTask.objects.create(
|
||||
@@ -228,30 +238,37 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
||||
logger.error(f"Error downloading transcription nested url: {e}")
|
||||
|
||||
if isinstance(transcription_data, dict):
|
||||
# 尝试多种可能的路径提取句子
|
||||
# 1. 直接在根目录: {"Sentences": [...]}
|
||||
# 2. 在 Transcription 字段下: {"Transcription": {"Sentences": [...]}}
|
||||
# 3. 可能是 Paragraphs 结构
|
||||
sentences = transcription_data.get('Sentences', [])
|
||||
if not sentences and 'Transcription' in transcription_data:
|
||||
sentences = transcription_data['Transcription'].get('Sentences', [])
|
||||
# 确定包含实际内容的字典源
|
||||
# 下载的 JSON 可能结构为 {"TaskId": "...", "Transcription": { "Sentences": [...] }}
|
||||
# 或者 {"Sentences": [...]}
|
||||
content_source = transcription_data
|
||||
|
||||
# 如果存在嵌套的 Transcription 键,且它是字典,则优先使用它
|
||||
if 'Transcription' in transcription_data and isinstance(transcription_data['Transcription'], dict):
|
||||
content_source = transcription_data['Transcription']
|
||||
logger.info(f"Using nested Transcription dict, keys: {content_source.keys()}")
|
||||
|
||||
# 尝试提取 Sentences
|
||||
sentences = content_source.get('Sentences', [])
|
||||
|
||||
# 尝试提取 Paragraphs
|
||||
# 注意:Paragraphs 字段本身可能是一个列表,或者是一个包含 Paragraphs 键的字典
|
||||
paragraphs_data = content_source.get('Paragraphs', [])
|
||||
|
||||
if sentences:
|
||||
full_text = " ".join([s.get('Text', '') for s in sentences])
|
||||
task.transcription = full_text
|
||||
elif 'Paragraphs' in transcription_data:
|
||||
# 尝试从段落中提取
|
||||
paragraphs = transcription_data.get('Paragraphs', {})
|
||||
if isinstance(paragraphs, dict):
|
||||
# Paragraphs 可能是一个字典,包含 list
|
||||
para_list = paragraphs.get('Paragraphs', []) # 有时候是嵌套的
|
||||
if not para_list and isinstance(paragraphs, list):
|
||||
para_list = paragraphs
|
||||
elif isinstance(paragraphs, list):
|
||||
para_list = paragraphs
|
||||
else:
|
||||
elif paragraphs_data:
|
||||
# 处理 Paragraphs
|
||||
para_list = []
|
||||
if isinstance(paragraphs_data, dict):
|
||||
# 有时结构是 {"Paragraphs": {"Paragraphs": [...]}} 或者 {"Paragraphs": [...]}
|
||||
para_list = paragraphs_data.get('Paragraphs', [])
|
||||
# 如果取出来是空的,但 paragraphs_data 本身有内容,可能结构不同,暂不处理深层嵌套
|
||||
elif isinstance(paragraphs_data, list):
|
||||
para_list = paragraphs_data
|
||||
|
||||
if para_list:
|
||||
texts = []
|
||||
for p in para_list:
|
||||
if 'Text' in p:
|
||||
@@ -261,8 +278,11 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
||||
if 'Text' in s:
|
||||
texts.append(s['Text'])
|
||||
task.transcription = "\n".join(texts)
|
||||
logger.info(f"Extracted {len(texts)} paragraphs")
|
||||
else:
|
||||
logger.warning(f"Could not find Sentences or Paragraphs in transcription data: {transcription_data.keys()}")
|
||||
logger.warning(f"Paragraphs found but failed to extract list. Type: {type(paragraphs_data)}")
|
||||
else:
|
||||
logger.warning(f"Could not find Sentences or Paragraphs in content source. Keys: {content_source.keys()}")
|
||||
|
||||
# 提取总结
|
||||
# 总结结果结构可能因配置不同而异,这里尝试获取摘要
|
||||
|
||||
Reference in New Issue
Block a user