This commit is contained in:
@@ -103,6 +103,11 @@ class AliyunTingwuService:
|
||||
"Transcoding": {
|
||||
"TargetAudioFormat": "mp3"
|
||||
},
|
||||
"Transcription": {
|
||||
"DiarizationEnabled": True,
|
||||
"ChannelId": 0
|
||||
},
|
||||
"TranscriptionEnabled": True,
|
||||
"AutoChaptersEnabled": True,
|
||||
"SummarizationEnabled": True,
|
||||
"Summarization": {
|
||||
|
||||
@@ -146,8 +146,10 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
||||
"""
|
||||
task = self.get_object()
|
||||
|
||||
# 如果任务已经完成或失败,直接返回当前状态
|
||||
if task.status in [TranscriptionTask.Status.SUCCEEDED, TranscriptionTask.Status.FAILED]:
|
||||
# 如果任务已经完成或失败,但逐字稿为空,允许重新刷新
|
||||
if task.status == TranscriptionTask.Status.SUCCEEDED and not task.transcription:
|
||||
pass # 继续执行刷新逻辑
|
||||
elif task.status in [TranscriptionTask.Status.SUCCEEDED, TranscriptionTask.Status.FAILED]:
|
||||
serializer = self.get_serializer(task)
|
||||
return Response(serializer.data)
|
||||
|
||||
@@ -191,9 +193,11 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
||||
|
||||
# 解析结果
|
||||
task_result = data_obj.get('Result', {})
|
||||
logger.info(f"Task result keys: {task_result.keys()}")
|
||||
|
||||
# 提取逐字稿
|
||||
transcription_data = task_result.get('Transcription', {})
|
||||
logger.info(f"Raw transcription data type: {type(transcription_data)}")
|
||||
|
||||
# 如果是 URL (字符串),尝试下载内容
|
||||
if isinstance(transcription_data, str) and transcription_data.startswith('http'):
|
||||
@@ -203,6 +207,7 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
||||
t_resp = requests.get(transcription_data)
|
||||
if t_resp.status_code == 200:
|
||||
transcription_data = t_resp.json()
|
||||
logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}")
|
||||
else:
|
||||
logger.warning(f"Failed to download transcription: {t_resp.status_code}")
|
||||
transcription_data = {}
|
||||
@@ -218,6 +223,7 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
||||
t_resp = requests.get(url)
|
||||
if t_resp.status_code == 200:
|
||||
transcription_data = t_resp.json()
|
||||
logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading transcription nested url: {e}")
|
||||
|
||||
@@ -225,12 +231,38 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
||||
# 尝试多种可能的路径提取句子
|
||||
# 1. 直接在根目录: {"Sentences": [...]}
|
||||
# 2. 在 Transcription 字段下: {"Transcription": {"Sentences": [...]}}
|
||||
# 3. 可能是 Paragraphs 结构
|
||||
sentences = transcription_data.get('Sentences', [])
|
||||
if not sentences and 'Transcription' in transcription_data:
|
||||
sentences = transcription_data['Transcription'].get('Sentences', [])
|
||||
|
||||
if sentences:
|
||||
full_text = " ".join([s.get('Text', '') for s in sentences])
|
||||
task.transcription = full_text
|
||||
elif 'Paragraphs' in transcription_data:
|
||||
# 尝试从段落中提取
|
||||
paragraphs = transcription_data.get('Paragraphs', {})
|
||||
if isinstance(paragraphs, dict):
|
||||
# Paragraphs 可能是一个字典,包含 list
|
||||
para_list = paragraphs.get('Paragraphs', []) # 有时候是嵌套的
|
||||
if not para_list and isinstance(paragraphs, list):
|
||||
para_list = paragraphs
|
||||
elif isinstance(paragraphs, list):
|
||||
para_list = paragraphs
|
||||
else:
|
||||
para_list = []
|
||||
|
||||
texts = []
|
||||
for p in para_list:
|
||||
if 'Text' in p:
|
||||
texts.append(p['Text'])
|
||||
elif 'Sentences' in p:
|
||||
for s in p['Sentences']:
|
||||
if 'Text' in s:
|
||||
texts.append(s['Text'])
|
||||
task.transcription = "\n".join(texts)
|
||||
else:
|
||||
logger.warning(f"Could not find Sentences or Paragraphs in transcription data: {transcription_data.keys()}")
|
||||
|
||||
# 提取总结
|
||||
# 总结结果结构可能因配置不同而异,这里尝试获取摘要
|
||||
|
||||
Reference in New Issue
Block a user