This commit is contained in:
@@ -103,6 +103,11 @@ class AliyunTingwuService:
|
|||||||
"Transcoding": {
|
"Transcoding": {
|
||||||
"TargetAudioFormat": "mp3"
|
"TargetAudioFormat": "mp3"
|
||||||
},
|
},
|
||||||
|
"Transcription": {
|
||||||
|
"DiarizationEnabled": True,
|
||||||
|
"ChannelId": 0
|
||||||
|
},
|
||||||
|
"TranscriptionEnabled": True,
|
||||||
"AutoChaptersEnabled": True,
|
"AutoChaptersEnabled": True,
|
||||||
"SummarizationEnabled": True,
|
"SummarizationEnabled": True,
|
||||||
"Summarization": {
|
"Summarization": {
|
||||||
|
|||||||
@@ -146,8 +146,10 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
|||||||
"""
|
"""
|
||||||
task = self.get_object()
|
task = self.get_object()
|
||||||
|
|
||||||
# 如果任务已经完成或失败,直接返回当前状态
|
# 如果任务已经完成或失败,但逐字稿为空,允许重新刷新
|
||||||
if task.status in [TranscriptionTask.Status.SUCCEEDED, TranscriptionTask.Status.FAILED]:
|
if task.status == TranscriptionTask.Status.SUCCEEDED and not task.transcription:
|
||||||
|
pass # 继续执行刷新逻辑
|
||||||
|
elif task.status in [TranscriptionTask.Status.SUCCEEDED, TranscriptionTask.Status.FAILED]:
|
||||||
serializer = self.get_serializer(task)
|
serializer = self.get_serializer(task)
|
||||||
return Response(serializer.data)
|
return Response(serializer.data)
|
||||||
|
|
||||||
@@ -191,9 +193,11 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
|||||||
|
|
||||||
# 解析结果
|
# 解析结果
|
||||||
task_result = data_obj.get('Result', {})
|
task_result = data_obj.get('Result', {})
|
||||||
|
logger.info(f"Task result keys: {task_result.keys()}")
|
||||||
|
|
||||||
# 提取逐字稿
|
# 提取逐字稿
|
||||||
transcription_data = task_result.get('Transcription', {})
|
transcription_data = task_result.get('Transcription', {})
|
||||||
|
logger.info(f"Raw transcription data type: {type(transcription_data)}")
|
||||||
|
|
||||||
# 如果是 URL (字符串),尝试下载内容
|
# 如果是 URL (字符串),尝试下载内容
|
||||||
if isinstance(transcription_data, str) and transcription_data.startswith('http'):
|
if isinstance(transcription_data, str) and transcription_data.startswith('http'):
|
||||||
@@ -203,6 +207,7 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
|||||||
t_resp = requests.get(transcription_data)
|
t_resp = requests.get(transcription_data)
|
||||||
if t_resp.status_code == 200:
|
if t_resp.status_code == 200:
|
||||||
transcription_data = t_resp.json()
|
transcription_data = t_resp.json()
|
||||||
|
logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}")
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Failed to download transcription: {t_resp.status_code}")
|
logger.warning(f"Failed to download transcription: {t_resp.status_code}")
|
||||||
transcription_data = {}
|
transcription_data = {}
|
||||||
@@ -218,6 +223,7 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
|||||||
t_resp = requests.get(url)
|
t_resp = requests.get(url)
|
||||||
if t_resp.status_code == 200:
|
if t_resp.status_code == 200:
|
||||||
transcription_data = t_resp.json()
|
transcription_data = t_resp.json()
|
||||||
|
logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error downloading transcription nested url: {e}")
|
logger.error(f"Error downloading transcription nested url: {e}")
|
||||||
|
|
||||||
@@ -225,12 +231,38 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
|
|||||||
# 尝试多种可能的路径提取句子
|
# 尝试多种可能的路径提取句子
|
||||||
# 1. 直接在根目录: {"Sentences": [...]}
|
# 1. 直接在根目录: {"Sentences": [...]}
|
||||||
# 2. 在 Transcription 字段下: {"Transcription": {"Sentences": [...]}}
|
# 2. 在 Transcription 字段下: {"Transcription": {"Sentences": [...]}}
|
||||||
|
# 3. 可能是 Paragraphs 结构
|
||||||
sentences = transcription_data.get('Sentences', [])
|
sentences = transcription_data.get('Sentences', [])
|
||||||
if not sentences and 'Transcription' in transcription_data:
|
if not sentences and 'Transcription' in transcription_data:
|
||||||
sentences = transcription_data['Transcription'].get('Sentences', [])
|
sentences = transcription_data['Transcription'].get('Sentences', [])
|
||||||
|
|
||||||
full_text = " ".join([s.get('Text', '') for s in sentences])
|
if sentences:
|
||||||
task.transcription = full_text
|
full_text = " ".join([s.get('Text', '') for s in sentences])
|
||||||
|
task.transcription = full_text
|
||||||
|
elif 'Paragraphs' in transcription_data:
|
||||||
|
# 尝试从段落中提取
|
||||||
|
paragraphs = transcription_data.get('Paragraphs', {})
|
||||||
|
if isinstance(paragraphs, dict):
|
||||||
|
# Paragraphs 可能是一个字典,包含 list
|
||||||
|
para_list = paragraphs.get('Paragraphs', []) # 有时候是嵌套的
|
||||||
|
if not para_list and isinstance(paragraphs, list):
|
||||||
|
para_list = paragraphs
|
||||||
|
elif isinstance(paragraphs, list):
|
||||||
|
para_list = paragraphs
|
||||||
|
else:
|
||||||
|
para_list = []
|
||||||
|
|
||||||
|
texts = []
|
||||||
|
for p in para_list:
|
||||||
|
if 'Text' in p:
|
||||||
|
texts.append(p['Text'])
|
||||||
|
elif 'Sentences' in p:
|
||||||
|
for s in p['Sentences']:
|
||||||
|
if 'Text' in s:
|
||||||
|
texts.append(s['Text'])
|
||||||
|
task.transcription = "\n".join(texts)
|
||||||
|
else:
|
||||||
|
logger.warning(f"Could not find Sentences or Paragraphs in transcription data: {transcription_data.keys()}")
|
||||||
|
|
||||||
# 提取总结
|
# 提取总结
|
||||||
# 总结结果结构可能因配置不同而异,这里尝试获取摘要
|
# 总结结果结构可能因配置不同而异,这里尝试获取摘要
|
||||||
|
|||||||
Reference in New Issue
Block a user