tingwu_new
All checks were successful
Deploy to Server / deploy (push) Successful in 18s

This commit is contained in:
jeremygan2021
2026-03-11 20:07:46 +08:00
parent 59bd66459a
commit b0e97ed140
2 changed files with 41 additions and 4 deletions

View File

@@ -103,6 +103,11 @@ class AliyunTingwuService:
"Transcoding": { "Transcoding": {
"TargetAudioFormat": "mp3" "TargetAudioFormat": "mp3"
}, },
"Transcription": {
"DiarizationEnabled": True,
"ChannelId": 0
},
"TranscriptionEnabled": True,
"AutoChaptersEnabled": True, "AutoChaptersEnabled": True,
"SummarizationEnabled": True, "SummarizationEnabled": True,
"Summarization": { "Summarization": {

View File

@@ -146,8 +146,10 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
""" """
task = self.get_object() task = self.get_object()
# 如果任务已经完成或失败,直接返回当前状态 # 如果任务已经完成或失败,但逐字稿为空,允许重新刷新
if task.status in [TranscriptionTask.Status.SUCCEEDED, TranscriptionTask.Status.FAILED]: if task.status == TranscriptionTask.Status.SUCCEEDED and not task.transcription:
pass # 继续执行刷新逻辑
elif task.status in [TranscriptionTask.Status.SUCCEEDED, TranscriptionTask.Status.FAILED]:
serializer = self.get_serializer(task) serializer = self.get_serializer(task)
return Response(serializer.data) return Response(serializer.data)
@@ -191,9 +193,11 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
# 解析结果 # 解析结果
task_result = data_obj.get('Result', {}) task_result = data_obj.get('Result', {})
logger.info(f"Task result keys: {task_result.keys()}")
# 提取逐字稿 # 提取逐字稿
transcription_data = task_result.get('Transcription', {}) transcription_data = task_result.get('Transcription', {})
logger.info(f"Raw transcription data type: {type(transcription_data)}")
# 如果是 URL (字符串),尝试下载内容 # 如果是 URL (字符串),尝试下载内容
if isinstance(transcription_data, str) and transcription_data.startswith('http'): if isinstance(transcription_data, str) and transcription_data.startswith('http'):
@@ -203,6 +207,7 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
t_resp = requests.get(transcription_data) t_resp = requests.get(transcription_data)
if t_resp.status_code == 200: if t_resp.status_code == 200:
transcription_data = t_resp.json() transcription_data = t_resp.json()
logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}")
else: else:
logger.warning(f"Failed to download transcription: {t_resp.status_code}") logger.warning(f"Failed to download transcription: {t_resp.status_code}")
transcription_data = {} transcription_data = {}
@@ -218,6 +223,7 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
t_resp = requests.get(url) t_resp = requests.get(url)
if t_resp.status_code == 200: if t_resp.status_code == 200:
transcription_data = t_resp.json() transcription_data = t_resp.json()
logger.info(f"Downloaded transcription keys: {transcription_data.keys() if isinstance(transcription_data, dict) else 'Not a dict'}")
except Exception as e: except Exception as e:
logger.error(f"Error downloading transcription nested url: {e}") logger.error(f"Error downloading transcription nested url: {e}")
@@ -225,12 +231,38 @@ class TranscriptionTaskViewSet(viewsets.ModelViewSet):
# 尝试多种可能的路径提取句子 # 尝试多种可能的路径提取句子
# 1. 直接在根目录: {"Sentences": [...]} # 1. 直接在根目录: {"Sentences": [...]}
# 2. 在 Transcription 字段下: {"Transcription": {"Sentences": [...]}} # 2. 在 Transcription 字段下: {"Transcription": {"Sentences": [...]}}
# 3. 可能是 Paragraphs 结构
sentences = transcription_data.get('Sentences', []) sentences = transcription_data.get('Sentences', [])
if not sentences and 'Transcription' in transcription_data: if not sentences and 'Transcription' in transcription_data:
sentences = transcription_data['Transcription'].get('Sentences', []) sentences = transcription_data['Transcription'].get('Sentences', [])
full_text = " ".join([s.get('Text', '') for s in sentences]) if sentences:
task.transcription = full_text full_text = " ".join([s.get('Text', '') for s in sentences])
task.transcription = full_text
elif 'Paragraphs' in transcription_data:
# 尝试从段落中提取
paragraphs = transcription_data.get('Paragraphs', {})
if isinstance(paragraphs, dict):
# Paragraphs 可能是一个字典,包含 list
para_list = paragraphs.get('Paragraphs', []) # 有时候是嵌套的
if not para_list and isinstance(paragraphs, list):
para_list = paragraphs
elif isinstance(paragraphs, list):
para_list = paragraphs
else:
para_list = []
texts = []
for p in para_list:
if 'Text' in p:
texts.append(p['Text'])
elif 'Sentences' in p:
for s in p['Sentences']:
if 'Text' in s:
texts.append(s['Text'])
task.transcription = "\n".join(texts)
else:
logger.warning(f"Could not find Sentences or Paragraphs in transcription data: {transcription_data.keys()}")
# 提取总结 # 提取总结
# 总结结果结构可能因配置不同而异,这里尝试获取摘要 # 总结结果结构可能因配置不同而异,这里尝试获取摘要