This commit is contained in:
jeremygan2021
2026-03-11 14:31:17 +08:00
parent 6361b7a522
commit 1a30da74cf
16 changed files with 422 additions and 0 deletions

9
backend/.env.example Normal file
View File

@@ -0,0 +1,9 @@
# Aliyun OSS Configuration
ALIYUN_ACCESS_KEY_ID=LTAI5tE62GW8MKyoEaotzxXk
ALIYUN_ACCESS_KEY_SECRET=Zdzqo1fgj57DxxioXOotNKhJdSfVQW
ALIYUN_OSS_ENDPOINT=https://oss-cn-shanghai.aliyuncs.com
ALIYUN_OSS_BUCKET_NAME=tangledup-ai-staging
ALIYUN_OSS_INTERNAL_ENDPOINT=https://oss-cn-shanghai-internal.aliyuncs.com
# Aliyun Tingwu Configuration
ALIYUN_TINGWU_APP_KEY=

View File

View File

@@ -0,0 +1,11 @@
from django.contrib import admin
from django.contrib.admin import ModelAdmin
from unfold.admin import ModelAdmin as UnfoldModelAdmin
from .models import TranscriptionTask
@admin.register(TranscriptionTask)
class TranscriptionTaskAdmin(UnfoldModelAdmin):
list_display = ['id', 'status', 'task_id', 'created_at']
list_filter = ['status', 'created_at']
search_fields = ['id', 'task_id', 'transcription', 'summary']
readonly_fields = ['id', 'created_at', 'updated_at', 'task_id']

View File

@@ -0,0 +1,5 @@
from django.apps import AppConfig
class AiServicesConfig(AppConfig):
name = 'ai_services'

View File

@@ -0,0 +1,34 @@
# Generated by Django 6.0.1 on 2026-03-11 05:11
import uuid
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='TranscriptionTask',
fields=[
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('file_url', models.URLField(max_length=1024, verbose_name='文件链接')),
('task_id', models.CharField(blank=True, max_length=100, null=True, verbose_name='听悟任务ID')),
('status', models.CharField(choices=[('PENDING', '等待中'), ('PROCESSING', '处理中'), ('SUCCEEDED', '成功'), ('FAILED', '失败')], default='PENDING', max_length=20, verbose_name='状态')),
('transcription', models.TextField(blank=True, null=True, verbose_name='逐字稿')),
('summary', models.TextField(blank=True, null=True, verbose_name='AI总结')),
('error_message', models.TextField(blank=True, null=True, verbose_name='错误信息')),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='更新时间')),
],
options={
'verbose_name': '转写任务',
'verbose_name_plural': '转写任务',
'ordering': ['-created_at'],
},
),
]

View File

@@ -0,0 +1,23 @@
# Generated by Django 6.0.1 on 2026-03-11 05:12
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ai_services', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='transcriptiontask',
name='evaluation',
field=models.TextField(blank=True, null=True, verbose_name='AI评语'),
),
migrations.AddField(
model_name='transcriptiontask',
name='score',
field=models.IntegerField(blank=True, help_text='基于转写内容的评分', null=True, verbose_name='AI评分'),
),
]

View File

@@ -0,0 +1,35 @@
import uuid
from django.db import models
from django.utils.translation import gettext_lazy as _
class TranscriptionTask(models.Model):
class Status(models.TextChoices):
PENDING = 'PENDING', _('等待中')
PROCESSING = 'PROCESSING', _('处理中')
SUCCEEDED = 'SUCCEEDED', _('成功')
FAILED = 'FAILED', _('失败')
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
file_url = models.URLField(verbose_name=_('文件链接'), max_length=1024)
task_id = models.CharField(verbose_name=_('听悟任务ID'), max_length=100, blank=True, null=True)
status = models.CharField(
verbose_name=_('状态'),
max_length=20,
choices=Status.choices,
default=Status.PENDING
)
transcription = models.TextField(verbose_name=_('逐字稿'), blank=True, null=True)
summary = models.TextField(verbose_name=_('AI总结'), blank=True, null=True)
score = models.IntegerField(verbose_name=_('AI评分'), blank=True, null=True, help_text=_('基于转写内容的评分'))
evaluation = models.TextField(verbose_name=_('AI评语'), blank=True, null=True)
error_message = models.TextField(verbose_name=_('错误信息'), blank=True, null=True)
created_at = models.DateTimeField(verbose_name=_('创建时间'), auto_now_add=True)
updated_at = models.DateTimeField(verbose_name=_('更新时间'), auto_now=True)
class Meta:
verbose_name = _('转写任务')
verbose_name_plural = _('转写任务')
ordering = ['-created_at']
def __str__(self):
return f"{self.id} - {self.get_status_display()}"

View File

@@ -0,0 +1,8 @@
from rest_framework import serializers
from .models import TranscriptionTask
class TranscriptionTaskSerializer(serializers.ModelSerializer):
class Meta:
model = TranscriptionTask
fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at']
read_only_fields = ['id', 'file_url', 'task_id', 'status', 'transcription', 'summary', 'error_message', 'created_at', 'updated_at']

View File

@@ -0,0 +1,115 @@
import json
import logging
import time
import oss2
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.acs_exception.exceptions import ClientException, ServerException
# 尝试导入最新的 API 版本,如果有问题可能需要调整
try:
from aliyunsdktingwu.request.v20230930 import CreateTaskRequest, GetTaskInfoRequest
except ImportError:
# Fallback or error handling if version differs
pass
from django.conf import settings
logger = logging.getLogger(__name__)
class AliyunTingwuService:
def __init__(self):
self.access_key_id = settings.ALIYUN_ACCESS_KEY_ID
self.access_key_secret = settings.ALIYUN_ACCESS_KEY_SECRET
self.oss_bucket_name = settings.ALIYUN_OSS_BUCKET_NAME
self.oss_endpoint = settings.ALIYUN_OSS_ENDPOINT
self.tingwu_app_key = settings.ALIYUN_TINGWU_APP_KEY
self.region_id = "cn-beijing" # 听悟服务主要在北京
# 初始化 OSS Bucket
if self.access_key_id and self.access_key_secret and self.oss_endpoint:
auth = oss2.Auth(self.access_key_id, self.access_key_secret)
self.bucket = oss2.Bucket(auth, self.oss_endpoint, self.oss_bucket_name)
else:
self.bucket = None
logger.warning("Aliyun OSS configuration missing.")
# 初始化听悟 Client
if self.access_key_id and self.access_key_secret:
self.client = AcsClient(
self.access_key_id,
self.access_key_secret,
self.region_id
)
else:
self.client = None
logger.warning("Aliyun AccessKey configuration missing.")
def upload_to_oss(self, file_obj, file_name):
"""
上传文件到 OSS 并返回带签名的 URL (有效期 3 小时)
"""
if not self.bucket:
raise Exception("OSS Client not initialized")
try:
# 上传文件
# file_obj 应该是打开的文件对象或字节流
self.bucket.put_object(file_name, file_obj)
# 生成签名 URL有效期 3 小时 (3600 * 3)
url = self.bucket.sign_url('GET', file_name, 3600 * 3)
return url
except Exception as e:
logger.error(f"OSS Upload failed: {e}")
raise e
def create_transcription_task(self, file_url, language="cn"):
"""
创建听悟转写任务
"""
if not self.client:
raise Exception("Tingwu Client not initialized")
request = CreateTaskRequest.CreateTaskRequest()
request.set_AppKey(self.tingwu_app_key)
# 配置 Input
input_param = {
"FileUrl": file_url,
"SourceLanguage": language,
"TaskKey": "transcription_task"
}
request.set_Input(json.dumps(input_param))
# 配置 Parameters (开启自动章节和摘要)
parameters = {
"Transcoding": {
"TargetAudioFormat": "mp3"
},
"AutoChaptersEnabled": True,
"SummarizationEnabled": True
}
request.set_Parameters(json.dumps(parameters))
try:
response = self.client.do_action_with_exception(request)
return json.loads(response)
except (ClientException, ServerException) as e:
logger.error(f"Tingwu CreateTask failed: {e}")
raise e
def get_task_info(self, task_id):
"""
查询任务状态和结果
"""
if not self.client:
raise Exception("Tingwu Client not initialized")
request = GetTaskInfoRequest.GetTaskInfoRequest()
request.set_TaskId(task_id)
try:
response = self.client.do_action_with_exception(request)
return json.loads(response)
except (ClientException, ServerException) as e:
logger.error(f"Tingwu GetTaskInfo failed: {e}")
raise e

View File

@@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

View File

@@ -0,0 +1,10 @@
from django.urls import path, include
from rest_framework.routers import DefaultRouter
from .views import TranscriptionTaskViewSet
router = DefaultRouter()
router.register(r'transcriptions', TranscriptionTaskViewSet)
urlpatterns = [
path('', include(router.urls)),
]

View File

@@ -0,0 +1,140 @@
import logging
import uuid
from rest_framework import viewsets, status
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.parsers import MultiPartParser, FormParser
from django.conf import settings
from .models import TranscriptionTask
from .serializers import TranscriptionTaskSerializer
from .services import AliyunTingwuService
logger = logging.getLogger(__name__)
class TranscriptionTaskViewSet(viewsets.ModelViewSet):
queryset = TranscriptionTask.objects.all()
serializer_class = TranscriptionTaskSerializer
parser_classes = (MultiPartParser, FormParser)
def create(self, request, *args, **kwargs):
"""
上传音频文件并创建听悟转写任务
"""
file_obj = request.FILES.get('file')
if not file_obj:
return Response({'error': '未提供文件'}, status=status.HTTP_400_BAD_REQUEST)
service = AliyunTingwuService()
if not service.bucket or not service.client:
return Response({'error': '阿里云服务未配置'}, status=status.HTTP_503_SERVICE_UNAVAILABLE)
try:
# 1. 上传文件到 OSS
file_extension = file_obj.name.split('.')[-1]
file_name = f"transcription/{uuid.uuid4()}.{file_extension}"
# 使用服务上传
oss_url = service.upload_to_oss(file_obj, file_name)
# 2. 创建数据库记录
task_record = TranscriptionTask.objects.create(
file_url=oss_url,
status=TranscriptionTask.Status.PENDING
)
# 3. 调用听悟接口创建任务
try:
tingwu_response = service.create_transcription_task(oss_url)
task_id = tingwu_response.get('TaskId')
if task_id:
task_record.task_id = task_id
task_record.status = TranscriptionTask.Status.PROCESSING
task_record.save()
else:
task_record.status = TranscriptionTask.Status.FAILED
task_record.error_message = "未能获取 TaskId"
task_record.save()
return Response({'error': '未能获取 TaskId'}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
except Exception as e:
task_record.status = TranscriptionTask.Status.FAILED
task_record.error_message = str(e)
task_record.save()
logger.error(f"创建听悟任务失败: {e}")
return Response({'error': f"创建听悟任务失败: {str(e)}"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
serializer = self.get_serializer(task_record)
return Response(serializer.data, status=status.HTTP_201_CREATED)
except Exception as e:
logger.error(f"处理上传请求失败: {e}")
return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
@action(detail=True, methods=['get'])
def refresh_status(self, request, pk=None):
"""
刷新任务状态并获取结果
"""
task = self.get_object()
# 如果任务已经完成或失败,直接返回当前状态
if task.status in [TranscriptionTask.Status.SUCCEEDED, TranscriptionTask.Status.FAILED]:
serializer = self.get_serializer(task)
return Response(serializer.data)
if not task.task_id:
return Response({'error': '任务ID不存在'}, status=status.HTTP_400_BAD_REQUEST)
service = AliyunTingwuService()
try:
result = service.get_task_info(task.task_id)
task_status = result.get('TaskStatus')
if task_status == 'COMPLETE':
task.status = TranscriptionTask.Status.SUCCEEDED
# 解析结果
task_result = result.get('Result', {})
# 提取逐字稿
sentences = task_result.get('Transcription', {}).get('Sentences', [])
full_text = " ".join([s.get('Text', '') for s in sentences])
task.transcription = full_text
# 提取总结
# 总结结果结构可能因配置不同而异,这里尝试获取摘要
summarization = task_result.get('Summarization', {})
# 听悟的总结通常在 Summarization.Text 或类似字段
# 如果是章节摘要,可能在 Chapters 中
# 假设是全文摘要
if 'Text' in summarization:
task.summary = summarization['Text']
elif 'Headline' in summarization:
task.summary = summarization['Headline']
else:
# 尝试从章节摘要中提取
chapters = task_result.get('Chapters', [])
summary_parts = []
for chapter in chapters:
if 'Headline' in chapter:
summary_parts.append(chapter['Headline'])
if 'Summary' in chapter:
summary_parts.append(chapter['Summary'])
task.summary = "\n".join(summary_parts)
task.save()
elif task_status == 'FAILED':
task.status = TranscriptionTask.Status.FAILED
task.error_message = result.get('TaskStatusText', 'Unknown error')
task.save()
# 其他状态 (PENDING, RUNNING) 不做更改
serializer = self.get_serializer(task)
return Response(serializer.data)
except Exception as e:
logger.error(f"刷新任务状态失败: {e}")
return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)

View File

@@ -12,10 +12,14 @@ https://docs.djangoproject.com/en/6.0/ref/settings/
import os
from pathlib import Path
from dotenv import load_dotenv
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
# Load .env file
load_dotenv(BASE_DIR / '.env')
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/6.0/howto/deployment/checklist/
@@ -48,6 +52,7 @@ INSTALLED_APPS = [
'shop',
'community',
'competition',
'ai_services',
]
MIDDLEWARE = [
@@ -348,6 +353,17 @@ UNFOLD = {
},
],
},
{
"title": "AI 听悟",
"separator": True,
"items": [
{
"title": "转写与总结任务",
"icon": "record_voice_over",
"link": reverse_lazy("admin:ai_services_transcriptiontask_changelist"),
},
],
},
{
"title": "系统配置",
"separator": True,
@@ -390,3 +406,11 @@ LOGGING = {
'level': 'INFO',
},
}
# 阿里云配置
ALIYUN_ACCESS_KEY_ID = os.environ.get('ALIYUN_ACCESS_KEY_ID', '')
ALIYUN_ACCESS_KEY_SECRET = os.environ.get('ALIYUN_ACCESS_KEY_SECRET', '')
ALIYUN_OSS_BUCKET_NAME = os.environ.get('ALIYUN_OSS_BUCKET_NAME', '')
ALIYUN_OSS_ENDPOINT = os.environ.get('ALIYUN_OSS_ENDPOINT', 'oss-cn-shanghai.aliyuncs.com')
ALIYUN_OSS_INTERNAL_ENDPOINT = os.environ.get('ALIYUN_OSS_INTERNAL_ENDPOINT', '')
ALIYUN_TINGWU_APP_KEY = os.environ.get('ALIYUN_TINGWU_APP_KEY', '') # 听悟AppKey

View File

@@ -9,6 +9,7 @@ urlpatterns = [
path('api/', include('shop.urls')),
path('api/community/', include('community.urls')),
path('api/competition/', include('competition.urls')),
path('api/ai/', include('ai_services.urls')),
# Swagger文档路由
path('api/schema/', SpectacularAPIView.as_view(), name='schema'),

View File

@@ -24,3 +24,7 @@ django-filter
django-admin-sortable2
openpyxl
aliyun-python-sdk-core==2.16.0
aliyun-python-sdk-tingwu==1.0.7
oss2==2.19.1
python-dotenv