from fastapi import FastAPI, WebSocket, WebSocketDisconnect import uvicorn import asyncio import os import subprocess import struct from dotenv import load_dotenv import dashscope from dashscope.audio.asr import Recognition, RecognitionCallback, RecognitionResult import json # 加载环境变量 load_dotenv() dashscope.api_key = os.getenv("DASHSCOPE_API_KEY") app = FastAPI() # 存储接收到的音频数据 audio_buffer = bytearray() RECORDING_RAW_FILE = "received_audio.raw" RECORDING_MP3_FILE = "received_audio.mp3" VOLUME_GAIN = 10.0 # 放大倍数 FONT_FILE = "GB2312-16.bin" class MyRecognitionCallback(RecognitionCallback): def __init__(self, websocket: WebSocket, loop: asyncio.AbstractEventLoop): self.websocket = websocket self.loop = loop def on_open(self) -> None: print("ASR Session started") def on_close(self) -> None: print("ASR Session closed") def on_event(self, result: RecognitionResult) -> None: if result.get_sentence(): text = result.get_sentence()['text'] print(f"ASR Result: {text}") # 将识别结果发送回客户端 try: asyncio.run_coroutine_threadsafe( self.websocket.send_text(f"ASR:{text}"), self.loop ) except Exception as e: print(f"Failed to send ASR result to client: {e}") def process_chunk_32_to_16(chunk_bytes, gain=1.0): processed_chunk = bytearray() # Iterate 4 bytes at a time for i in range(0, len(chunk_bytes), 4): if i+3 < len(chunk_bytes): # 取 chunk[i+2] 和 chunk[i+3] 组成 16-bit signed int sample = struct.unpack_from(' 32767: sample = 32767 elif sample < -32768: sample = -32768 # 重新打包为 16-bit little-endian processed_chunk.extend(struct.pack('> 8) - 0xA0 index = (code & 0xFF) - 0xA0 if area >= 1 and index >= 1: offset = ((area - 1) * 94 + (index - 1)) * 32 # 读取字体文件 # 注意:这里为了简单,每次都打开文件。如果并发高,应该缓存文件句柄或内容。 # 假设字体文件在当前目录或上级目录 # Prioritize finding the file in the script's directory script_dir = os.path.dirname(os.path.abspath(__file__)) font_path = os.path.join(script_dir, FONT_FILE) # Fallback: check one level up if not os.path.exists(font_path): font_path = os.path.join(script_dir, "..", FONT_FILE) # Fallback: check current working directory if not os.path.exists(font_path): font_path = FONT_FILE if os.path.exists(font_path): print(f"Reading font from: {font_path} (Offset: {offset})") with open(font_path, "rb") as f: f.seek(offset) font_data = f.read(32) if len(font_data) == 32: import binascii hex_data = binascii.hexlify(font_data).decode('utf-8') response = f"FONT_DATA:{hex_code}:{hex_data}" print(f"Sending Font Response: {response[:30]}...") await websocket.send_text(response) else: print(f"Error: Read {len(font_data)} bytes for font data (expected 32)") else: print(f"Font file not found: {font_path}") else: print(f"Invalid GB2312 code: {hex_code} (Area: {area}, Index: {index})") except Exception as e: print(f"Error handling GET_FONT: {e}") elif "bytes" in message: # 接收音频数据并追加到缓冲区 data = message["bytes"] audio_buffer.extend(data) # 实时处理并发送给 ASR pcm_chunk = process_chunk_32_to_16(data, VOLUME_GAIN) processed_buffer.extend(pcm_chunk) if recognition: try: recognition.send_audio_frame(pcm_chunk) except Exception as e: print(f"Error sending audio frame to ASR: {e}") except WebSocketDisconnect: print("Client disconnected") if recognition: try: recognition.stop() except: pass except Exception as e: print(f"Error: {e}") if recognition: try: recognition.stop() except: pass if __name__ == "__main__": # 获取本机IP,方便ESP32连接 import socket hostname = socket.gethostname() local_ip = socket.gethostbyname(hostname) print(f"Server running on ws://{local_ip}:8000/ws/audio") uvicorn.run(app, host="0.0.0.0", port=8000)