from fastapi import FastAPI, WebSocket, WebSocketDisconnect import uvicorn import asyncio import os import subprocess import struct import base64 import time import hashlib import json from dotenv import load_dotenv import dashscope from dashscope.audio.asr import Recognition, RecognitionCallback, RecognitionResult from dashscope import ImageSynthesis from dashscope import Generation # 加载环境变量 load_dotenv() dashscope.api_key = os.getenv("DASHSCOPE_API_KEY") app = FastAPI() # 字体文件配置 FONT_FILE = "GB2312-16.bin" FONT_CHUNK_SIZE = 512 HIGH_FREQ_CHARS = "的一是在不了有和人这中大为上个国我以要他时来用们生到作地于出就分对成会可主发年动同工也能下过子说产种面而方后多定行学法所民得经十三之进着等部度家电力里如水化高自二理起小物现实加量都两体制机当使点从业本去把性好应开它合还因由其些然前外天政四日那社义事平形相全表间样与关各重新线内数正心反你明看原又么利比或但质气第向道命此变条只没结解问意建月公无系军很情者最立代想已通并提直题党程展五果料象员革位入常文总次品式活设及管特件长求老头基资边流路级少图山统接知较将组见计别她手角期根论运农指几九区强放决西被干做必战先回则任取据处队南给色光门即保治北造百规热领七海口东导器压志世金增争济阶油思术极交受联什认六共权收证改清己美再采转更单风切打白教速花带安场身车例真务具万每目至达走积示议声报斗完类八离华名确才科张信马节话米整空元况今集温传土许步群广石记需段研界拉林律叫且究观越织装影算低持音众书布复容儿须际商非验连断深难近矿千周委素技备半办青省列习响约支般史感劳便团往酸历市克何除消构府称太准精值号率族维划选标写存候毛亲快效斯院查江型眼王按格养易置派层片始却专状育厂京识适属圆包火住调满县局照参红细引听该铁价严龙飞" # 高频字对应的Unicode码点列表 HIGH_FREQ_UNICODE = [ord(c) for c in HIGH_FREQ_CHARS] # 字体缓存 font_cache = {} font_md5 = {} def calculate_md5(filepath): """计算文件的MD5哈希值""" if not os.path.exists(filepath): return None hash_md5 = hashlib.md5() with open(filepath, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest() def init_font_cache(): """初始化字体缓存和MD5""" global font_cache, font_md5 script_dir = os.path.dirname(os.path.abspath(__file__)) font_path = os.path.join(script_dir, FONT_FILE) if not os.path.exists(font_path): font_path = os.path.join(script_dir, "..", FONT_FILE) if os.path.exists(font_path): font_md5 = calculate_md5(font_path) print(f"Font MD5: {font_md5}") # 预加载高频字到缓存 for unicode_val in HIGH_FREQ_UNICODE: try: char = chr(unicode_val) gb_bytes = char.encode('gb2312') if len(gb_bytes) == 2: code = struct.unpack('>H', gb_bytes)[0] area = (code >> 8) - 0xA0 index = (code & 0xFF) - 0xA0 if area >= 1 and index >= 1: offset = ((area - 1) * 94 + (index - 1)) * 32 with open(font_path, "rb") as f: f.seek(offset) font_data = f.read(32) if len(font_data) == 32: font_cache[unicode_val] = font_data except: pass print(f"Preloaded {len(font_cache)} high-frequency characters") # 启动时初始化字体缓存 init_font_cache() # 存储接收到的音频数据 audio_buffer = bytearray() RECORDING_RAW_FILE = "received_audio.raw" RECORDING_MP3_FILE = "received_audio.mp3" VOLUME_GAIN = 10.0 GENERATED_IMAGE_FILE = "generated_image.png" GENERATED_THUMB_FILE = "generated_thumb.bin" OUTPUT_DIR = "output_images" if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) image_counter = 0 def get_output_path(): global image_counter image_counter += 1 timestamp = time.strftime("%Y%m%d_%H%M%S") return os.path.join(OUTPUT_DIR, f"image_{timestamp}_{image_counter}.png") THUMB_SIZE = 245 # 字体请求队列(用于重试机制) font_request_queue = {} FONT_RETRY_MAX = 3 def get_font_data(unicode_val): """从字体文件获取单个字符数据(带缓存)""" if unicode_val in font_cache: return font_cache[unicode_val] try: char = chr(unicode_val) gb_bytes = char.encode('gb2312') if len(gb_bytes) == 2: code = struct.unpack('>H', gb_bytes)[0] area = (code >> 8) - 0xA0 index = (code & 0xFF) - 0xA0 if area >= 1 and index >= 1: offset = ((area - 1) * 94 + (index - 1)) * 32 script_dir = os.path.dirname(os.path.abspath(__file__)) font_path = os.path.join(script_dir, FONT_FILE) if not os.path.exists(font_path): font_path = os.path.join(script_dir, "..", FONT_FILE) if not os.path.exists(font_path): font_path = FONT_FILE if os.path.exists(font_path): with open(font_path, "rb") as f: f.seek(offset) font_data = f.read(32) if len(font_data) == 32: font_cache[unicode_val] = font_data return font_data except: pass return None def send_font_batch_with_retry(websocket, code_list, retry_count=0): """批量发送字体数据(带重试机制)""" global font_request_queue success_codes = set() failed_codes = [] for code_str in code_list: if not code_str: continue try: unicode_val = int(code_str) font_data = get_font_data(unicode_val) if font_data: import binascii hex_data = binascii.hexlify(font_data).decode('utf-8') response = f"FONT_DATA:{code_str}:{hex_data}" asyncio.run_coroutine_threadsafe( websocket.send_text(response), asyncio.get_event_loop() ) success_codes.add(unicode_val) else: failed_codes.append(code_str) except Exception as e: print(f"Error processing font {code_str}: {e}") failed_codes.append(code_str) # 记录失败的请求用于重试 if failed_codes and retry_count < FONT_RETRY_MAX: req_key = f"retry_{retry_count}_{time.time()}" font_request_queue[req_key] = { 'codes': failed_codes, 'retry': retry_count + 1, 'timestamp': time.time() } return len(success_codes), failed_codes async def send_font_with_fragment(websocket, unicode_val): """使用二进制分片方式发送字体数据""" font_data = get_font_data(unicode_val) if not font_data: return False # 分片发送 total_size = len(font_data) chunk_size = FONT_CHUNK_SIZE for i in range(0, total_size, chunk_size): chunk = font_data[i:i+chunk_size] seq_num = i // chunk_size # 构造二进制消息头: 2字节序列号 + 2字节总片数 + 数据 header = struct.pack('> 8) - 0xA0 index = (code & 0xFF) - 0xA0 if area >= 1 and index >= 1: offset = ((area - 1) * 94 + (index - 1)) * 32 script_dir = os.path.dirname(os.path.abspath(__file__)) font_path = os.path.join(script_dir, FONT_FILE) if not os.path.exists(font_path): font_path = os.path.join(script_dir, "..", FONT_FILE) if os.path.exists(font_path): with open(font_path, "rb") as f: f.seek(offset) font_data = f.read(32) else: font_data = None else: font_data = None if font_data: import binascii hex_data = binascii.hexlify(font_data).decode('utf-8') response = f"FONT_DATA:{code_str}:{hex_data}" await websocket.send_text(response) except Exception as e: print(f"Error handling font request: {e}") class MyRecognitionCallback(RecognitionCallback): def __init__(self, websocket: WebSocket, loop: asyncio.AbstractEventLoop): self.websocket = websocket self.loop = loop self.final_text = "" # 保存最终识别结果 def on_open(self) -> None: print("ASR Session started") def on_close(self) -> None: print("ASR Session closed") def on_event(self, result: RecognitionResult) -> None: if result.get_sentence(): text = result.get_sentence()['text'] print(f"ASR Result: {text}") self.final_text = text # 保存识别结果 # 将识别结果发送回客户端 try: asyncio.run_coroutine_threadsafe( self.websocket.send_text(f"ASR:{text}"), self.loop ) except Exception as e: print(f"Failed to send ASR result to client: {e}") def process_chunk_32_to_16(chunk_bytes, gain=1.0): processed_chunk = bytearray() # Iterate 4 bytes at a time for i in range(0, len(chunk_bytes), 4): if i+3 < len(chunk_bytes): # 取 chunk[i+2] 和 chunk[i+3] 组成 16-bit signed int sample = struct.unpack_from(' 32767: sample = 32767 elif sample < -32768: sample = -32768 # 重新打包为 16-bit little-endian processed_chunk.extend(struct.pack('> 3) & 0x1F g6 = (g >> 2) & 0x3F b5 = (b >> 3) & 0x1F # 小端模式:低字节在前 rgb565 = (r5 << 11) | (g6 << 5) | b5 rgb565_data.extend(struct.pack('H', gb_bytes)[0] else: print(f"Character {char} is not a valid 2-byte GB2312 char") # Send empty/dummy? Or just skip. # Better to send something so client doesn't wait forever if it counts responses. # But client probably uses a set of missing chars. continue # Calc offset area = (code >> 8) - 0xA0 index = (code & 0xFF) - 0xA0 if area >= 1 and index >= 1: offset = ((area - 1) * 94 + (index - 1)) * 32 # Read font file # Optimization: Open file once outside loop? # For simplicity, keep it here, OS caching helps. script_dir = os.path.dirname(os.path.abspath(__file__)) font_path = os.path.join(script_dir, FONT_FILE) if not os.path.exists(font_path): font_path = os.path.join(script_dir, "..", FONT_FILE) if not os.path.exists(font_path): font_path = FONT_FILE if os.path.exists(font_path): with open(font_path, "rb") as f: f.seek(offset) font_data = f.read(32) if len(font_data) == 32: import binascii hex_data = binascii.hexlify(font_data).decode('utf-8') response = f"FONT_DATA:{code_str}:{hex_data}" await websocket.send_text(response) # Small yield to let network flush? # await asyncio.sleep(0.001) except Exception as e: print(f"Error processing batch item {code_str}: {e}") # Send a completion marker await websocket.send_text("FONT_BATCH_END") except Exception as e: print(f"Error handling BATCH FONT request: {e}") await websocket.send_text("FONT_BATCH_END") # Ensure we unblock client elif text.startswith("GET_FONT_UNICODE:") or text.startswith("GET_FONT:"): # 格式: GET_FONT_UNICODE:12345 (decimal) or GET_FONT:0xA1A1 (hex) try: is_unicode = text.startswith("GET_FONT_UNICODE:") code_str = text.split(":")[1] target_code_str = code_str # Used for response if is_unicode: unicode_val = int(code_str) char = chr(unicode_val) try: gb_bytes = char.encode('gb2312') if len(gb_bytes) == 2: code = struct.unpack('>H', gb_bytes)[0] else: print(f"Character {char} is not a valid 2-byte GB2312 char") continue except Exception as e: print(f"Failed to encode {char} to gb2312: {e}") continue else: code = int(code_str, 16) # 计算偏移量 # GB2312 编码范围:0xA1A1 - 0xFEFE # 区码:高字节 - 0xA0 # 位码:低字节 - 0xA0 area = (code >> 8) - 0xA0 index = (code & 0xFF) - 0xA0 if area >= 1 and index >= 1: offset = ((area - 1) * 94 + (index - 1)) * 32 # 读取字体文件 # 注意:这里为了简单,每次都打开文件。如果并发高,应该缓存文件句柄或内容。 # 假设字体文件在当前目录或上级目录 # Prioritize finding the file in the script's directory script_dir = os.path.dirname(os.path.abspath(__file__)) font_path = os.path.join(script_dir, FONT_FILE) # Fallback: check one level up if not os.path.exists(font_path): font_path = os.path.join(script_dir, "..", FONT_FILE) # Fallback: check current working directory if not os.path.exists(font_path): font_path = FONT_FILE if os.path.exists(font_path): print(f"Reading font from: {font_path} (Offset: {offset})") with open(font_path, "rb") as f: f.seek(offset) font_data = f.read(32) if len(font_data) == 32: import binascii hex_data = binascii.hexlify(font_data).decode('utf-8') # Return the original requested code (unicode or hex) so client can map it back response = f"FONT_DATA:{target_code_str}:{hex_data}" # print(f"Sending Font Response: {response[:30]}...") await websocket.send_text(response) else: print(f"Error: Read {len(font_data)} bytes for font data (expected 32)") else: print(f"Font file not found: {font_path}") else: print(f"Invalid GB2312 code derived: {code:X} (Area: {area}, Index: {index})") except Exception as e: print(f"Error handling FONT request: {e}") elif "bytes" in message: # 接收音频数据并追加到缓冲区 data = message["bytes"] audio_buffer.extend(data) # 实时处理并发送给 ASR pcm_chunk = process_chunk_32_to_16(data, VOLUME_GAIN) processed_buffer.extend(pcm_chunk) if recognition: try: recognition.send_audio_frame(pcm_chunk) except Exception as e: print(f"Error sending audio frame to ASR: {e}") except WebSocketDisconnect: print("Client disconnected") if recognition: try: recognition.stop() except: pass except Exception as e: print(f"Error: {e}") if recognition: try: recognition.stop() except: pass if __name__ == "__main__": # 获取本机IP,方便ESP32连接 import socket hostname = socket.gethostname() local_ip = socket.gethostbyname(hostname) print(f"Server running on ws://{local_ip}:8000/ws/audio") uvicorn.run(app, host="0.0.0.0", port=8000)