diff --git a/font.py b/font.py index aa6e669..42e67fd 100644 --- a/font.py +++ b/font.py @@ -19,8 +19,47 @@ class Font: color_bytes = struct.pack(">H", color) bg_bytes = struct.pack(">H", bg) + # Create LUT for current color/bg + lut = [bytearray(16) for _ in range(256)] + for i in range(256): + for bit in range(8): + # bit 7 is first pixel (leftmost) + # target index: (7-bit)*2 + val = (i >> bit) & 1 + idx = (7 - bit) * 2 + if val: + lut[i][idx] = color_bytes[0] + lut[i][idx+1] = color_bytes[1] + else: + lut[i][idx] = bg_bytes[0] + lut[i][idx+1] = bg_bytes[1] + initial_x = x + # 1. Identify missing fonts + missing_codes = set() + for char in text: + if ord(char) > 127: + code = ord(char) + if code not in self.cache: + missing_codes.add(code) + + # 2. Batch request missing fonts + if missing_codes and self.ws: + # Convert to list for consistent order/string + missing_list = list(missing_codes) + # Limit batch size? Maybe 20 chars at a time? + # For short ASR result, usually < 20 chars. + + req_str = ",".join([str(c) for c in missing_list]) + print(f"Batch requesting fonts: {req_str}") + try: + self.ws.send(f"GET_FONTS_BATCH:{req_str}") + self._wait_for_fonts(missing_codes) + except Exception as e: + print(f"Batch font request failed: {e}") + + # 3. Draw text for char in text: # Handle newlines if char == '\n': @@ -38,56 +77,20 @@ class Font: is_chinese = False buf_data = None - # Check if it's Chinese + # Check if it's Chinese (or non-ASCII) if ord(char) > 127: - try: - gb = char.encode('gb2312') - if len(gb) == 2: - code = struct.unpack('>H', gb)[0] - # Try to get from cache - if code in self.cache: - buf_data = self.cache[code] - is_chinese = True - else: - # Need to fetch from server - # Since we can't block easily here (unless we use a blocking socket recv or a callback mechanism), - # we have to rely on the main loop to handle responses. - # But we want to draw *now*. - # - # Solution: - # 1. Send request - # 2. Wait for response with timeout (blocking wait) - # This is slow for long text but works for small amounts. - - if self.ws: - # Send request: GET_FONT:0xA1A1 - hex_code = "0x{:04X}".format(code) - print(f"Requesting font for {hex_code} ({char})") - self.ws.send(f"GET_FONT:{hex_code}") - - # Wait for response - # We need to peek/read from WS until we get FONT_DATA - buf_data = self._wait_for_font(hex_code) - - if buf_data: - self.cache[code] = buf_data - is_chinese = True - print(f"Font loaded for {hex_code}") - else: - print(f"Font fetch timeout for {hex_code}") - # Fallback: draw question mark or box - self._draw_ascii(tft, '?', x, y, color, bg) - x += 8 - continue # Skip drawing bitmap logic - else: - print("WS not available for font fetch") - except Exception as e: - print(f"Font error: {e}") + code = ord(char) + if code in self.cache: + buf_data = self.cache[code] + is_chinese = True + else: + # Still missing after batch request? + # Could be timeout or invalid char. pass if is_chinese and buf_data: # Draw Chinese character (16x16) - self._draw_bitmap(tft, buf_data, x, y, 16, 16, color_bytes, bg_bytes) + self._draw_bitmap(tft, buf_data, x, y, 16, 16, lut) x += 16 else: # Draw ASCII (8x16) using built-in framebuf font (8x8 actually) @@ -97,78 +100,148 @@ class Font: self._draw_ascii(tft, char, x, y, color, bg) x += 8 - def _wait_for_font(self, target_hex_code): + def _wait_for_fonts(self, target_codes): """ - Blocking wait for specific font data from WebSocket. - Timeout 1s. - WARNING: This might consume other messages (like audio playback commands)! - We need to handle them or put them back? - WebSocketClient doesn't support peeking easily. - - This is a limitation. If we receive other messages, we should probably print them or ignore them. - But for ASR result display, usually we are in a state where we just received ASR result and are waiting for TTS. + Blocking wait for a set of font codes. + Buffers other messages to self.ws.unread_messages. """ - if not self.ws: - return None + if not self.ws or not target_codes: + return start = time.ticks_ms() - while time.ticks_diff(time.ticks_ms(), start) < 1000: - # We use a non-blocking poll if possible, but here we want to block until data arrives - # ws.recv() is blocking. - # But we might block forever if server doesn't reply. - # So we should use poll with timeout. + self.local_deferred = [] + + # 2 seconds timeout for batch + while time.ticks_diff(time.ticks_ms(), start) < 2000 and target_codes: - # Using uselect in main.py, but here we don't have easy access to it unless passed in. - # Let's try a simple approach: set socket timeout temporarily? - # Or use select.poll() + # Check unread_messages first? + # Actually ws.recv() in our modified client already checks unread_messages. + # But wait, if we put something BACK into unread_messages, we need to be careful not to read it again immediately if we are looping? + # No, we only put NON-FONT messages back. We are looking for FONT messages. + # So if we pop a non-font message, we put it back? + # If we put it back at head, we will read it again next loop! Infinite loop! + # + # Solution: We should NOT use ws.recv() which pops from unread. + # We should assume unread_messages might contain what we need? + # + # Actually, `ws.recv()` pops from `unread_messages`. + # If we get a message that is NOT what we want, we should store it in a temporary list, and push them all back at the end? + # Or append to `unread_messages` (if it's a queue). + # But `unread_messages` is used as a LIFO or FIFO? + # pop(0) -> FIFO. + # If we append, it goes to end. + # So: + # 1. recv() -> gets msg. + # 2. Is it font? + # Yes -> process. + # No -> append to `temp_buffer`. + # 3. After function finishes (or timeout), extend `unread_messages` with `temp_buffer`? + # Wait, `unread_messages` should be preserved order. + # If we had [A, B] in unread. + # recv() gets A. Not font. Temp=[A]. + # recv() gets B. Not font. Temp=[A, B]. + # recv() gets network C (Font). Process. + # End. + # Restore: unread = Temp + unread? (unread is empty now). + # So unread becomes [A, B]. Correct. import uselect - poller = uselect.poll() - poller.register(self.ws.sock, uselect.POLLIN) - events = poller.poll(200) # 200ms timeout - if events: - try: - msg = self.ws.recv() + # Fast check if we can read + # But we want to block until SOMETHING arrives. + + # If unread_messages is not empty, we should process them first. + # But we can't peak easily without modifying recv again. + # Let's just use recv() and handle the buffering logic here. + + try: + # Use a poller for the socket part to implement timeout + # But recv() handles logic. + # If unread_messages is empty, we poll socket. + + can_read = False + if self.ws.unread_messages: + can_read = True + else: + poller = uselect.poll() + poller.register(self.ws.sock, uselect.POLLIN) + events = poller.poll(100) # 100ms + if events: + can_read = True + + if can_read: + msg = self.ws.recv() # This will pop from unread or read from sock + if msg is None: + # Socket closed or error? + # Or just timeout in recv (but we polled). + continue + if isinstance(msg, str): - if msg.startswith(f"FONT_DATA:{target_hex_code}:"): - # Found it! - hex_data = msg.split(":")[2] - return binascii.unhexlify(hex_data) + if msg == "FONT_BATCH_END": + # Batch complete. Mark remaining as failed. + # We need to iterate over a copy because we are modifying target_codes? + # Actually we just clear it. + # But wait, target_codes is passed by reference (set). + # If we clear it, loop breaks. + # But we also want to mark cache as None for missing ones. + temp_missing = list(target_codes) + for c in temp_missing: + print(f"Batch missing/failed: {c}") + self.cache[c] = None # Cache failure + target_codes.clear() + elif msg.startswith("FONT_DATA:"): - # Wrong font data? Ignore or cache it? + # General font data handler parts = msg.split(":") if len(parts) >= 3: - c = int(parts[1], 16) - d = binascii.unhexlify(parts[2]) - self.cache[c] = d + try: + key_str = parts[1] + if key_str.startswith("0x"): + c = int(key_str, 16) + else: + c = int(key_str) + + d = binascii.unhexlify(parts[2]) + self.cache[c] = d + if c in target_codes: + target_codes.remove(c) + # print(f"Batch loaded: {c}") + except: + pass else: # Other message, e.g. START_PLAYBACK - # We can't put it back easily. - # For now, just print it and ignore (it will be lost!) - # ideally we should have a message queue. - print(f"Ignored msg during font fetch: {msg}") - except: - pass - - return None + self.local_deferred.append(msg) + + elif msg is not None: + # Binary message? Buffer it too. + self.local_deferred.append(msg) + + except Exception as e: + print(f"Wait font error: {e}") + + # End of wait. Restore deferred messages. + if self.local_deferred: + # We want new_list = local_deferred + old_list + self.ws.unread_messages = self.local_deferred + self.ws.unread_messages + self.local_deferred = [] - def _draw_bitmap(self, tft, bitmap, x, y, w, h, color_bytes, bg_bytes): - # Convert 1bpp bitmap to RGB565 buffer - # bitmap length is w * h / 8 = 32 bytes for 16x16 + def _wait_for_font(self, target_code_str): + # Compatibility wrapper or deprecated? + # The new logic uses batch wait. + pass + + def _draw_bitmap(self, tft, bitmap, x, y, w, h, lut): + # Convert 1bpp bitmap to RGB565 buffer using LUT # Optimize buffer allocation - rgb_buf = bytearray(w * h * 2) - idx = 0 - for byte in bitmap: - for i in range(7, -1, -1): - if (byte >> i) & 1: - rgb_buf[idx] = color_bytes[0] - rgb_buf[idx+1] = color_bytes[1] - else: - rgb_buf[idx] = bg_bytes[0] - rgb_buf[idx+1] = bg_bytes[1] - idx += 2 + # bitmap length is w * h / 8 = 32 bytes for 16x16 + + # Create list of chunks + chunks = [lut[b] for b in bitmap] + + # Join chunks into one buffer + rgb_buf = b''.join(chunks) + tft.blit_buffer(rgb_buf, x, y, w, h) def _draw_ascii(self, tft, char, x, y, color, bg): diff --git a/main.py b/main.py index d3d85ac..4a88825 100644 --- a/main.py +++ b/main.py @@ -406,89 +406,32 @@ def main(): if display.tft: display.init_ui() - # 停止录音并等待回放 + # 停止录音并通知服务器 if ws: try: - print(">>> Sending STOP & Waiting for playback...") + print(">>> Sending STOP to server...") ws.send("STOP_RECORDING") - # 重新初始化 Speaker (16kHz Mono 16-bit) - if speaker.i2s: - cfg = speaker.config - speaker.i2s.deinit() - speaker.i2s = machine.I2S( - 0, - sck=machine.Pin(cfg['bck']), - ws=machine.Pin(cfg['ws']), - sd=machine.Pin(cfg['sd']), - mode=machine.I2S.TX, - bits=16, - format=machine.I2S.MONO, - rate=16000, - ibuf=40000, - ) - - # 接收回放循环 - playback_timeout = 5000 # 5秒无数据则退出 - last_data_time = time.ticks_ms() - - while True: - # Check for data with timeout + # 不再等待回放,直接退出录音状态 + # 稍微等待一下可能的最后 ASR 结果 (非阻塞) + # 等待 500ms 接收剩余的 ASR 结果 + t_wait = time.ticks_add(time.ticks_ms(), 500) + while time.ticks_diff(t_wait, time.ticks_ms()) > 0: poller = uselect.poll() poller.register(ws.sock, uselect.POLLIN) - events = poller.poll(100) # 100ms wait - + events = poller.poll(100) if events: msg = ws.recv() - last_data_time = time.ticks_ms() - - if isinstance(msg, str): - if msg == "START_PLAYBACK": - print(">>> Server starting playback stream...") - continue - elif msg == "STOP_PLAYBACK": - print(">>> Server finished playback.") - break - elif msg.startswith("ASR:"): - print_nice_asr(msg[4:], display) - - elif isinstance(msg, bytes): - # 播放接收到的音频数据 - if speaker.i2s: - # 使用 try-except 防止 write 阻塞导致的问题 - try: - speaker.i2s.write(msg) - except Exception as e: - print(f"I2S Write Error: {e}") - - elif msg is None: - print("WS Connection closed or error (recv returned None)") - try: - ws.close() - except: - pass - ws = None - break - else: - # No data received in this poll window - if time.ticks_diff(time.ticks_ms(), last_data_time) > playback_timeout: - print("Playback timeout - no data received for 5 seconds") - break - - # Feed watchdog or do other small tasks if needed - # time.sleep(0.01) - + if isinstance(msg, str) and msg.startswith("ASR:"): + print_nice_asr(msg[4:], display) + # 不需要处理其他类型的消息了 except Exception as e: - print(f"Playback loop error: {e}") + print(f"Stop recording error: {e}") try: ws.close() except: pass ws = None - - # 恢复 Speaker 原始配置 - if speaker.i2s: speaker.i2s.deinit() - speaker._init_audio() gc.collect() diff --git a/websocket_client.py b/websocket_client.py index 8c4cbed..cb6d08e 100644 --- a/websocket_client.py +++ b/websocket_client.py @@ -10,6 +10,7 @@ class WebSocketClient: self.sock = None self.uri = uri self.timeout = timeout + self.unread_messages = [] # Queue for buffered messages self.connect() def connect(self): @@ -109,6 +110,13 @@ class WebSocketClient: self.sock.write(masked_data) def recv(self): + # 1. Check if we have unread messages in the buffer + if self.unread_messages: + return self.unread_messages.pop(0) + + if not self.sock: + return None + # Read header try: # Read 2 bytes at once @@ -176,3 +184,4 @@ class WebSocketClient: if self.sock: self.sock.close() self.sock = None + self.unread_messages = [] diff --git a/websocket_server/__pycache__/server.cpython-312.pyc b/websocket_server/__pycache__/server.cpython-312.pyc index 14433d6..333045e 100644 Binary files a/websocket_server/__pycache__/server.cpython-312.pyc and b/websocket_server/__pycache__/server.cpython-312.pyc differ diff --git a/websocket_server/received_audio.mp3 b/websocket_server/received_audio.mp3 index 0115652..b0d9057 100644 Binary files a/websocket_server/received_audio.mp3 and b/websocket_server/received_audio.mp3 differ diff --git a/websocket_server/received_audio.raw b/websocket_server/received_audio.raw index 405bd41..a515f87 100644 Binary files a/websocket_server/received_audio.raw and b/websocket_server/received_audio.raw differ diff --git a/websocket_server/server.py b/websocket_server/server.py index 4424f28..0a14c76 100644 --- a/websocket_server/server.py +++ b/websocket_server/server.py @@ -167,30 +167,110 @@ async def websocket_endpoint(websocket: WebSocket): except Exception as e: print(f"Error converting to MP3: {e}") - # 4. 发送回客户端播放 - print("Sending audio back...") - await websocket.send_text("START_PLAYBACK") + # 4. 不再发送回客户端播放,提升性能 + # print("Sending audio back...") + # await websocket.send_text("START_PLAYBACK") # 分块发送 - chunk_size = 4096 - for i in range(0, len(processed_audio), chunk_size): - chunk = processed_audio[i:i+chunk_size] - await websocket.send_bytes(chunk) - # 小延时,避免发送过快导致 ESP32 缓冲区溢出 - # 4096 bytes / 32000 bytes/s (16k*2) = ~0.128s - # 0.04s 约为 3 倍速发送,既保证缓冲又不至于拥塞 - await asyncio.sleep(0.04) + # chunk_size = 4096 + # for i in range(0, len(processed_audio), chunk_size): + # chunk = processed_audio[i:i+chunk_size] + # await websocket.send_bytes(chunk) + # # 小延时,避免发送过快导致 ESP32 缓冲区溢出 + # # 4096 bytes / 32000 bytes/s (16k*2) = ~0.128s + # # 0.04s 约为 3 倍速发送,既保证缓冲又不至于拥塞 + # await asyncio.sleep(0.04) - await websocket.send_text("STOP_PLAYBACK") - print("Audio sent back finished.") + # await websocket.send_text("STOP_PLAYBACK") + print("Server processing finished (No playback sent).") - elif text.startswith("GET_FONT:"): - # 格式: GET_FONT:0xA1A1 + elif text.startswith("GET_FONTS_BATCH:"): + # Format: GET_FONTS_BATCH:code1,code2,code3 (decimal unicode) try: - print(f"Font Request Received: {text}") - hex_code = text.split(":")[1] - code = int(hex_code, 16) + codes_str = text.split(":")[1] + code_list = codes_str.split(",") + print(f"Batch Font Request for {len(code_list)} chars: {code_list}") + for code_str in code_list: + if not code_str: continue + + try: + unicode_val = int(code_str) + char = chr(unicode_val) + + gb_bytes = char.encode('gb2312') + if len(gb_bytes) == 2: + code = struct.unpack('>H', gb_bytes)[0] + else: + print(f"Character {char} is not a valid 2-byte GB2312 char") + # Send empty/dummy? Or just skip. + # Better to send something so client doesn't wait forever if it counts responses. + # But client probably uses a set of missing chars. + continue + + # Calc offset + area = (code >> 8) - 0xA0 + index = (code & 0xFF) - 0xA0 + + if area >= 1 and index >= 1: + offset = ((area - 1) * 94 + (index - 1)) * 32 + + # Read font file + # Optimization: Open file once outside loop? + # For simplicity, keep it here, OS caching helps. + + script_dir = os.path.dirname(os.path.abspath(__file__)) + font_path = os.path.join(script_dir, FONT_FILE) + if not os.path.exists(font_path): + font_path = os.path.join(script_dir, "..", FONT_FILE) + if not os.path.exists(font_path): + font_path = FONT_FILE + + if os.path.exists(font_path): + with open(font_path, "rb") as f: + f.seek(offset) + font_data = f.read(32) + if len(font_data) == 32: + import binascii + hex_data = binascii.hexlify(font_data).decode('utf-8') + response = f"FONT_DATA:{code_str}:{hex_data}" + await websocket.send_text(response) + # Small yield to let network flush? + # await asyncio.sleep(0.001) + except Exception as e: + print(f"Error processing batch item {code_str}: {e}") + + # Send a completion marker + await websocket.send_text("FONT_BATCH_END") + + except Exception as e: + print(f"Error handling BATCH FONT request: {e}") + await websocket.send_text("FONT_BATCH_END") # Ensure we unblock client + + elif text.startswith("GET_FONT_UNICODE:") or text.startswith("GET_FONT:"): + # 格式: GET_FONT_UNICODE:12345 (decimal) or GET_FONT:0xA1A1 (hex) + try: + is_unicode = text.startswith("GET_FONT_UNICODE:") + code_str = text.split(":")[1] + + target_code_str = code_str # Used for response + + if is_unicode: + unicode_val = int(code_str) + char = chr(unicode_val) + try: + gb_bytes = char.encode('gb2312') + if len(gb_bytes) == 2: + code = struct.unpack('>H', gb_bytes)[0] + else: + print(f"Character {char} is not a valid 2-byte GB2312 char") + continue + except Exception as e: + print(f"Failed to encode {char} to gb2312: {e}") + continue + else: + code = int(code_str, 16) + # 计算偏移量 # GB2312 编码范围:0xA1A1 - 0xFEFE # 区码:高字节 - 0xA0 @@ -225,17 +305,18 @@ async def websocket_endpoint(websocket: WebSocket): if len(font_data) == 32: import binascii hex_data = binascii.hexlify(font_data).decode('utf-8') - response = f"FONT_DATA:{hex_code}:{hex_data}" - print(f"Sending Font Response: {response[:30]}...") + # Return the original requested code (unicode or hex) so client can map it back + response = f"FONT_DATA:{target_code_str}:{hex_data}" + # print(f"Sending Font Response: {response[:30]}...") await websocket.send_text(response) else: print(f"Error: Read {len(font_data)} bytes for font data (expected 32)") else: print(f"Font file not found: {font_path}") else: - print(f"Invalid GB2312 code: {hex_code} (Area: {area}, Index: {index})") + print(f"Invalid GB2312 code derived: {code:X} (Area: {area}, Index: {index})") except Exception as e: - print(f"Error handling GET_FONT: {e}") + print(f"Error handling FONT request: {e}") elif "bytes" in message: # 接收音频数据并追加到缓冲区