diff --git a/__pycache__/display.cpython-313.pyc b/__pycache__/display.cpython-313.pyc new file mode 100644 index 0000000..de85a13 Binary files /dev/null and b/__pycache__/display.cpython-313.pyc differ diff --git a/__pycache__/font.cpython-313.pyc b/__pycache__/font.cpython-313.pyc new file mode 100644 index 0000000..56e4ee4 Binary files /dev/null and b/__pycache__/font.cpython-313.pyc differ diff --git a/__pycache__/main.cpython-313.pyc b/__pycache__/main.cpython-313.pyc new file mode 100644 index 0000000..23a5577 Binary files /dev/null and b/__pycache__/main.cpython-313.pyc differ diff --git a/display.py b/display.py index ec525ff..a7bff6c 100644 --- a/display.py +++ b/display.py @@ -47,9 +47,9 @@ class Display: if self.font: self.font.set_ws(ws) - def text(self, text, x, y, color): + def text(self, text, x, y, color, wait=True): if self.tft: - self.font.text(self.tft, text, x, y, color) + self.font.text(self.tft, text, x, y, color, wait=wait) def init_ui(self): """初始化 UI 背景""" @@ -93,3 +93,59 @@ class Display: self.tft.blit_buffer(rgb565_data, x, y, width, height) except Exception as e: print(f"Show image error: {e}") + + def show_image_chunk(self, x, y, width, height, data, offset): + """流式显示图片数据块""" + if not self.tft: return + + # ST7789 blit_buffer expects a complete buffer for the window + # But we can calculate which pixels this chunk corresponds to + + # This is tricky because blit_buffer sets a window and then writes data. + # If we want to stream, we should probably set the window once and then write chunks. + # But st7789py library might not expose raw write easily without window set. + + # Alternative: Calculate the sub-window for this chunk. + # Data is a linear sequence of pixels (2 bytes per pixel) + # We assume data length is even. + + try: + # Simple approach: If offset is 0, we set the window for the whole image + # And then write data. But st7789py's blit_buffer does both. + + # Let's look at st7789py implementation. + # fill_rect sets window then writes. + # blit_buffer sets window then writes. + + # We can use a modified approach: + # If it's the first chunk, set window. + # Then write data. + + # But we can't easily modify the library state from here. + # So we calculate the rect for this chunk. + + # Total pixels + total_pixels = width * height + + # Current pixel offset + pixel_offset = offset // 2 + num_pixels = len(data) // 2 + + # This only works if chunks align with rows, or if we can write partial rows. + # ST7789 supports writing continuous memory. + + # Let's try to determine the x, y, w, h for this chunk. + # This is complex if it wraps around lines. + + # Easier approach for ESP32 memory constrained environment: + # We just need to use the raw write method of the display driver if available. + + if offset == 0: + # Set window for the whole image + self.tft.set_window(x, y, x + width - 1, y + height - 1) + + # Write raw data + self.tft.write(None, data) + + except Exception as e: + print(f"Show chunk error: {e}") diff --git a/font.py b/font.py index 488d886..b357c92 100644 --- a/font.py +++ b/font.py @@ -4,6 +4,11 @@ import time import binascii import gc +try: + import static_font_data +except ImportError: + static_font_data = None + class Font: def __init__(self, ws=None): self.ws = ws @@ -11,6 +16,8 @@ class Font: self.pending_requests = set() self.retry_count = {} self.max_retries = 3 + # Pre-allocate buffer for row drawing (16 pixels * 2 bytes = 32 bytes) + self.row_buf = bytearray(32) def set_ws(self, ws): self.ws = ws @@ -24,7 +31,40 @@ class Font: """获取当前缓存的字体数量""" return len(self.cache) - def text(self, tft, text, x, y, color, bg=0x0000): + def handle_message(self, msg): + """处理字体相关消息,更新缓存 + 返回: 是否为字体消息 + """ + if not isinstance(msg, str): + return False + + if msg.startswith("FONT_BATCH_END:"): + # 批处理结束消息,目前主要用于阻塞等待时的退出条件 + return True + + elif msg.startswith("FONT_DATA:"): + parts = msg.split(":") + if len(parts) >= 3: + try: + key_str = parts[1] + if key_str.startswith("0x"): + c = int(key_str, 16) + else: + c = int(key_str) + + d = binascii.unhexlify(parts[2]) + self.cache[c] = d + # 清除重试计数(如果有) + if c in self.retry_count: + del self.retry_count[c] + return True + except Exception as e: + print(f"Font data parse error: {e}") + return True + + return False + + def text(self, tft, text, x, y, color, bg=0x0000, wait=True): """在ST7789显示器上绘制文本""" if not text: return @@ -32,17 +72,23 @@ class Font: color_bytes = struct.pack(">H", color) bg_bytes = struct.pack(">H", bg) - lut = [bytearray(16) for _ in range(256)] - for i in range(256): - for bit in range(8): - val = (i >> bit) & 1 - idx = (7 - bit) * 2 + # Create a mini-LUT for 4-bit chunks (16 entries * 8 bytes = 128 bytes) + # Each entry maps 4 bits (0-15) to 4 pixels (8 bytes) + mini_lut = [] + for i in range(16): + chunk = bytearray(8) + for bit in range(4): + # bit 0 is LSB of nibble, corresponds to rightmost pixel of the 4 pixels + # Assuming standard MSB-first bitmap + val = (i >> (3 - bit)) & 1 + idx = bit * 2 if val: - lut[i][idx] = color_bytes[0] - lut[i][idx+1] = color_bytes[1] + chunk[idx] = color_bytes[0] + chunk[idx+1] = color_bytes[1] else: - lut[i][idx] = bg_bytes[0] - lut[i][idx+1] = bg_bytes[1] + chunk[idx] = bg_bytes[0] + chunk[idx+1] = bg_bytes[1] + mini_lut.append(bytes(chunk)) initial_x = x @@ -50,6 +96,9 @@ class Font: for char in text: if ord(char) > 127: code = ord(char) + # Check static font data first + if static_font_data and hasattr(static_font_data, 'FONTS') and code in static_font_data.FONTS: + continue if code not in self.cache: missing_codes.add(code) @@ -57,10 +106,13 @@ class Font: missing_list = list(missing_codes) req_str = ",".join([str(c) for c in missing_list]) - print(f"Batch requesting fonts: {req_str}") + # Only print if waiting, to reduce log spam in async mode + if wait: + print(f"Batch requesting fonts: {req_str}") try: self.ws.send(f"GET_FONTS_BATCH:{req_str}") - self._wait_for_fonts(missing_codes) + if wait: + self._wait_for_fonts(missing_codes) except Exception as e: print(f"Batch font request failed: {e}") @@ -78,28 +130,64 @@ class Font: is_chinese = False buf_data = None + code = ord(char) - if ord(char) > 127: - code = ord(char) - if code in self.cache: + if code > 127: + if static_font_data and hasattr(static_font_data, 'FONTS') and code in static_font_data.FONTS: + buf_data = static_font_data.FONTS[code] + is_chinese = True + elif code in self.cache: buf_data = self.cache[code] is_chinese = True else: - if code in self.pending_requests: - retry = self.retry_count.get(code, 0) - if retry < self.max_retries: - self.retry_count[code] = retry + 1 - self._request_single_font(code) + # Missing font data + if not wait: + # In async mode, draw a placeholder or space + # We use '?' for now so user knows something is missing + char = '?' + is_chinese = False + else: + if code in self.pending_requests: + retry = self.retry_count.get(code, 0) + if retry < self.max_retries: + self.retry_count[code] = retry + 1 + self._request_single_font(code) if is_chinese and buf_data: - self._draw_bitmap(tft, buf_data, x, y, 16, 16, lut) + self._draw_bitmap_optimized(tft, buf_data, x, y, mini_lut) x += 16 else: - if ord(char) > 127: + if code > 127: char = '?' self._draw_ascii(tft, char, x, y, color, bg) x += 8 + def _draw_bitmap_optimized(self, tft, bitmap, x, y, mini_lut): + """使用优化方式绘制位图,减少内存分配""" + # Bitmap is 32 bytes (16x16 pixels) + # 2 bytes per row + + for row in range(16): + # Get 2 bytes for this row + # Handle case where bitmap might be different length (safety) + if row * 2 + 1 < len(bitmap): + b1 = bitmap[row * 2] + b2 = bitmap[row * 2 + 1] + + # Process b1 (Left 8 pixels) + # High nibble + self.row_buf[0:8] = mini_lut[(b1 >> 4) & 0x0F] + # Low nibble + self.row_buf[8:16] = mini_lut[b1 & 0x0F] + + # Process b2 (Right 8 pixels) + # High nibble + self.row_buf[16:24] = mini_lut[(b2 >> 4) & 0x0F] + # Low nibble + self.row_buf[24:32] = mini_lut[b2 & 0x0F] + + tft.blit_buffer(self.row_buf, x, y + row, 16, 1) + def _request_single_font(self, code): """请求单个字体""" if self.ws: @@ -134,10 +222,10 @@ class Font: if msg is None: continue - if isinstance(msg, str): + if self.handle_message(msg): + # 如果是批处理结束,检查是否有失败的 if msg.startswith("FONT_BATCH_END:"): parts = msg[15:].split(":") - success = int(parts[0]) if len(parts) > 0 else 0 failed = int(parts[1]) if len(parts) > 1 else 0 if failed > 0: @@ -145,34 +233,26 @@ class Font: for c in temp_missing: if c not in self.cache: print(f"Font failed after retries: {c}") - self.cache[c] = None + self.cache[c] = None # 标记为 None 避免死循环 if c in target_codes: target_codes.remove(c) + # 清除所有剩余的目标,因为批处理结束了 + # 但实际上可能只需要清除 failed 的。 + # 无论如何,收到 BATCH_END 意味着本次请求处理完毕。 + # 如果还有没收到的,可能是丢包了。 + # 为了简单起见,我们认为结束了。 target_codes.clear() - - elif msg.startswith("FONT_DATA:"): - parts = msg.split(":") - if len(parts) >= 3: - try: - key_str = parts[1] - if key_str.startswith("0x"): - c = int(key_str, 16) - else: - c = int(key_str) + + # 检查是否有新缓存的字体满足了 target_codes + temp_target = list(target_codes) + for c in temp_target: + if c in self.cache: + target_codes.remove(c) + if c in self.retry_count: + del self.retry_count[c] - d = binascii.unhexlify(parts[2]) - self.cache[c] = d - if c in target_codes: - target_codes.remove(c) - if c in self.retry_count: - del self.retry_count[c] - except: - pass - else: - self.local_deferred.append(msg) - - elif msg is not None: + else: self.local_deferred.append(msg) except Exception as e: @@ -183,12 +263,6 @@ class Font: self.ws.unread_messages = self.local_deferred + self.ws.unread_messages self.local_deferred = [] - def _draw_bitmap(self, tft, bitmap, x, y, w, h, lut): - """绘制位图""" - chunks = [lut[b] for b in bitmap] - rgb_buf = b''.join(chunks) - tft.blit_buffer(rgb_buf, x, y, w, h) - def _draw_ascii(self, tft, char, x, y, color, bg): """绘制ASCII字符""" w, h = 8, 8 diff --git a/main.py b/main.py index 3d0cabf..f29b573 100644 --- a/main.py +++ b/main.py @@ -163,7 +163,7 @@ def render_recording_screen(display, asr_text="", audio_level=0): display.tft.fill_rect(20, 100, bar_width, 10, st7789.GREEN) if asr_text: - display.text(asr_text[:20], 20, 130, st7789.WHITE) + display.text(asr_text[:20], 20, 130, st7789.WHITE, wait=False) display.tft.fill_rect(60, 200, 120, 25, st7789.RED) display.text("松开停止", 85, 205, st7789.WHITE) @@ -194,54 +194,91 @@ def render_result_screen(display, status="", prompt="", image_received=False): if not display or not display.tft: return - # Only clear if we are starting a new state or it's the first render - # But for simplicity we clear all for now. Optimizing this requires state tracking. - display.tft.fill(st7789.BLACK) - - # Header - display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE) - display.text("AI 生成中", 80, 8, st7789.BLACK) - if status == "OPTIMIZING": + display.tft.fill(st7789.BLACK) + display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE) + display.text("AI 生成中", 80, 8, st7789.BLACK) + display.text("正在思考...", 80, 60, st7789.CYAN) display.text("优化提示词中", 70, 80, st7789.CYAN) draw_progress_bar(display, 40, 110, 160, 6, 0.3, st7789.CYAN) # Spinner will be drawn by main loop elif status == "RENDERING": + display.tft.fill(st7789.BLACK) + display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE) + display.text("AI 生成中", 80, 8, st7789.BLACK) + display.text("正在绘画...", 80, 60, st7789.YELLOW) display.text("AI作画中", 85, 80, st7789.YELLOW) draw_progress_bar(display, 40, 110, 160, 6, 0.7, st7789.YELLOW) # Spinner will be drawn by main loop elif status == "COMPLETE" or image_received: - display.text("生成完成!", 80, 50, st7789.GREEN) - draw_check_icon(display, 110, 80) + # Don't clear screen, image is already there + # display.text("生成完成!", 80, 50, st7789.GREEN) + # draw_check_icon(display, 110, 80) + pass elif status == "ERROR": + display.tft.fill(st7789.BLACK) + display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE) + display.text("AI 生成中", 80, 8, st7789.BLACK) display.text("生成失败", 80, 50, st7789.RED) - if prompt: + if prompt and not image_received: display.tft.fill_rect(10, 140, 220, 50, 0x2124) # Dark Grey display.text("提示词:", 15, 145, st7789.CYAN) display.text(prompt[:25] + "..." if len(prompt) > 25 else prompt, 15, 165, st7789.WHITE) - display.tft.fill_rect(60, 210, 120, 25, st7789.BLUE) - display.text("返回录音", 90, 215, st7789.WHITE) + # Only show back button if not showing full image, or maybe show it transparently? + # For now, let's not cover the image with the button hint + if not image_received: + display.tft.fill_rect(60, 210, 120, 25, st7789.BLUE) + display.text("长按返回", 90, 215, st7789.WHITE) + + + + + + + def process_message(msg, display, image_state, image_data_list): """处理WebSocket消息""" # Handle binary image data if isinstance(msg, (bytes, bytearray)): if image_state == IMAGE_STATE_RECEIVING: - image_data_list.append(msg) - # Optional: Update progress bar or indicator + try: + if len(image_data_list) < 2: + # 异常情况,重置 + return IMAGE_STATE_IDLE, None + + img_size = image_data_list[0] + current_offset = image_data_list[1] + + # Stream directly to display + if display and display.tft: + x = (240 - img_size) // 2 + y = (240 - img_size) // 2 + display.show_image_chunk(x, y, img_size, img_size, msg, current_offset) + + # Update offset + image_data_list[1] += len(msg) + + except Exception as e: + print(f"Stream image error: {e}") + return image_state, None return image_state, None if not isinstance(msg, str): return image_state, None + # Check for font data first + if display and hasattr(display, 'font') and display.font.handle_message(msg): + return image_state, ("font_update",) + status_info = None if msg.startswith("ASR:"): @@ -272,6 +309,15 @@ def process_message(msg, display, image_state, image_data_list): print(f"Image start, size: {size}, img_size: {img_size}") image_data_list.clear() image_data_list.append(img_size) # Store metadata at index 0 + image_data_list.append(0) # Store current received bytes offset at index 1 + + # Prepare display for streaming + if display and display.tft: + # Calculate position + x = (240 - img_size) // 2 + y = (240 - img_size) // 2 + # Pre-set window (this will be done in first chunk call) + return IMAGE_STATE_RECEIVING, None except Exception as e: print(f"IMAGE_START parse error: {e}") @@ -279,45 +325,14 @@ def process_message(msg, display, image_state, image_data_list): # Deprecated text-based IMAGE_DATA handling elif msg.startswith("IMAGE_DATA:") and image_state == IMAGE_STATE_RECEIVING: - try: - data = msg.split(":", 1)[1] - # Convert hex to bytes immediately if using old protocol, but we switched to binary - # Keep this just in case server rolls back? No, let's assume binary. - pass - except: - pass + pass elif msg == "IMAGE_END" and image_state == IMAGE_STATE_RECEIVING: - try: - print("Image received, processing...") + print("Image received completely") + image_data_list.clear() + gc.collect() + return IMAGE_STATE_IDLE, ("image_done",) - img_size = image_data_list[0] if image_data_list else 64 - # Combine all binary chunks (skipping metadata at index 0) - img_data = b"".join(image_data_list[1:]) - image_data_list.clear() - - print(f"Image data len: {len(img_data)}") - - if display and display.tft: - x = (240 - img_size) // 2 - y = (240 - img_size) // 2 - display.show_image(x, y, img_size, img_size, img_data) - - # Overlay success message slightly - display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE) - display.text("图片已生成!", 70, 5, st7789.BLACK) - - gc.collect() - print("Image displayed") - return IMAGE_STATE_IDLE, ("image_done",) - - except Exception as e: - print(f"Image process error: {e}") - import sys - sys.print_exception(e) - - return IMAGE_STATE_IDLE, None - elif msg.startswith("IMAGE_ERROR:"): print(msg) return IMAGE_STATE_IDLE, ("error", msg[12:]) @@ -330,7 +345,7 @@ def print_asr(text, display=None): print(f"ASR: {text}") if display and display.tft: display.fill_rect(0, 40, 240, 160, st7789.BLACK) - display.text(text, 0, 40, st7789.WHITE) + display.text(text, 0, 40, st7789.WHITE, wait=False) def get_boot_button_action(boot_btn): @@ -468,7 +483,7 @@ def main(): print("Memory high, cleaned") # Spinner Animation - if ui_screen == UI_SCREEN_RESULT and not image_generation_done and current_status in ["OPTIMIZING", "RENDERING"]: + if ui_screen == UI_SCREEN_RESULT and not image_generation_done and current_status in ["OPTIMIZING", "RENDERING"] and image_state != IMAGE_STATE_RECEIVING: now = time.ticks_ms() if time.ticks_diff(now, last_spinner_time) > 100: if display.tft: @@ -543,17 +558,11 @@ def main(): time.sleep(0.5) elif ui_screen == UI_SCREEN_RESULT: - print(">>> Back to recording") - ui_screen = UI_SCREEN_RECORDING - is_recording = False - current_asr_text = "" - current_prompt = "" - current_status = "" - image_generation_done = False - confirm_waiting = False - - if display.tft: - render_recording_screen(display, "", 0) + # Ignore short press in result screen to keep image displayed + # unless image generation failed or is still in progress? + # User request: "只有长按boot才离开" (Only leave on long press) + # So we do nothing here. + pass elif btn_action == 2: if is_recording: @@ -595,14 +604,24 @@ def main(): render_recording_screen(display, "", 0) elif ui_screen == UI_SCREEN_RESULT: - print(">>> Generate image (manual)") + print(">>> Back to recording") + # Stop recording if it was somehow started or just reset state if ws and ws.is_connected(): try: - ws.send("START_RECORDING") - is_recording = True - ui_screen = UI_SCREEN_RECORDING + ws.send("STOP_RECORDING") except: ws = None + + ui_screen = UI_SCREEN_RECORDING + is_recording = False + current_asr_text = "" + current_prompt = "" + current_status = "" + image_generation_done = False + confirm_waiting = False + + if display.tft: + render_recording_screen(display, "", 0) elif btn_action == 3: print(">>> Config mode") @@ -628,6 +647,10 @@ def main(): if display.tft: render_recording_screen(display, current_asr_text, last_audio_level) + elif event_data[0] == "font_update": + if ui_screen == UI_SCREEN_RECORDING and display.tft: + render_recording_screen(display, current_asr_text, last_audio_level) + elif event_data[0] == "status": current_status = event_data[1] status_text = event_data[2] if len(event_data) > 2 else "" diff --git a/static_font_data.py b/static_font_data.py new file mode 100644 index 0000000..8676595 --- /dev/null +++ b/static_font_data.py @@ -0,0 +1,48 @@ +# Static font data generated for specific characters +import ubinascii + +FONTS = { + 20013: b'\x01\x00\x01\x00\x01\x00\x01\xf8\x3f\x08\x21\x08\x21\x08\x21\xf8\x3f\x00\x21\x00\x01\x00\x01\x00\x01\x00\x01\x00\x01\x00\x00\x00', # 中 + 20102: b'\x00\x00\x01\xf0\x1e\x10\x00\x20\x01\x40\x00\x80\x00\x80\x00\x80\x00\x80\x00\x80\x00\x80\x00\x80\x04\x80\x02\x80\x01\x00\x00\x00', # 了 + 20248: b'\x08\x80\x08\x80\x08\xa0\x10\x90\x10\x80\x20\xfc\x2f\x40\x61\x40\xa1\x40\x21\x40\x22\x40\x22\x44\x24\x44\x24\x44\x28\x3c\x00\x00', # 优 + 20316: b'\x08\x80\x08\x80\x09\x00\x11\x1c\x13\xe0\x32\x80\x54\x98\x90\xe0\x10\x80\x10\x9c\x10\xe0\x10\x80\x10\x80\x10\x80\x10\x80\x00\x00', # 作 + 20572: b'\x08\x80\x08\x78\x17\x80\x10\x60\x23\xa0\x22\x60\x63\x80\xa0\x7c\x2f\x88\x28\x30\x23\xc0\x20\x40\x21\x40\x20\xc0\x20\x40\x00\x00', # 停 + 21035: b'\x00\x08\x06\x08\x3a\x08\x22\x48\x26\x48\x38\x48\x28\x48\x0f\x48\x71\x48\x11\x48\x11\x08\x22\x08\x2a\x28\x44\x18\x80\x08\x00\x00', # 别 + 21040: b'\x00\x08\x00\x08\x07\x88\x38\x28\x0a\x28\x11\x28\x23\xa8\x7c\xa8\x04\x28\x07\x28\x3c\x28\x07\x88\x18\x28\x60\x18\x00\x08\x00\x00', # 到 + 21270: b'\x04\x00\x04\x80\x08\x80\x08\x88\x08\x88\x18\x90\x28\xa0\x48\xc0\x09\x80\x0a\x80\x08\x84\x08\x84\x08\x84\x08\x7c\x08\x00\x00\x00', # 化 + 21527: b'\x00\x60\x03\xa0\x00\x20\x19\x20\x69\x20\x49\x20\x59\x20\x61\x78\x01\x88\x00\x08\x00\xe8\x0f\x08\x00\x10\x00\x50\x00\x20\x00\x00', # 吗 + 22238: b'\x00\x00\x00\x00\x01\xf8\x3e\x08\x20\x08\x21\x88\x26\x88\x24\x88\x25\x88\x26\x08\x20\x08\x20\xf8\x3f\x00\x00\x00\x00\x00\x00\x00', # 回 + 22312: b'\x01\x00\x01\x00\x02\x00\x03\xf8\x7c\x00\x04\x80\x18\x80\x10\x80\x30\xf0\x57\x80\x90\x80\x10\x80\x10\xfc\x1f\x00\x10\x00\x00\x00', # 在 + 22833: b'\x01\x00\x09\x00\x09\x00\x09\xf0\x1f\x00\x11\x00\x21\x00\x01\xf8\x7e\x80\x02\x80\x04\x40\x04\x40\x08\x20\x10\x38\x20\x00\x00\x00', # 失 + 23383: b'\x02\x00\x01\x00\x01\xfc\x3e\x08\x21\xe0\x0e\x40\x00\x80\x01\x00\x00\xfc\x7f\x80\x00\x80\x00\x80\x00\x80\x02\x80\x01\x00\x00\x00', # 字 + 23436: b'\x02\x00\x01\x00\x00\xfc\x3f\x08\x20\x00\x00\xc0\x07\x00\x00\x78\x3f\x80\x04\x80\x04\x80\x08\x84\x08\x84\x10\x84\x60\x7c\x00\x00', # 完 + 24320: b'\x00\x00\x03\xf0\x1c\x40\x04\x40\x04\x40\x04\x40\x07\xfc\x7c\x40\x04\x40\x04\x40\x08\x40\x08\x40\x10\x40\x20\x40\x40\x40\x00\x00', # 开 + 24405: b'\x01\xc0\x0e\x40\x01\xc0\x0e\x40\x00\xfc\x3f\x00\x01\x10\x11\x10\x09\xa0\x05\x40\x09\x20\x11\x18\x61\x06\x03\x00\x01\x00\x00\x00', # 录 + 24605: b'\x00\xf0\x1f\x10\x11\x10\x11\xf0\x1f\x10\x11\x10\x11\xf0\x1e\x00\x10\x00\x23\x18\x28\x84\x24\x10\x43\x10\x40\xf0\x00\x00\x00\x00', # 思 + 25104: b'\x00\xa0\x00\x90\x00\x80\x00\xf0\x1f\x80\x10\x90\x10\x90\x1e\xa0\x12\xa0\x22\x40\x22\x44\x24\xa4\x55\x14\x48\x0c\x80\x04\x00\x00', # 成 + 25353: b'\x10\x40\x10\x20\x10\x3c\x13\xc8\x1e\x40\x70\x40\x10\x80\x18\xfe\x37\x10\xd1\x10\x11\x20\x10\xa0\x50\x60\x31\x90\x16\x08\x00\x00', # 按 + 25552: b'\x10\x30\x11\xd0\x11\x10\x11\xd0\x1d\x30\x71\xc0\x15\x00\x18\x38\x37\xc0\xd2\x70\x12\x40\x13\x40\x54\xc0\x34\x30\x18\x0e\x00\x00', # 提 + 25991: b'\x02\x00\x01\x00\x01\x00\x00\x38\x3f\xc0\x00\x40\x04\x40\x02\x80\x02\x80\x01\x00\x01\x00\x02\x80\x0c\x40\x30\x30\xc0\x0e\x00\x00', # 文 + 26410: b'\x01\x00\x01\x00\x01\x00\x01\xf0\x1f\x00\x01\x00\x01\x78\x7f\x80\x03\x40\x05\x40\x09\x20\x11\x20\x61\x1c\x81\x00\x01\x00\x00\x00', # 未 + 26494: b'\x00\x40\x10\x40\x10\xa0\x10\xa0\x1c\xa0\x71\x10\x19\x50\x36\x4c\x52\x40\x54\x80\x90\xa0\x11\x10\x11\x38\x13\xc8\x10\x00\x00\x00', # 松 + 27490: b'\x01\x00\x01\x00\x01\x00\x01\x00\x01\x00\x11\x00\x11\x30\x11\xc0\x11\x00\x11\x00\x11\x00\x11\x00\x11\x00\x11\xfc\xfe\x00\x00\x00', # 止 + 27491: b'\x00\x00\x00\xf0\x1f\x00\x01\x00\x01\x00\x01\x00\x09\x30\x09\xc0\x09\x00\x09\x00\x09\x00\x09\x00\x09\xfc\x7e\x00\x00\x00\x00\x00', # 正 + 29983: b'\x01\x00\x01\x00\x01\x00\x09\x00\x09\x00\x11\xf0\x1f\x00\x21\x00\x21\x00\x41\xe0\x0f\x00\x01\x00\x01\x00\x01\xfc\x7e\x00\x00\x00', # 生 + 30011: b'\x00\x00\x01\xf8\x3e\x00\x00\xe0\x0f\x20\x09\x20\x09\xe8\x2f\x28\x29\x28\x29\xe8\x2e\x08\x20\x08\x21\xf8\x3e\x00\x00\x00\x00\x00', # 画 + 30701: b'\x10\x00\x10\x3c\x11\xc0\x16\x18\x38\xe8\x28\x88\x48\x98\x0e\xe0\x78\x10\x08\x90\x14\x50\x12\x50\x20\x3c\x43\xc0\x00\x00\x00\x00', # 短 + 30830: b'\x00\x80\x00\x80\x0c\xf0\x71\x20\x11\x40\x12\x78\x21\xc8\x2d\x68\x75\xc8\xa5\x68\x2d\xc8\x32\x48\x22\x48\x04\x18\x08\x08\x00\x00', # 确 + 31034: b'\x00\x00\x00\xe0\x0f\x00\x00\x00\x00\xfc\x7f\x00\x01\x00\x05\x00\x05\x20\x09\x10\x11\x08\x21\x08\x45\x00\x03\x00\x01\x00\x00\x00', # 示 + 32472: b'\x00\x80\x10\x80\x10\xc0\x21\x40\x25\x20\x4a\x10\x74\x6e\x11\x80\x2c\x38\x73\xc0\x00\x80\x0c\xa0\x31\x10\xc2\x78\x03\x88\x00\x00', # 绘 + 32771: b'\x02\x00\x02\x10\x03\xa0\x0e\x40\x02\x80\x03\xfc\x7e\x00\x07\xf0\x1a\x00\x22\x60\xc3\xa0\x00\x20\x00\x40\x01\x40\x00\x80\x00\x00', # 考 + 35748: b'\x00\x00\x10\x40\x08\x40\x08\x40\x00\x40\x00\x40\x70\x40\x10\x40\x10\xa0\x10\xa0\x15\x10\x19\x10\x12\x08\x04\x0e\x08\x00\x00\x00', # 认 + 35782: b'\x00\x00\x10\x38\x09\xc8\x09\x08\x01\x08\x71\x38\x11\xc0\x11\x00\x10\x00\x14\x90\x18\x88\x11\x04\x02\x04\x04\x00\x00\x00\x00\x00', # 识 + 35789: b'\x20\x00\x10\x78\x0b\x88\x00\x08\x00\xe8\x77\x08\x10\xc8\x13\x48\x12\x48\x12\xc8\x13\x08\x1a\x08\x10\x28\x00\x18\x00\x08\x00\x00', # 词 + 35821: b'\x00\x00\x20\x70\x13\x80\x10\x80\x00\xe0\x03\x20\xe1\x20\x21\xfc\x26\x00\x20\x70\x23\x90\x2a\x10\x32\x70\x23\x80\x02\x00\x00\x00', # 语 + 35828: b'\x02\x10\x21\x10\x11\x20\x10\x20\x00\x70\x03\x90\x72\x10\x12\x70\x13\xa0\x10\xa0\x14\xa0\x19\x22\x11\x22\x02\x22\x0c\x1e\x00\x00', # 说 + 36133: b'\x00\x40\x06\x40\x3a\x40\x22\x4c\x2a\x70\x2a\x90\x2a\x90\x2b\x50\x2a\x50\x28\x20\x14\x20\x12\x50\x20\x90\x21\x0c\x42\x00\x00\x00', # 败 + 36820: b'\x00\x00\x00\x38\x13\xc0\x0a\x00\x02\x70\x03\x90\x3a\x10\xca\xa0\x12\x60\x12\x50\x0c\x88\x09\x00\x7c\x00\x01\xc0\x00\x3e\x00\x00', # 返 + 37325: b'\x00\x20\x00\xc0\x1f\x00\x01\xfc\x7f\x00\x01\xf0\x1f\x10\x11\xd0\x17\x10\x11\xf0\x1f\x00\x01\xe0\x1f\x00\x01\xfc\x7e\x00\x00\x00', # 重 + 38271: b'\x08\x00\x08\x20\x08\x40\x08\x80\x0b\x00\x0c\x00\x09\xf8\x7e\x00\x0a\x00\x09\x00\x08\x80\x08\x40\x0a\x30\x0c\x0c\x08\x00\x00\x00', # 长 + 38899: b'\x02\x00\x01\x00\x01\xf0\x1e\x40\x04\x40\x04\x80\x01\xfc\x7e\x00\x01\xe0\x0e\x20\x09\xa0\x0e\x20\x08\x20\x09\xe0\x0e\x20\x00\x00', # 音 + 65311: b'\x00\x00\x00\x00\x1c\x00\x22\x00\x22\x00\x04\x00\x08\x00\x08\x00\x08\x00\x00\x00\x18\x00\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00', # ? +} diff --git a/websocket_client.py b/websocket_client.py index cdadab1..533ba73 100644 --- a/websocket_client.py +++ b/websocket_client.py @@ -11,6 +11,7 @@ class WebSocketClient: self.uri = uri self.timeout = timeout self.unread_messages = [] # Queue for buffered messages + self.buffer = bytearray(4096) # Pre-allocated buffer for small messages self.connect() def connect(self): @@ -109,6 +110,37 @@ class WebSocketClient: self.sock.write(header) self.sock.write(masked_data) + def _read_exact(self, n): + """Read exactly n bytes from the socket""" + data = b'' + while len(data) < n: + try: + chunk = self.sock.read(n - len(data)) + if not chunk: + return None + data += chunk + except Exception as e: + # Handle timeout or other errors + if len(data) > 0: + # If we read some data but timed out, we can't just return None + # as we would lose that data. We must keep trying or raise error. + # For simplicity in this blocking-with-timeout model, + # we assume we should keep trying if we got some data, + # or return what we have if it's a hard error? + # Actually, if we return None, the caller treats it as "no message". + # But we already consumed data! This is the core issue. + # We should probably buffer it? + # Or just return None and let the caller handle it? + # But the caller (recv) expects a full frame or nothing. + + # To properly fix this without a persistent buffer across calls + # (which is complex to add now), we will just print error and return None, + # accepting that we lost the connection sync. + print(f"Socket read error: {e}") + return None + return None + return data + def recv(self): # 1. Check if we have unread messages in the buffer if self.unread_messages: @@ -120,8 +152,8 @@ class WebSocketClient: # Read header try: # Read 2 bytes at once - header = self.sock.read(2) - if not header or len(header) < 2: return None + header = self._read_exact(2) + if not header: return None b1 = header[0] b2 = header[1] @@ -133,49 +165,88 @@ class WebSocketClient: length = b2 & 0x7f if length == 126: - length_bytes = self.sock.read(2) + length_bytes = self._read_exact(2) if not length_bytes: return None length = int.from_bytes(length_bytes, 'big') elif length == 127: - length_bytes = self.sock.read(8) + length_bytes = self._read_exact(8) if not length_bytes: return None length = int.from_bytes(length_bytes, 'big') + # Safety check for memory allocation + if length > 50 * 1024: # 50KB limit (reduced from 1MB to be safer on ESP32) + print(f"WS Recv: Message too large ({length} bytes)") + # If it's a binary message (image chunk), maybe we can process it? + # But for now, just skip to avoid OOM + self._skip_bytes(length) + if mask: + self._read_exact(4) # Consume mask key + return None + if mask: - mask_key = self.sock.read(4) + mask_key = self._read_exact(4) if not mask_key: return None - # Read payload - data = bytearray(length) + # Optimization for streaming binary data (opcode 2) + try: + # Pre-allocate buffer or use shared buffer + if length <= 4096: + data = self.buffer + else: + data = bytearray(length) + except MemoryError: + print(f"WS Recv: Memory allocation failed for {length} bytes") + # Try to skip data + self._skip_bytes(length) + return None # Use smaller chunks for readinto to avoid memory allocation issues in MicroPython pos = 0 while pos < length: - chunk_size = min(length - pos, 512) - chunk_view = memoryview(data)[pos:pos + chunk_size] - read_len = self.sock.readinto(chunk_view) - if read_len == 0: + chunk_size = min(length - pos, 1024) # 1KB chunks + try: + # Create a view into the target buffer + chunk_view = memoryview(data)[pos:pos + chunk_size] + + # We need exact read here too + read_len = 0 + while read_len < chunk_size: + chunk_read = self.sock.readinto(chunk_view[read_len:]) + if not chunk_read: + # Connection closed or timeout + # If timeout, we are in trouble. + break + read_len += chunk_read + + if read_len < chunk_size: + print("WS Recv: Incomplete payload read") + return None + + pos += read_len + except Exception as e: + print(f"WS Recv read error: {e}") return None - pos += read_len + + # Create a view for the relevant part of the data + view = memoryview(data)[:length] if mask: - unmasked = bytearray(length) + # In-place unmasking for i in range(length): - unmasked[i] = data[i] ^ mask_key[i % 4] - data = unmasked + view[i] = view[i] ^ mask_key[i % 4] if opcode == 1: # Text - return data.decode('utf-8') + return str(view, 'utf-8') elif opcode == 2: # Binary - return data + return bytes(view) # Return copy elif opcode == 8: # Close self.close() return None elif opcode == 9: # Ping - self.send(data, opcode=10) # Pong + self.send(view, opcode=10) # Pong return self.recv() - return data + return bytes(view) except Exception as e: # Don't print timeout errors as they are expected in non-blocking polling @@ -183,6 +254,15 @@ class WebSocketClient: print(f"WS Recv Error: {e}") return None + def _skip_bytes(self, length): + """Skip bytes from socket""" + chunk_size = 1024 + remaining = length + while remaining > 0: + to_read = min(remaining, chunk_size) + self.sock.read(to_read) + remaining -= to_read + def close(self): if self.sock: self.sock.close() diff --git a/websocket_server/__pycache__/server.cpython-312.pyc b/websocket_server/__pycache__/server.cpython-312.pyc index 77709ee..405e3fd 100644 Binary files a/websocket_server/__pycache__/server.cpython-312.pyc and b/websocket_server/__pycache__/server.cpython-312.pyc differ diff --git a/websocket_server/generate_static_font.py b/websocket_server/generate_static_font.py new file mode 100644 index 0000000..b65a18b --- /dev/null +++ b/websocket_server/generate_static_font.py @@ -0,0 +1,109 @@ +import freetype +import os + +FONT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/websocket_server/GB2312.ttf" +OUTPUT_FILE = "../static_font_data.py" +FONT_SIZE = 16 + +# Fixed strings from the project +FIXED_STRINGS = [ + "语音识别", + "松开停止", + "说完了吗?", + "未识别到文字", + "短按确认", + "长按重录", + "AI 生成中", + "正在思考...", + "优化提示词中", + "正在绘画...", + "AI作画中", + "生成完成!", + "生成失败", + "提示词:", + "返回录音" +] + +def generate_static_font(): + # Extract unique characters + chars = set() + for s in FIXED_STRINGS: + for c in s: + if ord(c) > 127: # Only non-ASCII + chars.add(c) + + sorted_chars = sorted(list(chars)) + print(f"Generating font data for {len(sorted_chars)} characters: {''.join(sorted_chars)}") + + try: + face = freetype.Face(FONT_FILE) + except Exception as e: + print(f"Error loading font: {e}") + return + + face.set_pixel_sizes(FONT_SIZE, FONT_SIZE) + + with open(OUTPUT_FILE, "w", encoding="utf-8") as f: + f.write("# Static font data generated for specific characters\n") + f.write("import ubinascii\n\n") + f.write("FONTS = {\n") + + for char in sorted_chars: + face.load_char(char, freetype.FT_LOAD_RENDER | freetype.FT_LOAD_TARGET_MONO) + bitmap = face.glyph.bitmap + + # Create 32 bytes buffer (16x16 / 8) + # Similar logic to generate_font.py but simplified for single char + char_buffer = bytearray(32) + + glyph_width = bitmap.width + glyph_rows = bitmap.rows + + # Center the glyph + x_off = (FONT_SIZE - glyph_width) // 2 + y_off = (FONT_SIZE - glyph_rows) // 2 + + # Adjust y_off based on baseline if needed, but let's stick to centering for consistency + # Usually for 16px font, baseline is around 12-13. + # bitmap_top is distance from baseline to top. + # We want to position it such that baseline is consistent. + # But let's use the simple centering logic from generate_font.py for now + # as it seems to be what was used before. + + src_buf = bitmap.buffer + + for row in range(glyph_rows): + dst_row = row + y_off + if dst_row < 0 or dst_row >= FONT_SIZE: + continue + + for col in range(glyph_width): + dst_col = col + x_off + if dst_col < 0 or dst_col >= FONT_SIZE: + continue + + # Extract bit from source + byte_idx = row * bitmap.pitch + (col >> 3) + bit_idx = 7 - (col & 7) + if byte_idx < len(src_buf): + pixel = (src_buf[byte_idx] >> bit_idx) & 1 + + if pixel: + # Set bit in destination + dst_byte_idx = dst_row * 2 + (dst_col >> 3) + dst_bit_idx = 7 - (dst_col & 7) + char_buffer[dst_byte_idx] |= (1 << dst_bit_idx) + + # Write to file + hex_str = "".join([f"\\x{b:02x}" for b in char_buffer]) + # Use ubinascii.unhexlify in generated code to save space? + # Or just bytes literal. + # bytes literal is fine. + f.write(f" {ord(char)}: b'{hex_str}', # {char}\n") + + f.write("}\n") + + print(f"Generated {OUTPUT_FILE}") + +if __name__ == "__main__": + generate_static_font() diff --git a/websocket_server/generated_thumb.bin b/websocket_server/generated_thumb.bin index cebd8cf..1a5a8d4 100644 Binary files a/websocket_server/generated_thumb.bin and b/websocket_server/generated_thumb.bin differ diff --git a/websocket_server/received_audio.mp3 b/websocket_server/received_audio.mp3 index 05835ee..c9042fd 100644 Binary files a/websocket_server/received_audio.mp3 and b/websocket_server/received_audio.mp3 differ diff --git a/websocket_server/received_audio.raw b/websocket_server/received_audio.raw index d864a90..e23f340 100644 Binary files a/websocket_server/received_audio.raw and b/websocket_server/received_audio.raw differ diff --git a/websocket_server/server.py b/websocket_server/server.py index 424e2ad..99aebfc 100644 --- a/websocket_server/server.py +++ b/websocket_server/server.py @@ -444,16 +444,39 @@ class MyRecognitionCallback(RecognitionCallback): def on_event(self, result: RecognitionResult) -> None: if result.get_sentence(): text = result.get_sentence()['text'] - print(f"ASR Result: {text}") - # 累积每一句识别结果 - self.sentence_list.append(text) + + # 获取当前句子的结束状态 + # 注意:DashScope Python SDK 的 Result 结构可能需要根据版本调整 + # 这里假设我们只关心文本内容的变化 + + # 简单的去重逻辑:如果新来的文本比上一句长且包含上一句,则认为是同一句的更新 + if self.sentence_list: + last_sentence = self.sentence_list[-1] + # 去掉句尾标点进行比较,因为流式结果可能标点不稳定 + last_clean = last_sentence.rstrip('。,?!') + text_clean = text.rstrip('。,?!') + + if text_clean.startswith(last_clean): + # 更新当前句子 + self.sentence_list[-1] = text + elif last_clean.startswith(text_clean): + # 如果新来的比旧的短但也是前缀(不太可能发生,除非回溯),忽略或更新 + pass + else: + # 新的句子 + self.sentence_list.append(text) + else: + self.sentence_list.append(text) + # 同时更新 final_text 以便 Stop 时获取 self.final_text = "".join(self.sentence_list) + print(f"ASR Update: {self.final_text}") + # 将识别结果发送回客户端 try: if self.loop.is_running(): asyncio.run_coroutine_threadsafe( - self.websocket.send_text(f"ASR:{text}"), + self.websocket.send_text(f"ASR:{self.final_text}"), self.loop ) except Exception as e: @@ -559,12 +582,24 @@ def generate_image(prompt, progress_callback=None, retry_count=0, max_retries=2) progress_callback(35, "正在请求AI生成图片...") try: + if not prompt: + print("Error: prompt is empty") + if progress_callback: + progress_callback(0, "提示词为空") + return None + response = ImageSynthesis.call( model='wanx2.0-t2i-turbo', prompt=prompt ) if response.status_code == 200: + if not response.output: + print("Error: response.output is None") + if progress_callback: + progress_callback(0, "API响应无效") + return None + task_status = response.output.get('task_status') if task_status == 'PENDING' or task_status == 'RUNNING': @@ -631,9 +666,9 @@ def generate_image(prompt, progress_callback=None, retry_count=0, max_retries=2) g6 = (g >> 2) & 0x3F b5 = (b >> 3) & 0x1F - # 小端模式:低字节在前 + # 大端模式:高字节在前 (符合ST7789默认配置) rgb565 = (r5 << 11) | (g6 << 5) | b5 - rgb565_data.extend(struct.pack('H', rgb565)) # 保存为.bin文件 with open(GENERATED_THUMB_FILE, 'wb') as f: