1

2026-03-03 22:45:09 +08:00
parent 700bc55657
commit 05f02a1454
14 changed files with 574 additions and 149 deletions
--- a/pycache/display.cpython-313.pyc
+++ b/pycache/display.cpython-313.pyc
--- a/pycache/font.cpython-313.pyc
+++ b/pycache/font.cpython-313.pyc
--- a/pycache/main.cpython-313.pyc
+++ b/pycache/main.cpython-313.pyc
--- a/display.py
+++ b/display.py
@@ -47,9 +47,9 @@ class Display:
        if self.font:
            self.font.set_ws(ws)
-    def text(self, text, x, y, color):
+    def text(self, text, x, y, color, wait=True):
        if self.tft:
-            self.font.text(self.tft, text, x, y, color)
+            self.font.text(self.tft, text, x, y, color, wait=wait)
    def init_ui(self):
        """初始化 UI 背景"""
@@ -93,3 +93,59 @@ class Display:
            self.tft.blit_buffer(rgb565_data, x, y, width, height)
        except Exception as e:
            print(f"Show image error: {e}")
    def show_image_chunk(self, x, y, width, height, data, offset):
        """流式显示图片数据块"""
        if not self.tft: return
        # ST7789 blit_buffer expects a complete buffer for the window
        # But we can calculate which pixels this chunk corresponds to
        # This is tricky because blit_buffer sets a window and then writes data.
        # If we want to stream, we should probably set the window once and then write chunks.
        # But st7789py library might not expose raw write easily without window set.
        # Alternative: Calculate the sub-window for this chunk.
        # Data is a linear sequence of pixels (2 bytes per pixel)
        # We assume data length is even.
        try:
            # Simple approach: If offset is 0, we set the window for the whole image
            # And then write data. But st7789py's blit_buffer does both.
            # Let's look at st7789py implementation.
            # fill_rect sets window then writes.
            # blit_buffer sets window then writes.
            # We can use a modified approach:
            # If it's the first chunk, set window.
            # Then write data.
            # But we can't easily modify the library state from here.
            # So we calculate the rect for this chunk.
            # Total pixels
            total_pixels = width * height
            # Current pixel offset
            pixel_offset = offset // 2
            num_pixels = len(data) // 2
            # This only works if chunks align with rows, or if we can write partial rows.
            # ST7789 supports writing continuous memory.
            # Let's try to determine the x, y, w, h for this chunk.
            # This is complex if it wraps around lines.
            # Easier approach for ESP32 memory constrained environment:
            # We just need to use the raw write method of the display driver if available.
            if offset == 0:
                # Set window for the whole image
                self.tft.set_window(x, y, x + width - 1, y + height - 1)
            # Write raw data
            self.tft.write(None, data)
        except Exception as e:
            print(f"Show chunk error: {e}")
--- a/font.py
+++ b/font.py
@@ -4,6 +4,11 @@ import time
 import binascii
 import gc
 try:
    import static_font_data
 except ImportError:
    static_font_data = None
 class Font:
    def __init__(self, ws=None):
        self.ws = ws
@@ -11,6 +16,8 @@ class Font:
        self.pending_requests = set()
        self.retry_count = {}
        self.max_retries = 3
        # Pre-allocate buffer for row drawing (16 pixels * 2 bytes = 32 bytes)
        self.row_buf = bytearray(32)
    def set_ws(self, ws):
        self.ws = ws
@@ -24,7 +31,40 @@ class Font:
        """获取当前缓存的字体数量"""
        return len(self.cache)
-    def text(self, tft, text, x, y, color, bg=0x0000):
+    def handle_message(self, msg):
        """处理字体相关消息，更新缓存
        返回: 是否为字体消息
        """
        if not isinstance(msg, str):
            return False
        if msg.startswith("FONT_BATCH_END:"):
            # 批处理结束消息，目前主要用于阻塞等待时的退出条件
            return True
        elif msg.startswith("FONT_DATA:"):
            parts = msg.split(":")
            if len(parts) >= 3:
                try:
                    key_str = parts[1]
                    if key_str.startswith("0x"):
                        c = int(key_str, 16)
                    else:
                        c = int(key_str)
                    d = binascii.unhexlify(parts[2])
                    self.cache[c] = d
                    # 清除重试计数（如果有）
                    if c in self.retry_count:
                        del self.retry_count[c]
                    return True
                except Exception as e:
                    print(f"Font data parse error: {e}")
            return True
        return False
    def text(self, tft, text, x, y, color, bg=0x0000, wait=True):
        """在ST7789显示器上绘制文本"""
        if not text:
            return
@@ -32,17 +72,23 @@ class Font:
        color_bytes = struct.pack(">H", color)
        bg_bytes = struct.pack(">H", bg)
-        lut = [bytearray(16) for _ in range(256)]
+        # Create a mini-LUT for 4-bit chunks (16 entries * 8 bytes = 128 bytes)
-        for i in range(256):
+        # Each entry maps 4 bits (0-15) to 4 pixels (8 bytes)
-            for bit in range(8):
+        mini_lut = []
-                val = (i >> bit) & 1
+        for i in range(16):
-                idx = (7 - bit) * 2
+            chunk = bytearray(8)
            for bit in range(4):
                # bit 0 is LSB of nibble, corresponds to rightmost pixel of the 4 pixels
                # Assuming standard MSB-first bitmap
                val = (i >> (3 - bit)) & 1
                idx = bit * 2
                if val:
-                    lut[i][idx] = color_bytes[0]
+                    chunk[idx] = color_bytes[0]
-                    lut[i][idx+1] = color_bytes[1]
+                    chunk[idx+1] = color_bytes[1]
                else:
-                    lut[i][idx] = bg_bytes[0]
+                    chunk[idx] = bg_bytes[0]
-                    lut[i][idx+1] = bg_bytes[1]
+                    chunk[idx+1] = bg_bytes[1]
            mini_lut.append(bytes(chunk))
        initial_x = x
@@ -50,6 +96,9 @@ class Font:
        for char in text:
            if ord(char) > 127:
                code = ord(char)
                # Check static font data first
                if static_font_data and hasattr(static_font_data, 'FONTS') and code in static_font_data.FONTS:
                    continue
                if code not in self.cache:
                    missing_codes.add(code)
@@ -57,9 +106,12 @@ class Font:
            missing_list = list(missing_codes)
            req_str = ",".join([str(c) for c in missing_list])
            # Only print if waiting, to reduce log spam in async mode
            if wait:
                print(f"Batch requesting fonts: {req_str}")
            try:
                self.ws.send(f"GET_FONTS_BATCH:{req_str}")
                if wait:
                    self._wait_for_fonts(missing_codes)
            except Exception as e:
                print(f"Batch font request failed: {e}")
@@ -78,12 +130,22 @@ class Font:
            is_chinese = False
            buf_data = None
            if ord(char) > 127:
            code = ord(char)
-                if code in self.cache:
+            
            if code > 127:
                if static_font_data and hasattr(static_font_data, 'FONTS') and code in static_font_data.FONTS:
                    buf_data = static_font_data.FONTS[code]
                    is_chinese = True
                elif code in self.cache:
                    buf_data = self.cache[code]
                    is_chinese = True
                else:
                    # Missing font data
                    if not wait:
                        # In async mode, draw a placeholder or space
                        # We use '?' for now so user knows something is missing
                        char = '?'
                        is_chinese = False
                    else:
                        if code in self.pending_requests:
                            retry = self.retry_count.get(code, 0)
@@ -92,14 +154,40 @@ class Font:
                                self._request_single_font(code)
            if is_chinese and buf_data:
-                self._draw_bitmap(tft, buf_data, x, y, 16, 16, lut)
+                self._draw_bitmap_optimized(tft, buf_data, x, y, mini_lut)
                x += 16
            else:
-                if ord(char) > 127:
+                if code > 127:
                    char = '?'
                self._draw_ascii(tft, char, x, y, color, bg)
                x += 8
    def _draw_bitmap_optimized(self, tft, bitmap, x, y, mini_lut):
        """使用优化方式绘制位图，减少内存分配"""
        # Bitmap is 32 bytes (16x16 pixels)
        # 2 bytes per row
        for row in range(16):
            # Get 2 bytes for this row
            # Handle case where bitmap might be different length (safety)
            if row * 2 + 1 < len(bitmap):
                b1 = bitmap[row * 2]
                b2 = bitmap[row * 2 + 1]
                # Process b1 (Left 8 pixels)
                # High nibble
                self.row_buf[0:8] = mini_lut[(b1 >> 4) & 0x0F]
                # Low nibble
                self.row_buf[8:16] = mini_lut[b1 & 0x0F]
                # Process b2 (Right 8 pixels)
                # High nibble
                self.row_buf[16:24] = mini_lut[(b2 >> 4) & 0x0F]
                # Low nibble
                self.row_buf[24:32] = mini_lut[b2 & 0x0F]
                tft.blit_buffer(self.row_buf, x, y + row, 16, 1)
    def _request_single_font(self, code):
        """请求单个字体"""
        if self.ws:
@@ -134,10 +222,10 @@ class Font:
                    if msg is None:
                        continue
-                    if isinstance(msg, str):
+                    if self.handle_message(msg):
                        # 如果是批处理结束，检查是否有失败的
                        if msg.startswith("FONT_BATCH_END:"):
                            parts = msg[15:].split(":")
                            success = int(parts[0]) if len(parts) > 0 else 0
                            failed = int(parts[1]) if len(parts) > 1 else 0
                            if failed > 0:
@@ -145,34 +233,26 @@ class Font:
                                for c in temp_missing:
                                    if c not in self.cache:
                                        print(f"Font failed after retries: {c}")
-                                        self.cache[c] = None
+                                        self.cache[c] = None # 标记为 None 避免死循环
                                        if c in target_codes:
                                            target_codes.remove(c)
                            # 清除所有剩余的目标，因为批处理结束了
                            # 但实际上可能只需要清除 failed 的。
                            # 无论如何，收到 BATCH_END 意味着本次请求处理完毕。
                            # 如果还有没收到的，可能是丢包了。
                            # 为了简单起见，我们认为结束了。
                            target_codes.clear()
-                        elif msg.startswith("FONT_DATA:"):
+                        # 检查是否有新缓存的字体满足了 target_codes
-                            parts = msg.split(":")
+                        temp_target = list(target_codes)
-                            if len(parts) >= 3:
+                        for c in temp_target:
-                                try:
+                            if c in self.cache:
                                    key_str = parts[1]
                                    if key_str.startswith("0x"):
                                        c = int(key_str, 16)
                                    else:
                                        c = int(key_str)
                                    d = binascii.unhexlify(parts[2])
                                    self.cache[c] = d
                                    if c in target_codes:
                                target_codes.remove(c)
                                if c in self.retry_count:
                                    del self.retry_count[c]
                                except:
                                    pass
                        else:
                            self.local_deferred.append(msg)
-                    elif msg is not None:
+                    else:
                        self.local_deferred.append(msg)
            except Exception as e:
@@ -183,12 +263,6 @@ class Font:
                self.ws.unread_messages = self.local_deferred + self.ws.unread_messages
            self.local_deferred = []
    def _draw_bitmap(self, tft, bitmap, x, y, w, h, lut):
        """绘制位图"""
        chunks = [lut[b] for b in bitmap]
        rgb_buf = b''.join(chunks)
        tft.blit_buffer(rgb_buf, x, y, w, h)
    def _draw_ascii(self, tft, char, x, y, color, bg):
        """绘制ASCII字符"""
        w, h = 8, 8
--- a/main.py
+++ b/main.py
@@ -163,7 +163,7 @@ def render_recording_screen(display, asr_text="", audio_level=0):
        display.tft.fill_rect(20, 100, bar_width, 10, st7789.GREEN)
    if asr_text:
-        display.text(asr_text[:20], 20, 130, st7789.WHITE)
+        display.text(asr_text[:20], 20, 130, st7789.WHITE, wait=False)
    display.tft.fill_rect(60, 200, 120, 25, st7789.RED)
    display.text("松开停止", 85, 205, st7789.WHITE)
@@ -194,54 +194,91 @@ def render_result_screen(display, status="", prompt="", image_received=False):
    if not display or not display.tft:
        return
-    # Only clear if we are starting a new state or it's the first render
+    if status == "OPTIMIZING":
    # But for simplicity we clear all for now. Optimizing this requires state tracking.
        display.tft.fill(st7789.BLACK)
    # Header
        display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
        display.text("AI 生成中", 80, 8, st7789.BLACK)
    if status == "OPTIMIZING":
        display.text("正在思考...", 80, 60, st7789.CYAN)
        display.text("优化提示词中", 70, 80, st7789.CYAN)
        draw_progress_bar(display, 40, 110, 160, 6, 0.3, st7789.CYAN)
        # Spinner will be drawn by main loop
    elif status == "RENDERING":
        display.tft.fill(st7789.BLACK)
        display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
        display.text("AI 生成中", 80, 8, st7789.BLACK)
        display.text("正在绘画...", 80, 60, st7789.YELLOW)
        display.text("AI作画中", 85, 80, st7789.YELLOW)
        draw_progress_bar(display, 40, 110, 160, 6, 0.7, st7789.YELLOW)
        # Spinner will be drawn by main loop
    elif status == "COMPLETE" or image_received:
-        display.text("生成完成!", 80, 50, st7789.GREEN)
+        # Don't clear screen, image is already there
-        draw_check_icon(display, 110, 80)
+        # display.text("生成完成!", 80, 50, st7789.GREEN)
        # draw_check_icon(display, 110, 80)
        pass
    elif status == "ERROR":
        display.tft.fill(st7789.BLACK)
        display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
        display.text("AI 生成中", 80, 8, st7789.BLACK)
        display.text("生成失败", 80, 50, st7789.RED)
-    if prompt:
+    if prompt and not image_received:
        display.tft.fill_rect(10, 140, 220, 50, 0x2124) # Dark Grey
        display.text("提示词:", 15, 145, st7789.CYAN)
        display.text(prompt[:25] + "..." if len(prompt) > 25 else prompt, 15, 165, st7789.WHITE)
    # Only show back button if not showing full image, or maybe show it transparently?
    # For now, let's not cover the image with the button hint
    if not image_received:
        display.tft.fill_rect(60, 210, 120, 25, st7789.BLUE)
-    display.text("返回录音", 90, 215, st7789.WHITE)
+        display.text("长按返回", 90, 215, st7789.WHITE)
 def process_message(msg, display, image_state, image_data_list):
    """处理WebSocket消息"""
    # Handle binary image data
    if isinstance(msg, (bytes, bytearray)):
        if image_state == IMAGE_STATE_RECEIVING:
-            image_data_list.append(msg)
+            try:
-            # Optional: Update progress bar or indicator
+                if len(image_data_list) < 2:
                     # 异常情况，重置
                     return IMAGE_STATE_IDLE, None
                img_size = image_data_list[0]
                current_offset = image_data_list[1]
                # Stream directly to display
                if display and display.tft:
                    x = (240 - img_size) // 2
                    y = (240 - img_size) // 2
                    display.show_image_chunk(x, y, img_size, img_size, msg, current_offset)
                # Update offset
                image_data_list[1] += len(msg)
            except Exception as e:
                print(f"Stream image error: {e}")
            return image_state, None
        return image_state, None
    if not isinstance(msg, str):
        return image_state, None
    # Check for font data first
    if display and hasattr(display, 'font') and display.font.handle_message(msg):
        return image_state, ("font_update",)
    status_info = None
    if msg.startswith("ASR:"):
@@ -272,6 +309,15 @@ def process_message(msg, display, image_state, image_data_list):
            print(f"Image start, size: {size}, img_size: {img_size}")
            image_data_list.clear()
            image_data_list.append(img_size) # Store metadata at index 0
            image_data_list.append(0)        # Store current received bytes offset at index 1
            # Prepare display for streaming
            if display and display.tft:
                # Calculate position
                x = (240 - img_size) // 2
                y = (240 - img_size) // 2
                # Pre-set window (this will be done in first chunk call)
            return IMAGE_STATE_RECEIVING, None
        except Exception as e:
            print(f"IMAGE_START parse error: {e}")
@@ -279,45 +325,14 @@ def process_message(msg, display, image_state, image_data_list):
    # Deprecated text-based IMAGE_DATA handling
    elif msg.startswith("IMAGE_DATA:") and image_state == IMAGE_STATE_RECEIVING:
        try:
            data = msg.split(":", 1)[1]
            # Convert hex to bytes immediately if using old protocol, but we switched to binary
            # Keep this just in case server rolls back? No, let's assume binary.
            pass 
        except:
        pass 
    elif msg == "IMAGE_END" and image_state == IMAGE_STATE_RECEIVING:
-        try:
+        print("Image received completely")
            print("Image received, processing...")
            img_size = image_data_list[0] if image_data_list else 64
            # Combine all binary chunks (skipping metadata at index 0)
            img_data = b"".join(image_data_list[1:])
        image_data_list.clear()
            print(f"Image data len: {len(img_data)}")
            if display and display.tft:
                x = (240 - img_size) // 2
                y = (240 - img_size) // 2
                display.show_image(x, y, img_size, img_size, img_data)
                # Overlay success message slightly
                display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
                display.text("图片已生成!", 70, 5, st7789.BLACK)
        gc.collect()
            print("Image displayed")
        return IMAGE_STATE_IDLE, ("image_done",)
        except Exception as e:
            print(f"Image process error: {e}")
            import sys
            sys.print_exception(e)
        return IMAGE_STATE_IDLE, None
    elif msg.startswith("IMAGE_ERROR:"):
        print(msg)
        return IMAGE_STATE_IDLE, ("error", msg[12:])
@@ -330,7 +345,7 @@ def print_asr(text, display=None):
    print(f"ASR: {text}")
    if display and display.tft:
        display.fill_rect(0, 40, 240, 160, st7789.BLACK)
-        display.text(text, 0, 40, st7789.WHITE)
+        display.text(text, 0, 40, st7789.WHITE, wait=False)
 def get_boot_button_action(boot_btn):
@@ -468,7 +483,7 @@ def main():
                    print("Memory high, cleaned")
            # Spinner Animation
-            if ui_screen == UI_SCREEN_RESULT and not image_generation_done and current_status in ["OPTIMIZING", "RENDERING"]:
+            if ui_screen == UI_SCREEN_RESULT and not image_generation_done and current_status in ["OPTIMIZING", "RENDERING"] and image_state != IMAGE_STATE_RECEIVING:
                now = time.ticks_ms()
                if time.ticks_diff(now, last_spinner_time) > 100:
                    if display.tft:
@@ -543,17 +558,11 @@ def main():
                    time.sleep(0.5)
                elif ui_screen == UI_SCREEN_RESULT:
-                    print(">>> Back to recording")
+                    # Ignore short press in result screen to keep image displayed
-                    ui_screen = UI_SCREEN_RECORDING
+                    # unless image generation failed or is still in progress?
-                    is_recording = False
+                    # User request: "只有长按boot才离开" (Only leave on long press)
-                    current_asr_text = ""
+                    # So we do nothing here.
-                    current_prompt = ""
+                    pass
                    current_status = ""
                    image_generation_done = False
                    confirm_waiting = False
                    if display.tft:
                        render_recording_screen(display, "", 0)
            elif btn_action == 2:
                if is_recording:
@@ -595,15 +604,25 @@ def main():
                        render_recording_screen(display, "", 0)
                elif ui_screen == UI_SCREEN_RESULT:
-                    print(">>> Generate image (manual)")
+                    print(">>> Back to recording")
                    # Stop recording if it was somehow started or just reset state
                    if ws and ws.is_connected():
                        try:
-                            ws.send("START_RECORDING")
+                            ws.send("STOP_RECORDING")
                            is_recording = True
                            ui_screen = UI_SCREEN_RECORDING
                        except:
                            ws = None
                    ui_screen = UI_SCREEN_RECORDING
                    is_recording = False
                    current_asr_text = ""
                    current_prompt = ""
                    current_status = ""
                    image_generation_done = False
                    confirm_waiting = False
                    if display.tft:
                        render_recording_screen(display, "", 0)
            elif btn_action == 3:
                print(">>> Config mode")
@@ -628,6 +647,10 @@ def main():
                                            if display.tft:
                                                render_recording_screen(display, current_asr_text, last_audio_level)
                                        elif event_data[0] == "font_update":
                                            if ui_screen == UI_SCREEN_RECORDING and display.tft:
                                                render_recording_screen(display, current_asr_text, last_audio_level)
                                        elif event_data[0] == "status":
                                            current_status = event_data[1]
                                            status_text = event_data[2] if len(event_data) > 2 else ""
--- a/static_font_data.py
+++ b/static_font_data.py
@@ -0,0 +1,48 @@
 # Static font data generated for specific characters
 import ubinascii
 FONTS = {
    20013: b'\x01\x00\x01\x00\x01\x00\x01\xf8\x3f\x08\x21\x08\x21\x08\x21\xf8\x3f\x00\x21\x00\x01\x00\x01\x00\x01\x00\x01\x00\x01\x00\x00\x00', # 中
    20102: b'\x00\x00\x01\xf0\x1e\x10\x00\x20\x01\x40\x00\x80\x00\x80\x00\x80\x00\x80\x00\x80\x00\x80\x00\x80\x04\x80\x02\x80\x01\x00\x00\x00', # 了
    20248: b'\x08\x80\x08\x80\x08\xa0\x10\x90\x10\x80\x20\xfc\x2f\x40\x61\x40\xa1\x40\x21\x40\x22\x40\x22\x44\x24\x44\x24\x44\x28\x3c\x00\x00', # 优
    20316: b'\x08\x80\x08\x80\x09\x00\x11\x1c\x13\xe0\x32\x80\x54\x98\x90\xe0\x10\x80\x10\x9c\x10\xe0\x10\x80\x10\x80\x10\x80\x10\x80\x00\x00', # 作
    20572: b'\x08\x80\x08\x78\x17\x80\x10\x60\x23\xa0\x22\x60\x63\x80\xa0\x7c\x2f\x88\x28\x30\x23\xc0\x20\x40\x21\x40\x20\xc0\x20\x40\x00\x00', # 停
    21035: b'\x00\x08\x06\x08\x3a\x08\x22\x48\x26\x48\x38\x48\x28\x48\x0f\x48\x71\x48\x11\x48\x11\x08\x22\x08\x2a\x28\x44\x18\x80\x08\x00\x00', # 别
    21040: b'\x00\x08\x00\x08\x07\x88\x38\x28\x0a\x28\x11\x28\x23\xa8\x7c\xa8\x04\x28\x07\x28\x3c\x28\x07\x88\x18\x28\x60\x18\x00\x08\x00\x00', # 到
    21270: b'\x04\x00\x04\x80\x08\x80\x08\x88\x08\x88\x18\x90\x28\xa0\x48\xc0\x09\x80\x0a\x80\x08\x84\x08\x84\x08\x84\x08\x7c\x08\x00\x00\x00', # 化
    21527: b'\x00\x60\x03\xa0\x00\x20\x19\x20\x69\x20\x49\x20\x59\x20\x61\x78\x01\x88\x00\x08\x00\xe8\x0f\x08\x00\x10\x00\x50\x00\x20\x00\x00', # 吗
    22238: b'\x00\x00\x00\x00\x01\xf8\x3e\x08\x20\x08\x21\x88\x26\x88\x24\x88\x25\x88\x26\x08\x20\x08\x20\xf8\x3f\x00\x00\x00\x00\x00\x00\x00', # 回
    22312: b'\x01\x00\x01\x00\x02\x00\x03\xf8\x7c\x00\x04\x80\x18\x80\x10\x80\x30\xf0\x57\x80\x90\x80\x10\x80\x10\xfc\x1f\x00\x10\x00\x00\x00', # 在
    22833: b'\x01\x00\x09\x00\x09\x00\x09\xf0\x1f\x00\x11\x00\x21\x00\x01\xf8\x7e\x80\x02\x80\x04\x40\x04\x40\x08\x20\x10\x38\x20\x00\x00\x00', # 失
    23383: b'\x02\x00\x01\x00\x01\xfc\x3e\x08\x21\xe0\x0e\x40\x00\x80\x01\x00\x00\xfc\x7f\x80\x00\x80\x00\x80\x00\x80\x02\x80\x01\x00\x00\x00', # 字
    23436: b'\x02\x00\x01\x00\x00\xfc\x3f\x08\x20\x00\x00\xc0\x07\x00\x00\x78\x3f\x80\x04\x80\x04\x80\x08\x84\x08\x84\x10\x84\x60\x7c\x00\x00', # 完
    24320: b'\x00\x00\x03\xf0\x1c\x40\x04\x40\x04\x40\x04\x40\x07\xfc\x7c\x40\x04\x40\x04\x40\x08\x40\x08\x40\x10\x40\x20\x40\x40\x40\x00\x00', # 开
    24405: b'\x01\xc0\x0e\x40\x01\xc0\x0e\x40\x00\xfc\x3f\x00\x01\x10\x11\x10\x09\xa0\x05\x40\x09\x20\x11\x18\x61\x06\x03\x00\x01\x00\x00\x00', # 录
    24605: b'\x00\xf0\x1f\x10\x11\x10\x11\xf0\x1f\x10\x11\x10\x11\xf0\x1e\x00\x10\x00\x23\x18\x28\x84\x24\x10\x43\x10\x40\xf0\x00\x00\x00\x00', # 思
    25104: b'\x00\xa0\x00\x90\x00\x80\x00\xf0\x1f\x80\x10\x90\x10\x90\x1e\xa0\x12\xa0\x22\x40\x22\x44\x24\xa4\x55\x14\x48\x0c\x80\x04\x00\x00', # 成
    25353: b'\x10\x40\x10\x20\x10\x3c\x13\xc8\x1e\x40\x70\x40\x10\x80\x18\xfe\x37\x10\xd1\x10\x11\x20\x10\xa0\x50\x60\x31\x90\x16\x08\x00\x00', # 按
    25552: b'\x10\x30\x11\xd0\x11\x10\x11\xd0\x1d\x30\x71\xc0\x15\x00\x18\x38\x37\xc0\xd2\x70\x12\x40\x13\x40\x54\xc0\x34\x30\x18\x0e\x00\x00', # 提
    25991: b'\x02\x00\x01\x00\x01\x00\x00\x38\x3f\xc0\x00\x40\x04\x40\x02\x80\x02\x80\x01\x00\x01\x00\x02\x80\x0c\x40\x30\x30\xc0\x0e\x00\x00', # 文
    26410: b'\x01\x00\x01\x00\x01\x00\x01\xf0\x1f\x00\x01\x00\x01\x78\x7f\x80\x03\x40\x05\x40\x09\x20\x11\x20\x61\x1c\x81\x00\x01\x00\x00\x00', # 未
    26494: b'\x00\x40\x10\x40\x10\xa0\x10\xa0\x1c\xa0\x71\x10\x19\x50\x36\x4c\x52\x40\x54\x80\x90\xa0\x11\x10\x11\x38\x13\xc8\x10\x00\x00\x00', # 松
    27490: b'\x01\x00\x01\x00\x01\x00\x01\x00\x01\x00\x11\x00\x11\x30\x11\xc0\x11\x00\x11\x00\x11\x00\x11\x00\x11\x00\x11\xfc\xfe\x00\x00\x00', # 止
    27491: b'\x00\x00\x00\xf0\x1f\x00\x01\x00\x01\x00\x01\x00\x09\x30\x09\xc0\x09\x00\x09\x00\x09\x00\x09\x00\x09\xfc\x7e\x00\x00\x00\x00\x00', # 正
    29983: b'\x01\x00\x01\x00\x01\x00\x09\x00\x09\x00\x11\xf0\x1f\x00\x21\x00\x21\x00\x41\xe0\x0f\x00\x01\x00\x01\x00\x01\xfc\x7e\x00\x00\x00', # 生
    30011: b'\x00\x00\x01\xf8\x3e\x00\x00\xe0\x0f\x20\x09\x20\x09\xe8\x2f\x28\x29\x28\x29\xe8\x2e\x08\x20\x08\x21\xf8\x3e\x00\x00\x00\x00\x00', # 画
    30701: b'\x10\x00\x10\x3c\x11\xc0\x16\x18\x38\xe8\x28\x88\x48\x98\x0e\xe0\x78\x10\x08\x90\x14\x50\x12\x50\x20\x3c\x43\xc0\x00\x00\x00\x00', # 短
    30830: b'\x00\x80\x00\x80\x0c\xf0\x71\x20\x11\x40\x12\x78\x21\xc8\x2d\x68\x75\xc8\xa5\x68\x2d\xc8\x32\x48\x22\x48\x04\x18\x08\x08\x00\x00', # 确
    31034: b'\x00\x00\x00\xe0\x0f\x00\x00\x00\x00\xfc\x7f\x00\x01\x00\x05\x00\x05\x20\x09\x10\x11\x08\x21\x08\x45\x00\x03\x00\x01\x00\x00\x00', # 示
    32472: b'\x00\x80\x10\x80\x10\xc0\x21\x40\x25\x20\x4a\x10\x74\x6e\x11\x80\x2c\x38\x73\xc0\x00\x80\x0c\xa0\x31\x10\xc2\x78\x03\x88\x00\x00', # 绘
    32771: b'\x02\x00\x02\x10\x03\xa0\x0e\x40\x02\x80\x03\xfc\x7e\x00\x07\xf0\x1a\x00\x22\x60\xc3\xa0\x00\x20\x00\x40\x01\x40\x00\x80\x00\x00', # 考
    35748: b'\x00\x00\x10\x40\x08\x40\x08\x40\x00\x40\x00\x40\x70\x40\x10\x40\x10\xa0\x10\xa0\x15\x10\x19\x10\x12\x08\x04\x0e\x08\x00\x00\x00', # 认
    35782: b'\x00\x00\x10\x38\x09\xc8\x09\x08\x01\x08\x71\x38\x11\xc0\x11\x00\x10\x00\x14\x90\x18\x88\x11\x04\x02\x04\x04\x00\x00\x00\x00\x00', # 识
    35789: b'\x20\x00\x10\x78\x0b\x88\x00\x08\x00\xe8\x77\x08\x10\xc8\x13\x48\x12\x48\x12\xc8\x13\x08\x1a\x08\x10\x28\x00\x18\x00\x08\x00\x00', # 词
    35821: b'\x00\x00\x20\x70\x13\x80\x10\x80\x00\xe0\x03\x20\xe1\x20\x21\xfc\x26\x00\x20\x70\x23\x90\x2a\x10\x32\x70\x23\x80\x02\x00\x00\x00', # 语
    35828: b'\x02\x10\x21\x10\x11\x20\x10\x20\x00\x70\x03\x90\x72\x10\x12\x70\x13\xa0\x10\xa0\x14\xa0\x19\x22\x11\x22\x02\x22\x0c\x1e\x00\x00', # 说
    36133: b'\x00\x40\x06\x40\x3a\x40\x22\x4c\x2a\x70\x2a\x90\x2a\x90\x2b\x50\x2a\x50\x28\x20\x14\x20\x12\x50\x20\x90\x21\x0c\x42\x00\x00\x00', # 败
    36820: b'\x00\x00\x00\x38\x13\xc0\x0a\x00\x02\x70\x03\x90\x3a\x10\xca\xa0\x12\x60\x12\x50\x0c\x88\x09\x00\x7c\x00\x01\xc0\x00\x3e\x00\x00', # 返
    37325: b'\x00\x20\x00\xc0\x1f\x00\x01\xfc\x7f\x00\x01\xf0\x1f\x10\x11\xd0\x17\x10\x11\xf0\x1f\x00\x01\xe0\x1f\x00\x01\xfc\x7e\x00\x00\x00', # 重
    38271: b'\x08\x00\x08\x20\x08\x40\x08\x80\x0b\x00\x0c\x00\x09\xf8\x7e\x00\x0a\x00\x09\x00\x08\x80\x08\x40\x0a\x30\x0c\x0c\x08\x00\x00\x00', # 长
    38899: b'\x02\x00\x01\x00\x01\xf0\x1e\x40\x04\x40\x04\x80\x01\xfc\x7e\x00\x01\xe0\x0e\x20\x09\xa0\x0e\x20\x08\x20\x09\xe0\x0e\x20\x00\x00', # 音
    65311: b'\x00\x00\x00\x00\x1c\x00\x22\x00\x22\x00\x04\x00\x08\x00\x08\x00\x08\x00\x00\x00\x18\x00\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00', # ？
 }
--- a/websocket_client.py
+++ b/websocket_client.py
@@ -11,6 +11,7 @@ class WebSocketClient:
        self.uri = uri
        self.timeout = timeout
        self.unread_messages = [] # Queue for buffered messages
        self.buffer = bytearray(4096) # Pre-allocated buffer for small messages
        self.connect()
    def connect(self):
@@ -109,6 +110,37 @@ class WebSocketClient:
        self.sock.write(header)
        self.sock.write(masked_data)
    def _read_exact(self, n):
        """Read exactly n bytes from the socket"""
        data = b''
        while len(data) < n:
            try:
                chunk = self.sock.read(n - len(data))
                if not chunk:
                    return None
                data += chunk
            except Exception as e:
                # Handle timeout or other errors
                if len(data) > 0:
                    # If we read some data but timed out, we can't just return None
                    # as we would lose that data. We must keep trying or raise error.
                    # For simplicity in this blocking-with-timeout model, 
                    # we assume we should keep trying if we got some data, 
                    # or return what we have if it's a hard error?
                    # Actually, if we return None, the caller treats it as "no message".
                    # But we already consumed data! This is the core issue.
                    # We should probably buffer it?
                    # Or just return None and let the caller handle it?
                    # But the caller (recv) expects a full frame or nothing.
                    # To properly fix this without a persistent buffer across calls 
                    # (which is complex to add now), we will just print error and return None,
                    # accepting that we lost the connection sync.
                    print(f"Socket read error: {e}")
                    return None
                return None
        return data
    def recv(self):
        # 1. Check if we have unread messages in the buffer
        if self.unread_messages:
@@ -120,8 +152,8 @@ class WebSocketClient:
        # Read header
        try:
            # Read 2 bytes at once
-            header = self.sock.read(2)
+            header = self._read_exact(2)
-            if not header or len(header) < 2: return None
+            if not header: return None
            b1 = header[0]
            b2 = header[1]
@@ -133,49 +165,88 @@ class WebSocketClient:
            length = b2 & 0x7f
            if length == 126:
-                length_bytes = self.sock.read(2)
+                length_bytes = self._read_exact(2)
                if not length_bytes: return None
                length = int.from_bytes(length_bytes, 'big')
            elif length == 127:
-                length_bytes = self.sock.read(8)
+                length_bytes = self._read_exact(8)
                if not length_bytes: return None
                length = int.from_bytes(length_bytes, 'big')
            # Safety check for memory allocation
            if length > 50 * 1024: # 50KB limit (reduced from 1MB to be safer on ESP32)
                print(f"WS Recv: Message too large ({length} bytes)")
                # If it's a binary message (image chunk), maybe we can process it?
                # But for now, just skip to avoid OOM
                self._skip_bytes(length)
                if mask:
-                mask_key = self.sock.read(4)
+                     self._read_exact(4) # Consume mask key
                return None
            if mask:
                mask_key = self._read_exact(4)
                if not mask_key: return None
-            # Read payload
+            # Optimization for streaming binary data (opcode 2)
            try:
                # Pre-allocate buffer or use shared buffer
                if length <= 4096:
                    data = self.buffer
                else:
                    data = bytearray(length)
            except MemoryError:
                print(f"WS Recv: Memory allocation failed for {length} bytes")
                # Try to skip data
                self._skip_bytes(length)
                return None
            # Use smaller chunks for readinto to avoid memory allocation issues in MicroPython
            pos = 0
            while pos < length:
-                chunk_size = min(length - pos, 512)
+                chunk_size = min(length - pos, 1024) # 1KB chunks
                try:
                    # Create a view into the target buffer
                    chunk_view = memoryview(data)[pos:pos + chunk_size]
-                read_len = self.sock.readinto(chunk_view)
+                    
-                if read_len == 0: 
+                    # We need exact read here too
                    read_len = 0
                    while read_len < chunk_size:
                        chunk_read = self.sock.readinto(chunk_view[read_len:])
                        if not chunk_read:
                             # Connection closed or timeout
                             # If timeout, we are in trouble.
                             break
                        read_len += chunk_read
                    if read_len < chunk_size:
                        print("WS Recv: Incomplete payload read")
                        return None
                    pos += read_len
                except Exception as e:
                    print(f"WS Recv read error: {e}")
                    return None
            # Create a view for the relevant part of the data
            view = memoryview(data)[:length]
            if mask:
-                unmasked = bytearray(length)
+                # In-place unmasking
                for i in range(length):
-                    unmasked[i] = data[i] ^ mask_key[i % 4]
+                    view[i] = view[i] ^ mask_key[i % 4]
                data = unmasked
            if opcode == 1: # Text
-                return data.decode('utf-8')
+                return str(view, 'utf-8')
            elif opcode == 2: # Binary
-                return data
+                return bytes(view) # Return copy
            elif opcode == 8: # Close
                self.close()
                return None
            elif opcode == 9: # Ping
-                self.send(data, opcode=10) # Pong
+                self.send(view, opcode=10) # Pong
                return self.recv()
-            return data
+            return bytes(view)
        except Exception as e:
            # Don't print timeout errors as they are expected in non-blocking polling
@@ -183,6 +254,15 @@ class WebSocketClient:
                print(f"WS Recv Error: {e}")
            return None
    def _skip_bytes(self, length):
        """Skip bytes from socket"""
        chunk_size = 1024
        remaining = length
        while remaining > 0:
            to_read = min(remaining, chunk_size)
            self.sock.read(to_read)
            remaining -= to_read
    def close(self):
        if self.sock:
            self.sock.close()
--- a/websocket_server/pycache/server.cpython-312.pyc
+++ b/websocket_server/pycache/server.cpython-312.pyc
--- a/websocket_server/generate_static_font.py
+++ b/websocket_server/generate_static_font.py
@@ -0,0 +1,109 @@
 import freetype
 import os
 FONT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/websocket_server/GB2312.ttf"
 OUTPUT_FILE = "../static_font_data.py"
 FONT_SIZE = 16
 # Fixed strings from the project
 FIXED_STRINGS = [
    "语音识别",
    "松开停止",
    "说完了吗？",
    "未识别到文字",
    "短按确认",
    "长按重录",
    "AI 生成中",
    "正在思考...",
    "优化提示词中",
    "正在绘画...",
    "AI作画中",
    "生成完成!",
    "生成失败",
    "提示词:",
    "返回录音"
 ]
 def generate_static_font():
    # Extract unique characters
    chars = set()
    for s in FIXED_STRINGS:
        for c in s:
            if ord(c) > 127: # Only non-ASCII
                chars.add(c)
    sorted_chars = sorted(list(chars))
    print(f"Generating font data for {len(sorted_chars)} characters: {''.join(sorted_chars)}")
    try:
        face = freetype.Face(FONT_FILE)
    except Exception as e:
        print(f"Error loading font: {e}")
        return
    face.set_pixel_sizes(FONT_SIZE, FONT_SIZE)
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        f.write("# Static font data generated for specific characters\n")
        f.write("import ubinascii\n\n")
        f.write("FONTS = {\n")
        for char in sorted_chars:
            face.load_char(char, freetype.FT_LOAD_RENDER | freetype.FT_LOAD_TARGET_MONO)
            bitmap = face.glyph.bitmap
            # Create 32 bytes buffer (16x16 / 8)
            # Similar logic to generate_font.py but simplified for single char
            char_buffer = bytearray(32)
            glyph_width = bitmap.width
            glyph_rows = bitmap.rows
            # Center the glyph
            x_off = (FONT_SIZE - glyph_width) // 2
            y_off = (FONT_SIZE - glyph_rows) // 2
            # Adjust y_off based on baseline if needed, but let's stick to centering for consistency
            # Usually for 16px font, baseline is around 12-13.
            # bitmap_top is distance from baseline to top.
            # We want to position it such that baseline is consistent.
            # But let's use the simple centering logic from generate_font.py for now
            # as it seems to be what was used before.
            src_buf = bitmap.buffer
            for row in range(glyph_rows):
                dst_row = row + y_off
                if dst_row < 0 or dst_row >= FONT_SIZE:
                    continue
                for col in range(glyph_width):
                    dst_col = col + x_off
                    if dst_col < 0 or dst_col >= FONT_SIZE:
                        continue
                    # Extract bit from source
                    byte_idx = row * bitmap.pitch + (col >> 3)
                    bit_idx = 7 - (col & 7)
                    if byte_idx < len(src_buf):
                        pixel = (src_buf[byte_idx] >> bit_idx) & 1
                        if pixel:
                            # Set bit in destination
                            dst_byte_idx = dst_row * 2 + (dst_col >> 3)
                            dst_bit_idx = 7 - (dst_col & 7)
                            char_buffer[dst_byte_idx] |= (1 << dst_bit_idx)
            # Write to file
            hex_str = "".join([f"\\x{b:02x}" for b in char_buffer])
            # Use ubinascii.unhexlify in generated code to save space? 
            # Or just bytes literal.
            # bytes literal is fine.
            f.write(f"    {ord(char)}: b'{hex_str}', # {char}\n")
        f.write("}\n")
    print(f"Generated {OUTPUT_FILE}")
 if __name__ == "__main__":
    generate_static_font()
--- a/websocket_server/generated_thumb.bin
+++ b/websocket_server/generated_thumb.bin
--- a/websocket_server/received_audio.mp3
+++ b/websocket_server/received_audio.mp3
--- a/websocket_server/received_audio.raw
+++ b/websocket_server/received_audio.raw
--- a/websocket_server/server.py
+++ b/websocket_server/server.py
@@ -444,16 +444,39 @@ class MyRecognitionCallback(RecognitionCallback):
    def on_event(self, result: RecognitionResult) -> None:
        if result.get_sentence():
             text = result.get_sentence()['text']
-             print(f"ASR Result: {text}")
+             
-             # 累积每一句识别结果
+             # 获取当前句子的结束状态
             # 注意：DashScope Python SDK 的 Result 结构可能需要根据版本调整
             # 这里假设我们只关心文本内容的变化
             # 简单的去重逻辑：如果新来的文本比上一句长且包含上一句，则认为是同一句的更新
             if self.sentence_list:
                 last_sentence = self.sentence_list[-1]
                 # 去掉句尾标点进行比较，因为流式结果可能标点不稳定
                 last_clean = last_sentence.rstrip('。，？！')
                 text_clean = text.rstrip('。，？！')
                 if text_clean.startswith(last_clean):
                     # 更新当前句子
                     self.sentence_list[-1] = text
                 elif last_clean.startswith(text_clean):
                     # 如果新来的比旧的短但也是前缀（不太可能发生，除非回溯），忽略或更新
                     pass
                 else:
                     # 新的句子
                     self.sentence_list.append(text)
             else:
                 self.sentence_list.append(text)
             # 同时更新 final_text 以便 Stop 时获取
             self.final_text = "".join(self.sentence_list)
             print(f"ASR Update: {self.final_text}")
             # 将识别结果发送回客户端
             try:
                 if self.loop.is_running():
                     asyncio.run_coroutine_threadsafe(
-                         self.websocket.send_text(f"ASR:{text}"), 
+                         self.websocket.send_text(f"ASR:{self.final_text}"), 
                         self.loop
                     )
             except Exception as e:
@@ -559,12 +582,24 @@ def generate_image(prompt, progress_callback=None, retry_count=0, max_retries=2)
        progress_callback(35, "正在请求AI生成图片...")
    try:
        if not prompt:
            print("Error: prompt is empty")
            if progress_callback:
                progress_callback(0, "提示词为空")
            return None
        response = ImageSynthesis.call(
            model='wanx2.0-t2i-turbo',
            prompt=prompt
        )
        if response.status_code == 200:
            if not response.output:
                print("Error: response.output is None")
                if progress_callback:
                    progress_callback(0, "API响应无效")
                return None
            task_status = response.output.get('task_status')
            if task_status == 'PENDING' or task_status == 'RUNNING':
@@ -631,9 +666,9 @@ def generate_image(prompt, progress_callback=None, retry_count=0, max_retries=2)
                        g6 = (g >> 2) & 0x3F
                        b5 = (b >> 3) & 0x1F
-                        # 小端模式：低字节在前
+                        # 大端模式：高字节在前 (符合ST7789默认配置)
                        rgb565 = (r5 << 11) | (g6 << 5) | b5
-                        rgb565_data.extend(struct.pack('<H', rgb565))
+                        rgb565_data.extend(struct.pack('>H', rgb565))
                # 保存为.bin文件
                with open(GENERATED_THUMB_FILE, 'wb') as f: