1

2026-03-03 22:45:09 +08:00
parent 700bc55657
commit 05f02a1454
14 changed files with 574 additions and 149 deletions
--- a/pycache/display.cpython-313.pyc
+++ b/pycache/display.cpython-313.pyc
--- a/pycache/font.cpython-313.pyc
+++ b/pycache/font.cpython-313.pyc
--- a/pycache/main.cpython-313.pyc
+++ b/pycache/main.cpython-313.pyc
--- a/display.py
+++ b/display.py
@@ -47,9 +47,9 @@ class Display:
        if self.font:
            self.font.set_ws(ws)

-    def text(self, text, x, y, color):
+    def text(self, text, x, y, color, wait=True):
        if self.tft:
-            self.font.text(self.tft, text, x, y, color)
+            self.font.text(self.tft, text, x, y, color, wait=wait)

    def init_ui(self):
        """初始化 UI 背景"""
@@ -93,3 +93,59 @@ class Display:
            self.tft.blit_buffer(rgb565_data, x, y, width, height)
        except Exception as e:
            print(f"Show image error: {e}")
+
+    def show_image_chunk(self, x, y, width, height, data, offset):
+        """流式显示图片数据块"""
+        if not self.tft: return
+        
+        # ST7789 blit_buffer expects a complete buffer for the window
+        # But we can calculate which pixels this chunk corresponds to
+        
+        # This is tricky because blit_buffer sets a window and then writes data.
+        # If we want to stream, we should probably set the window once and then write chunks.
+        # But st7789py library might not expose raw write easily without window set.
+        
+        # Alternative: Calculate the sub-window for this chunk.
+        # Data is a linear sequence of pixels (2 bytes per pixel)
+        # We assume data length is even.
+        
+        try:
+            # Simple approach: If offset is 0, we set the window for the whole image
+            # And then write data. But st7789py's blit_buffer does both.
+            
+            # Let's look at st7789py implementation.
+            # fill_rect sets window then writes.
+            # blit_buffer sets window then writes.
+            
+            # We can use a modified approach:
+            # If it's the first chunk, set window.
+            # Then write data.
+            
+            # But we can't easily modify the library state from here.
+            # So we calculate the rect for this chunk.
+            
+            # Total pixels
+            total_pixels = width * height
+            
+            # Current pixel offset
+            pixel_offset = offset // 2
+            num_pixels = len(data) // 2
+            
+            # This only works if chunks align with rows, or if we can write partial rows.
+            # ST7789 supports writing continuous memory.
+            
+            # Let's try to determine the x, y, w, h for this chunk.
+            # This is complex if it wraps around lines.
+            
+            # Easier approach for ESP32 memory constrained environment:
+            # We just need to use the raw write method of the display driver if available.
+            
+            if offset == 0:
+                # Set window for the whole image
+                self.tft.set_window(x, y, x + width - 1, y + height - 1)
+            
+            # Write raw data
+            self.tft.write(None, data)
+            
+        except Exception as e:
+            print(f"Show chunk error: {e}")
--- a/font.py
+++ b/font.py
@@ -4,6 +4,11 @@ import time
 import binascii
 import gc

+try:
+    import static_font_data
+except ImportError:
+    static_font_data = None
+
 class Font:
    def __init__(self, ws=None):
        self.ws = ws
@@ -11,6 +16,8 @@ class Font:
        self.pending_requests = set()
        self.retry_count = {}
        self.max_retries = 3
+        # Pre-allocate buffer for row drawing (16 pixels * 2 bytes = 32 bytes)
+        self.row_buf = bytearray(32)

    def set_ws(self, ws):
        self.ws = ws
@@ -24,7 +31,40 @@ class Font:
        """获取当前缓存的字体数量"""
        return len(self.cache)

-    def text(self, tft, text, x, y, color, bg=0x0000):
+    def handle_message(self, msg):
+        """处理字体相关消息，更新缓存
+        返回: 是否为字体消息
+        """
+        if not isinstance(msg, str):
+            return False
+            
+        if msg.startswith("FONT_BATCH_END:"):
+            # 批处理结束消息，目前主要用于阻塞等待时的退出条件
+            return True
+            
+        elif msg.startswith("FONT_DATA:"):
+            parts = msg.split(":")
+            if len(parts) >= 3:
+                try:
+                    key_str = parts[1]
+                    if key_str.startswith("0x"):
+                        c = int(key_str, 16)
+                    else:
+                        c = int(key_str)
+                    
+                    d = binascii.unhexlify(parts[2])
+                    self.cache[c] = d
+                    # 清除重试计数（如果有）
+                    if c in self.retry_count:
+                        del self.retry_count[c]
+                    return True
+                except Exception as e:
+                    print(f"Font data parse error: {e}")
+            return True
+            
+        return False
+
+    def text(self, tft, text, x, y, color, bg=0x0000, wait=True):
        """在ST7789显示器上绘制文本"""
        if not text:
            return
@@ -32,17 +72,23 @@ class Font:
        color_bytes = struct.pack(">H", color)
        bg_bytes = struct.pack(">H", bg)
        
-        lut = [bytearray(16) for _ in range(256)]
-        for i in range(256):
-            for bit in range(8):
-                val = (i >> bit) & 1
-                idx = (7 - bit) * 2
+        # Create a mini-LUT for 4-bit chunks (16 entries * 8 bytes = 128 bytes)
+        # Each entry maps 4 bits (0-15) to 4 pixels (8 bytes)
+        mini_lut = []
+        for i in range(16):
+            chunk = bytearray(8)
+            for bit in range(4):
+                # bit 0 is LSB of nibble, corresponds to rightmost pixel of the 4 pixels
+                # Assuming standard MSB-first bitmap
+                val = (i >> (3 - bit)) & 1
+                idx = bit * 2
                if val:
-                    lut[i][idx] = color_bytes[0]
-                    lut[i][idx+1] = color_bytes[1]
+                    chunk[idx] = color_bytes[0]
+                    chunk[idx+1] = color_bytes[1]
                else:
-                    lut[i][idx] = bg_bytes[0]
-                    lut[i][idx+1] = bg_bytes[1]
+                    chunk[idx] = bg_bytes[0]
+                    chunk[idx+1] = bg_bytes[1]
+            mini_lut.append(bytes(chunk))
                    
        initial_x = x
        
@@ -50,6 +96,9 @@ class Font:
        for char in text:
            if ord(char) > 127:
                code = ord(char)
+                # Check static font data first
+                if static_font_data and hasattr(static_font_data, 'FONTS') and code in static_font_data.FONTS:
+                    continue
                if code not in self.cache:
                    missing_codes.add(code)
        
@@ -57,9 +106,12 @@ class Font:
            missing_list = list(missing_codes)
            
            req_str = ",".join([str(c) for c in missing_list])
+            # Only print if waiting, to reduce log spam in async mode
+            if wait:
                print(f"Batch requesting fonts: {req_str}")
            try:
                self.ws.send(f"GET_FONTS_BATCH:{req_str}")
+                if wait:
                    self._wait_for_fonts(missing_codes)
            except Exception as e:
                print(f"Batch font request failed: {e}")
@@ -78,12 +130,22 @@ class Font:
                
            is_chinese = False
            buf_data = None
-            
-            if ord(char) > 127:
            code = ord(char)
-                if code in self.cache:
+            
+            if code > 127:
+                if static_font_data and hasattr(static_font_data, 'FONTS') and code in static_font_data.FONTS:
+                    buf_data = static_font_data.FONTS[code]
+                    is_chinese = True
+                elif code in self.cache:
                    buf_data = self.cache[code]
                    is_chinese = True
+                else:
+                    # Missing font data
+                    if not wait:
+                        # In async mode, draw a placeholder or space
+                        # We use '?' for now so user knows something is missing
+                        char = '?'
+                        is_chinese = False
                    else:
                        if code in self.pending_requests:
                            retry = self.retry_count.get(code, 0)
@@ -92,14 +154,40 @@ class Font:
                                self._request_single_font(code)
            
            if is_chinese and buf_data:
-                self._draw_bitmap(tft, buf_data, x, y, 16, 16, lut)
+                self._draw_bitmap_optimized(tft, buf_data, x, y, mini_lut)
                x += 16
            else:
-                if ord(char) > 127:
+                if code > 127:
                    char = '?'
                self._draw_ascii(tft, char, x, y, color, bg)
                x += 8

+    def _draw_bitmap_optimized(self, tft, bitmap, x, y, mini_lut):
+        """使用优化方式绘制位图，减少内存分配"""
+        # Bitmap is 32 bytes (16x16 pixels)
+        # 2 bytes per row
+        
+        for row in range(16):
+            # Get 2 bytes for this row
+            # Handle case where bitmap might be different length (safety)
+            if row * 2 + 1 < len(bitmap):
+                b1 = bitmap[row * 2]
+                b2 = bitmap[row * 2 + 1]
+                
+                # Process b1 (Left 8 pixels)
+                # High nibble
+                self.row_buf[0:8] = mini_lut[(b1 >> 4) & 0x0F]
+                # Low nibble
+                self.row_buf[8:16] = mini_lut[b1 & 0x0F]
+                
+                # Process b2 (Right 8 pixels)
+                # High nibble
+                self.row_buf[16:24] = mini_lut[(b2 >> 4) & 0x0F]
+                # Low nibble
+                self.row_buf[24:32] = mini_lut[b2 & 0x0F]
+                
+                tft.blit_buffer(self.row_buf, x, y + row, 16, 1)
+
    def _request_single_font(self, code):
        """请求单个字体"""
        if self.ws:
@@ -134,10 +222,10 @@ class Font:
                    if msg is None:
                        continue
                        
-                    if isinstance(msg, str):
+                    if self.handle_message(msg):
+                        # 如果是批处理结束，检查是否有失败的
                        if msg.startswith("FONT_BATCH_END:"):
                            parts = msg[15:].split(":")
-                            success = int(parts[0]) if len(parts) > 0 else 0
                            failed = int(parts[1]) if len(parts) > 1 else 0
                            
                            if failed > 0:
@@ -145,34 +233,26 @@ class Font:
                                for c in temp_missing:
                                    if c not in self.cache:
                                        print(f"Font failed after retries: {c}")
-                                        self.cache[c] = None
+                                        self.cache[c] = None # 标记为 None 避免死循环
                                        if c in target_codes:
                                            target_codes.remove(c)
                            
+                            # 清除所有剩余的目标，因为批处理结束了
+                            # 但实际上可能只需要清除 failed 的。
+                            # 无论如何，收到 BATCH_END 意味着本次请求处理完毕。
+                            # 如果还有没收到的，可能是丢包了。
+                            # 为了简单起见，我们认为结束了。
                            target_codes.clear()
                        
-                        elif msg.startswith("FONT_DATA:"):
-                            parts = msg.split(":")
-                            if len(parts) >= 3:
-                                try:
-                                    key_str = parts[1]
-                                    if key_str.startswith("0x"):
-                                        c = int(key_str, 16)
-                                    else:
-                                        c = int(key_str)
-                                    
-                                    d = binascii.unhexlify(parts[2])
-                                    self.cache[c] = d
-                                    if c in target_codes:
+                        # 检查是否有新缓存的字体满足了 target_codes
+                        temp_target = list(target_codes)
+                        for c in temp_target:
+                            if c in self.cache:
                                target_codes.remove(c)
                                if c in self.retry_count:
                                    del self.retry_count[c]
-                                except:
-                                    pass
-                        else:
-                            self.local_deferred.append(msg)
                                    
-                    elif msg is not None:
+                    else:
                        self.local_deferred.append(msg)
                        
            except Exception as e:
@@ -183,12 +263,6 @@ class Font:
                self.ws.unread_messages = self.local_deferred + self.ws.unread_messages
            self.local_deferred = []

-    def _draw_bitmap(self, tft, bitmap, x, y, w, h, lut):
-        """绘制位图"""
-        chunks = [lut[b] for b in bitmap]
-        rgb_buf = b''.join(chunks)
-        tft.blit_buffer(rgb_buf, x, y, w, h)
-
    def _draw_ascii(self, tft, char, x, y, color, bg):
        """绘制ASCII字符"""
        w, h = 8, 8
--- a/main.py
+++ b/main.py
@@ -163,7 +163,7 @@ def render_recording_screen(display, asr_text="", audio_level=0):
        display.tft.fill_rect(20, 100, bar_width, 10, st7789.GREEN)
    
    if asr_text:
-        display.text(asr_text[:20], 20, 130, st7789.WHITE)
+        display.text(asr_text[:20], 20, 130, st7789.WHITE, wait=False)
    
    display.tft.fill_rect(60, 200, 120, 25, st7789.RED)
    display.text("松开停止", 85, 205, st7789.WHITE)
@@ -194,54 +194,91 @@ def render_result_screen(display, status="", prompt="", image_received=False):
    if not display or not display.tft:
        return
    
-    # Only clear if we are starting a new state or it's the first render
-    # But for simplicity we clear all for now. Optimizing this requires state tracking.
+    if status == "OPTIMIZING":
        display.tft.fill(st7789.BLACK)
-    
-    # Header
        display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
        display.text("AI 生成中", 80, 8, st7789.BLACK)
        
-    if status == "OPTIMIZING":
        display.text("正在思考...", 80, 60, st7789.CYAN)
        display.text("优化提示词中", 70, 80, st7789.CYAN)
        draw_progress_bar(display, 40, 110, 160, 6, 0.3, st7789.CYAN)
        # Spinner will be drawn by main loop
        
    elif status == "RENDERING":
+        display.tft.fill(st7789.BLACK)
+        display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
+        display.text("AI 生成中", 80, 8, st7789.BLACK)
+        
        display.text("正在绘画...", 80, 60, st7789.YELLOW)
        display.text("AI作画中", 85, 80, st7789.YELLOW)
        draw_progress_bar(display, 40, 110, 160, 6, 0.7, st7789.YELLOW)
        # Spinner will be drawn by main loop
        
    elif status == "COMPLETE" or image_received:
-        display.text("生成完成!", 80, 50, st7789.GREEN)
-        draw_check_icon(display, 110, 80)
+        # Don't clear screen, image is already there
+        # display.text("生成完成!", 80, 50, st7789.GREEN)
+        # draw_check_icon(display, 110, 80)
+        pass
        
    elif status == "ERROR":
+        display.tft.fill(st7789.BLACK)
+        display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
+        display.text("AI 生成中", 80, 8, st7789.BLACK)
        display.text("生成失败", 80, 50, st7789.RED)
        
-    if prompt:
+    if prompt and not image_received:
        display.tft.fill_rect(10, 140, 220, 50, 0x2124) # Dark Grey
        display.text("提示词:", 15, 145, st7789.CYAN)
        display.text(prompt[:25] + "..." if len(prompt) > 25 else prompt, 15, 165, st7789.WHITE)
    
+    # Only show back button if not showing full image, or maybe show it transparently?
+    # For now, let's not cover the image with the button hint
+    if not image_received:
        display.tft.fill_rect(60, 210, 120, 25, st7789.BLUE)
-    display.text("返回录音", 90, 215, st7789.WHITE)
+        display.text("长按返回", 90, 215, st7789.WHITE)
+
+
+
+
+
+
+

 def process_message(msg, display, image_state, image_data_list):
    """处理WebSocket消息"""
    # Handle binary image data
    if isinstance(msg, (bytes, bytearray)):
        if image_state == IMAGE_STATE_RECEIVING:
-            image_data_list.append(msg)
-            # Optional: Update progress bar or indicator
+            try:
+                if len(image_data_list) < 2:
+                     # 异常情况，重置
+                     return IMAGE_STATE_IDLE, None
+                
+                img_size = image_data_list[0]
+                current_offset = image_data_list[1]
+                
+                # Stream directly to display
+                if display and display.tft:
+                    x = (240 - img_size) // 2
+                    y = (240 - img_size) // 2
+                    display.show_image_chunk(x, y, img_size, img_size, msg, current_offset)
+                
+                # Update offset
+                image_data_list[1] += len(msg)
+                
+            except Exception as e:
+                print(f"Stream image error: {e}")
+            
            return image_state, None
        return image_state, None

    if not isinstance(msg, str):
        return image_state, None
    
+    # Check for font data first
+    if display and hasattr(display, 'font') and display.font.handle_message(msg):
+        return image_state, ("font_update",)
+    
    status_info = None
    
    if msg.startswith("ASR:"):
@@ -272,6 +309,15 @@ def process_message(msg, display, image_state, image_data_list):
            print(f"Image start, size: {size}, img_size: {img_size}")
            image_data_list.clear()
            image_data_list.append(img_size) # Store metadata at index 0
+            image_data_list.append(0)        # Store current received bytes offset at index 1
+            
+            # Prepare display for streaming
+            if display and display.tft:
+                # Calculate position
+                x = (240 - img_size) // 2
+                y = (240 - img_size) // 2
+                # Pre-set window (this will be done in first chunk call)
+            
            return IMAGE_STATE_RECEIVING, None
        except Exception as e:
            print(f"IMAGE_START parse error: {e}")
@@ -279,45 +325,14 @@ def process_message(msg, display, image_state, image_data_list):
    
    # Deprecated text-based IMAGE_DATA handling
    elif msg.startswith("IMAGE_DATA:") and image_state == IMAGE_STATE_RECEIVING:
-        try:
-            data = msg.split(":", 1)[1]
-            # Convert hex to bytes immediately if using old protocol, but we switched to binary
-            # Keep this just in case server rolls back? No, let's assume binary.
-            pass 
-        except:
        pass 
    
    elif msg == "IMAGE_END" and image_state == IMAGE_STATE_RECEIVING:
-        try:
-            print("Image received, processing...")
-            
-            img_size = image_data_list[0] if image_data_list else 64
-            # Combine all binary chunks (skipping metadata at index 0)
-            img_data = b"".join(image_data_list[1:])
+        print("Image received completely")
        image_data_list.clear()
-            
-            print(f"Image data len: {len(img_data)}")
-            
-            if display and display.tft:
-                x = (240 - img_size) // 2
-                y = (240 - img_size) // 2
-                display.show_image(x, y, img_size, img_size, img_data)
-                
-                # Overlay success message slightly
-                display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
-                display.text("图片已生成!", 70, 5, st7789.BLACK)
-            
        gc.collect()
-            print("Image displayed")
        return IMAGE_STATE_IDLE, ("image_done",)
            
-        except Exception as e:
-            print(f"Image process error: {e}")
-            import sys
-            sys.print_exception(e)
-        
-        return IMAGE_STATE_IDLE, None
-    
    elif msg.startswith("IMAGE_ERROR:"):
        print(msg)
        return IMAGE_STATE_IDLE, ("error", msg[12:])
@@ -330,7 +345,7 @@ def print_asr(text, display=None):
    print(f"ASR: {text}")
    if display and display.tft:
        display.fill_rect(0, 40, 240, 160, st7789.BLACK)
-        display.text(text, 0, 40, st7789.WHITE)
+        display.text(text, 0, 40, st7789.WHITE, wait=False)


 def get_boot_button_action(boot_btn):
@@ -468,7 +483,7 @@ def main():
                    print("Memory high, cleaned")
            
            # Spinner Animation
-            if ui_screen == UI_SCREEN_RESULT and not image_generation_done and current_status in ["OPTIMIZING", "RENDERING"]:
+            if ui_screen == UI_SCREEN_RESULT and not image_generation_done and current_status in ["OPTIMIZING", "RENDERING"] and image_state != IMAGE_STATE_RECEIVING:
                now = time.ticks_ms()
                if time.ticks_diff(now, last_spinner_time) > 100:
                    if display.tft:
@@ -543,17 +558,11 @@ def main():
                    time.sleep(0.5)
                
                elif ui_screen == UI_SCREEN_RESULT:
-                    print(">>> Back to recording")
-                    ui_screen = UI_SCREEN_RECORDING
-                    is_recording = False
-                    current_asr_text = ""
-                    current_prompt = ""
-                    current_status = ""
-                    image_generation_done = False
-                    confirm_waiting = False
-                    
-                    if display.tft:
-                        render_recording_screen(display, "", 0)
+                    # Ignore short press in result screen to keep image displayed
+                    # unless image generation failed or is still in progress?
+                    # User request: "只有长按boot才离开" (Only leave on long press)
+                    # So we do nothing here.
+                    pass
            
            elif btn_action == 2:
                if is_recording:
@@ -595,15 +604,25 @@ def main():
                        render_recording_screen(display, "", 0)
                
                elif ui_screen == UI_SCREEN_RESULT:
-                    print(">>> Generate image (manual)")
+                    print(">>> Back to recording")
+                    # Stop recording if it was somehow started or just reset state
                    if ws and ws.is_connected():
                        try:
-                            ws.send("START_RECORDING")
-                            is_recording = True
-                            ui_screen = UI_SCREEN_RECORDING
+                            ws.send("STOP_RECORDING")
                        except:
                            ws = None
                    
+                    ui_screen = UI_SCREEN_RECORDING
+                    is_recording = False
+                    current_asr_text = ""
+                    current_prompt = ""
+                    current_status = ""
+                    image_generation_done = False
+                    confirm_waiting = False
+                    
+                    if display.tft:
+                        render_recording_screen(display, "", 0)
+            
            elif btn_action == 3:
                print(">>> Config mode")
            
@@ -628,6 +647,10 @@ def main():
                                            if display.tft:
                                                render_recording_screen(display, current_asr_text, last_audio_level)
                                        
+                                        elif event_data[0] == "font_update":
+                                            if ui_screen == UI_SCREEN_RECORDING and display.tft:
+                                                render_recording_screen(display, current_asr_text, last_audio_level)
+                                        
                                        elif event_data[0] == "status":
                                            current_status = event_data[1]
                                            status_text = event_data[2] if len(event_data) > 2 else ""
--- a/static_font_data.py
+++ b/static_font_data.py
@@ -0,0 +1,48 @@
+# Static font data generated for specific characters
+import ubinascii
+
+FONTS = {
+    20013: b'\x01\x00\x01\x00\x01\x00\x01\xf8\x3f\x08\x21\x08\x21\x08\x21\xf8\x3f\x00\x21\x00\x01\x00\x01\x00\x01\x00\x01\x00\x01\x00\x00\x00', # 中
+    20102: b'\x00\x00\x01\xf0\x1e\x10\x00\x20\x01\x40\x00\x80\x00\x80\x00\x80\x00\x80\x00\x80\x00\x80\x00\x80\x04\x80\x02\x80\x01\x00\x00\x00', # 了
+    20248: b'\x08\x80\x08\x80\x08\xa0\x10\x90\x10\x80\x20\xfc\x2f\x40\x61\x40\xa1\x40\x21\x40\x22\x40\x22\x44\x24\x44\x24\x44\x28\x3c\x00\x00', # 优
+    20316: b'\x08\x80\x08\x80\x09\x00\x11\x1c\x13\xe0\x32\x80\x54\x98\x90\xe0\x10\x80\x10\x9c\x10\xe0\x10\x80\x10\x80\x10\x80\x10\x80\x00\x00', # 作
+    20572: b'\x08\x80\x08\x78\x17\x80\x10\x60\x23\xa0\x22\x60\x63\x80\xa0\x7c\x2f\x88\x28\x30\x23\xc0\x20\x40\x21\x40\x20\xc0\x20\x40\x00\x00', # 停
+    21035: b'\x00\x08\x06\x08\x3a\x08\x22\x48\x26\x48\x38\x48\x28\x48\x0f\x48\x71\x48\x11\x48\x11\x08\x22\x08\x2a\x28\x44\x18\x80\x08\x00\x00', # 别
+    21040: b'\x00\x08\x00\x08\x07\x88\x38\x28\x0a\x28\x11\x28\x23\xa8\x7c\xa8\x04\x28\x07\x28\x3c\x28\x07\x88\x18\x28\x60\x18\x00\x08\x00\x00', # 到
+    21270: b'\x04\x00\x04\x80\x08\x80\x08\x88\x08\x88\x18\x90\x28\xa0\x48\xc0\x09\x80\x0a\x80\x08\x84\x08\x84\x08\x84\x08\x7c\x08\x00\x00\x00', # 化
+    21527: b'\x00\x60\x03\xa0\x00\x20\x19\x20\x69\x20\x49\x20\x59\x20\x61\x78\x01\x88\x00\x08\x00\xe8\x0f\x08\x00\x10\x00\x50\x00\x20\x00\x00', # 吗
+    22238: b'\x00\x00\x00\x00\x01\xf8\x3e\x08\x20\x08\x21\x88\x26\x88\x24\x88\x25\x88\x26\x08\x20\x08\x20\xf8\x3f\x00\x00\x00\x00\x00\x00\x00', # 回
+    22312: b'\x01\x00\x01\x00\x02\x00\x03\xf8\x7c\x00\x04\x80\x18\x80\x10\x80\x30\xf0\x57\x80\x90\x80\x10\x80\x10\xfc\x1f\x00\x10\x00\x00\x00', # 在
+    22833: b'\x01\x00\x09\x00\x09\x00\x09\xf0\x1f\x00\x11\x00\x21\x00\x01\xf8\x7e\x80\x02\x80\x04\x40\x04\x40\x08\x20\x10\x38\x20\x00\x00\x00', # 失
+    23383: b'\x02\x00\x01\x00\x01\xfc\x3e\x08\x21\xe0\x0e\x40\x00\x80\x01\x00\x00\xfc\x7f\x80\x00\x80\x00\x80\x00\x80\x02\x80\x01\x00\x00\x00', # 字
+    23436: b'\x02\x00\x01\x00\x00\xfc\x3f\x08\x20\x00\x00\xc0\x07\x00\x00\x78\x3f\x80\x04\x80\x04\x80\x08\x84\x08\x84\x10\x84\x60\x7c\x00\x00', # 完
+    24320: b'\x00\x00\x03\xf0\x1c\x40\x04\x40\x04\x40\x04\x40\x07\xfc\x7c\x40\x04\x40\x04\x40\x08\x40\x08\x40\x10\x40\x20\x40\x40\x40\x00\x00', # 开
+    24405: b'\x01\xc0\x0e\x40\x01\xc0\x0e\x40\x00\xfc\x3f\x00\x01\x10\x11\x10\x09\xa0\x05\x40\x09\x20\x11\x18\x61\x06\x03\x00\x01\x00\x00\x00', # 录
+    24605: b'\x00\xf0\x1f\x10\x11\x10\x11\xf0\x1f\x10\x11\x10\x11\xf0\x1e\x00\x10\x00\x23\x18\x28\x84\x24\x10\x43\x10\x40\xf0\x00\x00\x00\x00', # 思
+    25104: b'\x00\xa0\x00\x90\x00\x80\x00\xf0\x1f\x80\x10\x90\x10\x90\x1e\xa0\x12\xa0\x22\x40\x22\x44\x24\xa4\x55\x14\x48\x0c\x80\x04\x00\x00', # 成
+    25353: b'\x10\x40\x10\x20\x10\x3c\x13\xc8\x1e\x40\x70\x40\x10\x80\x18\xfe\x37\x10\xd1\x10\x11\x20\x10\xa0\x50\x60\x31\x90\x16\x08\x00\x00', # 按
+    25552: b'\x10\x30\x11\xd0\x11\x10\x11\xd0\x1d\x30\x71\xc0\x15\x00\x18\x38\x37\xc0\xd2\x70\x12\x40\x13\x40\x54\xc0\x34\x30\x18\x0e\x00\x00', # 提
+    25991: b'\x02\x00\x01\x00\x01\x00\x00\x38\x3f\xc0\x00\x40\x04\x40\x02\x80\x02\x80\x01\x00\x01\x00\x02\x80\x0c\x40\x30\x30\xc0\x0e\x00\x00', # 文
+    26410: b'\x01\x00\x01\x00\x01\x00\x01\xf0\x1f\x00\x01\x00\x01\x78\x7f\x80\x03\x40\x05\x40\x09\x20\x11\x20\x61\x1c\x81\x00\x01\x00\x00\x00', # 未
+    26494: b'\x00\x40\x10\x40\x10\xa0\x10\xa0\x1c\xa0\x71\x10\x19\x50\x36\x4c\x52\x40\x54\x80\x90\xa0\x11\x10\x11\x38\x13\xc8\x10\x00\x00\x00', # 松
+    27490: b'\x01\x00\x01\x00\x01\x00\x01\x00\x01\x00\x11\x00\x11\x30\x11\xc0\x11\x00\x11\x00\x11\x00\x11\x00\x11\x00\x11\xfc\xfe\x00\x00\x00', # 止
+    27491: b'\x00\x00\x00\xf0\x1f\x00\x01\x00\x01\x00\x01\x00\x09\x30\x09\xc0\x09\x00\x09\x00\x09\x00\x09\x00\x09\xfc\x7e\x00\x00\x00\x00\x00', # 正
+    29983: b'\x01\x00\x01\x00\x01\x00\x09\x00\x09\x00\x11\xf0\x1f\x00\x21\x00\x21\x00\x41\xe0\x0f\x00\x01\x00\x01\x00\x01\xfc\x7e\x00\x00\x00', # 生
+    30011: b'\x00\x00\x01\xf8\x3e\x00\x00\xe0\x0f\x20\x09\x20\x09\xe8\x2f\x28\x29\x28\x29\xe8\x2e\x08\x20\x08\x21\xf8\x3e\x00\x00\x00\x00\x00', # 画
+    30701: b'\x10\x00\x10\x3c\x11\xc0\x16\x18\x38\xe8\x28\x88\x48\x98\x0e\xe0\x78\x10\x08\x90\x14\x50\x12\x50\x20\x3c\x43\xc0\x00\x00\x00\x00', # 短
+    30830: b'\x00\x80\x00\x80\x0c\xf0\x71\x20\x11\x40\x12\x78\x21\xc8\x2d\x68\x75\xc8\xa5\x68\x2d\xc8\x32\x48\x22\x48\x04\x18\x08\x08\x00\x00', # 确
+    31034: b'\x00\x00\x00\xe0\x0f\x00\x00\x00\x00\xfc\x7f\x00\x01\x00\x05\x00\x05\x20\x09\x10\x11\x08\x21\x08\x45\x00\x03\x00\x01\x00\x00\x00', # 示
+    32472: b'\x00\x80\x10\x80\x10\xc0\x21\x40\x25\x20\x4a\x10\x74\x6e\x11\x80\x2c\x38\x73\xc0\x00\x80\x0c\xa0\x31\x10\xc2\x78\x03\x88\x00\x00', # 绘
+    32771: b'\x02\x00\x02\x10\x03\xa0\x0e\x40\x02\x80\x03\xfc\x7e\x00\x07\xf0\x1a\x00\x22\x60\xc3\xa0\x00\x20\x00\x40\x01\x40\x00\x80\x00\x00', # 考
+    35748: b'\x00\x00\x10\x40\x08\x40\x08\x40\x00\x40\x00\x40\x70\x40\x10\x40\x10\xa0\x10\xa0\x15\x10\x19\x10\x12\x08\x04\x0e\x08\x00\x00\x00', # 认
+    35782: b'\x00\x00\x10\x38\x09\xc8\x09\x08\x01\x08\x71\x38\x11\xc0\x11\x00\x10\x00\x14\x90\x18\x88\x11\x04\x02\x04\x04\x00\x00\x00\x00\x00', # 识
+    35789: b'\x20\x00\x10\x78\x0b\x88\x00\x08\x00\xe8\x77\x08\x10\xc8\x13\x48\x12\x48\x12\xc8\x13\x08\x1a\x08\x10\x28\x00\x18\x00\x08\x00\x00', # 词
+    35821: b'\x00\x00\x20\x70\x13\x80\x10\x80\x00\xe0\x03\x20\xe1\x20\x21\xfc\x26\x00\x20\x70\x23\x90\x2a\x10\x32\x70\x23\x80\x02\x00\x00\x00', # 语
+    35828: b'\x02\x10\x21\x10\x11\x20\x10\x20\x00\x70\x03\x90\x72\x10\x12\x70\x13\xa0\x10\xa0\x14\xa0\x19\x22\x11\x22\x02\x22\x0c\x1e\x00\x00', # 说
+    36133: b'\x00\x40\x06\x40\x3a\x40\x22\x4c\x2a\x70\x2a\x90\x2a\x90\x2b\x50\x2a\x50\x28\x20\x14\x20\x12\x50\x20\x90\x21\x0c\x42\x00\x00\x00', # 败
+    36820: b'\x00\x00\x00\x38\x13\xc0\x0a\x00\x02\x70\x03\x90\x3a\x10\xca\xa0\x12\x60\x12\x50\x0c\x88\x09\x00\x7c\x00\x01\xc0\x00\x3e\x00\x00', # 返
+    37325: b'\x00\x20\x00\xc0\x1f\x00\x01\xfc\x7f\x00\x01\xf0\x1f\x10\x11\xd0\x17\x10\x11\xf0\x1f\x00\x01\xe0\x1f\x00\x01\xfc\x7e\x00\x00\x00', # 重
+    38271: b'\x08\x00\x08\x20\x08\x40\x08\x80\x0b\x00\x0c\x00\x09\xf8\x7e\x00\x0a\x00\x09\x00\x08\x80\x08\x40\x0a\x30\x0c\x0c\x08\x00\x00\x00', # 长
+    38899: b'\x02\x00\x01\x00\x01\xf0\x1e\x40\x04\x40\x04\x80\x01\xfc\x7e\x00\x01\xe0\x0e\x20\x09\xa0\x0e\x20\x08\x20\x09\xe0\x0e\x20\x00\x00', # 音
+    65311: b'\x00\x00\x00\x00\x1c\x00\x22\x00\x22\x00\x04\x00\x08\x00\x08\x00\x08\x00\x00\x00\x18\x00\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00', # ？
+}
--- a/websocket_client.py
+++ b/websocket_client.py
@@ -11,6 +11,7 @@ class WebSocketClient:
        self.uri = uri
        self.timeout = timeout
        self.unread_messages = [] # Queue for buffered messages
+        self.buffer = bytearray(4096) # Pre-allocated buffer for small messages
        self.connect()

    def connect(self):
@@ -109,6 +110,37 @@ class WebSocketClient:
        self.sock.write(header)
        self.sock.write(masked_data)

+    def _read_exact(self, n):
+        """Read exactly n bytes from the socket"""
+        data = b''
+        while len(data) < n:
+            try:
+                chunk = self.sock.read(n - len(data))
+                if not chunk:
+                    return None
+                data += chunk
+            except Exception as e:
+                # Handle timeout or other errors
+                if len(data) > 0:
+                    # If we read some data but timed out, we can't just return None
+                    # as we would lose that data. We must keep trying or raise error.
+                    # For simplicity in this blocking-with-timeout model, 
+                    # we assume we should keep trying if we got some data, 
+                    # or return what we have if it's a hard error?
+                    # Actually, if we return None, the caller treats it as "no message".
+                    # But we already consumed data! This is the core issue.
+                    # We should probably buffer it?
+                    # Or just return None and let the caller handle it?
+                    # But the caller (recv) expects a full frame or nothing.
+                    
+                    # To properly fix this without a persistent buffer across calls 
+                    # (which is complex to add now), we will just print error and return None,
+                    # accepting that we lost the connection sync.
+                    print(f"Socket read error: {e}")
+                    return None
+                return None
+        return data
+
    def recv(self):
        # 1. Check if we have unread messages in the buffer
        if self.unread_messages:
@@ -120,8 +152,8 @@ class WebSocketClient:
        # Read header
        try:
            # Read 2 bytes at once
-            header = self.sock.read(2)
-            if not header or len(header) < 2: return None
+            header = self._read_exact(2)
+            if not header: return None
            
            b1 = header[0]
            b2 = header[1]
@@ -133,49 +165,88 @@ class WebSocketClient:
            length = b2 & 0x7f

            if length == 126:
-                length_bytes = self.sock.read(2)
+                length_bytes = self._read_exact(2)
                if not length_bytes: return None
                length = int.from_bytes(length_bytes, 'big')
            elif length == 127:
-                length_bytes = self.sock.read(8)
+                length_bytes = self._read_exact(8)
                if not length_bytes: return None
                length = int.from_bytes(length_bytes, 'big')

+            # Safety check for memory allocation
+            if length > 50 * 1024: # 50KB limit (reduced from 1MB to be safer on ESP32)
+                print(f"WS Recv: Message too large ({length} bytes)")
+                # If it's a binary message (image chunk), maybe we can process it?
+                # But for now, just skip to avoid OOM
+                self._skip_bytes(length)
                if mask:
-                mask_key = self.sock.read(4)
+                     self._read_exact(4) # Consume mask key
+                return None
+
+            if mask:
+                mask_key = self._read_exact(4)
                if not mask_key: return None

-            # Read payload
+            # Optimization for streaming binary data (opcode 2)
+            try:
+                # Pre-allocate buffer or use shared buffer
+                if length <= 4096:
+                    data = self.buffer
+                else:
                    data = bytearray(length)
+            except MemoryError:
+                print(f"WS Recv: Memory allocation failed for {length} bytes")
+                # Try to skip data
+                self._skip_bytes(length)
+                return None
            
            # Use smaller chunks for readinto to avoid memory allocation issues in MicroPython
            pos = 0
            while pos < length:
-                chunk_size = min(length - pos, 512)
+                chunk_size = min(length - pos, 1024) # 1KB chunks
+                try:
+                    # Create a view into the target buffer
                    chunk_view = memoryview(data)[pos:pos + chunk_size]
-                read_len = self.sock.readinto(chunk_view)
-                if read_len == 0: 
+                    
+                    # We need exact read here too
+                    read_len = 0
+                    while read_len < chunk_size:
+                        chunk_read = self.sock.readinto(chunk_view[read_len:])
+                        if not chunk_read:
+                             # Connection closed or timeout
+                             # If timeout, we are in trouble.
+                             break
+                        read_len += chunk_read
+                    
+                    if read_len < chunk_size:
+                        print("WS Recv: Incomplete payload read")
                        return None
+                        
                    pos += read_len
+                except Exception as e:
+                    print(f"WS Recv read error: {e}")
+                    return None
+            
+            # Create a view for the relevant part of the data
+            view = memoryview(data)[:length]
            
            if mask:
-                unmasked = bytearray(length)
+                # In-place unmasking
                for i in range(length):
-                    unmasked[i] = data[i] ^ mask_key[i % 4]
-                data = unmasked
+                    view[i] = view[i] ^ mask_key[i % 4]

            if opcode == 1: # Text
-                return data.decode('utf-8')
+                return str(view, 'utf-8')
            elif opcode == 2: # Binary
-                return data
+                return bytes(view) # Return copy
            elif opcode == 8: # Close
                self.close()
                return None
            elif opcode == 9: # Ping
-                self.send(data, opcode=10) # Pong
+                self.send(view, opcode=10) # Pong
                return self.recv()
            
-            return data
+            return bytes(view)
            
        except Exception as e:
            # Don't print timeout errors as they are expected in non-blocking polling
@@ -183,6 +254,15 @@ class WebSocketClient:
                print(f"WS Recv Error: {e}")
            return None

+    def _skip_bytes(self, length):
+        """Skip bytes from socket"""
+        chunk_size = 1024
+        remaining = length
+        while remaining > 0:
+            to_read = min(remaining, chunk_size)
+            self.sock.read(to_read)
+            remaining -= to_read
+
    def close(self):
        if self.sock:
            self.sock.close()
--- a/websocket_server/pycache/server.cpython-312.pyc
+++ b/websocket_server/pycache/server.cpython-312.pyc
--- a/websocket_server/generate_static_font.py
+++ b/websocket_server/generate_static_font.py
@@ -0,0 +1,109 @@
+import freetype
+import os
+
+FONT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/websocket_server/GB2312.ttf"
+OUTPUT_FILE = "../static_font_data.py"
+FONT_SIZE = 16
+
+# Fixed strings from the project
+FIXED_STRINGS = [
+    "语音识别",
+    "松开停止",
+    "说完了吗？",
+    "未识别到文字",
+    "短按确认",
+    "长按重录",
+    "AI 生成中",
+    "正在思考...",
+    "优化提示词中",
+    "正在绘画...",
+    "AI作画中",
+    "生成完成!",
+    "生成失败",
+    "提示词:",
+    "返回录音"
+]
+
+def generate_static_font():
+    # Extract unique characters
+    chars = set()
+    for s in FIXED_STRINGS:
+        for c in s:
+            if ord(c) > 127: # Only non-ASCII
+                chars.add(c)
+    
+    sorted_chars = sorted(list(chars))
+    print(f"Generating font data for {len(sorted_chars)} characters: {''.join(sorted_chars)}")
+
+    try:
+        face = freetype.Face(FONT_FILE)
+    except Exception as e:
+        print(f"Error loading font: {e}")
+        return
+
+    face.set_pixel_sizes(FONT_SIZE, FONT_SIZE)
+
+    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
+        f.write("# Static font data generated for specific characters\n")
+        f.write("import ubinascii\n\n")
+        f.write("FONTS = {\n")
+
+        for char in sorted_chars:
+            face.load_char(char, freetype.FT_LOAD_RENDER | freetype.FT_LOAD_TARGET_MONO)
+            bitmap = face.glyph.bitmap
+            
+            # Create 32 bytes buffer (16x16 / 8)
+            # Similar logic to generate_font.py but simplified for single char
+            char_buffer = bytearray(32)
+            
+            glyph_width = bitmap.width
+            glyph_rows = bitmap.rows
+            
+            # Center the glyph
+            x_off = (FONT_SIZE - glyph_width) // 2
+            y_off = (FONT_SIZE - glyph_rows) // 2
+            
+            # Adjust y_off based on baseline if needed, but let's stick to centering for consistency
+            # Usually for 16px font, baseline is around 12-13.
+            # bitmap_top is distance from baseline to top.
+            # We want to position it such that baseline is consistent.
+            # But let's use the simple centering logic from generate_font.py for now
+            # as it seems to be what was used before.
+            
+            src_buf = bitmap.buffer
+            
+            for row in range(glyph_rows):
+                dst_row = row + y_off
+                if dst_row < 0 or dst_row >= FONT_SIZE:
+                    continue
+                
+                for col in range(glyph_width):
+                    dst_col = col + x_off
+                    if dst_col < 0 or dst_col >= FONT_SIZE:
+                        continue
+                    
+                    # Extract bit from source
+                    byte_idx = row * bitmap.pitch + (col >> 3)
+                    bit_idx = 7 - (col & 7)
+                    if byte_idx < len(src_buf):
+                        pixel = (src_buf[byte_idx] >> bit_idx) & 1
+                        
+                        if pixel:
+                            # Set bit in destination
+                            dst_byte_idx = dst_row * 2 + (dst_col >> 3)
+                            dst_bit_idx = 7 - (dst_col & 7)
+                            char_buffer[dst_byte_idx] |= (1 << dst_bit_idx)
+            
+            # Write to file
+            hex_str = "".join([f"\\x{b:02x}" for b in char_buffer])
+            # Use ubinascii.unhexlify in generated code to save space? 
+            # Or just bytes literal.
+            # bytes literal is fine.
+            f.write(f"    {ord(char)}: b'{hex_str}', # {char}\n")
+        
+        f.write("}\n")
+    
+    print(f"Generated {OUTPUT_FILE}")
+
+if __name__ == "__main__":
+    generate_static_font()
--- a/websocket_server/generated_thumb.bin
+++ b/websocket_server/generated_thumb.bin
--- a/websocket_server/received_audio.mp3
+++ b/websocket_server/received_audio.mp3
--- a/websocket_server/received_audio.raw
+++ b/websocket_server/received_audio.raw
--- a/websocket_server/server.py
+++ b/websocket_server/server.py
@@ -444,16 +444,39 @@ class MyRecognitionCallback(RecognitionCallback):
    def on_event(self, result: RecognitionResult) -> None:
        if result.get_sentence():
             text = result.get_sentence()['text']
-             print(f"ASR Result: {text}")
-             # 累积每一句识别结果
+             
+             # 获取当前句子的结束状态
+             # 注意：DashScope Python SDK 的 Result 结构可能需要根据版本调整
+             # 这里假设我们只关心文本内容的变化
+             
+             # 简单的去重逻辑：如果新来的文本比上一句长且包含上一句，则认为是同一句的更新
+             if self.sentence_list:
+                 last_sentence = self.sentence_list[-1]
+                 # 去掉句尾标点进行比较，因为流式结果可能标点不稳定
+                 last_clean = last_sentence.rstrip('。，？！')
+                 text_clean = text.rstrip('。，？！')
+                 
+                 if text_clean.startswith(last_clean):
+                     # 更新当前句子
+                     self.sentence_list[-1] = text
+                 elif last_clean.startswith(text_clean):
+                     # 如果新来的比旧的短但也是前缀（不太可能发生，除非回溯），忽略或更新
+                     pass
+                 else:
+                     # 新的句子
                     self.sentence_list.append(text)
+             else:
+                 self.sentence_list.append(text)
+             
             # 同时更新 final_text 以便 Stop 时获取
             self.final_text = "".join(self.sentence_list)
+             print(f"ASR Update: {self.final_text}")
+             
             # 将识别结果发送回客户端
             try:
                 if self.loop.is_running():
                     asyncio.run_coroutine_threadsafe(
-                         self.websocket.send_text(f"ASR:{text}"), 
+                         self.websocket.send_text(f"ASR:{self.final_text}"), 
                         self.loop
                     )
             except Exception as e:
@@ -559,12 +582,24 @@ def generate_image(prompt, progress_callback=None, retry_count=0, max_retries=2)
        progress_callback(35, "正在请求AI生成图片...")
    
    try:
+        if not prompt:
+            print("Error: prompt is empty")
+            if progress_callback:
+                progress_callback(0, "提示词为空")
+            return None
+            
        response = ImageSynthesis.call(
            model='wanx2.0-t2i-turbo',
            prompt=prompt
        )
        
        if response.status_code == 200:
+            if not response.output:
+                print("Error: response.output is None")
+                if progress_callback:
+                    progress_callback(0, "API响应无效")
+                return None
+                
            task_status = response.output.get('task_status')
            
            if task_status == 'PENDING' or task_status == 'RUNNING':
@@ -631,9 +666,9 @@ def generate_image(prompt, progress_callback=None, retry_count=0, max_retries=2)
                        g6 = (g >> 2) & 0x3F
                        b5 = (b >> 3) & 0x1F
                        
-                        # 小端模式：低字节在前
+                        # 大端模式：高字节在前 (符合ST7789默认配置)
                        rgb565 = (r5 << 11) | (g6 << 5) | b5
-                        rgb565_data.extend(struct.pack('<H', rgb565))
+                        rgb565_data.extend(struct.pack('>H', rgb565))
                
                # 保存为.bin文件
                with open(GENERATED_THUMB_FILE, 'wb') as f: