1

2026-03-03 22:45:09 +08:00
parent 700bc55657
commit 05f02a1454
14 changed files with 574 additions and 149 deletions
--- a/main.py
+++ b/main.py
@@ -163,7 +163,7 @@ def render_recording_screen(display, asr_text="", audio_level=0):
        display.tft.fill_rect(20, 100, bar_width, 10, st7789.GREEN)
    
    if asr_text:
-        display.text(asr_text[:20], 20, 130, st7789.WHITE)
+        display.text(asr_text[:20], 20, 130, st7789.WHITE, wait=False)
    
    display.tft.fill_rect(60, 200, 120, 25, st7789.RED)
    display.text("松开停止", 85, 205, st7789.WHITE)
@@ -194,54 +194,91 @@ def render_result_screen(display, status="", prompt="", image_received=False):
    if not display or not display.tft:
        return
    
-    # Only clear if we are starting a new state or it's the first render
-    # But for simplicity we clear all for now. Optimizing this requires state tracking.
-    display.tft.fill(st7789.BLACK)
-    
-    # Header
-    display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
-    display.text("AI 生成中", 80, 8, st7789.BLACK)
-    
    if status == "OPTIMIZING":
+        display.tft.fill(st7789.BLACK)
+        display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
+        display.text("AI 生成中", 80, 8, st7789.BLACK)
+        
        display.text("正在思考...", 80, 60, st7789.CYAN)
        display.text("优化提示词中", 70, 80, st7789.CYAN)
        draw_progress_bar(display, 40, 110, 160, 6, 0.3, st7789.CYAN)
        # Spinner will be drawn by main loop
        
    elif status == "RENDERING":
+        display.tft.fill(st7789.BLACK)
+        display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
+        display.text("AI 生成中", 80, 8, st7789.BLACK)
+        
        display.text("正在绘画...", 80, 60, st7789.YELLOW)
        display.text("AI作画中", 85, 80, st7789.YELLOW)
        draw_progress_bar(display, 40, 110, 160, 6, 0.7, st7789.YELLOW)
        # Spinner will be drawn by main loop
        
    elif status == "COMPLETE" or image_received:
-        display.text("生成完成!", 80, 50, st7789.GREEN)
-        draw_check_icon(display, 110, 80)
+        # Don't clear screen, image is already there
+        # display.text("生成完成!", 80, 50, st7789.GREEN)
+        # draw_check_icon(display, 110, 80)
+        pass
        
    elif status == "ERROR":
+        display.tft.fill(st7789.BLACK)
+        display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
+        display.text("AI 生成中", 80, 8, st7789.BLACK)
        display.text("生成失败", 80, 50, st7789.RED)
        
-    if prompt:
+    if prompt and not image_received:
        display.tft.fill_rect(10, 140, 220, 50, 0x2124) # Dark Grey
        display.text("提示词:", 15, 145, st7789.CYAN)
        display.text(prompt[:25] + "..." if len(prompt) > 25 else prompt, 15, 165, st7789.WHITE)
    
-    display.tft.fill_rect(60, 210, 120, 25, st7789.BLUE)
-    display.text("返回录音", 90, 215, st7789.WHITE)
+    # Only show back button if not showing full image, or maybe show it transparently?
+    # For now, let's not cover the image with the button hint
+    if not image_received:
+        display.tft.fill_rect(60, 210, 120, 25, st7789.BLUE)
+        display.text("长按返回", 90, 215, st7789.WHITE)
+
+
+
+
+
+
+

 def process_message(msg, display, image_state, image_data_list):
    """处理WebSocket消息"""
    # Handle binary image data
    if isinstance(msg, (bytes, bytearray)):
        if image_state == IMAGE_STATE_RECEIVING:
-            image_data_list.append(msg)
-            # Optional: Update progress bar or indicator
+            try:
+                if len(image_data_list) < 2:
+                     # 异常情况，重置
+                     return IMAGE_STATE_IDLE, None
+                
+                img_size = image_data_list[0]
+                current_offset = image_data_list[1]
+                
+                # Stream directly to display
+                if display and display.tft:
+                    x = (240 - img_size) // 2
+                    y = (240 - img_size) // 2
+                    display.show_image_chunk(x, y, img_size, img_size, msg, current_offset)
+                
+                # Update offset
+                image_data_list[1] += len(msg)
+                
+            except Exception as e:
+                print(f"Stream image error: {e}")
+            
            return image_state, None
        return image_state, None

    if not isinstance(msg, str):
        return image_state, None
    
+    # Check for font data first
+    if display and hasattr(display, 'font') and display.font.handle_message(msg):
+        return image_state, ("font_update",)
+    
    status_info = None
    
    if msg.startswith("ASR:"):
@@ -272,6 +309,15 @@ def process_message(msg, display, image_state, image_data_list):
            print(f"Image start, size: {size}, img_size: {img_size}")
            image_data_list.clear()
            image_data_list.append(img_size) # Store metadata at index 0
+            image_data_list.append(0)        # Store current received bytes offset at index 1
+            
+            # Prepare display for streaming
+            if display and display.tft:
+                # Calculate position
+                x = (240 - img_size) // 2
+                y = (240 - img_size) // 2
+                # Pre-set window (this will be done in first chunk call)
+            
            return IMAGE_STATE_RECEIVING, None
        except Exception as e:
            print(f"IMAGE_START parse error: {e}")
@@ -279,45 +325,14 @@ def process_message(msg, display, image_state, image_data_list):
    
    # Deprecated text-based IMAGE_DATA handling
    elif msg.startswith("IMAGE_DATA:") and image_state == IMAGE_STATE_RECEIVING:
-        try:
-            data = msg.split(":", 1)[1]
-            # Convert hex to bytes immediately if using old protocol, but we switched to binary
-            # Keep this just in case server rolls back? No, let's assume binary.
-            pass 
-        except:
-            pass
+        pass 
    
    elif msg == "IMAGE_END" and image_state == IMAGE_STATE_RECEIVING:
-        try:
-            print("Image received, processing...")
+        print("Image received completely")
+        image_data_list.clear()
+        gc.collect()
+        return IMAGE_STATE_IDLE, ("image_done",)
            
-            img_size = image_data_list[0] if image_data_list else 64
-            # Combine all binary chunks (skipping metadata at index 0)
-            img_data = b"".join(image_data_list[1:])
-            image_data_list.clear()
-            
-            print(f"Image data len: {len(img_data)}")
-            
-            if display and display.tft:
-                x = (240 - img_size) // 2
-                y = (240 - img_size) // 2
-                display.show_image(x, y, img_size, img_size, img_data)
-                
-                # Overlay success message slightly
-                display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
-                display.text("图片已生成!", 70, 5, st7789.BLACK)
-            
-            gc.collect()
-            print("Image displayed")
-            return IMAGE_STATE_IDLE, ("image_done",)
-            
-        except Exception as e:
-            print(f"Image process error: {e}")
-            import sys
-            sys.print_exception(e)
-        
-        return IMAGE_STATE_IDLE, None
-    
    elif msg.startswith("IMAGE_ERROR:"):
        print(msg)
        return IMAGE_STATE_IDLE, ("error", msg[12:])
@@ -330,7 +345,7 @@ def print_asr(text, display=None):
    print(f"ASR: {text}")
    if display and display.tft:
        display.fill_rect(0, 40, 240, 160, st7789.BLACK)
-        display.text(text, 0, 40, st7789.WHITE)
+        display.text(text, 0, 40, st7789.WHITE, wait=False)


 def get_boot_button_action(boot_btn):
@@ -468,7 +483,7 @@ def main():
                    print("Memory high, cleaned")
            
            # Spinner Animation
-            if ui_screen == UI_SCREEN_RESULT and not image_generation_done and current_status in ["OPTIMIZING", "RENDERING"]:
+            if ui_screen == UI_SCREEN_RESULT and not image_generation_done and current_status in ["OPTIMIZING", "RENDERING"] and image_state != IMAGE_STATE_RECEIVING:
                now = time.ticks_ms()
                if time.ticks_diff(now, last_spinner_time) > 100:
                    if display.tft:
@@ -543,17 +558,11 @@ def main():
                    time.sleep(0.5)
                
                elif ui_screen == UI_SCREEN_RESULT:
-                    print(">>> Back to recording")
-                    ui_screen = UI_SCREEN_RECORDING
-                    is_recording = False
-                    current_asr_text = ""
-                    current_prompt = ""
-                    current_status = ""
-                    image_generation_done = False
-                    confirm_waiting = False
-                    
-                    if display.tft:
-                        render_recording_screen(display, "", 0)
+                    # Ignore short press in result screen to keep image displayed
+                    # unless image generation failed or is still in progress?
+                    # User request: "只有长按boot才离开" (Only leave on long press)
+                    # So we do nothing here.
+                    pass
            
            elif btn_action == 2:
                if is_recording:
@@ -595,14 +604,24 @@ def main():
                        render_recording_screen(display, "", 0)
                
                elif ui_screen == UI_SCREEN_RESULT:
-                    print(">>> Generate image (manual)")
+                    print(">>> Back to recording")
+                    # Stop recording if it was somehow started or just reset state
                    if ws and ws.is_connected():
                        try:
-                            ws.send("START_RECORDING")
-                            is_recording = True
-                            ui_screen = UI_SCREEN_RECORDING
+                            ws.send("STOP_RECORDING")
                        except:
                            ws = None
+                    
+                    ui_screen = UI_SCREEN_RECORDING
+                    is_recording = False
+                    current_asr_text = ""
+                    current_prompt = ""
+                    current_status = ""
+                    image_generation_done = False
+                    confirm_waiting = False
+                    
+                    if display.tft:
+                        render_recording_screen(display, "", 0)
            
            elif btn_action == 3:
                print(">>> Config mode")
@@ -628,6 +647,10 @@ def main():
                                            if display.tft:
                                                render_recording_screen(display, current_asr_text, last_audio_level)
                                        
+                                        elif event_data[0] == "font_update":
+                                            if ui_screen == UI_SCREEN_RECORDING and display.tft:
+                                                render_recording_screen(display, current_asr_text, last_audio_level)
+                                        
                                        elif event_data[0] == "status":
                                            current_status = event_data[1]
                                            status_text = event_data[2] if len(event_data) > 2 else ""