finish

2026-03-03 23:31:06 +08:00
parent 0aa8f5f473
commit 20d2e72c51
10 changed files with 220 additions and 201 deletions
--- a/main.py
+++ b/main.py
@@ -28,7 +28,7 @@ UI_SCREEN_RECORDING = 1
 UI_SCREEN_CONFIRM = 2
 UI_SCREEN_RESULT = 3

-BOOT_SHORT_MS = 500
+BOOT_SHORT_MS = 100
 BOOT_LONG_MS = 2000
 BOOT_EXTRA_LONG_MS = 5000

@@ -170,7 +170,7 @@ def draw_progress_bar(display, x, y, width, height, progress, color=st7789.CYAN)
        display.tft.fill_rect(x, y, bar_width, height, color)


-def render_recording_screen(display, asr_text="", audio_level=0):
+def render_recording_screen(display, asr_text="", audio_level=0, is_recording=False):
    """渲染录音界面"""
    if not display or not display.tft:
        return
@@ -190,7 +190,10 @@ def render_recording_screen(display, asr_text="", audio_level=0):
        display.text(asr_text[:20], 20, 130, st7789.WHITE, wait=False)
    
    display.tft.fill_rect(60, 200, 120, 25, st7789.RED)
-    display.text("松开停止", 85, 205, st7789.WHITE)
+    if is_recording:
+        display.text("松开停止", 85, 205, st7789.WHITE)
+    else:
+        display.text("长按录音", 85, 205, st7789.WHITE)


 def render_confirm_screen(display, asr_text=""):
@@ -251,14 +254,14 @@ def render_result_screen(display, status="", prompt="", image_received=False):
        display.text("AI 生成中", 80, 8, st7789.BLACK)
        display.text("生成失败", 80, 50, st7789.RED)
        
-    if prompt and not image_received and not image_generation_done:
+    if prompt and not image_received:
        display.tft.fill_rect(10, 140, 220, 50, 0x2124) # Dark Grey
        display.text("提示词:", 15, 145, st7789.CYAN)
        display.text(prompt[:25] + "..." if len(prompt) > 25 else prompt, 15, 165, st7789.WHITE)
    
    # Only show back button if not showing full image, or maybe show it transparently?
    # For now, let's not cover the image with the button hint
-    if not image_received and not image_generation_done:
+    if not image_received:
        display.tft.fill_rect(60, 210, 120, 25, st7789.BLUE)
        display.text("长按返回", 90, 215, st7789.WHITE)

@@ -502,19 +505,20 @@ def main():
        # WiFi 和 WS 都连接成功后，进入录音界面
        ui_screen = UI_SCREEN_RECORDING
        if display.tft:
-            render_recording_screen(display, "", 0)
+            render_recording_screen(display, "", 0, False)
    else:
        print("Running in offline mode")
        # 即使离线也进入录音界面（虽然不能用）
        ui_screen = UI_SCREEN_RECORDING
        if display.tft:
-            render_recording_screen(display, "离线模式", 0)
+            render_recording_screen(display, "离线模式", 0, False)
    
    read_buf = bytearray(4096)
    last_audio_level = 0
    memory_check_counter = 0
    spinner_angle = 0
    last_spinner_time = 0
+    wait_for_release = False
    
    while True:
        try:
@@ -544,134 +548,67 @@ def main():

            btn_action = get_boot_button_action(boot_btn)
            
-            if btn_action == 1:
-                if is_recording:
-                    print(">>> Stop recording")
-                    if ws and ws.is_connected():
-                        try:
-                            ws.send("STOP_RECORDING")
-                        except:
-                            ws = None
-                    
-                    is_recording = False
-                    ui_screen = UI_SCREEN_RESULT
-                    image_generation_done = False
-                    
-                    if display.tft:
-                        render_result_screen(display, "OPTIMIZING", current_asr_text, False)
-                    
-                    time.sleep(0.5)
-                
-                elif ui_screen == UI_SCREEN_RECORDING:
-                    if not is_recording:
-                        print(">>> Recording...")
-                        is_recording = True
-                        confirm_waiting = False
-                        current_asr_text = ""
-                        current_prompt = ""
-                        current_status = ""
-                        image_generation_done = False
-                        
-                        if display.tft:
-                            render_recording_screen(display, "", 0)
-                        
-                        if ws is None or not ws.is_connected():
-                            connect_ws()
-                        
-                        if ws and ws.is_connected():
-                            try:
-                                ws.send("START_RECORDING")
-                            except:
-                                ws = None
-                
-                elif ui_screen == UI_SCREEN_CONFIRM:
-                    print(">>> Confirm and generate")
-                    
-                    # 发送生成图片指令
-                    if ws and ws.is_connected():
-                        try:
-                            # 明确发送生成指令
-                            ws.send(f"GENERATE_IMAGE:{current_asr_text}")
-                        except:
-                            ws = None
-                    
-                    is_recording = False
-                    ui_screen = UI_SCREEN_RESULT
-                    image_generation_done = False
-                    
-                    if display.tft:
-                        render_result_screen(display, "OPTIMIZING", current_asr_text, False)
-                    
-                    time.sleep(0.5)
-                
-                elif ui_screen == UI_SCREEN_RESULT:
-                    # Ignore short press in result screen to keep image displayed
-                    # unless image generation failed or is still in progress?
-                    # User request: "只有长按boot才离开" (Only leave on long press)
-                    # So we do nothing here.
-                    pass
-            
-            elif btn_action == 2:
-                if is_recording:
-                    print(">>> Stop recording (long press)")
-                    if ws and ws.is_connected():
-                        try:
-                            ws.send("STOP_RECORDING")
-                        except:
-                            ws = None
-                    
-                    is_recording = False
-                
-                # If in recording screen or (not recording AND not result screen), then regenerate/re-record
-                # This ensures result screen is handled by its own block below
-                if ui_screen == UI_SCREEN_RECORDING:
-                    if current_asr_text:
-                        print(">>> Generate image with ASR text")
-                        ui_screen = UI_SCREEN_RESULT
-                        image_generation_done = False
-                        
-                        if display.tft:
-                            render_result_screen(display, "OPTIMIZING", current_asr_text, False)
-                        
-                        time.sleep(0.5)
-                    else:
-                        print(">>> Re-record")
-                        current_asr_text = ""
-                        confirm_waiting = False
-                        ui_screen = UI_SCREEN_RECORDING
-                        
-                        if display.tft:
-                            render_recording_screen(display, "", 0)
-                
-                elif ui_screen == UI_SCREEN_CONFIRM:
-                    print(">>> Re-record")
-                    current_asr_text = ""
+            # Hold to Record Logic (Press to Start, Release to Stop)
+            if ui_screen == UI_SCREEN_RECORDING:
+                if boot_btn.value() == 0 and not is_recording:
+                    print(">>> Start recording (Hold)")
+                    is_recording = True
                    confirm_waiting = False
-                    ui_screen = UI_SCREEN_RECORDING
-                    
-                    if display.tft:
-                        render_recording_screen(display, "", 0)
-                
-                elif ui_screen == UI_SCREEN_RESULT:
-                    print(">>> Back to recording")
-                    # Stop recording if it was somehow started or just reset state
-                    if ws and ws.is_connected():
-                        try:
-                            ws.send("STOP_RECORDING")
-                        except:
-                            ws = None
-                    
-                    ui_screen = UI_SCREEN_RECORDING
-                    is_recording = False
                    current_asr_text = ""
                    current_prompt = ""
                    current_status = ""
                    image_generation_done = False
-                    confirm_waiting = False
-                    
                    if display.tft:
-                        render_recording_screen(display, "", 0)
+                        render_recording_screen(display, "", 0, True)
+                    if ws is None or not ws.is_connected():
+                        connect_ws()
+                    if ws and ws.is_connected():
+                        try:
+                            ws.send("START_RECORDING")
+                        except:
+                            ws = None
+                elif boot_btn.value() == 1 and is_recording:
+                    print(">>> Stop recording (Release)")
+                    if ws and ws.is_connected():
+                        try:
+                            ws.send("STOP_RECORDING")
+                        except:
+                            ws = None
+                    is_recording = False
+                    ui_screen = UI_SCREEN_CONFIRM
+                    image_generation_done = False
+                    if display.tft:
+                        render_confirm_screen(display, current_asr_text)
+                    # Consume action to prevent triggering other events
+                    btn_action = 0
+
+            if btn_action == 1:
+                if ui_screen == UI_SCREEN_CONFIRM:
+                    print(">>> Confirm and generate")
+                    if ws and ws.is_connected():
+                        try:
+                            ws.send(f"GENERATE_IMAGE:{current_asr_text}")
+                        except:
+                            ws = None
+                    is_recording = False
+                    ui_screen = UI_SCREEN_RESULT
+                    image_generation_done = False
+                    if display.tft:
+                        render_result_screen(display, "OPTIMIZING", current_asr_text, False)
+                    time.sleep(0.5)
            
+            elif btn_action == 2:
+                if ui_screen == UI_SCREEN_CONFIRM or ui_screen == UI_SCREEN_RESULT:
+                    print(">>> Re-record")
+                    current_asr_text = ""
+                    confirm_waiting = False
+                    ui_screen = UI_SCREEN_RECORDING
+                    is_recording = False
+                    image_generation_done = False
+                    if display.tft:
+                        render_recording_screen(display, "", 0, False)
+                    time.sleep(0.5)
+
            elif btn_action == 3:
                print(">>> Config mode")
            
@@ -684,82 +621,58 @@ def main():
                                ws.send(read_buf[:num_read], opcode=2)
                                
                                # 移除录音时的消息接收，确保录音流畅
-                                # poller = uselect.poll()
-                                # poller.register(ws.sock, uselect.POLLIN)
-                                # events = poller.poll(0)
-                                # if events:
-                                #     msg = ws.recv()
-                                #     image_state, event_data = process_message(msg, display, image_state, image_data_list)
-                                #     
-                                #     if event_data:
-                                #         if event_data[0] == "asr":
-                                #             current_asr_text = event_data[1]
-                                #             if display.tft:
-                                #                 render_recording_screen(display, current_asr_text, last_audio_level)
-                                #         
-                                #         elif event_data[0] == "font_update":
-                                #             if ui_screen == UI_SCREEN_RECORDING and display.tft:
-                                #                 render_recording_screen(display, current_asr_text, last_audio_level)
-                                #         
-                                #         elif event_data[0] == "status":
-                                #             current_status = event_data[1]
-                                #             status_text = event_data[2] if len(event_data) > 2 else ""
-                                #             if display.tft:
-                                #                 render_result_screen(display, current_status, current_prompt, image_generation_done)
-                                #         
-                                #         elif event_data[0] == "prompt":
-                                #             current_prompt = event_data[1]
-                                #         
-                                #         elif event_data[0] == "image_done":
-                                #             image_generation_done = True
-                                #             if display.tft:
-                                #                 render_result_screen(display, "COMPLETE", current_prompt, True)
-                                #         
-                                #         elif event_data[0] == "error":
-                                #             if display.tft:
-                                #                 render_result_screen(display, "ERROR", current_prompt, False)
-                                    
                            except:
                                ws = None
            
-            if ui_screen == UI_SCREEN_RESULT and ws and ws.is_connected():
-                try:
-                    poller = uselect.poll()
-                    poller.register(ws.sock, uselect.POLLIN)
-                    events = poller.poll(100)
-                    if events:
-                        msg = ws.recv()
-                        if msg:
-                            image_state, event_data = process_message(msg, display, image_state, image_data_list)
-                            
-                            if event_data:
-                                if event_data[0] == "asr":
-                                    current_asr_text = event_data[1]
+            # 在录音结束后（CONFIRM状态）或 RESULT 状态，才接收消息
+            if (ui_screen == UI_SCREEN_CONFIRM or ui_screen == UI_SCREEN_RESULT or ui_screen == UI_SCREEN_RECORDING) and not is_recording:
+                if ws and ws.is_connected():
+                    try:
+                        poller = uselect.poll()
+                        poller.register(ws.sock, uselect.POLLIN)
+                        events = poller.poll(100)
+                        if events:
+                            msg = ws.recv()
+                            if msg:
+                                image_state, event_data = process_message(msg, display, image_state, image_data_list)
                                
-                                elif event_data[0] == "status":
-                                    current_status = event_data[1]
-                                    status_text = event_data[2] if len(event_data) > 2 else ""
-                                    if display.tft:
-                                        render_result_screen(display, current_status, current_prompt, image_generation_done)
-                                
-                                elif event_data[0] == "prompt":
-                                    current_prompt = event_data[1]
-                                    if display.tft:
-                                        render_result_screen(display, current_status, current_prompt, image_generation_done)
-                                
-                                elif event_data[0] == "image_done":
-                                    image_generation_done = True
-                                    if display.tft:
-                                        render_result_screen(display, "COMPLETE", current_prompt, True)
-                                
-                                elif event_data[0] == "error":
-                                    if display.tft:
-                                        render_result_screen(display, "ERROR", current_prompt, False)
-                except:
-                    pass
+                                if event_data:
+                                    if event_data[0] == "asr":
+                                        current_asr_text = event_data[1]
+                                        print(f"Received ASR: {current_asr_text}")
+                                        
+                                        # 收到 ASR 结果，跳转到 CONFIRM 界面
+                                        if ui_screen == UI_SCREEN_RECORDING or ui_screen == UI_SCREEN_CONFIRM:
+                                            ui_screen = UI_SCREEN_CONFIRM
+                                            if display.tft:
+                                                render_confirm_screen(display, current_asr_text)
+                                    
+                                    elif event_data[0] == "font_update":
+                                        # 如果还在录音界面等待，刷新一下（虽然可能已经跳到 CONFIRM 了）
+                                        pass
+                                    
+                                    elif event_data[0] == "status":
+                                        current_status = event_data[1]
+                                        status_text = event_data[2] if len(event_data) > 2 else ""
+                                        if display.tft and ui_screen == UI_SCREEN_RESULT:
+                                            render_result_screen(display, current_status, current_prompt, image_generation_done)
+                                    
+                                    elif event_data[0] == "prompt":
+                                        current_prompt = event_data[1]
+                                        if display.tft and ui_screen == UI_SCREEN_RESULT:
+                                            render_result_screen(display, current_status, current_prompt, image_generation_done)
+                                    
+                                    elif event_data[0] == "image_done":
+                                        image_generation_done = True
+                                        if display.tft and ui_screen == UI_SCREEN_RESULT:
+                                            render_result_screen(display, "COMPLETE", current_prompt, True)
+                                    
+                                    elif event_data[0] == "error":
+                                        if display.tft and ui_screen == UI_SCREEN_RESULT:
+                                            render_result_screen(display, "ERROR", current_prompt, False)
+                    except Exception as e:
+                        print(f"WS Recv Error: {e}")
                
-                continue
-            
            time.sleep(0.01)
            
        except Exception as e: