This commit is contained in:
jeremygan2021
2026-03-03 23:31:06 +08:00
parent 0aa8f5f473
commit 20d2e72c51
10 changed files with 220 additions and 201 deletions

311
main.py
View File

@@ -28,7 +28,7 @@ UI_SCREEN_RECORDING = 1
UI_SCREEN_CONFIRM = 2
UI_SCREEN_RESULT = 3
BOOT_SHORT_MS = 500
BOOT_SHORT_MS = 100
BOOT_LONG_MS = 2000
BOOT_EXTRA_LONG_MS = 5000
@@ -170,7 +170,7 @@ def draw_progress_bar(display, x, y, width, height, progress, color=st7789.CYAN)
display.tft.fill_rect(x, y, bar_width, height, color)
def render_recording_screen(display, asr_text="", audio_level=0):
def render_recording_screen(display, asr_text="", audio_level=0, is_recording=False):
"""渲染录音界面"""
if not display or not display.tft:
return
@@ -190,7 +190,10 @@ def render_recording_screen(display, asr_text="", audio_level=0):
display.text(asr_text[:20], 20, 130, st7789.WHITE, wait=False)
display.tft.fill_rect(60, 200, 120, 25, st7789.RED)
display.text("松开停止", 85, 205, st7789.WHITE)
if is_recording:
display.text("松开停止", 85, 205, st7789.WHITE)
else:
display.text("长按录音", 85, 205, st7789.WHITE)
def render_confirm_screen(display, asr_text=""):
@@ -251,14 +254,14 @@ def render_result_screen(display, status="", prompt="", image_received=False):
display.text("AI 生成中", 80, 8, st7789.BLACK)
display.text("生成失败", 80, 50, st7789.RED)
if prompt and not image_received and not image_generation_done:
if prompt and not image_received:
display.tft.fill_rect(10, 140, 220, 50, 0x2124) # Dark Grey
display.text("提示词:", 15, 145, st7789.CYAN)
display.text(prompt[:25] + "..." if len(prompt) > 25 else prompt, 15, 165, st7789.WHITE)
# Only show back button if not showing full image, or maybe show it transparently?
# For now, let's not cover the image with the button hint
if not image_received and not image_generation_done:
if not image_received:
display.tft.fill_rect(60, 210, 120, 25, st7789.BLUE)
display.text("长按返回", 90, 215, st7789.WHITE)
@@ -502,19 +505,20 @@ def main():
# WiFi 和 WS 都连接成功后,进入录音界面
ui_screen = UI_SCREEN_RECORDING
if display.tft:
render_recording_screen(display, "", 0)
render_recording_screen(display, "", 0, False)
else:
print("Running in offline mode")
# 即使离线也进入录音界面(虽然不能用)
ui_screen = UI_SCREEN_RECORDING
if display.tft:
render_recording_screen(display, "离线模式", 0)
render_recording_screen(display, "离线模式", 0, False)
read_buf = bytearray(4096)
last_audio_level = 0
memory_check_counter = 0
spinner_angle = 0
last_spinner_time = 0
wait_for_release = False
while True:
try:
@@ -544,134 +548,67 @@ def main():
btn_action = get_boot_button_action(boot_btn)
if btn_action == 1:
if is_recording:
print(">>> Stop recording")
if ws and ws.is_connected():
try:
ws.send("STOP_RECORDING")
except:
ws = None
is_recording = False
ui_screen = UI_SCREEN_RESULT
image_generation_done = False
if display.tft:
render_result_screen(display, "OPTIMIZING", current_asr_text, False)
time.sleep(0.5)
elif ui_screen == UI_SCREEN_RECORDING:
if not is_recording:
print(">>> Recording...")
is_recording = True
confirm_waiting = False
current_asr_text = ""
current_prompt = ""
current_status = ""
image_generation_done = False
if display.tft:
render_recording_screen(display, "", 0)
if ws is None or not ws.is_connected():
connect_ws()
if ws and ws.is_connected():
try:
ws.send("START_RECORDING")
except:
ws = None
elif ui_screen == UI_SCREEN_CONFIRM:
print(">>> Confirm and generate")
# 发送生成图片指令
if ws and ws.is_connected():
try:
# 明确发送生成指令
ws.send(f"GENERATE_IMAGE:{current_asr_text}")
except:
ws = None
is_recording = False
ui_screen = UI_SCREEN_RESULT
image_generation_done = False
if display.tft:
render_result_screen(display, "OPTIMIZING", current_asr_text, False)
time.sleep(0.5)
elif ui_screen == UI_SCREEN_RESULT:
# Ignore short press in result screen to keep image displayed
# unless image generation failed or is still in progress?
# User request: "只有长按boot才离开" (Only leave on long press)
# So we do nothing here.
pass
elif btn_action == 2:
if is_recording:
print(">>> Stop recording (long press)")
if ws and ws.is_connected():
try:
ws.send("STOP_RECORDING")
except:
ws = None
is_recording = False
# If in recording screen or (not recording AND not result screen), then regenerate/re-record
# This ensures result screen is handled by its own block below
if ui_screen == UI_SCREEN_RECORDING:
if current_asr_text:
print(">>> Generate image with ASR text")
ui_screen = UI_SCREEN_RESULT
image_generation_done = False
if display.tft:
render_result_screen(display, "OPTIMIZING", current_asr_text, False)
time.sleep(0.5)
else:
print(">>> Re-record")
current_asr_text = ""
confirm_waiting = False
ui_screen = UI_SCREEN_RECORDING
if display.tft:
render_recording_screen(display, "", 0)
elif ui_screen == UI_SCREEN_CONFIRM:
print(">>> Re-record")
current_asr_text = ""
# Hold to Record Logic (Press to Start, Release to Stop)
if ui_screen == UI_SCREEN_RECORDING:
if boot_btn.value() == 0 and not is_recording:
print(">>> Start recording (Hold)")
is_recording = True
confirm_waiting = False
ui_screen = UI_SCREEN_RECORDING
if display.tft:
render_recording_screen(display, "", 0)
elif ui_screen == UI_SCREEN_RESULT:
print(">>> Back to recording")
# Stop recording if it was somehow started or just reset state
if ws and ws.is_connected():
try:
ws.send("STOP_RECORDING")
except:
ws = None
ui_screen = UI_SCREEN_RECORDING
is_recording = False
current_asr_text = ""
current_prompt = ""
current_status = ""
image_generation_done = False
confirm_waiting = False
if display.tft:
render_recording_screen(display, "", 0)
render_recording_screen(display, "", 0, True)
if ws is None or not ws.is_connected():
connect_ws()
if ws and ws.is_connected():
try:
ws.send("START_RECORDING")
except:
ws = None
elif boot_btn.value() == 1 and is_recording:
print(">>> Stop recording (Release)")
if ws and ws.is_connected():
try:
ws.send("STOP_RECORDING")
except:
ws = None
is_recording = False
ui_screen = UI_SCREEN_CONFIRM
image_generation_done = False
if display.tft:
render_confirm_screen(display, current_asr_text)
# Consume action to prevent triggering other events
btn_action = 0
if btn_action == 1:
if ui_screen == UI_SCREEN_CONFIRM:
print(">>> Confirm and generate")
if ws and ws.is_connected():
try:
ws.send(f"GENERATE_IMAGE:{current_asr_text}")
except:
ws = None
is_recording = False
ui_screen = UI_SCREEN_RESULT
image_generation_done = False
if display.tft:
render_result_screen(display, "OPTIMIZING", current_asr_text, False)
time.sleep(0.5)
elif btn_action == 2:
if ui_screen == UI_SCREEN_CONFIRM or ui_screen == UI_SCREEN_RESULT:
print(">>> Re-record")
current_asr_text = ""
confirm_waiting = False
ui_screen = UI_SCREEN_RECORDING
is_recording = False
image_generation_done = False
if display.tft:
render_recording_screen(display, "", 0, False)
time.sleep(0.5)
elif btn_action == 3:
print(">>> Config mode")
@@ -684,82 +621,58 @@ def main():
ws.send(read_buf[:num_read], opcode=2)
# 移除录音时的消息接收,确保录音流畅
# poller = uselect.poll()
# poller.register(ws.sock, uselect.POLLIN)
# events = poller.poll(0)
# if events:
# msg = ws.recv()
# image_state, event_data = process_message(msg, display, image_state, image_data_list)
#
# if event_data:
# if event_data[0] == "asr":
# current_asr_text = event_data[1]
# if display.tft:
# render_recording_screen(display, current_asr_text, last_audio_level)
#
# elif event_data[0] == "font_update":
# if ui_screen == UI_SCREEN_RECORDING and display.tft:
# render_recording_screen(display, current_asr_text, last_audio_level)
#
# elif event_data[0] == "status":
# current_status = event_data[1]
# status_text = event_data[2] if len(event_data) > 2 else ""
# if display.tft:
# render_result_screen(display, current_status, current_prompt, image_generation_done)
#
# elif event_data[0] == "prompt":
# current_prompt = event_data[1]
#
# elif event_data[0] == "image_done":
# image_generation_done = True
# if display.tft:
# render_result_screen(display, "COMPLETE", current_prompt, True)
#
# elif event_data[0] == "error":
# if display.tft:
# render_result_screen(display, "ERROR", current_prompt, False)
except:
ws = None
if ui_screen == UI_SCREEN_RESULT and ws and ws.is_connected():
try:
poller = uselect.poll()
poller.register(ws.sock, uselect.POLLIN)
events = poller.poll(100)
if events:
msg = ws.recv()
if msg:
image_state, event_data = process_message(msg, display, image_state, image_data_list)
if event_data:
if event_data[0] == "asr":
current_asr_text = event_data[1]
# 在录音结束后CONFIRM状态或 RESULT 状态,才接收消息
if (ui_screen == UI_SCREEN_CONFIRM or ui_screen == UI_SCREEN_RESULT or ui_screen == UI_SCREEN_RECORDING) and not is_recording:
if ws and ws.is_connected():
try:
poller = uselect.poll()
poller.register(ws.sock, uselect.POLLIN)
events = poller.poll(100)
if events:
msg = ws.recv()
if msg:
image_state, event_data = process_message(msg, display, image_state, image_data_list)
elif event_data[0] == "status":
current_status = event_data[1]
status_text = event_data[2] if len(event_data) > 2 else ""
if display.tft:
render_result_screen(display, current_status, current_prompt, image_generation_done)
elif event_data[0] == "prompt":
current_prompt = event_data[1]
if display.tft:
render_result_screen(display, current_status, current_prompt, image_generation_done)
elif event_data[0] == "image_done":
image_generation_done = True
if display.tft:
render_result_screen(display, "COMPLETE", current_prompt, True)
elif event_data[0] == "error":
if display.tft:
render_result_screen(display, "ERROR", current_prompt, False)
except:
pass
if event_data:
if event_data[0] == "asr":
current_asr_text = event_data[1]
print(f"Received ASR: {current_asr_text}")
# 收到 ASR 结果,跳转到 CONFIRM 界面
if ui_screen == UI_SCREEN_RECORDING or ui_screen == UI_SCREEN_CONFIRM:
ui_screen = UI_SCREEN_CONFIRM
if display.tft:
render_confirm_screen(display, current_asr_text)
elif event_data[0] == "font_update":
# 如果还在录音界面等待,刷新一下(虽然可能已经跳到 CONFIRM 了)
pass
elif event_data[0] == "status":
current_status = event_data[1]
status_text = event_data[2] if len(event_data) > 2 else ""
if display.tft and ui_screen == UI_SCREEN_RESULT:
render_result_screen(display, current_status, current_prompt, image_generation_done)
elif event_data[0] == "prompt":
current_prompt = event_data[1]
if display.tft and ui_screen == UI_SCREEN_RESULT:
render_result_screen(display, current_status, current_prompt, image_generation_done)
elif event_data[0] == "image_done":
image_generation_done = True
if display.tft and ui_screen == UI_SCREEN_RESULT:
render_result_screen(display, "COMPLETE", current_prompt, True)
elif event_data[0] == "error":
if display.tft and ui_screen == UI_SCREEN_RESULT:
render_result_screen(display, "ERROR", current_prompt, False)
except Exception as e:
print(f"WS Recv Error: {e}")
continue
time.sleep(0.01)
except Exception as e: