This commit is contained in:
jeremygan2021
2026-03-03 22:45:09 +08:00
parent 700bc55657
commit 05f02a1454
14 changed files with 574 additions and 149 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -47,9 +47,9 @@ class Display:
if self.font:
self.font.set_ws(ws)
def text(self, text, x, y, color):
def text(self, text, x, y, color, wait=True):
if self.tft:
self.font.text(self.tft, text, x, y, color)
self.font.text(self.tft, text, x, y, color, wait=wait)
def init_ui(self):
"""初始化 UI 背景"""
@@ -93,3 +93,59 @@ class Display:
self.tft.blit_buffer(rgb565_data, x, y, width, height)
except Exception as e:
print(f"Show image error: {e}")
def show_image_chunk(self, x, y, width, height, data, offset):
"""流式显示图片数据块"""
if not self.tft: return
# ST7789 blit_buffer expects a complete buffer for the window
# But we can calculate which pixels this chunk corresponds to
# This is tricky because blit_buffer sets a window and then writes data.
# If we want to stream, we should probably set the window once and then write chunks.
# But st7789py library might not expose raw write easily without window set.
# Alternative: Calculate the sub-window for this chunk.
# Data is a linear sequence of pixels (2 bytes per pixel)
# We assume data length is even.
try:
# Simple approach: If offset is 0, we set the window for the whole image
# And then write data. But st7789py's blit_buffer does both.
# Let's look at st7789py implementation.
# fill_rect sets window then writes.
# blit_buffer sets window then writes.
# We can use a modified approach:
# If it's the first chunk, set window.
# Then write data.
# But we can't easily modify the library state from here.
# So we calculate the rect for this chunk.
# Total pixels
total_pixels = width * height
# Current pixel offset
pixel_offset = offset // 2
num_pixels = len(data) // 2
# This only works if chunks align with rows, or if we can write partial rows.
# ST7789 supports writing continuous memory.
# Let's try to determine the x, y, w, h for this chunk.
# This is complex if it wraps around lines.
# Easier approach for ESP32 memory constrained environment:
# We just need to use the raw write method of the display driver if available.
if offset == 0:
# Set window for the whole image
self.tft.set_window(x, y, x + width - 1, y + height - 1)
# Write raw data
self.tft.write(None, data)
except Exception as e:
print(f"Show chunk error: {e}")

158
font.py
View File

@@ -4,6 +4,11 @@ import time
import binascii
import gc
try:
import static_font_data
except ImportError:
static_font_data = None
class Font:
def __init__(self, ws=None):
self.ws = ws
@@ -11,6 +16,8 @@ class Font:
self.pending_requests = set()
self.retry_count = {}
self.max_retries = 3
# Pre-allocate buffer for row drawing (16 pixels * 2 bytes = 32 bytes)
self.row_buf = bytearray(32)
def set_ws(self, ws):
self.ws = ws
@@ -24,7 +31,40 @@ class Font:
"""获取当前缓存的字体数量"""
return len(self.cache)
def text(self, tft, text, x, y, color, bg=0x0000):
def handle_message(self, msg):
"""处理字体相关消息,更新缓存
返回: 是否为字体消息
"""
if not isinstance(msg, str):
return False
if msg.startswith("FONT_BATCH_END:"):
# 批处理结束消息,目前主要用于阻塞等待时的退出条件
return True
elif msg.startswith("FONT_DATA:"):
parts = msg.split(":")
if len(parts) >= 3:
try:
key_str = parts[1]
if key_str.startswith("0x"):
c = int(key_str, 16)
else:
c = int(key_str)
d = binascii.unhexlify(parts[2])
self.cache[c] = d
# 清除重试计数(如果有)
if c in self.retry_count:
del self.retry_count[c]
return True
except Exception as e:
print(f"Font data parse error: {e}")
return True
return False
def text(self, tft, text, x, y, color, bg=0x0000, wait=True):
"""在ST7789显示器上绘制文本"""
if not text:
return
@@ -32,17 +72,23 @@ class Font:
color_bytes = struct.pack(">H", color)
bg_bytes = struct.pack(">H", bg)
lut = [bytearray(16) for _ in range(256)]
for i in range(256):
for bit in range(8):
val = (i >> bit) & 1
idx = (7 - bit) * 2
# Create a mini-LUT for 4-bit chunks (16 entries * 8 bytes = 128 bytes)
# Each entry maps 4 bits (0-15) to 4 pixels (8 bytes)
mini_lut = []
for i in range(16):
chunk = bytearray(8)
for bit in range(4):
# bit 0 is LSB of nibble, corresponds to rightmost pixel of the 4 pixels
# Assuming standard MSB-first bitmap
val = (i >> (3 - bit)) & 1
idx = bit * 2
if val:
lut[i][idx] = color_bytes[0]
lut[i][idx+1] = color_bytes[1]
chunk[idx] = color_bytes[0]
chunk[idx+1] = color_bytes[1]
else:
lut[i][idx] = bg_bytes[0]
lut[i][idx+1] = bg_bytes[1]
chunk[idx] = bg_bytes[0]
chunk[idx+1] = bg_bytes[1]
mini_lut.append(bytes(chunk))
initial_x = x
@@ -50,6 +96,9 @@ class Font:
for char in text:
if ord(char) > 127:
code = ord(char)
# Check static font data first
if static_font_data and hasattr(static_font_data, 'FONTS') and code in static_font_data.FONTS:
continue
if code not in self.cache:
missing_codes.add(code)
@@ -57,9 +106,12 @@ class Font:
missing_list = list(missing_codes)
req_str = ",".join([str(c) for c in missing_list])
# Only print if waiting, to reduce log spam in async mode
if wait:
print(f"Batch requesting fonts: {req_str}")
try:
self.ws.send(f"GET_FONTS_BATCH:{req_str}")
if wait:
self._wait_for_fonts(missing_codes)
except Exception as e:
print(f"Batch font request failed: {e}")
@@ -78,12 +130,22 @@ class Font:
is_chinese = False
buf_data = None
if ord(char) > 127:
code = ord(char)
if code in self.cache:
if code > 127:
if static_font_data and hasattr(static_font_data, 'FONTS') and code in static_font_data.FONTS:
buf_data = static_font_data.FONTS[code]
is_chinese = True
elif code in self.cache:
buf_data = self.cache[code]
is_chinese = True
else:
# Missing font data
if not wait:
# In async mode, draw a placeholder or space
# We use '?' for now so user knows something is missing
char = '?'
is_chinese = False
else:
if code in self.pending_requests:
retry = self.retry_count.get(code, 0)
@@ -92,14 +154,40 @@ class Font:
self._request_single_font(code)
if is_chinese and buf_data:
self._draw_bitmap(tft, buf_data, x, y, 16, 16, lut)
self._draw_bitmap_optimized(tft, buf_data, x, y, mini_lut)
x += 16
else:
if ord(char) > 127:
if code > 127:
char = '?'
self._draw_ascii(tft, char, x, y, color, bg)
x += 8
def _draw_bitmap_optimized(self, tft, bitmap, x, y, mini_lut):
"""使用优化方式绘制位图,减少内存分配"""
# Bitmap is 32 bytes (16x16 pixels)
# 2 bytes per row
for row in range(16):
# Get 2 bytes for this row
# Handle case where bitmap might be different length (safety)
if row * 2 + 1 < len(bitmap):
b1 = bitmap[row * 2]
b2 = bitmap[row * 2 + 1]
# Process b1 (Left 8 pixels)
# High nibble
self.row_buf[0:8] = mini_lut[(b1 >> 4) & 0x0F]
# Low nibble
self.row_buf[8:16] = mini_lut[b1 & 0x0F]
# Process b2 (Right 8 pixels)
# High nibble
self.row_buf[16:24] = mini_lut[(b2 >> 4) & 0x0F]
# Low nibble
self.row_buf[24:32] = mini_lut[b2 & 0x0F]
tft.blit_buffer(self.row_buf, x, y + row, 16, 1)
def _request_single_font(self, code):
"""请求单个字体"""
if self.ws:
@@ -134,10 +222,10 @@ class Font:
if msg is None:
continue
if isinstance(msg, str):
if self.handle_message(msg):
# 如果是批处理结束,检查是否有失败的
if msg.startswith("FONT_BATCH_END:"):
parts = msg[15:].split(":")
success = int(parts[0]) if len(parts) > 0 else 0
failed = int(parts[1]) if len(parts) > 1 else 0
if failed > 0:
@@ -145,34 +233,26 @@ class Font:
for c in temp_missing:
if c not in self.cache:
print(f"Font failed after retries: {c}")
self.cache[c] = None
self.cache[c] = None # 标记为 None 避免死循环
if c in target_codes:
target_codes.remove(c)
# 清除所有剩余的目标,因为批处理结束了
# 但实际上可能只需要清除 failed 的。
# 无论如何,收到 BATCH_END 意味着本次请求处理完毕。
# 如果还有没收到的,可能是丢包了。
# 为了简单起见,我们认为结束了。
target_codes.clear()
elif msg.startswith("FONT_DATA:"):
parts = msg.split(":")
if len(parts) >= 3:
try:
key_str = parts[1]
if key_str.startswith("0x"):
c = int(key_str, 16)
else:
c = int(key_str)
d = binascii.unhexlify(parts[2])
self.cache[c] = d
if c in target_codes:
# 检查是否有新缓存的字体满足了 target_codes
temp_target = list(target_codes)
for c in temp_target:
if c in self.cache:
target_codes.remove(c)
if c in self.retry_count:
del self.retry_count[c]
except:
pass
else:
self.local_deferred.append(msg)
elif msg is not None:
else:
self.local_deferred.append(msg)
except Exception as e:
@@ -183,12 +263,6 @@ class Font:
self.ws.unread_messages = self.local_deferred + self.ws.unread_messages
self.local_deferred = []
def _draw_bitmap(self, tft, bitmap, x, y, w, h, lut):
"""绘制位图"""
chunks = [lut[b] for b in bitmap]
rgb_buf = b''.join(chunks)
tft.blit_buffer(rgb_buf, x, y, w, h)
def _draw_ascii(self, tft, char, x, y, color, bg):
"""绘制ASCII字符"""
w, h = 8, 8

145
main.py
View File

@@ -163,7 +163,7 @@ def render_recording_screen(display, asr_text="", audio_level=0):
display.tft.fill_rect(20, 100, bar_width, 10, st7789.GREEN)
if asr_text:
display.text(asr_text[:20], 20, 130, st7789.WHITE)
display.text(asr_text[:20], 20, 130, st7789.WHITE, wait=False)
display.tft.fill_rect(60, 200, 120, 25, st7789.RED)
display.text("松开停止", 85, 205, st7789.WHITE)
@@ -194,54 +194,91 @@ def render_result_screen(display, status="", prompt="", image_received=False):
if not display or not display.tft:
return
# Only clear if we are starting a new state or it's the first render
# But for simplicity we clear all for now. Optimizing this requires state tracking.
if status == "OPTIMIZING":
display.tft.fill(st7789.BLACK)
# Header
display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
display.text("AI 生成中", 80, 8, st7789.BLACK)
if status == "OPTIMIZING":
display.text("正在思考...", 80, 60, st7789.CYAN)
display.text("优化提示词中", 70, 80, st7789.CYAN)
draw_progress_bar(display, 40, 110, 160, 6, 0.3, st7789.CYAN)
# Spinner will be drawn by main loop
elif status == "RENDERING":
display.tft.fill(st7789.BLACK)
display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
display.text("AI 生成中", 80, 8, st7789.BLACK)
display.text("正在绘画...", 80, 60, st7789.YELLOW)
display.text("AI作画中", 85, 80, st7789.YELLOW)
draw_progress_bar(display, 40, 110, 160, 6, 0.7, st7789.YELLOW)
# Spinner will be drawn by main loop
elif status == "COMPLETE" or image_received:
display.text("生成完成!", 80, 50, st7789.GREEN)
draw_check_icon(display, 110, 80)
# Don't clear screen, image is already there
# display.text("生成完成!", 80, 50, st7789.GREEN)
# draw_check_icon(display, 110, 80)
pass
elif status == "ERROR":
display.tft.fill(st7789.BLACK)
display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
display.text("AI 生成中", 80, 8, st7789.BLACK)
display.text("生成失败", 80, 50, st7789.RED)
if prompt:
if prompt and not image_received:
display.tft.fill_rect(10, 140, 220, 50, 0x2124) # Dark Grey
display.text("提示词:", 15, 145, st7789.CYAN)
display.text(prompt[:25] + "..." if len(prompt) > 25 else prompt, 15, 165, st7789.WHITE)
# Only show back button if not showing full image, or maybe show it transparently?
# For now, let's not cover the image with the button hint
if not image_received:
display.tft.fill_rect(60, 210, 120, 25, st7789.BLUE)
display.text("返回录音", 90, 215, st7789.WHITE)
display.text("长按返回", 90, 215, st7789.WHITE)
def process_message(msg, display, image_state, image_data_list):
"""处理WebSocket消息"""
# Handle binary image data
if isinstance(msg, (bytes, bytearray)):
if image_state == IMAGE_STATE_RECEIVING:
image_data_list.append(msg)
# Optional: Update progress bar or indicator
try:
if len(image_data_list) < 2:
# 异常情况,重置
return IMAGE_STATE_IDLE, None
img_size = image_data_list[0]
current_offset = image_data_list[1]
# Stream directly to display
if display and display.tft:
x = (240 - img_size) // 2
y = (240 - img_size) // 2
display.show_image_chunk(x, y, img_size, img_size, msg, current_offset)
# Update offset
image_data_list[1] += len(msg)
except Exception as e:
print(f"Stream image error: {e}")
return image_state, None
return image_state, None
if not isinstance(msg, str):
return image_state, None
# Check for font data first
if display and hasattr(display, 'font') and display.font.handle_message(msg):
return image_state, ("font_update",)
status_info = None
if msg.startswith("ASR:"):
@@ -272,6 +309,15 @@ def process_message(msg, display, image_state, image_data_list):
print(f"Image start, size: {size}, img_size: {img_size}")
image_data_list.clear()
image_data_list.append(img_size) # Store metadata at index 0
image_data_list.append(0) # Store current received bytes offset at index 1
# Prepare display for streaming
if display and display.tft:
# Calculate position
x = (240 - img_size) // 2
y = (240 - img_size) // 2
# Pre-set window (this will be done in first chunk call)
return IMAGE_STATE_RECEIVING, None
except Exception as e:
print(f"IMAGE_START parse error: {e}")
@@ -279,45 +325,14 @@ def process_message(msg, display, image_state, image_data_list):
# Deprecated text-based IMAGE_DATA handling
elif msg.startswith("IMAGE_DATA:") and image_state == IMAGE_STATE_RECEIVING:
try:
data = msg.split(":", 1)[1]
# Convert hex to bytes immediately if using old protocol, but we switched to binary
# Keep this just in case server rolls back? No, let's assume binary.
pass
except:
pass
elif msg == "IMAGE_END" and image_state == IMAGE_STATE_RECEIVING:
try:
print("Image received, processing...")
img_size = image_data_list[0] if image_data_list else 64
# Combine all binary chunks (skipping metadata at index 0)
img_data = b"".join(image_data_list[1:])
print("Image received completely")
image_data_list.clear()
print(f"Image data len: {len(img_data)}")
if display and display.tft:
x = (240 - img_size) // 2
y = (240 - img_size) // 2
display.show_image(x, y, img_size, img_size, img_data)
# Overlay success message slightly
display.tft.fill_rect(0, 0, 240, 30, st7789.WHITE)
display.text("图片已生成!", 70, 5, st7789.BLACK)
gc.collect()
print("Image displayed")
return IMAGE_STATE_IDLE, ("image_done",)
except Exception as e:
print(f"Image process error: {e}")
import sys
sys.print_exception(e)
return IMAGE_STATE_IDLE, None
elif msg.startswith("IMAGE_ERROR:"):
print(msg)
return IMAGE_STATE_IDLE, ("error", msg[12:])
@@ -330,7 +345,7 @@ def print_asr(text, display=None):
print(f"ASR: {text}")
if display and display.tft:
display.fill_rect(0, 40, 240, 160, st7789.BLACK)
display.text(text, 0, 40, st7789.WHITE)
display.text(text, 0, 40, st7789.WHITE, wait=False)
def get_boot_button_action(boot_btn):
@@ -468,7 +483,7 @@ def main():
print("Memory high, cleaned")
# Spinner Animation
if ui_screen == UI_SCREEN_RESULT and not image_generation_done and current_status in ["OPTIMIZING", "RENDERING"]:
if ui_screen == UI_SCREEN_RESULT and not image_generation_done and current_status in ["OPTIMIZING", "RENDERING"] and image_state != IMAGE_STATE_RECEIVING:
now = time.ticks_ms()
if time.ticks_diff(now, last_spinner_time) > 100:
if display.tft:
@@ -543,17 +558,11 @@ def main():
time.sleep(0.5)
elif ui_screen == UI_SCREEN_RESULT:
print(">>> Back to recording")
ui_screen = UI_SCREEN_RECORDING
is_recording = False
current_asr_text = ""
current_prompt = ""
current_status = ""
image_generation_done = False
confirm_waiting = False
if display.tft:
render_recording_screen(display, "", 0)
# Ignore short press in result screen to keep image displayed
# unless image generation failed or is still in progress?
# User request: "只有长按boot才离开" (Only leave on long press)
# So we do nothing here.
pass
elif btn_action == 2:
if is_recording:
@@ -595,15 +604,25 @@ def main():
render_recording_screen(display, "", 0)
elif ui_screen == UI_SCREEN_RESULT:
print(">>> Generate image (manual)")
print(">>> Back to recording")
# Stop recording if it was somehow started or just reset state
if ws and ws.is_connected():
try:
ws.send("START_RECORDING")
is_recording = True
ui_screen = UI_SCREEN_RECORDING
ws.send("STOP_RECORDING")
except:
ws = None
ui_screen = UI_SCREEN_RECORDING
is_recording = False
current_asr_text = ""
current_prompt = ""
current_status = ""
image_generation_done = False
confirm_waiting = False
if display.tft:
render_recording_screen(display, "", 0)
elif btn_action == 3:
print(">>> Config mode")
@@ -628,6 +647,10 @@ def main():
if display.tft:
render_recording_screen(display, current_asr_text, last_audio_level)
elif event_data[0] == "font_update":
if ui_screen == UI_SCREEN_RECORDING and display.tft:
render_recording_screen(display, current_asr_text, last_audio_level)
elif event_data[0] == "status":
current_status = event_data[1]
status_text = event_data[2] if len(event_data) > 2 else ""

48
static_font_data.py Normal file
View File

@@ -0,0 +1,48 @@
# Static font data generated for specific characters
import ubinascii
FONTS = {
20013: b'\x01\x00\x01\x00\x01\x00\x01\xf8\x3f\x08\x21\x08\x21\x08\x21\xf8\x3f\x00\x21\x00\x01\x00\x01\x00\x01\x00\x01\x00\x01\x00\x00\x00', # 中
20102: b'\x00\x00\x01\xf0\x1e\x10\x00\x20\x01\x40\x00\x80\x00\x80\x00\x80\x00\x80\x00\x80\x00\x80\x00\x80\x04\x80\x02\x80\x01\x00\x00\x00', # 了
20248: b'\x08\x80\x08\x80\x08\xa0\x10\x90\x10\x80\x20\xfc\x2f\x40\x61\x40\xa1\x40\x21\x40\x22\x40\x22\x44\x24\x44\x24\x44\x28\x3c\x00\x00', # 优
20316: b'\x08\x80\x08\x80\x09\x00\x11\x1c\x13\xe0\x32\x80\x54\x98\x90\xe0\x10\x80\x10\x9c\x10\xe0\x10\x80\x10\x80\x10\x80\x10\x80\x00\x00', # 作
20572: b'\x08\x80\x08\x78\x17\x80\x10\x60\x23\xa0\x22\x60\x63\x80\xa0\x7c\x2f\x88\x28\x30\x23\xc0\x20\x40\x21\x40\x20\xc0\x20\x40\x00\x00', # 停
21035: b'\x00\x08\x06\x08\x3a\x08\x22\x48\x26\x48\x38\x48\x28\x48\x0f\x48\x71\x48\x11\x48\x11\x08\x22\x08\x2a\x28\x44\x18\x80\x08\x00\x00', # 别
21040: b'\x00\x08\x00\x08\x07\x88\x38\x28\x0a\x28\x11\x28\x23\xa8\x7c\xa8\x04\x28\x07\x28\x3c\x28\x07\x88\x18\x28\x60\x18\x00\x08\x00\x00', # 到
21270: b'\x04\x00\x04\x80\x08\x80\x08\x88\x08\x88\x18\x90\x28\xa0\x48\xc0\x09\x80\x0a\x80\x08\x84\x08\x84\x08\x84\x08\x7c\x08\x00\x00\x00', # 化
21527: b'\x00\x60\x03\xa0\x00\x20\x19\x20\x69\x20\x49\x20\x59\x20\x61\x78\x01\x88\x00\x08\x00\xe8\x0f\x08\x00\x10\x00\x50\x00\x20\x00\x00', # 吗
22238: b'\x00\x00\x00\x00\x01\xf8\x3e\x08\x20\x08\x21\x88\x26\x88\x24\x88\x25\x88\x26\x08\x20\x08\x20\xf8\x3f\x00\x00\x00\x00\x00\x00\x00', # 回
22312: b'\x01\x00\x01\x00\x02\x00\x03\xf8\x7c\x00\x04\x80\x18\x80\x10\x80\x30\xf0\x57\x80\x90\x80\x10\x80\x10\xfc\x1f\x00\x10\x00\x00\x00', # 在
22833: b'\x01\x00\x09\x00\x09\x00\x09\xf0\x1f\x00\x11\x00\x21\x00\x01\xf8\x7e\x80\x02\x80\x04\x40\x04\x40\x08\x20\x10\x38\x20\x00\x00\x00', # 失
23383: b'\x02\x00\x01\x00\x01\xfc\x3e\x08\x21\xe0\x0e\x40\x00\x80\x01\x00\x00\xfc\x7f\x80\x00\x80\x00\x80\x00\x80\x02\x80\x01\x00\x00\x00', # 字
23436: b'\x02\x00\x01\x00\x00\xfc\x3f\x08\x20\x00\x00\xc0\x07\x00\x00\x78\x3f\x80\x04\x80\x04\x80\x08\x84\x08\x84\x10\x84\x60\x7c\x00\x00', # 完
24320: b'\x00\x00\x03\xf0\x1c\x40\x04\x40\x04\x40\x04\x40\x07\xfc\x7c\x40\x04\x40\x04\x40\x08\x40\x08\x40\x10\x40\x20\x40\x40\x40\x00\x00', # 开
24405: b'\x01\xc0\x0e\x40\x01\xc0\x0e\x40\x00\xfc\x3f\x00\x01\x10\x11\x10\x09\xa0\x05\x40\x09\x20\x11\x18\x61\x06\x03\x00\x01\x00\x00\x00', # 录
24605: b'\x00\xf0\x1f\x10\x11\x10\x11\xf0\x1f\x10\x11\x10\x11\xf0\x1e\x00\x10\x00\x23\x18\x28\x84\x24\x10\x43\x10\x40\xf0\x00\x00\x00\x00', # 思
25104: b'\x00\xa0\x00\x90\x00\x80\x00\xf0\x1f\x80\x10\x90\x10\x90\x1e\xa0\x12\xa0\x22\x40\x22\x44\x24\xa4\x55\x14\x48\x0c\x80\x04\x00\x00', # 成
25353: b'\x10\x40\x10\x20\x10\x3c\x13\xc8\x1e\x40\x70\x40\x10\x80\x18\xfe\x37\x10\xd1\x10\x11\x20\x10\xa0\x50\x60\x31\x90\x16\x08\x00\x00', # 按
25552: b'\x10\x30\x11\xd0\x11\x10\x11\xd0\x1d\x30\x71\xc0\x15\x00\x18\x38\x37\xc0\xd2\x70\x12\x40\x13\x40\x54\xc0\x34\x30\x18\x0e\x00\x00', # 提
25991: b'\x02\x00\x01\x00\x01\x00\x00\x38\x3f\xc0\x00\x40\x04\x40\x02\x80\x02\x80\x01\x00\x01\x00\x02\x80\x0c\x40\x30\x30\xc0\x0e\x00\x00', # 文
26410: b'\x01\x00\x01\x00\x01\x00\x01\xf0\x1f\x00\x01\x00\x01\x78\x7f\x80\x03\x40\x05\x40\x09\x20\x11\x20\x61\x1c\x81\x00\x01\x00\x00\x00', # 未
26494: b'\x00\x40\x10\x40\x10\xa0\x10\xa0\x1c\xa0\x71\x10\x19\x50\x36\x4c\x52\x40\x54\x80\x90\xa0\x11\x10\x11\x38\x13\xc8\x10\x00\x00\x00', # 松
27490: b'\x01\x00\x01\x00\x01\x00\x01\x00\x01\x00\x11\x00\x11\x30\x11\xc0\x11\x00\x11\x00\x11\x00\x11\x00\x11\x00\x11\xfc\xfe\x00\x00\x00', # 止
27491: b'\x00\x00\x00\xf0\x1f\x00\x01\x00\x01\x00\x01\x00\x09\x30\x09\xc0\x09\x00\x09\x00\x09\x00\x09\x00\x09\xfc\x7e\x00\x00\x00\x00\x00', # 正
29983: b'\x01\x00\x01\x00\x01\x00\x09\x00\x09\x00\x11\xf0\x1f\x00\x21\x00\x21\x00\x41\xe0\x0f\x00\x01\x00\x01\x00\x01\xfc\x7e\x00\x00\x00', # 生
30011: b'\x00\x00\x01\xf8\x3e\x00\x00\xe0\x0f\x20\x09\x20\x09\xe8\x2f\x28\x29\x28\x29\xe8\x2e\x08\x20\x08\x21\xf8\x3e\x00\x00\x00\x00\x00', # 画
30701: b'\x10\x00\x10\x3c\x11\xc0\x16\x18\x38\xe8\x28\x88\x48\x98\x0e\xe0\x78\x10\x08\x90\x14\x50\x12\x50\x20\x3c\x43\xc0\x00\x00\x00\x00', # 短
30830: b'\x00\x80\x00\x80\x0c\xf0\x71\x20\x11\x40\x12\x78\x21\xc8\x2d\x68\x75\xc8\xa5\x68\x2d\xc8\x32\x48\x22\x48\x04\x18\x08\x08\x00\x00', # 确
31034: b'\x00\x00\x00\xe0\x0f\x00\x00\x00\x00\xfc\x7f\x00\x01\x00\x05\x00\x05\x20\x09\x10\x11\x08\x21\x08\x45\x00\x03\x00\x01\x00\x00\x00', # 示
32472: b'\x00\x80\x10\x80\x10\xc0\x21\x40\x25\x20\x4a\x10\x74\x6e\x11\x80\x2c\x38\x73\xc0\x00\x80\x0c\xa0\x31\x10\xc2\x78\x03\x88\x00\x00', # 绘
32771: b'\x02\x00\x02\x10\x03\xa0\x0e\x40\x02\x80\x03\xfc\x7e\x00\x07\xf0\x1a\x00\x22\x60\xc3\xa0\x00\x20\x00\x40\x01\x40\x00\x80\x00\x00', # 考
35748: b'\x00\x00\x10\x40\x08\x40\x08\x40\x00\x40\x00\x40\x70\x40\x10\x40\x10\xa0\x10\xa0\x15\x10\x19\x10\x12\x08\x04\x0e\x08\x00\x00\x00', # 认
35782: b'\x00\x00\x10\x38\x09\xc8\x09\x08\x01\x08\x71\x38\x11\xc0\x11\x00\x10\x00\x14\x90\x18\x88\x11\x04\x02\x04\x04\x00\x00\x00\x00\x00', # 识
35789: b'\x20\x00\x10\x78\x0b\x88\x00\x08\x00\xe8\x77\x08\x10\xc8\x13\x48\x12\x48\x12\xc8\x13\x08\x1a\x08\x10\x28\x00\x18\x00\x08\x00\x00', # 词
35821: b'\x00\x00\x20\x70\x13\x80\x10\x80\x00\xe0\x03\x20\xe1\x20\x21\xfc\x26\x00\x20\x70\x23\x90\x2a\x10\x32\x70\x23\x80\x02\x00\x00\x00', # 语
35828: b'\x02\x10\x21\x10\x11\x20\x10\x20\x00\x70\x03\x90\x72\x10\x12\x70\x13\xa0\x10\xa0\x14\xa0\x19\x22\x11\x22\x02\x22\x0c\x1e\x00\x00', # 说
36133: b'\x00\x40\x06\x40\x3a\x40\x22\x4c\x2a\x70\x2a\x90\x2a\x90\x2b\x50\x2a\x50\x28\x20\x14\x20\x12\x50\x20\x90\x21\x0c\x42\x00\x00\x00', # 败
36820: b'\x00\x00\x00\x38\x13\xc0\x0a\x00\x02\x70\x03\x90\x3a\x10\xca\xa0\x12\x60\x12\x50\x0c\x88\x09\x00\x7c\x00\x01\xc0\x00\x3e\x00\x00', # 返
37325: b'\x00\x20\x00\xc0\x1f\x00\x01\xfc\x7f\x00\x01\xf0\x1f\x10\x11\xd0\x17\x10\x11\xf0\x1f\x00\x01\xe0\x1f\x00\x01\xfc\x7e\x00\x00\x00', # 重
38271: b'\x08\x00\x08\x20\x08\x40\x08\x80\x0b\x00\x0c\x00\x09\xf8\x7e\x00\x0a\x00\x09\x00\x08\x80\x08\x40\x0a\x30\x0c\x0c\x08\x00\x00\x00', # 长
38899: b'\x02\x00\x01\x00\x01\xf0\x1e\x40\x04\x40\x04\x80\x01\xfc\x7e\x00\x01\xe0\x0e\x20\x09\xa0\x0e\x20\x08\x20\x09\xe0\x0e\x20\x00\x00', # 音
65311: b'\x00\x00\x00\x00\x1c\x00\x22\x00\x22\x00\x04\x00\x08\x00\x08\x00\x08\x00\x00\x00\x18\x00\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00', #
}

View File

@@ -11,6 +11,7 @@ class WebSocketClient:
self.uri = uri
self.timeout = timeout
self.unread_messages = [] # Queue for buffered messages
self.buffer = bytearray(4096) # Pre-allocated buffer for small messages
self.connect()
def connect(self):
@@ -109,6 +110,37 @@ class WebSocketClient:
self.sock.write(header)
self.sock.write(masked_data)
def _read_exact(self, n):
"""Read exactly n bytes from the socket"""
data = b''
while len(data) < n:
try:
chunk = self.sock.read(n - len(data))
if not chunk:
return None
data += chunk
except Exception as e:
# Handle timeout or other errors
if len(data) > 0:
# If we read some data but timed out, we can't just return None
# as we would lose that data. We must keep trying or raise error.
# For simplicity in this blocking-with-timeout model,
# we assume we should keep trying if we got some data,
# or return what we have if it's a hard error?
# Actually, if we return None, the caller treats it as "no message".
# But we already consumed data! This is the core issue.
# We should probably buffer it?
# Or just return None and let the caller handle it?
# But the caller (recv) expects a full frame or nothing.
# To properly fix this without a persistent buffer across calls
# (which is complex to add now), we will just print error and return None,
# accepting that we lost the connection sync.
print(f"Socket read error: {e}")
return None
return None
return data
def recv(self):
# 1. Check if we have unread messages in the buffer
if self.unread_messages:
@@ -120,8 +152,8 @@ class WebSocketClient:
# Read header
try:
# Read 2 bytes at once
header = self.sock.read(2)
if not header or len(header) < 2: return None
header = self._read_exact(2)
if not header: return None
b1 = header[0]
b2 = header[1]
@@ -133,49 +165,88 @@ class WebSocketClient:
length = b2 & 0x7f
if length == 126:
length_bytes = self.sock.read(2)
length_bytes = self._read_exact(2)
if not length_bytes: return None
length = int.from_bytes(length_bytes, 'big')
elif length == 127:
length_bytes = self.sock.read(8)
length_bytes = self._read_exact(8)
if not length_bytes: return None
length = int.from_bytes(length_bytes, 'big')
# Safety check for memory allocation
if length > 50 * 1024: # 50KB limit (reduced from 1MB to be safer on ESP32)
print(f"WS Recv: Message too large ({length} bytes)")
# If it's a binary message (image chunk), maybe we can process it?
# But for now, just skip to avoid OOM
self._skip_bytes(length)
if mask:
mask_key = self.sock.read(4)
self._read_exact(4) # Consume mask key
return None
if mask:
mask_key = self._read_exact(4)
if not mask_key: return None
# Read payload
# Optimization for streaming binary data (opcode 2)
try:
# Pre-allocate buffer or use shared buffer
if length <= 4096:
data = self.buffer
else:
data = bytearray(length)
except MemoryError:
print(f"WS Recv: Memory allocation failed for {length} bytes")
# Try to skip data
self._skip_bytes(length)
return None
# Use smaller chunks for readinto to avoid memory allocation issues in MicroPython
pos = 0
while pos < length:
chunk_size = min(length - pos, 512)
chunk_size = min(length - pos, 1024) # 1KB chunks
try:
# Create a view into the target buffer
chunk_view = memoryview(data)[pos:pos + chunk_size]
read_len = self.sock.readinto(chunk_view)
if read_len == 0:
# We need exact read here too
read_len = 0
while read_len < chunk_size:
chunk_read = self.sock.readinto(chunk_view[read_len:])
if not chunk_read:
# Connection closed or timeout
# If timeout, we are in trouble.
break
read_len += chunk_read
if read_len < chunk_size:
print("WS Recv: Incomplete payload read")
return None
pos += read_len
except Exception as e:
print(f"WS Recv read error: {e}")
return None
# Create a view for the relevant part of the data
view = memoryview(data)[:length]
if mask:
unmasked = bytearray(length)
# In-place unmasking
for i in range(length):
unmasked[i] = data[i] ^ mask_key[i % 4]
data = unmasked
view[i] = view[i] ^ mask_key[i % 4]
if opcode == 1: # Text
return data.decode('utf-8')
return str(view, 'utf-8')
elif opcode == 2: # Binary
return data
return bytes(view) # Return copy
elif opcode == 8: # Close
self.close()
return None
elif opcode == 9: # Ping
self.send(data, opcode=10) # Pong
self.send(view, opcode=10) # Pong
return self.recv()
return data
return bytes(view)
except Exception as e:
# Don't print timeout errors as they are expected in non-blocking polling
@@ -183,6 +254,15 @@ class WebSocketClient:
print(f"WS Recv Error: {e}")
return None
def _skip_bytes(self, length):
"""Skip bytes from socket"""
chunk_size = 1024
remaining = length
while remaining > 0:
to_read = min(remaining, chunk_size)
self.sock.read(to_read)
remaining -= to_read
def close(self):
if self.sock:
self.sock.close()

View File

@@ -0,0 +1,109 @@
import freetype
import os
FONT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/websocket_server/GB2312.ttf"
OUTPUT_FILE = "../static_font_data.py"
FONT_SIZE = 16
# Fixed strings from the project
FIXED_STRINGS = [
"语音识别",
"松开停止",
"说完了吗?",
"未识别到文字",
"短按确认",
"长按重录",
"AI 生成中",
"正在思考...",
"优化提示词中",
"正在绘画...",
"AI作画中",
"生成完成!",
"生成失败",
"提示词:",
"返回录音"
]
def generate_static_font():
# Extract unique characters
chars = set()
for s in FIXED_STRINGS:
for c in s:
if ord(c) > 127: # Only non-ASCII
chars.add(c)
sorted_chars = sorted(list(chars))
print(f"Generating font data for {len(sorted_chars)} characters: {''.join(sorted_chars)}")
try:
face = freetype.Face(FONT_FILE)
except Exception as e:
print(f"Error loading font: {e}")
return
face.set_pixel_sizes(FONT_SIZE, FONT_SIZE)
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
f.write("# Static font data generated for specific characters\n")
f.write("import ubinascii\n\n")
f.write("FONTS = {\n")
for char in sorted_chars:
face.load_char(char, freetype.FT_LOAD_RENDER | freetype.FT_LOAD_TARGET_MONO)
bitmap = face.glyph.bitmap
# Create 32 bytes buffer (16x16 / 8)
# Similar logic to generate_font.py but simplified for single char
char_buffer = bytearray(32)
glyph_width = bitmap.width
glyph_rows = bitmap.rows
# Center the glyph
x_off = (FONT_SIZE - glyph_width) // 2
y_off = (FONT_SIZE - glyph_rows) // 2
# Adjust y_off based on baseline if needed, but let's stick to centering for consistency
# Usually for 16px font, baseline is around 12-13.
# bitmap_top is distance from baseline to top.
# We want to position it such that baseline is consistent.
# But let's use the simple centering logic from generate_font.py for now
# as it seems to be what was used before.
src_buf = bitmap.buffer
for row in range(glyph_rows):
dst_row = row + y_off
if dst_row < 0 or dst_row >= FONT_SIZE:
continue
for col in range(glyph_width):
dst_col = col + x_off
if dst_col < 0 or dst_col >= FONT_SIZE:
continue
# Extract bit from source
byte_idx = row * bitmap.pitch + (col >> 3)
bit_idx = 7 - (col & 7)
if byte_idx < len(src_buf):
pixel = (src_buf[byte_idx] >> bit_idx) & 1
if pixel:
# Set bit in destination
dst_byte_idx = dst_row * 2 + (dst_col >> 3)
dst_bit_idx = 7 - (dst_col & 7)
char_buffer[dst_byte_idx] |= (1 << dst_bit_idx)
# Write to file
hex_str = "".join([f"\\x{b:02x}" for b in char_buffer])
# Use ubinascii.unhexlify in generated code to save space?
# Or just bytes literal.
# bytes literal is fine.
f.write(f" {ord(char)}: b'{hex_str}', # {char}\n")
f.write("}\n")
print(f"Generated {OUTPUT_FILE}")
if __name__ == "__main__":
generate_static_font()

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -444,16 +444,39 @@ class MyRecognitionCallback(RecognitionCallback):
def on_event(self, result: RecognitionResult) -> None:
if result.get_sentence():
text = result.get_sentence()['text']
print(f"ASR Result: {text}")
# 累积每一句识别结果
# 获取当前句子的结束状态
# 注意DashScope Python SDK 的 Result 结构可能需要根据版本调整
# 这里假设我们只关心文本内容的变化
# 简单的去重逻辑:如果新来的文本比上一句长且包含上一句,则认为是同一句的更新
if self.sentence_list:
last_sentence = self.sentence_list[-1]
# 去掉句尾标点进行比较,因为流式结果可能标点不稳定
last_clean = last_sentence.rstrip('。,?!')
text_clean = text.rstrip('。,?!')
if text_clean.startswith(last_clean):
# 更新当前句子
self.sentence_list[-1] = text
elif last_clean.startswith(text_clean):
# 如果新来的比旧的短但也是前缀(不太可能发生,除非回溯),忽略或更新
pass
else:
# 新的句子
self.sentence_list.append(text)
else:
self.sentence_list.append(text)
# 同时更新 final_text 以便 Stop 时获取
self.final_text = "".join(self.sentence_list)
print(f"ASR Update: {self.final_text}")
# 将识别结果发送回客户端
try:
if self.loop.is_running():
asyncio.run_coroutine_threadsafe(
self.websocket.send_text(f"ASR:{text}"),
self.websocket.send_text(f"ASR:{self.final_text}"),
self.loop
)
except Exception as e:
@@ -559,12 +582,24 @@ def generate_image(prompt, progress_callback=None, retry_count=0, max_retries=2)
progress_callback(35, "正在请求AI生成图片...")
try:
if not prompt:
print("Error: prompt is empty")
if progress_callback:
progress_callback(0, "提示词为空")
return None
response = ImageSynthesis.call(
model='wanx2.0-t2i-turbo',
prompt=prompt
)
if response.status_code == 200:
if not response.output:
print("Error: response.output is None")
if progress_callback:
progress_callback(0, "API响应无效")
return None
task_status = response.output.get('task_status')
if task_status == 'PENDING' or task_status == 'RUNNING':
@@ -631,9 +666,9 @@ def generate_image(prompt, progress_callback=None, retry_count=0, max_retries=2)
g6 = (g >> 2) & 0x3F
b5 = (b >> 3) & 0x1F
# 端模式:字节在前
# 端模式:字节在前 (符合ST7789默认配置)
rgb565 = (r5 << 11) | (g6 << 5) | b5
rgb565_data.extend(struct.pack('<H', rgb565))
rgb565_data.extend(struct.pack('>H', rgb565))
# 保存为.bin文件
with open(GENERATED_THUMB_FILE, 'wb') as f: