This commit is contained in:
jeremygan2021
2026-03-02 22:58:02 +08:00
parent 4c51f52654
commit 124b185b8a
7 changed files with 297 additions and 191 deletions

273
font.py
View File

@@ -19,8 +19,47 @@ class Font:
color_bytes = struct.pack(">H", color)
bg_bytes = struct.pack(">H", bg)
# Create LUT for current color/bg
lut = [bytearray(16) for _ in range(256)]
for i in range(256):
for bit in range(8):
# bit 7 is first pixel (leftmost)
# target index: (7-bit)*2
val = (i >> bit) & 1
idx = (7 - bit) * 2
if val:
lut[i][idx] = color_bytes[0]
lut[i][idx+1] = color_bytes[1]
else:
lut[i][idx] = bg_bytes[0]
lut[i][idx+1] = bg_bytes[1]
initial_x = x
# 1. Identify missing fonts
missing_codes = set()
for char in text:
if ord(char) > 127:
code = ord(char)
if code not in self.cache:
missing_codes.add(code)
# 2. Batch request missing fonts
if missing_codes and self.ws:
# Convert to list for consistent order/string
missing_list = list(missing_codes)
# Limit batch size? Maybe 20 chars at a time?
# For short ASR result, usually < 20 chars.
req_str = ",".join([str(c) for c in missing_list])
print(f"Batch requesting fonts: {req_str}")
try:
self.ws.send(f"GET_FONTS_BATCH:{req_str}")
self._wait_for_fonts(missing_codes)
except Exception as e:
print(f"Batch font request failed: {e}")
# 3. Draw text
for char in text:
# Handle newlines
if char == '\n':
@@ -38,56 +77,20 @@ class Font:
is_chinese = False
buf_data = None
# Check if it's Chinese
# Check if it's Chinese (or non-ASCII)
if ord(char) > 127:
try:
gb = char.encode('gb2312')
if len(gb) == 2:
code = struct.unpack('>H', gb)[0]
# Try to get from cache
if code in self.cache:
buf_data = self.cache[code]
is_chinese = True
else:
# Need to fetch from server
# Since we can't block easily here (unless we use a blocking socket recv or a callback mechanism),
# we have to rely on the main loop to handle responses.
# But we want to draw *now*.
#
# Solution:
# 1. Send request
# 2. Wait for response with timeout (blocking wait)
# This is slow for long text but works for small amounts.
if self.ws:
# Send request: GET_FONT:0xA1A1
hex_code = "0x{:04X}".format(code)
print(f"Requesting font for {hex_code} ({char})")
self.ws.send(f"GET_FONT:{hex_code}")
# Wait for response
# We need to peek/read from WS until we get FONT_DATA
buf_data = self._wait_for_font(hex_code)
if buf_data:
self.cache[code] = buf_data
is_chinese = True
print(f"Font loaded for {hex_code}")
else:
print(f"Font fetch timeout for {hex_code}")
# Fallback: draw question mark or box
self._draw_ascii(tft, '?', x, y, color, bg)
x += 8
continue # Skip drawing bitmap logic
else:
print("WS not available for font fetch")
except Exception as e:
print(f"Font error: {e}")
code = ord(char)
if code in self.cache:
buf_data = self.cache[code]
is_chinese = True
else:
# Still missing after batch request?
# Could be timeout or invalid char.
pass
if is_chinese and buf_data:
# Draw Chinese character (16x16)
self._draw_bitmap(tft, buf_data, x, y, 16, 16, color_bytes, bg_bytes)
self._draw_bitmap(tft, buf_data, x, y, 16, 16, lut)
x += 16
else:
# Draw ASCII (8x16) using built-in framebuf font (8x8 actually)
@@ -97,78 +100,148 @@ class Font:
self._draw_ascii(tft, char, x, y, color, bg)
x += 8
def _wait_for_font(self, target_hex_code):
def _wait_for_fonts(self, target_codes):
"""
Blocking wait for specific font data from WebSocket.
Timeout 1s.
WARNING: This might consume other messages (like audio playback commands)!
We need to handle them or put them back?
WebSocketClient doesn't support peeking easily.
This is a limitation. If we receive other messages, we should probably print them or ignore them.
But for ASR result display, usually we are in a state where we just received ASR result and are waiting for TTS.
Blocking wait for a set of font codes.
Buffers other messages to self.ws.unread_messages.
"""
if not self.ws:
return None
if not self.ws or not target_codes:
return
start = time.ticks_ms()
while time.ticks_diff(time.ticks_ms(), start) < 1000:
# We use a non-blocking poll if possible, but here we want to block until data arrives
# ws.recv() is blocking.
# But we might block forever if server doesn't reply.
# So we should use poll with timeout.
self.local_deferred = []
# 2 seconds timeout for batch
while time.ticks_diff(time.ticks_ms(), start) < 2000 and target_codes:
# Using uselect in main.py, but here we don't have easy access to it unless passed in.
# Let's try a simple approach: set socket timeout temporarily?
# Or use select.poll()
# Check unread_messages first?
# Actually ws.recv() in our modified client already checks unread_messages.
# But wait, if we put something BACK into unread_messages, we need to be careful not to read it again immediately if we are looping?
# No, we only put NON-FONT messages back. We are looking for FONT messages.
# So if we pop a non-font message, we put it back?
# If we put it back at head, we will read it again next loop! Infinite loop!
#
# Solution: We should NOT use ws.recv() which pops from unread.
# We should assume unread_messages might contain what we need?
#
# Actually, `ws.recv()` pops from `unread_messages`.
# If we get a message that is NOT what we want, we should store it in a temporary list, and push them all back at the end?
# Or append to `unread_messages` (if it's a queue).
# But `unread_messages` is used as a LIFO or FIFO?
# pop(0) -> FIFO.
# If we append, it goes to end.
# So:
# 1. recv() -> gets msg.
# 2. Is it font?
# Yes -> process.
# No -> append to `temp_buffer`.
# 3. After function finishes (or timeout), extend `unread_messages` with `temp_buffer`?
# Wait, `unread_messages` should be preserved order.
# If we had [A, B] in unread.
# recv() gets A. Not font. Temp=[A].
# recv() gets B. Not font. Temp=[A, B].
# recv() gets network C (Font). Process.
# End.
# Restore: unread = Temp + unread? (unread is empty now).
# So unread becomes [A, B]. Correct.
import uselect
poller = uselect.poll()
poller.register(self.ws.sock, uselect.POLLIN)
events = poller.poll(200) # 200ms timeout
if events:
try:
msg = self.ws.recv()
# Fast check if we can read
# But we want to block until SOMETHING arrives.
# If unread_messages is not empty, we should process them first.
# But we can't peak easily without modifying recv again.
# Let's just use recv() and handle the buffering logic here.
try:
# Use a poller for the socket part to implement timeout
# But recv() handles logic.
# If unread_messages is empty, we poll socket.
can_read = False
if self.ws.unread_messages:
can_read = True
else:
poller = uselect.poll()
poller.register(self.ws.sock, uselect.POLLIN)
events = poller.poll(100) # 100ms
if events:
can_read = True
if can_read:
msg = self.ws.recv() # This will pop from unread or read from sock
if msg is None:
# Socket closed or error?
# Or just timeout in recv (but we polled).
continue
if isinstance(msg, str):
if msg.startswith(f"FONT_DATA:{target_hex_code}:"):
# Found it!
hex_data = msg.split(":")[2]
return binascii.unhexlify(hex_data)
if msg == "FONT_BATCH_END":
# Batch complete. Mark remaining as failed.
# We need to iterate over a copy because we are modifying target_codes?
# Actually we just clear it.
# But wait, target_codes is passed by reference (set).
# If we clear it, loop breaks.
# But we also want to mark cache as None for missing ones.
temp_missing = list(target_codes)
for c in temp_missing:
print(f"Batch missing/failed: {c}")
self.cache[c] = None # Cache failure
target_codes.clear()
elif msg.startswith("FONT_DATA:"):
# Wrong font data? Ignore or cache it?
# General font data handler
parts = msg.split(":")
if len(parts) >= 3:
c = int(parts[1], 16)
d = binascii.unhexlify(parts[2])
self.cache[c] = d
try:
key_str = parts[1]
if key_str.startswith("0x"):
c = int(key_str, 16)
else:
c = int(key_str)
d = binascii.unhexlify(parts[2])
self.cache[c] = d
if c in target_codes:
target_codes.remove(c)
# print(f"Batch loaded: {c}")
except:
pass
else:
# Other message, e.g. START_PLAYBACK
# We can't put it back easily.
# For now, just print it and ignore (it will be lost!)
# ideally we should have a message queue.
print(f"Ignored msg during font fetch: {msg}")
except:
pass
return None
self.local_deferred.append(msg)
elif msg is not None:
# Binary message? Buffer it too.
self.local_deferred.append(msg)
except Exception as e:
print(f"Wait font error: {e}")
# End of wait. Restore deferred messages.
if self.local_deferred:
# We want new_list = local_deferred + old_list
self.ws.unread_messages = self.local_deferred + self.ws.unread_messages
self.local_deferred = []
def _draw_bitmap(self, tft, bitmap, x, y, w, h, color_bytes, bg_bytes):
# Convert 1bpp bitmap to RGB565 buffer
# bitmap length is w * h / 8 = 32 bytes for 16x16
def _wait_for_font(self, target_code_str):
# Compatibility wrapper or deprecated?
# The new logic uses batch wait.
pass
def _draw_bitmap(self, tft, bitmap, x, y, w, h, lut):
# Convert 1bpp bitmap to RGB565 buffer using LUT
# Optimize buffer allocation
rgb_buf = bytearray(w * h * 2)
idx = 0
for byte in bitmap:
for i in range(7, -1, -1):
if (byte >> i) & 1:
rgb_buf[idx] = color_bytes[0]
rgb_buf[idx+1] = color_bytes[1]
else:
rgb_buf[idx] = bg_bytes[0]
rgb_buf[idx+1] = bg_bytes[1]
idx += 2
# bitmap length is w * h / 8 = 32 bytes for 16x16
# Create list of chunks
chunks = [lut[b] for b in bitmap]
# Join chunks into one buffer
rgb_buf = b''.join(chunks)
tft.blit_buffer(rgb_buf, x, y, w, h)
def _draw_ascii(self, tft, char, x, y, color, bg):

81
main.py
View File

@@ -406,89 +406,32 @@ def main():
if display.tft:
display.init_ui()
# 停止录音并等待回放
# 停止录音并通知服务器
if ws:
try:
print(">>> Sending STOP & Waiting for playback...")
print(">>> Sending STOP to server...")
ws.send("STOP_RECORDING")
# 重新初始化 Speaker (16kHz Mono 16-bit)
if speaker.i2s:
cfg = speaker.config
speaker.i2s.deinit()
speaker.i2s = machine.I2S(
0,
sck=machine.Pin(cfg['bck']),
ws=machine.Pin(cfg['ws']),
sd=machine.Pin(cfg['sd']),
mode=machine.I2S.TX,
bits=16,
format=machine.I2S.MONO,
rate=16000,
ibuf=40000,
)
# 接收回放循环
playback_timeout = 5000 # 5秒无数据则退出
last_data_time = time.ticks_ms()
while True:
# Check for data with timeout
# 不再等待回放,直接退出录音状态
# 稍微等待一下可能的最后 ASR 结果 (非阻塞)
# 等待 500ms 接收剩余的 ASR 结果
t_wait = time.ticks_add(time.ticks_ms(), 500)
while time.ticks_diff(t_wait, time.ticks_ms()) > 0:
poller = uselect.poll()
poller.register(ws.sock, uselect.POLLIN)
events = poller.poll(100) # 100ms wait
events = poller.poll(100)
if events:
msg = ws.recv()
last_data_time = time.ticks_ms()
if isinstance(msg, str):
if msg == "START_PLAYBACK":
print(">>> Server starting playback stream...")
continue
elif msg == "STOP_PLAYBACK":
print(">>> Server finished playback.")
break
elif msg.startswith("ASR:"):
print_nice_asr(msg[4:], display)
elif isinstance(msg, bytes):
# 播放接收到的音频数据
if speaker.i2s:
# 使用 try-except 防止 write 阻塞导致的问题
try:
speaker.i2s.write(msg)
except Exception as e:
print(f"I2S Write Error: {e}")
elif msg is None:
print("WS Connection closed or error (recv returned None)")
try:
ws.close()
except:
pass
ws = None
break
else:
# No data received in this poll window
if time.ticks_diff(time.ticks_ms(), last_data_time) > playback_timeout:
print("Playback timeout - no data received for 5 seconds")
break
# Feed watchdog or do other small tasks if needed
# time.sleep(0.01)
if isinstance(msg, str) and msg.startswith("ASR:"):
print_nice_asr(msg[4:], display)
# 不需要处理其他类型的消息了
except Exception as e:
print(f"Playback loop error: {e}")
print(f"Stop recording error: {e}")
try:
ws.close()
except:
pass
ws = None
# 恢复 Speaker 原始配置
if speaker.i2s: speaker.i2s.deinit()
speaker._init_audio()
gc.collect()

View File

@@ -10,6 +10,7 @@ class WebSocketClient:
self.sock = None
self.uri = uri
self.timeout = timeout
self.unread_messages = [] # Queue for buffered messages
self.connect()
def connect(self):
@@ -109,6 +110,13 @@ class WebSocketClient:
self.sock.write(masked_data)
def recv(self):
# 1. Check if we have unread messages in the buffer
if self.unread_messages:
return self.unread_messages.pop(0)
if not self.sock:
return None
# Read header
try:
# Read 2 bytes at once
@@ -176,3 +184,4 @@ class WebSocketClient:
if self.sock:
self.sock.close()
self.sock = None
self.unread_messages = []

Binary file not shown.

Binary file not shown.

View File

@@ -167,30 +167,110 @@ async def websocket_endpoint(websocket: WebSocket):
except Exception as e:
print(f"Error converting to MP3: {e}")
# 4. 发送回客户端播放
print("Sending audio back...")
await websocket.send_text("START_PLAYBACK")
# 4. 不再发送回客户端播放,提升性能
# print("Sending audio back...")
# await websocket.send_text("START_PLAYBACK")
# 分块发送
chunk_size = 4096
for i in range(0, len(processed_audio), chunk_size):
chunk = processed_audio[i:i+chunk_size]
await websocket.send_bytes(chunk)
# 小延时,避免发送过快导致 ESP32 缓冲区溢出
# 4096 bytes / 32000 bytes/s (16k*2) = ~0.128s
# 0.04s 约为 3 倍速发送,既保证缓冲又不至于拥塞
await asyncio.sleep(0.04)
# chunk_size = 4096
# for i in range(0, len(processed_audio), chunk_size):
# chunk = processed_audio[i:i+chunk_size]
# await websocket.send_bytes(chunk)
# # 小延时,避免发送过快导致 ESP32 缓冲区溢出
# # 4096 bytes / 32000 bytes/s (16k*2) = ~0.128s
# # 0.04s 约为 3 倍速发送,既保证缓冲又不至于拥塞
# await asyncio.sleep(0.04)
await websocket.send_text("STOP_PLAYBACK")
print("Audio sent back finished.")
# await websocket.send_text("STOP_PLAYBACK")
print("Server processing finished (No playback sent).")
elif text.startswith("GET_FONT:"):
# 格式: GET_FONT:0xA1A1
elif text.startswith("GET_FONTS_BATCH:"):
# Format: GET_FONTS_BATCH:code1,code2,code3 (decimal unicode)
try:
print(f"Font Request Received: {text}")
hex_code = text.split(":")[1]
code = int(hex_code, 16)
codes_str = text.split(":")[1]
code_list = codes_str.split(",")
print(f"Batch Font Request for {len(code_list)} chars: {code_list}")
for code_str in code_list:
if not code_str: continue
try:
unicode_val = int(code_str)
char = chr(unicode_val)
gb_bytes = char.encode('gb2312')
if len(gb_bytes) == 2:
code = struct.unpack('>H', gb_bytes)[0]
else:
print(f"Character {char} is not a valid 2-byte GB2312 char")
# Send empty/dummy? Or just skip.
# Better to send something so client doesn't wait forever if it counts responses.
# But client probably uses a set of missing chars.
continue
# Calc offset
area = (code >> 8) - 0xA0
index = (code & 0xFF) - 0xA0
if area >= 1 and index >= 1:
offset = ((area - 1) * 94 + (index - 1)) * 32
# Read font file
# Optimization: Open file once outside loop?
# For simplicity, keep it here, OS caching helps.
script_dir = os.path.dirname(os.path.abspath(__file__))
font_path = os.path.join(script_dir, FONT_FILE)
if not os.path.exists(font_path):
font_path = os.path.join(script_dir, "..", FONT_FILE)
if not os.path.exists(font_path):
font_path = FONT_FILE
if os.path.exists(font_path):
with open(font_path, "rb") as f:
f.seek(offset)
font_data = f.read(32)
if len(font_data) == 32:
import binascii
hex_data = binascii.hexlify(font_data).decode('utf-8')
response = f"FONT_DATA:{code_str}:{hex_data}"
await websocket.send_text(response)
# Small yield to let network flush?
# await asyncio.sleep(0.001)
except Exception as e:
print(f"Error processing batch item {code_str}: {e}")
# Send a completion marker
await websocket.send_text("FONT_BATCH_END")
except Exception as e:
print(f"Error handling BATCH FONT request: {e}")
await websocket.send_text("FONT_BATCH_END") # Ensure we unblock client
elif text.startswith("GET_FONT_UNICODE:") or text.startswith("GET_FONT:"):
# 格式: GET_FONT_UNICODE:12345 (decimal) or GET_FONT:0xA1A1 (hex)
try:
is_unicode = text.startswith("GET_FONT_UNICODE:")
code_str = text.split(":")[1]
target_code_str = code_str # Used for response
if is_unicode:
unicode_val = int(code_str)
char = chr(unicode_val)
try:
gb_bytes = char.encode('gb2312')
if len(gb_bytes) == 2:
code = struct.unpack('>H', gb_bytes)[0]
else:
print(f"Character {char} is not a valid 2-byte GB2312 char")
continue
except Exception as e:
print(f"Failed to encode {char} to gb2312: {e}")
continue
else:
code = int(code_str, 16)
# 计算偏移量
# GB2312 编码范围0xA1A1 - 0xFEFE
# 区码:高字节 - 0xA0
@@ -225,17 +305,18 @@ async def websocket_endpoint(websocket: WebSocket):
if len(font_data) == 32:
import binascii
hex_data = binascii.hexlify(font_data).decode('utf-8')
response = f"FONT_DATA:{hex_code}:{hex_data}"
print(f"Sending Font Response: {response[:30]}...")
# Return the original requested code (unicode or hex) so client can map it back
response = f"FONT_DATA:{target_code_str}:{hex_data}"
# print(f"Sending Font Response: {response[:30]}...")
await websocket.send_text(response)
else:
print(f"Error: Read {len(font_data)} bytes for font data (expected 32)")
else:
print(f"Font file not found: {font_path}")
else:
print(f"Invalid GB2312 code: {hex_code} (Area: {area}, Index: {index})")
print(f"Invalid GB2312 code derived: {code:X} (Area: {area}, Index: {index})")
except Exception as e:
print(f"Error handling GET_FONT: {e}")
print(f"Error handling FONT request: {e}")
elif "bytes" in message:
# 接收音频数据并追加到缓冲区