1

2026-03-03 22:45:09 +08:00
parent 700bc55657
commit 05f02a1454
14 changed files with 574 additions and 149 deletions
--- a/websocket_server/pycache/server.cpython-312.pyc
+++ b/websocket_server/pycache/server.cpython-312.pyc
--- a/websocket_server/generate_static_font.py
+++ b/websocket_server/generate_static_font.py
@@ -0,0 +1,109 @@
+import freetype
+import os
+
+FONT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/websocket_server/GB2312.ttf"
+OUTPUT_FILE = "../static_font_data.py"
+FONT_SIZE = 16
+
+# Fixed strings from the project
+FIXED_STRINGS = [
+    "语音识别",
+    "松开停止",
+    "说完了吗？",
+    "未识别到文字",
+    "短按确认",
+    "长按重录",
+    "AI 生成中",
+    "正在思考...",
+    "优化提示词中",
+    "正在绘画...",
+    "AI作画中",
+    "生成完成!",
+    "生成失败",
+    "提示词:",
+    "返回录音"
+]
+
+def generate_static_font():
+    # Extract unique characters
+    chars = set()
+    for s in FIXED_STRINGS:
+        for c in s:
+            if ord(c) > 127: # Only non-ASCII
+                chars.add(c)
+    
+    sorted_chars = sorted(list(chars))
+    print(f"Generating font data for {len(sorted_chars)} characters: {''.join(sorted_chars)}")
+
+    try:
+        face = freetype.Face(FONT_FILE)
+    except Exception as e:
+        print(f"Error loading font: {e}")
+        return
+
+    face.set_pixel_sizes(FONT_SIZE, FONT_SIZE)
+
+    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
+        f.write("# Static font data generated for specific characters\n")
+        f.write("import ubinascii\n\n")
+        f.write("FONTS = {\n")
+
+        for char in sorted_chars:
+            face.load_char(char, freetype.FT_LOAD_RENDER | freetype.FT_LOAD_TARGET_MONO)
+            bitmap = face.glyph.bitmap
+            
+            # Create 32 bytes buffer (16x16 / 8)
+            # Similar logic to generate_font.py but simplified for single char
+            char_buffer = bytearray(32)
+            
+            glyph_width = bitmap.width
+            glyph_rows = bitmap.rows
+            
+            # Center the glyph
+            x_off = (FONT_SIZE - glyph_width) // 2
+            y_off = (FONT_SIZE - glyph_rows) // 2
+            
+            # Adjust y_off based on baseline if needed, but let's stick to centering for consistency
+            # Usually for 16px font, baseline is around 12-13.
+            # bitmap_top is distance from baseline to top.
+            # We want to position it such that baseline is consistent.
+            # But let's use the simple centering logic from generate_font.py for now
+            # as it seems to be what was used before.
+            
+            src_buf = bitmap.buffer
+            
+            for row in range(glyph_rows):
+                dst_row = row + y_off
+                if dst_row < 0 or dst_row >= FONT_SIZE:
+                    continue
+                
+                for col in range(glyph_width):
+                    dst_col = col + x_off
+                    if dst_col < 0 or dst_col >= FONT_SIZE:
+                        continue
+                    
+                    # Extract bit from source
+                    byte_idx = row * bitmap.pitch + (col >> 3)
+                    bit_idx = 7 - (col & 7)
+                    if byte_idx < len(src_buf):
+                        pixel = (src_buf[byte_idx] >> bit_idx) & 1
+                        
+                        if pixel:
+                            # Set bit in destination
+                            dst_byte_idx = dst_row * 2 + (dst_col >> 3)
+                            dst_bit_idx = 7 - (dst_col & 7)
+                            char_buffer[dst_byte_idx] |= (1 << dst_bit_idx)
+            
+            # Write to file
+            hex_str = "".join([f"\\x{b:02x}" for b in char_buffer])
+            # Use ubinascii.unhexlify in generated code to save space? 
+            # Or just bytes literal.
+            # bytes literal is fine.
+            f.write(f"    {ord(char)}: b'{hex_str}', # {char}\n")
+        
+        f.write("}\n")
+    
+    print(f"Generated {OUTPUT_FILE}")
+
+if __name__ == "__main__":
+    generate_static_font()
--- a/websocket_server/generated_thumb.bin
+++ b/websocket_server/generated_thumb.bin
--- a/websocket_server/received_audio.mp3
+++ b/websocket_server/received_audio.mp3
--- a/websocket_server/received_audio.raw
+++ b/websocket_server/received_audio.raw
--- a/websocket_server/server.py
+++ b/websocket_server/server.py
@@ -444,16 +444,39 @@ class MyRecognitionCallback(RecognitionCallback):
    def on_event(self, result: RecognitionResult) -> None:
        if result.get_sentence():
             text = result.get_sentence()['text']
-             print(f"ASR Result: {text}")
-             # 累积每一句识别结果
-             self.sentence_list.append(text)
+             
+             # 获取当前句子的结束状态
+             # 注意：DashScope Python SDK 的 Result 结构可能需要根据版本调整
+             # 这里假设我们只关心文本内容的变化
+             
+             # 简单的去重逻辑：如果新来的文本比上一句长且包含上一句，则认为是同一句的更新
+             if self.sentence_list:
+                 last_sentence = self.sentence_list[-1]
+                 # 去掉句尾标点进行比较，因为流式结果可能标点不稳定
+                 last_clean = last_sentence.rstrip('。，？！')
+                 text_clean = text.rstrip('。，？！')
+                 
+                 if text_clean.startswith(last_clean):
+                     # 更新当前句子
+                     self.sentence_list[-1] = text
+                 elif last_clean.startswith(text_clean):
+                     # 如果新来的比旧的短但也是前缀（不太可能发生，除非回溯），忽略或更新
+                     pass
+                 else:
+                     # 新的句子
+                     self.sentence_list.append(text)
+             else:
+                 self.sentence_list.append(text)
+             
             # 同时更新 final_text 以便 Stop 时获取
             self.final_text = "".join(self.sentence_list)
+             print(f"ASR Update: {self.final_text}")
+             
             # 将识别结果发送回客户端
             try:
                 if self.loop.is_running():
                     asyncio.run_coroutine_threadsafe(
-                         self.websocket.send_text(f"ASR:{text}"), 
+                         self.websocket.send_text(f"ASR:{self.final_text}"), 
                         self.loop
                     )
             except Exception as e:
@@ -559,12 +582,24 @@ def generate_image(prompt, progress_callback=None, retry_count=0, max_retries=2)
        progress_callback(35, "正在请求AI生成图片...")
    
    try:
+        if not prompt:
+            print("Error: prompt is empty")
+            if progress_callback:
+                progress_callback(0, "提示词为空")
+            return None
+            
        response = ImageSynthesis.call(
            model='wanx2.0-t2i-turbo',
            prompt=prompt
        )
        
        if response.status_code == 200:
+            if not response.output:
+                print("Error: response.output is None")
+                if progress_callback:
+                    progress_callback(0, "API响应无效")
+                return None
+                
            task_status = response.output.get('task_status')
            
            if task_status == 'PENDING' or task_status == 'RUNNING':
@@ -631,9 +666,9 @@ def generate_image(prompt, progress_callback=None, retry_count=0, max_retries=2)
                        g6 = (g >> 2) & 0x3F
                        b5 = (b >> 3) & 0x1F
                        
-                        # 小端模式：低字节在前
+                        # 大端模式：高字节在前 (符合ST7789默认配置)
                        rgb565 = (r5 << 11) | (g6 << 5) | b5
-                        rgb565_data.extend(struct.pack('<H', rgb565))
+                        rgb565_data.extend(struct.pack('>H', rgb565))
                
                # 保存为.bin文件
                with open(GENERATED_THUMB_FILE, 'wb') as f: