diff --git a/.trae/rules/rule.md b/.trae/rules/rule.md
index 9601240..7a43358 100644
--- a/.trae/rules/rule.md
+++ b/.trae/rules/rule.md
@@ -1,4 +1,5 @@
 这是一个esp32 s3项目
 用的是Micropython
 使用的spi7789 方形的屏幕封装
-硬件是基于c++文件夹里的代码改到MicroPython上面
\ No newline at end of file
+硬件是基于c++文件夹里的代码改到MicroPython上面
+websocket_server是这个esp32的服务器项目
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index ecd5bbf..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2019 Ivan Belokobylskiy
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/audio.py b/audio.py
index f3c7eb1..37d2c1a 100644
--- a/audio.py
+++ b/audio.py
@@ -42,33 +42,70 @@ class AudioPlayer:
             self.i2s = None
 
     def play_tone(self, frequency, duration_ms, volume=0.5):
-        """播放指定频率的音调"""
+        """播放指定频率的音调 (优化内存版)"""
         if self.i2s is None: return
         
         sample_rate = self.config.get('sample_rate', 24000)
-        n_samples = int(sample_rate * duration_ms / 1000)
+        
+        if frequency <= 0:
+            # 静音处理
+            time.sleep_ms(duration_ms)
+            return
+
+        # 振幅
         amplitude = int(32767 * volume)
         
-        # STEREO: 每个采样 2 个声道 (L+R)，每个声道 2 字节 (16-bit) -> 4 字节/帧
-        buffer = bytearray(n_samples * 4)
-        if frequency > 0:
-            period = sample_rate // frequency
-            half_period = period // 2
+        # 计算单周期采样数
+        period = sample_rate // frequency
+        
+        # 目标 buffer 大小约 2048 字节 (防止 buffer 只有几字节导致 underrun)
+        target_size = 2048
+        frame_size = 4 # 16bit stereo
+        
+        # 计算 buffer 中包含多少个完整周期
+        period_bytes = period * frame_size
+        repeats = max(1, target_size // period_bytes)
+        buffer_bytes = repeats * period_bytes
+        
+        buffer = bytearray(buffer_bytes)
+        
+        # 填充 buffer
+        half_period = period // 2
+        
+        # 预计算采样值的高低字节
+        pos_val = amplitude
+        neg_val = -amplitude
+        
+        pos_low = pos_val & 0xFF
+        pos_high = (pos_val >> 8) & 0xFF
+        neg_low = neg_val & 0xFF
+        neg_high = (neg_val >> 8) & 0xFF
+        
+        for i in range(period * repeats):
+            # 方波：前半周期高电平，后半周期低电平
+            if (i % period) < half_period:
+                low, high = pos_low, pos_high
+            else:
+                low, high = neg_low, neg_high
             
-            for i in range(n_samples):
-                # 方波：前半周期高电平，后半周期低电平
-                sample = amplitude if (i % period) < half_period else -amplitude
-                # 左声道
-                struct.pack_into('<h', buffer, i * 4, sample)
-                # 右声道
-                struct.pack_into('<h', buffer, i * 4 + 2, sample)
-        else:
-            # 静音，缓冲区默认为0
-            pass
+            idx = i * 4
+            buffer[idx] = low
+            buffer[idx+1] = high
+            buffer[idx+2] = low
+            buffer[idx+3] = high
 
+        # 计算总共需要写入的数据量
+        total_bytes = int((sample_rate * duration_ms / 1000) * frame_size)
+        
+        written = 0
         try:
-            # 写入多次以确保缓冲区填满并开始播放
-            self.i2s.write(buffer) 
+            while written < total_bytes:
+                to_write = min(len(buffer), total_bytes - written)
+                if to_write == len(buffer):
+                    self.i2s.write(buffer)
+                else:
+                    self.i2s.write(buffer[:to_write])
+                written += to_write
         except Exception as e:
             print(f"Write error: {e}")
 
diff --git a/display.py b/display.py
index bf10b30..acabb97 100644
--- a/display.py
+++ b/display.py
@@ -1,6 +1,7 @@
 import machine
 import st7789py as st7789
 from config import CURRENT_CONFIG
+import font
 
 class Display:
     def __init__(self):
@@ -8,6 +9,7 @@ class Display:
         self.width = 240
         self.height = 240
         self._init_display()
+        self.font = font.Font()
 
     def _init_display(self):
         print(">>> Initializing Display...")
@@ -41,6 +43,14 @@ class Display:
         if self.tft:
             self.tft.fill_rect(x, y, w, h, color)
 
+    def set_ws(self, ws):
+        if self.font:
+            self.font.set_ws(ws)
+
+    def text(self, text, x, y, color):
+        if self.tft:
+            self.font.text(self.tft, text, x, y, color)
+
     def init_ui(self):
         """初始化 UI 背景"""
         if self.tft:
diff --git a/font.py b/font.py
new file mode 100644
index 0000000..aa6e669
--- /dev/null
+++ b/font.py
@@ -0,0 +1,208 @@
+import framebuf
+import struct
+import time
+import binascii
+
+class Font:
+    def __init__(self, ws=None):
+        self.ws = ws
+        self.cache = {} # Simple cache for font bitmaps: {code: bytes}
+
+    def set_ws(self, ws):
+        self.ws = ws
+
+    def text(self, tft, text, x, y, color, bg=0x0000):
+        """
+        Draw text on ST7789 display using WebSocket to fetch fonts
+        """
+        # Pre-calculate color bytes
+        color_bytes = struct.pack(">H", color)
+        bg_bytes = struct.pack(">H", bg)
+        
+        initial_x = x
+        
+        for char in text:
+            # Handle newlines
+            if char == '\n':
+                x = initial_x
+                y += 16
+                continue
+            
+            # Boundary check
+            if x + 16 > tft.width:
+                x = initial_x
+                y += 16
+            if y + 16 > tft.height:
+                break
+                
+            is_chinese = False
+            buf_data = None
+            
+            # Check if it's Chinese
+            if ord(char) > 127:
+                try:
+                    gb = char.encode('gb2312')
+                    if len(gb) == 2:
+                        code = struct.unpack('>H', gb)[0]
+                        # Try to get from cache
+                        if code in self.cache:
+                            buf_data = self.cache[code]
+                            is_chinese = True
+                        else:
+                            # Need to fetch from server
+                            # Since we can't block easily here (unless we use a blocking socket recv or a callback mechanism),
+                            # we have to rely on the main loop to handle responses.
+                            # But we want to draw *now*.
+                            # 
+                            # Solution: 
+                            # 1. Send request
+                            # 2. Wait for response with timeout (blocking wait)
+                            # This is slow for long text but works for small amounts.
+                            
+                            if self.ws:
+                                # Send request: GET_FONT:0xA1A1
+                                hex_code = "0x{:04X}".format(code)
+                                print(f"Requesting font for {hex_code} ({char})")
+                                self.ws.send(f"GET_FONT:{hex_code}")
+                                
+                                # Wait for response
+                                # We need to peek/read from WS until we get FONT_DATA
+                                buf_data = self._wait_for_font(hex_code)
+                                
+                                if buf_data:
+                                    self.cache[code] = buf_data
+                                    is_chinese = True
+                                    print(f"Font loaded for {hex_code}")
+                                else:
+                                    print(f"Font fetch timeout for {hex_code}")
+                                    # Fallback: draw question mark or box
+                                    self._draw_ascii(tft, '?', x, y, color, bg)
+                                    x += 8
+                                    continue # Skip drawing bitmap logic
+                            else:
+                                print("WS not available for font fetch")
+                except Exception as e:
+                    print(f"Font error: {e}")
+                    pass
+            
+            if is_chinese and buf_data:
+                # Draw Chinese character (16x16)
+                self._draw_bitmap(tft, buf_data, x, y, 16, 16, color_bytes, bg_bytes)
+                x += 16
+            else:
+                # Draw ASCII (8x16) using built-in framebuf font (8x8 actually)
+                # If char is not ASCII, replace with '?' to avoid framebuf errors
+                if ord(char) > 127:
+                    char = '?'
+                self._draw_ascii(tft, char, x, y, color, bg)
+                x += 8
+    
+    def _wait_for_font(self, target_hex_code):
+        """
+        Blocking wait for specific font data from WebSocket.
+        Timeout 1s.
+        WARNING: This might consume other messages (like audio playback commands)!
+        We need to handle them or put them back? 
+        WebSocketClient doesn't support peeking easily.
+        
+        This is a limitation. If we receive other messages, we should probably print them or ignore them.
+        But for ASR result display, usually we are in a state where we just received ASR result and are waiting for TTS.
+        """
+        if not self.ws:
+            return None
+            
+        start = time.ticks_ms()
+        while time.ticks_diff(time.ticks_ms(), start) < 1000:
+            # We use a non-blocking poll if possible, but here we want to block until data arrives
+            # ws.recv() is blocking.
+            # But we might block forever if server doesn't reply.
+            # So we should use poll with timeout.
+            
+            # Using uselect in main.py, but here we don't have easy access to it unless passed in.
+            # Let's try a simple approach: set socket timeout temporarily?
+            # Or use select.poll()
+            
+            import uselect
+            poller = uselect.poll()
+            poller.register(self.ws.sock, uselect.POLLIN)
+            events = poller.poll(200) # 200ms timeout
+            
+            if events:
+                try:
+                    msg = self.ws.recv()
+                    if isinstance(msg, str):
+                        if msg.startswith(f"FONT_DATA:{target_hex_code}:"):
+                            # Found it!
+                            hex_data = msg.split(":")[2]
+                            return binascii.unhexlify(hex_data)
+                        elif msg.startswith("FONT_DATA:"):
+                            # Wrong font data? Ignore or cache it?
+                            parts = msg.split(":")
+                            if len(parts) >= 3:
+                                c = int(parts[1], 16)
+                                d = binascii.unhexlify(parts[2])
+                                self.cache[c] = d
+                        else:
+                            # Other message, e.g. START_PLAYBACK
+                            # We can't put it back easily. 
+                            # For now, just print it and ignore (it will be lost!)
+                            # ideally we should have a message queue.
+                            print(f"Ignored msg during font fetch: {msg}")
+                except:
+                    pass
+            
+        return None
+
+    def _draw_bitmap(self, tft, bitmap, x, y, w, h, color_bytes, bg_bytes):
+        # Convert 1bpp bitmap to RGB565 buffer
+        # bitmap length is w * h / 8 = 32 bytes for 16x16
+        
+        # Optimize buffer allocation
+        rgb_buf = bytearray(w * h * 2)
+        idx = 0
+        for byte in bitmap:
+            for i in range(7, -1, -1):
+                if (byte >> i) & 1:
+                    rgb_buf[idx] = color_bytes[0]
+                    rgb_buf[idx+1] = color_bytes[1]
+                else:
+                    rgb_buf[idx] = bg_bytes[0]
+                    rgb_buf[idx+1] = bg_bytes[1]
+                idx += 2
+        tft.blit_buffer(rgb_buf, x, y, w, h)
+
+    def _draw_ascii(self, tft, char, x, y, color, bg):
+        # Use framebuf for ASCII
+        w, h = 8, 8
+        buf = bytearray(w * h // 8)
+        fb = framebuf.FrameBuffer(buf, w, h, framebuf.MONO_VLSB)
+        fb.fill(0)
+        fb.text(char, 0, 0, 1)
+        
+        # Since framebuf.text is 8x8, we center it vertically in 16px height
+        # Drawing pixel by pixel is slow but compatible
+        # To optimize, we can build a small buffer
+        
+        # Create a 8x16 RGB565 buffer
+        rgb_buf = bytearray(8 * 16 * 2)
+        # Fill with background
+        bg_high, bg_low = bg >> 8, bg & 0xFF
+        color_high, color_low = color >> 8, color & 0xFF
+        
+        for i in range(0, len(rgb_buf), 2):
+            rgb_buf[i] = bg_high
+            rgb_buf[i+1] = bg_low
+            
+        # Draw the 8x8 character into the buffer (centered)
+        # MONO_VLSB: each byte is a column of 8 pixels
+        for col in range(8): # 0..7
+            byte = buf[col]
+            for row in range(8): # 0..7
+                if (byte >> row) & 1:
+                    # Calculate position in rgb_buf
+                    # Target: x=col, y=row+4
+                    pos = ((row + 4) * 8 + col) * 2
+                    rgb_buf[pos] = color_high
+                    rgb_buf[pos+1] = color_low
+        
+        tft.blit_buffer(rgb_buf, x, y, 8, 16)
diff --git a/main.py b/main.py
index 1786af0..d3d85ac 100644
--- a/main.py
+++ b/main.py
@@ -4,15 +4,195 @@ import math
 import struct
 import array
 import gc
+import network
 import st7789py as st7789
 from config import CURRENT_CONFIG
 from audio import AudioPlayer, Microphone
 from display import Display
+from websocket_client import WebSocketClient
+import uselect
+
+# =============================================================================
+# 网络配置
+# =============================================================================
+WIFI_SSID = "Tangledup-AI"
+WIFI_PASS = "djt12345678"
+# 请修改为你的电脑 IP 地址
+SERVER_IP = "6.6.6.88" 
+SERVER_PORT = 8000
+SERVER_URL = f"ws://{SERVER_IP}:{SERVER_PORT}/ws/audio"
+
+def diagnose_wifi():
+    """
+    诊断WiFi模块状态，打印详细的调试信息
+    """
+    print("\n" + "="*50)
+    print("WiFi DIAGNOSTIC INFORMATION")
+    print("="*50)
+    
+    wlan = network.WLAN(network.STA_IF)
+    
+    # 基本状态
+    print(f"WiFi Module Active: {wlan.active()}")
+    print(f"Connection Status: {wlan.isconnected()}")
+    
+    if wlan.isconnected():
+        print(f"Network Config: {wlan.ifconfig()}")
+        print(f"Network SSID: {wlan.config('essid')}")
+        print(f"Signal Strength: {wlan.status('rssi')} dBm")
+    
+    # 扫描可用网络
+    try:
+        print("\nScanning for available networks...")
+        wlan.active(True)
+        time.sleep(1)
+        
+        networks = wlan.scan()
+        print(f"Found {len(networks)} networks:")
+        
+        for net in networks:
+            ssid = net[0].decode('utf-8') if net[0] else "Hidden"
+            bssid = ':'.join(['%02x' % b for b in net[1]])
+            channel = net[2]
+            rssi = net[3]
+            security = net[4]
+            
+            # 标记目标网络
+            marker = " [TARGET]" if ssid == WIFI_SSID else ""
+            
+            print(f"  {ssid}{marker}")
+            print(f"    BSSID: {bssid}, Channel: {channel}, RSSI: {rssi}dBm")
+            
+            # 信号强度解释
+            if rssi > -50:
+                signal_desc = "Excellent"
+            elif rssi > -60:
+                signal_desc = "Good"
+            elif rssi > -70:
+                signal_desc = "Fair"
+            else:
+                signal_desc = "Weak"
+            
+            print(f"    Signal: {signal_desc}")
+            print("")
+            
+    except Exception as e:
+        print(f"Network scan failed: {e}")
+    
+    print("="*50 + "\n")
+
+
+def connect_wifi(max_retries=3):
+    """
+    连接WiFi网络，包含完整的错误处理和重试机制
+    
+    Args:
+        max_retries: 最大重试次数，默认为3次
+        
+    Returns:
+        bool: 连接成功返回True，失败返回False
+    """
+    wlan = network.WLAN(network.STA_IF)
+    
+    # 首先确保WiFi模块处于干净状态
+    try:
+        wlan.active(False)  # 先关闭WiFi
+        time.sleep(1)       # 等待1秒让模块完全关闭
+        wlan.active(True)   # 重新激活WiFi
+        time.sleep(1)       # 等待模块初始化完成
+    except Exception as e:
+        print(f"WiFi module initialization error: {e}")
+        return False
+    
+    # 尝试连接，包含重试机制
+    for attempt in range(max_retries):
+        try:
+            print(f"WiFi connection attempt {attempt + 1}/{max_retries}")
+            
+            # 检查是否已连接
+            if wlan.isconnected():
+                print('Already connected to WiFi')
+                print('Network config:', wlan.ifconfig())
+                return True
+            
+            # 尝试连接
+            print(f'Connecting to WiFi {WIFI_SSID}...')
+            wlan.connect(WIFI_SSID, WIFI_PASS)
+            
+            # 等待连接完成，设置超时
+            start_time = time.time()
+            while not wlan.isconnected():
+                if time.time() - start_time > 20:  # 单次连接超时20秒
+                    print("WiFi connection timeout!")
+                    break
+                time.sleep(0.5)
+                print(".", end="")
+            
+            print("")  # 换行
+            
+            # 检查连接结果
+            if wlan.isconnected():
+                print('WiFi connected successfully!')
+                print('Network config:', wlan.ifconfig())
+                return True
+            else:
+                print(f"Connection attempt {attempt + 1} failed")
+                
+                # 在重试前进行清理
+                if attempt < max_retries - 1:  # 如果不是最后一次尝试
+                    print("Resetting WiFi module for retry...")
+                    wlan.disconnect()  # 断开连接
+                    time.sleep(2)      # 等待2秒
+                    
+        except OSError as e:
+            print(f"WiFi connection error on attempt {attempt + 1}: {e}")
+            if "Wifi Internal State Error" in str(e):
+                print("Detected internal state error, resetting WiFi module...")
+                try:
+                    wlan.active(False)
+                    time.sleep(2)
+                    wlan.active(True)
+                    time.sleep(1)
+                except:
+                    pass
+            
+            if attempt < max_retries - 1:
+                print(f"Retrying in 3 seconds...")
+                time.sleep(3)
+        
+        except Exception as e:
+            print(f"Unexpected error on attempt {attempt + 1}: {e}")
+            if attempt < max_retries - 1:
+                time.sleep(2)
+    
+    # 所有尝试都失败
+    print("All WiFi connection attempts failed!")
+    try:
+        wlan.active(False)  # 关闭WiFi模块节省电力
+    except:
+        pass
+    return False
 
 # =============================================================================
 # 硬件引脚配置 (从 config.py 获取)
 # =============================================================================
 
+def print_nice_asr(text, display=None):
+    """在终端美观地打印ASR结果，并在屏幕显示"""
+    print("\n" + "*"*40)
+    print("   ASR RESULT:")
+    print(f"   {text}")
+    print("*"*40 + "\n")
+    
+    if display and display.tft:
+        # 清除之前的文本区域 (保留顶部的状态栏和底部的可视化条)
+        # 假设状态栏 30px，底部 240-200=40px 用于可视化?
+        # init_ui 画了 0-30 的白条。
+        # update_audio_bar 在 240-bar_height 画条。
+        # 我们使用中间区域 40 - 200
+        display.fill_rect(0, 40, 240, 160, st7789.BLACK)
+        display.text(text, 0, 40, st7789.WHITE)
+
 def main():
     print("\n" + "="*40)
     print("AUDIO & MIC DIAGNOSTIC V5 (Modular & Clean)")
@@ -35,7 +215,44 @@ def main():
     speaker = AudioPlayer()
     if speaker.i2s:
         # 默认播放马里奥
-        speaker.play_mario()
+        # speaker.play_mario()
+        
+        # 播放简单方波 (1kHz, 1秒)
+        # 直接在 main.py 中实现分块播放，避免因 audio.py 未同步导致的 MemoryError
+        print("Playing 1kHz square wave...")
+        try:
+            import struct
+            
+            # 1. 参数设置
+            sr = 24000 # 默认采样率
+            if hasattr(speaker, 'config') and speaker.config:
+                sr = speaker.config.get('sample_rate', 24000)
+            freq = 1000
+            duration = 1000 # ms
+            vol = 10000     # 音量 (max 32767)
+            
+            # 2. 准备缓冲区 (只生成一小段，循环播放)
+            # 1kHz @ 24kHz -> 24 samples/cycle
+            period = sr // freq 
+            # 生成约 1000 字节的 buffer (包含整数个周期)
+            cycles_in_buf = 10 
+            buf = bytearray(period * cycles_in_buf * 4) # 16bit stereo = 4 bytes/frame
+            
+            # 3. 填充方波数据
+            for i in range(period * cycles_in_buf):
+                # 方波逻辑
+                sample = vol if (i % period) < (period // 2) else -vol
+                # 写入左右声道 (Little Endian, 16-bit signed)
+                struct.pack_into('<hh', buf, i*4, sample, sample)
+                
+            # 4. 循环写入 I2S
+            t_end = time.ticks_add(time.ticks_ms(), duration)
+            while time.ticks_diff(t_end, time.ticks_ms()) > 0:
+                speaker.i2s.write(buf)
+                
+        except Exception as e:
+            print(f"Tone error: {e}")
+
     else:
         print("!!! Speaker initialization failed")
 
@@ -57,7 +274,49 @@ def main():
     
     # 录音状态变量
     is_recording = False
-    recorded_chunks = []
+    
+    # WebSocket 连接
+    ws = None
+    
+    # 定义连接函数
+    def connect_ws():
+        nonlocal ws
+        # Reset existing connection object to ensure clean slate
+        try:
+            if ws:
+                ws.close()
+        except:
+            pass
+        ws = None
+        
+        try:
+            print(f"Connecting to WebSocket Server: {SERVER_URL}")
+            ws = WebSocketClient(SERVER_URL)
+            print("WebSocket connected successfully!")
+            
+            # Pass WebSocket to display for font loading
+            if display:
+                display.set_ws(ws)
+                
+            return True
+        except Exception as e:
+            print(f"WebSocket connection failed: {e}")
+            return False
+
+    # 先运行WiFi诊断
+    print("Running WiFi diagnostics...")
+    diagnose_wifi()
+    
+    # 尝试连接WiFi
+    print("Starting WiFi connection process...")
+    if connect_wifi(max_retries=3):
+        print("WiFi connected successfully!")
+        connect_ws()
+    else:
+        print("WiFi connection failed after all attempts!")
+        print("Continuing in offline mode without WebSocket functionality...")
+        print("You can still use the device for local audio recording and visualization.")
+
     
     # 调试：打印一次 Boot 键状态
     print(f"Boot Button Initial State: {boot_btn.value()}")
@@ -86,68 +345,151 @@ def main():
                 if not is_recording:
                     print("\n>>> Start Recording (Boot Pressed)...")
                     is_recording = True
-                    recorded_chunks = []
                     if display.tft:
                         print(">>> Filling Screen WHITE")
                         display.fill(st7789.WHITE)
                     else:
                         print(">>> Display TFT is None!")
+                    
+                    # 尝试重连 WS
+                    if ws is None or not ws.is_connected():
+                        print(">>> WS not connected, trying to reconnect...")
+                        connect_ws()
+                    
+                    # 发送开始录音指令
+                    if ws and ws.is_connected():
+                        try:
+                            ws.send("START_RECORDING")
+                        except Exception as e:
+                            print(f"WS Send Error: {e}")
+                            ws = None # Disconnect on error
+                    else:
+                        print(">>> Warning: No WebSocket connection! Audio will be discarded.")
                 
-                # 录音
+                # 录音并流式传输
                 if mic.i2s:
                     num_read = mic.readinto(read_buf)
                     if num_read > 0:
-                        try:
-                            recorded_chunks.append(bytes(read_buf[:num_read]))
-                        except MemoryError:
-                            print("Memory Full!")
+                        if ws and ws.is_connected():
+                            try:
+                                # 发送二进制数据
+                                ws.send(read_buf[:num_read], opcode=2)
+                                
+                                # 检查是否有回传的 ASR 结果 (非阻塞)
+                                poller = uselect.poll()
+                                poller.register(ws.sock, uselect.POLLIN)
+                                events = poller.poll(0) # 0 = return immediately
+                                if events:
+                                    msg = ws.recv()
+                                    if isinstance(msg, str) and msg.startswith("ASR:"):
+                                        print_nice_asr(msg[4:], display)
+                                    
+                            except Exception as e:
+                                print(f"WS Send/Recv Error: {e}")
+                                # 如果发送失败，视为断开
+                                try:
+                                    ws.close()
+                                except:
+                                    pass
+                                ws = None
+                        else:
+                            # 如果没有 WS，就不保存了，避免内存溢出
+                            pass
+                
                 continue # 跳过可视化逻辑
 
             # === 按键释放处理 ===
             elif is_recording:
-                print(f"\n>>> Stop Recording. Captured {len(recorded_chunks)} chunks.")
+                print(f"\n>>> Stop Recording.")
                 is_recording = False
                 
                 if display.tft:
                     display.init_ui()
                 
-                # 播放录音
-                if speaker.i2s and len(recorded_chunks) > 0:
-                    print(">>> Playing...")
+                # 停止录音并等待回放
+                if ws:
                     try:
-                        cfg = speaker.config
-                        # 重新初始化 Speaker (16kHz Mono 16-bit) 以匹配 Mic 数据
-                        speaker.i2s.deinit()
-                        speaker.i2s = machine.I2S(
-                            0,
-                            sck=machine.Pin(cfg['bck']),
-                            ws=machine.Pin(cfg['ws']),
-                            sd=machine.Pin(cfg['sd']),
-                            mode=machine.I2S.TX,
-                            bits=16,
-                            format=machine.I2S.MONO,
-                            rate=16000,
-                            ibuf=20000,
-                        )
+                        print(">>> Sending STOP & Waiting for playback...")
+                        ws.send("STOP_RECORDING")
                         
-                        # 播放数据
-                        for chunk in recorded_chunks:
-                            # 32-bit Mono -> 16-bit Mono (取高16位)
-                            # chunk 是 bytes, 转为 array('h') 方便访问 16-bit word
-                            # 32-bit 数据: LowWord, HighWord
-                            # 我们需要 HighWord
-                            arr = array.array('h', chunk)
-                            samples = arr[1::2]
-                            speaker.i2s.write(samples)
+                        # 重新初始化 Speaker (16kHz Mono 16-bit)
+                        if speaker.i2s:
+                            cfg = speaker.config
+                            speaker.i2s.deinit()
+                            speaker.i2s = machine.I2S(
+                                0,
+                                sck=machine.Pin(cfg['bck']),
+                                ws=machine.Pin(cfg['ws']),
+                                sd=machine.Pin(cfg['sd']),
+                                mode=machine.I2S.TX,
+                                bits=16,
+                                format=machine.I2S.MONO,
+                                rate=16000,
+                                ibuf=40000,
+                            )
+                        
+                        # 接收回放循环
+                        playback_timeout = 5000 # 5秒无数据则退出
+                        last_data_time = time.ticks_ms()
+                        
+                        while True:
+                            # Check for data with timeout
+                            poller = uselect.poll()
+                            poller.register(ws.sock, uselect.POLLIN)
+                            events = poller.poll(100) # 100ms wait
+                            
+                            if events:
+                                msg = ws.recv()
+                                last_data_time = time.ticks_ms()
+                                
+                                if isinstance(msg, str):
+                                    if msg == "START_PLAYBACK":
+                                        print(">>> Server starting playback stream...")
+                                        continue
+                                    elif msg == "STOP_PLAYBACK":
+                                        print(">>> Server finished playback.")
+                                        break
+                                    elif msg.startswith("ASR:"):
+                                        print_nice_asr(msg[4:], display)
+                                        
+                                elif isinstance(msg, bytes):
+                                    # 播放接收到的音频数据
+                                    if speaker.i2s:
+                                        # 使用 try-except 防止 write 阻塞导致的问题
+                                        try:
+                                            speaker.i2s.write(msg)
+                                        except Exception as e:
+                                            print(f"I2S Write Error: {e}")
+                                        
+                                elif msg is None:
+                                    print("WS Connection closed or error (recv returned None)")
+                                    try:
+                                        ws.close()
+                                    except:
+                                        pass
+                                    ws = None
+                                    break
+                            else:
+                                # No data received in this poll window
+                                if time.ticks_diff(time.ticks_ms(), last_data_time) > playback_timeout:
+                                    print("Playback timeout - no data received for 5 seconds")
+                                    break
+                                
+                                # Feed watchdog or do other small tasks if needed
+                                # time.sleep(0.01)
                             
                     except Exception as e:
-                        print(f"Playback error: {e}")
+                        print(f"Playback loop error: {e}")
+                        try:
+                            ws.close()
+                        except:
+                            pass
+                        ws = None
                     
                     # 恢复 Speaker 原始配置
                     if speaker.i2s: speaker.i2s.deinit()
                     speaker._init_audio()
                 
-                recorded_chunks = []
                 gc.collect()
 
             # === 原有的可视化逻辑 ===
@@ -178,10 +520,7 @@ def main():
                         last_print = time.ticks_ms()
                     
                     if display.tft:
-                        # 调整缩放比例，让显示更敏感
-                        # 你的日志显示安静时 Max ~2000-3000, 说话时 Max ~40000
-                        # 我们可以把 Max 40000 映射到满格
-                        
+                        # 调整缩放比例
                         bar_height = int((max_val / 40000) * 200) 
                         if bar_height > 200: bar_height = 200
                         if bar_height < 0: bar_height = 0
diff --git a/websocket_client.py b/websocket_client.py
new file mode 100644
index 0000000..8c4cbed
--- /dev/null
+++ b/websocket_client.py
@@ -0,0 +1,178 @@
+import usocket as socket
+import ubinascii
+import uos
+
+class WebSocketError(Exception):
+    pass
+
+class WebSocketClient:
+    def __init__(self, uri, timeout=5):
+        self.sock = None
+        self.uri = uri
+        self.timeout = timeout
+        self.connect()
+
+    def connect(self):
+        uri = self.uri
+        assert uri.startswith("ws://")
+
+        uri = uri[5:]
+        if "/" in uri:
+            host, path = uri.split("/", 1)
+        else:
+            host, path = uri, ""
+        path = "/" + path
+
+        if ":" in host:
+            host, port = host.split(":")
+            port = int(port)
+        else:
+            port = 80
+
+        print(f"Connecting to {host}:{port}{path}...")
+        self.sock = socket.socket()
+        
+        # Add timeout
+        self.sock.settimeout(self.timeout) 
+
+        addr_info = socket.getaddrinfo(host, port)
+        addr = addr_info[0][-1]
+        print(f"Resolved address: {addr}")
+        
+        try:
+            self.sock.connect(addr)
+        except OSError as e:
+            print(f"Socket connect failed: {e}")
+            if e.args[0] == 113:
+                print("Hint: Check firewall settings on server or if server is running.")
+            raise
+
+        # Random key
+        key = ubinascii.b2a_base64(uos.urandom(16)).strip()
+
+        
+        req = "GET {} HTTP/1.1\r\n".format(path)
+        req += "Host: {}:{}\r\n".format(host, port)
+        req += "Connection: Upgrade\r\n"
+        req += "Upgrade: websocket\r\n"
+        req += "Sec-WebSocket-Key: {}\r\n".format(key.decode())
+        req += "Sec-WebSocket-Version: 13\r\n"
+        req += "\r\n"
+        
+        self.sock.write(req.encode())
+
+        # Read handshake response
+        header = b""
+        while b"\r\n\r\n" not in header:
+            chunk = self.sock.read(1)
+            if not chunk:
+                raise WebSocketError("Connection closed during handshake")
+            header += chunk
+
+        if b" 101 " not in header:
+            raise WebSocketError("Handshake failed: " + header.decode())
+        
+        print("WebSocket connected!")
+
+    def is_connected(self):
+        return self.sock is not None
+
+    def send(self, data, opcode=1): # 1=Text, 2=Binary
+        if not self.sock:
+            print("WebSocket is not connected (send called on closed socket)")
+            raise WebSocketError("Connection closed")
+            
+        if isinstance(data, str):
+            data = data.encode('utf-8')
+
+        header = bytearray()
+        header.append(0x80 | opcode) # FIN + Opcode
+
+        length = len(data)
+        if length < 126:
+            header.append(0x80 | length) # Masked + length
+        elif length < 65536:
+            header.append(0x80 | 126)
+            header.extend(length.to_bytes(2, 'big'))
+        else:
+            header.append(0x80 | 127)
+            header.extend(length.to_bytes(8, 'big'))
+
+        mask = uos.urandom(4)
+        header.extend(mask)
+
+        masked_data = bytearray(length)
+        for i in range(length):
+            masked_data[i] = data[i] ^ mask[i % 4]
+
+        self.sock.write(header)
+        self.sock.write(masked_data)
+
+    def recv(self):
+        # Read header
+        try:
+            # Read 2 bytes at once
+            header = self.sock.read(2)
+            if not header or len(header) < 2: return None
+            
+            b1 = header[0]
+            b2 = header[1]
+            
+            fin = b1 & 0x80
+            opcode = b1 & 0x0f
+            
+            mask = b2 & 0x80
+            length = b2 & 0x7f
+
+            if length == 126:
+                length_bytes = self.sock.read(2)
+                if not length_bytes: return None
+                length = int.from_bytes(length_bytes, 'big')
+            elif length == 127:
+                length_bytes = self.sock.read(8)
+                if not length_bytes: return None
+                length = int.from_bytes(length_bytes, 'big')
+
+            if mask:
+                mask_key = self.sock.read(4)
+                if not mask_key: return None
+
+            # Read payload
+            data = bytearray(length)
+            view = memoryview(data)
+            pos = 0
+            while pos < length:
+                read_len = self.sock.readinto(view[pos:])
+                if read_len == 0: 
+                    return None
+                pos += read_len
+            
+            if mask:
+                unmasked = bytearray(length)
+                for i in range(length):
+                    unmasked[i] = data[i] ^ mask_key[i % 4]
+                data = unmasked
+
+            if opcode == 1: # Text
+                return data.decode('utf-8')
+            elif opcode == 2: # Binary
+                return data
+            elif opcode == 8: # Close
+                self.close()
+                return None
+            elif opcode == 9: # Ping
+                self.send(data, opcode=10) # Pong
+                return self.recv()
+            
+            return data
+            
+        except Exception as e:
+            # Don't print timeout errors as they are expected in non-blocking polling
+            if "ETIMEDOUT" not in str(e) and "110" not in str(e):
+                print(f"WS Recv Error: {e}")
+            return None
+
+    def close(self):
+        if self.sock:
+            self.sock.close()
+            self.sock = None
diff --git a/websocket_server/.env b/websocket_server/.env
new file mode 100644
index 0000000..4e472f7
--- /dev/null
+++ b/websocket_server/.env
@@ -0,0 +1 @@
+DASHSCOPE_API_KEY=sk-a294f382488d46a1aa0d7cd8e750729b
\ No newline at end of file
diff --git a/websocket_server/GB2312-16.bin b/websocket_server/GB2312-16.bin
new file mode 100644
index 0000000..75b182a
Binary files /dev/null and b/websocket_server/GB2312-16.bin differ
diff --git a/websocket_server/README.md b/websocket_server/README.md
new file mode 100644
index 0000000..3a670c1
--- /dev/null
+++ b/websocket_server/README.md
@@ -0,0 +1,31 @@
+# WebSocket Audio Server
+
+This is a FastAPI server that receives audio from an ESP32 via WebSocket, saves it, processes it (converts 32-bit to 16-bit), and sends it back for playback.
+
+## Installation
+
+1. Install dependencies:
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+## Usage
+
+1. Start the server:
+   ```bash
+   python server.py
+   ```
+   Or:
+   ```bash
+   uvicorn server:app --host 0.0.0.0 --port 8000
+   ```
+
+2. Update the IP address in `main.py` on your ESP32 to match your computer's IP address.
+   Look for `SERVER_IP` variable.
+
+## Features
+
+- Receives raw audio stream from ESP32.
+- Saves raw audio to `received_audio.raw`.
+- Converts 32-bit audio (from ICS-43434) to 16-bit audio (for MAX98357A).
+- Streams processed audio back to ESP32 for playback.
diff --git a/websocket_server/__pycache__/server.cpython-312.pyc b/websocket_server/__pycache__/server.cpython-312.pyc
new file mode 100644
index 0000000..6d9d5fe
Binary files /dev/null and b/websocket_server/__pycache__/server.cpython-312.pyc differ
diff --git a/websocket_server/__pycache__/server.cpython-313.pyc b/websocket_server/__pycache__/server.cpython-313.pyc
new file mode 100644
index 0000000..e4204b4
Binary files /dev/null and b/websocket_server/__pycache__/server.cpython-313.pyc differ
diff --git a/websocket_server/generate_font.py b/websocket_server/generate_font.py
new file mode 100644
index 0000000..ba28aa5
--- /dev/null
+++ b/websocket_server/generate_font.py
@@ -0,0 +1,127 @@
+import struct
+import freetype
+import os
+
+# Font file and output file
+FONT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/websocket_server/GB2312.ttf"
+OUTPUT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/GB2312-16.bin"
+
+# Font size (16x16)
+FONT_SIZE = 16
+
+def create_gb2312_font():
+    # Load the face
+    try:
+        face = freetype.Face(FONT_FILE)
+    except Exception as e:
+        print(f"Error loading font: {e}")
+        return
+
+    # Set char size
+    face.set_pixel_sizes(FONT_SIZE, FONT_SIZE)
+
+    print(f"Generating GB2312 font file: {OUTPUT_FILE}")
+    
+    with open(OUTPUT_FILE, 'wb') as f:
+        # Iterate through GB2312 code points
+        # Area: 0xA1 - 0xFE (161 - 254) -> 94 areas
+        # Index: 0xA1 - 0xFE (161 - 254) -> 94 chars per area
+        
+        count = 0
+        total_chars = 94 * 94
+        
+        # Buffer for empty char (32 bytes of 0x00)
+        empty_char = b'\x00' * 32
+
+        for area in range(0xA1, 0xFF):
+            for index in range(0xA1, 0xFF):
+                # Construct GB2312 code
+                gb_code = bytes([area, index])
+                
+                try:
+                    # Decode to unicode character
+                    char = gb_code.decode('gb2312')
+                    
+                    # Load glyph
+                    face.load_char(char, freetype.FT_LOAD_RENDER | freetype.FT_LOAD_TARGET_MONO)
+                    bitmap = face.glyph.bitmap
+                    
+                    # Convert bitmap to 32 bytes (16x16 / 8)
+                    # The bitmap.buffer is a flat list of bytes.
+                    # For mono rendering, each byte is 0 or 255? No, it's packed?
+                    # FT_LOAD_TARGET_MONO packs 8 pixels into 1 byte.
+                    
+                    # We need to ensure it's 16x16.
+                    # Center the glyph in 16x16 box.
+                    
+                    glyph_width = bitmap.width
+                    glyph_rows = bitmap.rows
+                    glyph_pitch = bitmap.pitch
+                    
+                    # Create a 16x16 buffer (32 bytes)
+                    char_buffer = bytearray(32)
+                    
+                    # Calculate offsets to center
+                    x_off = (FONT_SIZE - glyph_width) // 2
+                    # Vertical alignment is tricky. Let's use bearing Y or just center based on rows.
+                    # A better way is using face.glyph.bitmap_top
+                    # But for fixed height font generation, usually we just center or align baseline.
+                    # Let's try simple centering for now.
+                    y_off = (FONT_SIZE - glyph_rows) // 2
+                    # Adjust y_off if it's too high/low? 
+                    # Let's align to baseline approximately. 
+                    # Usually baseline is at 12-13px for 16px font.
+                    # face.size.ascender might help but let's stick to bitmap center for simplicity first.
+                    
+                    # Copy bitmap to buffer
+                    src_buf = bitmap.buffer
+                    
+                    for row in range(glyph_rows):
+                        # Target row
+                        dst_row = row + y_off
+                        if dst_row < 0 or dst_row >= FONT_SIZE:
+                            continue
+                            
+                        # Source row bytes
+                        # pitch is bytes per row
+                        src_start = row * glyph_pitch
+                        
+                        # We need to copy bits.
+                        # This is getting complicated because FreeType mono bitmap format 
+                        # might not match our target format exactly (MSB/LSB).
+                        # Let's iterate pixels.
+                        
+                        for col in range(glyph_width):
+                            dst_col = col + x_off
+                            if dst_col < 0 or dst_col >= FONT_SIZE:
+                                continue
+                                
+                            # Get pixel from src
+                            byte_idx = src_start + (col >> 3)
+                            bit_idx = 7 - (col & 7)
+                            pixel = (src_buf[byte_idx] >> bit_idx) & 1
+                            
+                            if pixel:
+                                # Set pixel in dst
+                                # format: row by row, 2 bytes per row.
+                                # row 0: byte 0, byte 1
+                                # byte 0: bits 0-7 (left to right) -> wait, usually MSB is left.
+                                dst_byte_idx = dst_row * 2 + (dst_col >> 3)
+                                dst_bit_idx = 7 - (dst_col & 7)
+                                char_buffer[dst_byte_idx] |= (1 << dst_bit_idx)
+                    
+                    f.write(char_buffer)
+                    count += 1
+                    
+                except Exception:
+                    # Character not found or decode error
+                    f.write(empty_char)
+                
+                # Progress
+                if count % 1000 == 0:
+                    print(f"Processed {count} characters...")
+
+    print(f"Done! Generated {OUTPUT_FILE} with size {os.path.getsize(OUTPUT_FILE)} bytes.")
+
+if __name__ == "__main__":
+    create_gb2312_font()
diff --git a/websocket_server/received_audio.mp3 b/websocket_server/received_audio.mp3
new file mode 100644
index 0000000..fb471c3
Binary files /dev/null and b/websocket_server/received_audio.mp3 differ
diff --git a/websocket_server/received_audio.raw b/websocket_server/received_audio.raw
new file mode 100644
index 0000000..8d06a40
Binary files /dev/null and b/websocket_server/received_audio.raw differ
diff --git a/websocket_server/requirements.txt b/websocket_server/requirements.txt
new file mode 100644
index 0000000..c69469c
--- /dev/null
+++ b/websocket_server/requirements.txt
@@ -0,0 +1,6 @@
+fastapi
+uvicorn
+websockets
+pydub
+dashscope
+python-dotenv
diff --git a/websocket_server/server.py b/websocket_server/server.py
new file mode 100644
index 0000000..4424f28
--- /dev/null
+++ b/websocket_server/server.py
@@ -0,0 +1,277 @@
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect
+import uvicorn
+import asyncio
+import os
+import subprocess
+import struct
+from dotenv import load_dotenv
+import dashscope
+from dashscope.audio.asr import Recognition, RecognitionCallback, RecognitionResult
+import json
+
+# 加载环境变量
+load_dotenv()
+dashscope.api_key = os.getenv("DASHSCOPE_API_KEY")
+
+app = FastAPI()
+
+# 存储接收到的音频数据
+audio_buffer = bytearray()
+RECORDING_RAW_FILE = "received_audio.raw"
+RECORDING_MP3_FILE = "received_audio.mp3"
+VOLUME_GAIN = 10.0 # 放大倍数
+FONT_FILE = "GB2312-16.bin"
+
+class MyRecognitionCallback(RecognitionCallback):
+    def __init__(self, websocket: WebSocket, loop: asyncio.AbstractEventLoop):
+        self.websocket = websocket
+        self.loop = loop
+
+    def on_open(self) -> None:
+        print("ASR Session started")
+
+    def on_close(self) -> None:
+        print("ASR Session closed")
+
+    def on_event(self, result: RecognitionResult) -> None:
+        if result.get_sentence():
+             text = result.get_sentence()['text']
+             print(f"ASR Result: {text}")
+             # 将识别结果发送回客户端
+             try:
+                 asyncio.run_coroutine_threadsafe(
+                     self.websocket.send_text(f"ASR:{text}"), 
+                     self.loop
+                 )
+             except Exception as e:
+                 print(f"Failed to send ASR result to client: {e}")
+
+def process_chunk_32_to_16(chunk_bytes, gain=1.0):
+    processed_chunk = bytearray()
+    # Iterate 4 bytes at a time
+    for i in range(0, len(chunk_bytes), 4):
+        if i+3 < len(chunk_bytes):
+             # 取 chunk[i+2] 和 chunk[i+3] 组成 16-bit signed int
+             sample = struct.unpack_from('<h', chunk_bytes, i+2)[0]
+             
+             # 放大音量
+             sample = int(sample * gain)
+             
+             # 限幅 (Clamping) 防止溢出爆音
+             if sample > 32767: sample = 32767
+             elif sample < -32768: sample = -32768
+             
+             # 重新打包为 16-bit little-endian
+             processed_chunk.extend(struct.pack('<h', sample))
+    return processed_chunk
+
+@app.websocket("/ws/audio")
+async def websocket_endpoint(websocket: WebSocket):
+    global audio_buffer
+    await websocket.accept()
+    print("Client connected")
+    
+    recognition = None
+    processed_buffer = bytearray()
+    loop = asyncio.get_running_loop()
+    
+    try:
+        while True:
+            # 接收消息 (可能是文本指令或二进制音频数据)
+            try:
+                message = await websocket.receive()
+            except RuntimeError as e:
+                if "Cannot call \"receive\" once a disconnect message has been received" in str(e):
+                    print("Client disconnected (RuntimeError caught)")
+                    break
+                raise e
+            
+            if "text" in message:
+                text = message["text"]
+                print(f"Received text: {text}")
+                
+                if text == "START_RECORDING":
+                    print("Start recording...")
+                    audio_buffer = bytearray() # 清空缓冲区
+                    processed_buffer = bytearray()
+                    
+                    # 启动实时语音识别
+                    try:
+                        callback = MyRecognitionCallback(websocket, loop)
+                        recognition = Recognition(
+                            model='paraformer-realtime-v2',
+                            format='pcm',
+                            sample_rate=16000,
+                            callback=callback
+                        )
+                        recognition.start()
+                        print("DashScope ASR started")
+                    except Exception as e:
+                        print(f"Failed to start ASR: {e}")
+                        recognition = None
+                    
+                elif text == "STOP_RECORDING":
+                    print(f"Stop recording. Total raw bytes: {len(audio_buffer)}")
+                    
+                    # 停止语音识别
+                    if recognition:
+                        try:
+                            recognition.stop()
+                            print("DashScope ASR stopped")
+                        except Exception as e:
+                            print(f"Error stopping ASR: {e}")
+                        recognition = None
+                    
+                    # 使用实时处理过的音频数据
+                    processed_audio = processed_buffer
+                    
+                    print(f"Processed audio size: {len(processed_audio)} bytes (Gain: {VOLUME_GAIN}x)")
+                    
+                    # 2. 保存原始 RAW 文件 (16-bit PCM)
+                    with open(RECORDING_RAW_FILE, "wb") as f:
+                        f.write(processed_audio)
+                        
+                    # 3. 转换为 MP3 并保存 (使用 ffmpeg 命令行，避免 Python 3.13 audioop 问题)
+                    try:
+                        # ffmpeg -y -f s16le -ar 16000 -ac 1 -i received_audio.raw received_audio.mp3
+                        cmd = [
+                            "ffmpeg",
+                            "-y", # 覆盖输出文件
+                            "-f", "s16le", # 输入格式: signed 16-bit little endian
+                            "-ar", "16000", # 输入采样率
+                            "-ac", "1", # 输入声道数
+                            "-i", RECORDING_RAW_FILE,
+                            RECORDING_MP3_FILE
+                        ]
+                        print(f"Running command: {' '.join(cmd)}")
+                        
+                        # Use asyncio.create_subprocess_exec instead of subprocess.run to avoid blocking the event loop
+                        process = await asyncio.create_subprocess_exec(
+                            *cmd,
+                            stdout=asyncio.subprocess.PIPE,
+                            stderr=asyncio.subprocess.PIPE
+                        )
+                        stdout, stderr = await process.communicate()
+                        
+                        if process.returncode != 0:
+                            raise subprocess.CalledProcessError(process.returncode, cmd, output=stdout, stderr=stderr)
+                            
+                        print(f"Saved MP3 to {RECORDING_MP3_FILE}")
+                    except subprocess.CalledProcessError as e:
+                        print(f"Error converting to MP3: {e}")
+                        # stderr might be bytes
+                        error_msg = e.stderr.decode() if isinstance(e.stderr, bytes) else str(e.stderr)
+                        print(f"FFmpeg stderr: {error_msg}")
+                    except FileNotFoundError:
+                        print("Error: ffmpeg not found. Please install ffmpeg.")
+                    except Exception as e:
+                         print(f"Error converting to MP3: {e}")
+
+                    # 4. 发送回客户端播放
+                    print("Sending audio back...")
+                    await websocket.send_text("START_PLAYBACK")
+                    
+                    # 分块发送
+                    chunk_size = 4096 
+                    for i in range(0, len(processed_audio), chunk_size):
+                        chunk = processed_audio[i:i+chunk_size]
+                        await websocket.send_bytes(chunk)
+                        # 小延时，避免发送过快导致 ESP32 缓冲区溢出
+                        # 4096 bytes / 32000 bytes/s (16k*2) = ~0.128s
+                        # 0.04s 约为 3 倍速发送，既保证缓冲又不至于拥塞
+                        await asyncio.sleep(0.04) 
+                        
+                    await websocket.send_text("STOP_PLAYBACK")
+                    print("Audio sent back finished.")
+                    
+                elif text.startswith("GET_FONT:"):
+                    # 格式: GET_FONT:0xA1A1
+                    try:
+                        print(f"Font Request Received: {text}")
+                        hex_code = text.split(":")[1]
+                        code = int(hex_code, 16)
+                        
+                        # 计算偏移量
+                        # GB2312 编码范围：0xA1A1 - 0xFEFE
+                        # 区码：高字节 - 0xA0
+                        # 位码：低字节 - 0xA0
+                        area = (code >> 8) - 0xA0
+                        index = (code & 0xFF) - 0xA0
+                        
+                        if area >= 1 and index >= 1:
+                            offset = ((area - 1) * 94 + (index - 1)) * 32
+                            
+                            # 读取字体文件
+                            # 注意：这里为了简单，每次都打开文件。如果并发高，应该缓存文件句柄或内容。
+                            # 假设字体文件在当前目录或上级目录
+                            # Prioritize finding the file in the script's directory
+                            script_dir = os.path.dirname(os.path.abspath(__file__))
+                            font_path = os.path.join(script_dir, FONT_FILE)
+                            
+                            # Fallback: check one level up
+                            if not os.path.exists(font_path):
+                                font_path = os.path.join(script_dir, "..", FONT_FILE)
+                                
+                            # Fallback: check current working directory
+                            if not os.path.exists(font_path):
+                                font_path = FONT_FILE
+                            
+                            if os.path.exists(font_path):
+                                print(f"Reading font from: {font_path} (Offset: {offset})")
+                                with open(font_path, "rb") as f:
+                                    f.seek(offset)
+                                    font_data = f.read(32)
+                                    
+                                    if len(font_data) == 32:
+                                        import binascii
+                                        hex_data = binascii.hexlify(font_data).decode('utf-8')
+                                        response = f"FONT_DATA:{hex_code}:{hex_data}"
+                                        print(f"Sending Font Response: {response[:30]}...")
+                                        await websocket.send_text(response)
+                                    else:
+                                        print(f"Error: Read {len(font_data)} bytes for font data (expected 32)")
+                            else:
+                                print(f"Font file not found: {font_path}")
+                        else:
+                             print(f"Invalid GB2312 code: {hex_code} (Area: {area}, Index: {index})")
+                    except Exception as e:
+                        print(f"Error handling GET_FONT: {e}")
+            
+            elif "bytes" in message:
+                # 接收音频数据并追加到缓冲区
+                data = message["bytes"]
+                audio_buffer.extend(data)
+                
+                # 实时处理并发送给 ASR
+                pcm_chunk = process_chunk_32_to_16(data, VOLUME_GAIN)
+                processed_buffer.extend(pcm_chunk)
+                
+                if recognition:
+                    try:
+                        recognition.send_audio_frame(pcm_chunk)
+                    except Exception as e:
+                        print(f"Error sending audio frame to ASR: {e}")
+                
+    except WebSocketDisconnect:
+        print("Client disconnected")
+        if recognition:
+            try:
+                recognition.stop()
+            except:
+                pass
+    except Exception as e:
+        print(f"Error: {e}")
+        if recognition:
+            try:
+                recognition.stop()
+            except:
+                pass
+
+if __name__ == "__main__":
+    # 获取本机IP，方便ESP32连接
+    import socket
+    hostname = socket.gethostname()
+    local_ip = socket.gethostbyname(hostname)
+    print(f"Server running on ws://{local_ip}:8000/ws/audio")
+    
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/websocket_server/test_font.py b/websocket_server/test_font.py
new file mode 100644
index 0000000..090c972
--- /dev/null
+++ b/websocket_server/test_font.py
@@ -0,0 +1,55 @@
+import os
+
+FONT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/websocket_server/GB2312-16.bin"
+
+def test_font():
+    if not os.path.exists(FONT_FILE):
+        print(f"Error: File not found at {FONT_FILE}")
+        return
+
+    file_size = os.path.getsize(FONT_FILE)
+    print(f"Font file size: {file_size} bytes")
+    
+    # Expected size for GB2312-16 (94x94 chars * 32 bytes)
+    expected_size = 94 * 94 * 32
+    print(f"Expected size: {expected_size} bytes")
+    
+    if file_size != expected_size:
+        print(f"Warning: File size mismatch! (Diff: {file_size - expected_size})")
+
+    # Try to render '中' (0xD6D0)
+    # Area: 0xD6 - 0xA0 = 54
+    # Index: 0xD0 - 0xA0 = 48
+    area = 0xD6 - 0xA0
+    index = 0xD0 - 0xA0
+    offset = ((area - 1) * 94 + (index - 1)) * 32
+    
+    print(f"Testing character '中' (0xD6D0)")
+    print(f"Area: {area}, Index: {index}, Offset: {offset}")
+    
+    with open(FONT_FILE, "rb") as f:
+        f.seek(offset)
+        data = f.read(32)
+        
+        if len(data) != 32:
+            print("Error: Could not read 32 bytes")
+            return
+            
+        print("Bitmap data:")
+        for i in range(16):
+            # Each row is 2 bytes (16 bits)
+            byte1 = data[i*2]
+            byte2 = data[i*2+1]
+            
+            # Print as bits
+            line = ""
+            for b in range(8):
+                if (byte1 >> (7-b)) & 1: line += "##"
+                else: line += ".."
+            for b in range(8):
+                if (byte2 >> (7-b)) & 1: line += "##"
+                else: line += ".."
+            print(line)
+
+if __name__ == "__main__":
+    test_font()