diff --git a/.trae/rules/rule.md b/.trae/rules/rule.md index 9601240..7a43358 100644 --- a/.trae/rules/rule.md +++ b/.trae/rules/rule.md @@ -1,4 +1,5 @@ 这是一个esp32 s3项目 用的是Micropython 使用的spi7789 方形的屏幕封装 -硬件是基于c++文件夹里的代码改到MicroPython上面 \ No newline at end of file +硬件是基于c++文件夹里的代码改到MicroPython上面 +websocket_server是这个esp32的服务器项目 \ No newline at end of file diff --git a/LICENSE b/LICENSE deleted file mode 100644 index ecd5bbf..0000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2019 Ivan Belokobylskiy - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/audio.py b/audio.py index f3c7eb1..37d2c1a 100644 --- a/audio.py +++ b/audio.py @@ -42,33 +42,70 @@ class AudioPlayer: self.i2s = None def play_tone(self, frequency, duration_ms, volume=0.5): - """播放指定频率的音调""" + """播放指定频率的音调 (优化内存版)""" if self.i2s is None: return sample_rate = self.config.get('sample_rate', 24000) - n_samples = int(sample_rate * duration_ms / 1000) + + if frequency <= 0: + # 静音处理 + time.sleep_ms(duration_ms) + return + + # 振幅 amplitude = int(32767 * volume) - # STEREO: 每个采样 2 个声道 (L+R),每个声道 2 字节 (16-bit) -> 4 字节/帧 - buffer = bytearray(n_samples * 4) - if frequency > 0: - period = sample_rate // frequency - half_period = period // 2 + # 计算单周期采样数 + period = sample_rate // frequency + + # 目标 buffer 大小约 2048 字节 (防止 buffer 只有几字节导致 underrun) + target_size = 2048 + frame_size = 4 # 16bit stereo + + # 计算 buffer 中包含多少个完整周期 + period_bytes = period * frame_size + repeats = max(1, target_size // period_bytes) + buffer_bytes = repeats * period_bytes + + buffer = bytearray(buffer_bytes) + + # 填充 buffer + half_period = period // 2 + + # 预计算采样值的高低字节 + pos_val = amplitude + neg_val = -amplitude + + pos_low = pos_val & 0xFF + pos_high = (pos_val >> 8) & 0xFF + neg_low = neg_val & 0xFF + neg_high = (neg_val >> 8) & 0xFF + + for i in range(period * repeats): + # 方波:前半周期高电平,后半周期低电平 + if (i % period) < half_period: + low, high = pos_low, pos_high + else: + low, high = neg_low, neg_high - for i in range(n_samples): - # 方波:前半周期高电平,后半周期低电平 - sample = amplitude if (i % period) < half_period else -amplitude - # 左声道 - struct.pack_into('>> Initializing Display...") @@ -41,6 +43,14 @@ class Display: if self.tft: self.tft.fill_rect(x, y, w, h, color) + def set_ws(self, ws): + if self.font: + self.font.set_ws(ws) + + def text(self, text, x, y, color): + if self.tft: + self.font.text(self.tft, text, x, y, color) + def init_ui(self): """初始化 UI 背景""" if self.tft: diff --git a/font.py b/font.py new file mode 100644 index 0000000..aa6e669 --- /dev/null +++ b/font.py @@ -0,0 +1,208 @@ +import framebuf +import struct +import time +import binascii + +class Font: + def __init__(self, ws=None): + self.ws = ws + self.cache = {} # Simple cache for font bitmaps: {code: bytes} + + def set_ws(self, ws): + self.ws = ws + + def text(self, tft, text, x, y, color, bg=0x0000): + """ + Draw text on ST7789 display using WebSocket to fetch fonts + """ + # Pre-calculate color bytes + color_bytes = struct.pack(">H", color) + bg_bytes = struct.pack(">H", bg) + + initial_x = x + + for char in text: + # Handle newlines + if char == '\n': + x = initial_x + y += 16 + continue + + # Boundary check + if x + 16 > tft.width: + x = initial_x + y += 16 + if y + 16 > tft.height: + break + + is_chinese = False + buf_data = None + + # Check if it's Chinese + if ord(char) > 127: + try: + gb = char.encode('gb2312') + if len(gb) == 2: + code = struct.unpack('>H', gb)[0] + # Try to get from cache + if code in self.cache: + buf_data = self.cache[code] + is_chinese = True + else: + # Need to fetch from server + # Since we can't block easily here (unless we use a blocking socket recv or a callback mechanism), + # we have to rely on the main loop to handle responses. + # But we want to draw *now*. + # + # Solution: + # 1. Send request + # 2. Wait for response with timeout (blocking wait) + # This is slow for long text but works for small amounts. + + if self.ws: + # Send request: GET_FONT:0xA1A1 + hex_code = "0x{:04X}".format(code) + print(f"Requesting font for {hex_code} ({char})") + self.ws.send(f"GET_FONT:{hex_code}") + + # Wait for response + # We need to peek/read from WS until we get FONT_DATA + buf_data = self._wait_for_font(hex_code) + + if buf_data: + self.cache[code] = buf_data + is_chinese = True + print(f"Font loaded for {hex_code}") + else: + print(f"Font fetch timeout for {hex_code}") + # Fallback: draw question mark or box + self._draw_ascii(tft, '?', x, y, color, bg) + x += 8 + continue # Skip drawing bitmap logic + else: + print("WS not available for font fetch") + except Exception as e: + print(f"Font error: {e}") + pass + + if is_chinese and buf_data: + # Draw Chinese character (16x16) + self._draw_bitmap(tft, buf_data, x, y, 16, 16, color_bytes, bg_bytes) + x += 16 + else: + # Draw ASCII (8x16) using built-in framebuf font (8x8 actually) + # If char is not ASCII, replace with '?' to avoid framebuf errors + if ord(char) > 127: + char = '?' + self._draw_ascii(tft, char, x, y, color, bg) + x += 8 + + def _wait_for_font(self, target_hex_code): + """ + Blocking wait for specific font data from WebSocket. + Timeout 1s. + WARNING: This might consume other messages (like audio playback commands)! + We need to handle them or put them back? + WebSocketClient doesn't support peeking easily. + + This is a limitation. If we receive other messages, we should probably print them or ignore them. + But for ASR result display, usually we are in a state where we just received ASR result and are waiting for TTS. + """ + if not self.ws: + return None + + start = time.ticks_ms() + while time.ticks_diff(time.ticks_ms(), start) < 1000: + # We use a non-blocking poll if possible, but here we want to block until data arrives + # ws.recv() is blocking. + # But we might block forever if server doesn't reply. + # So we should use poll with timeout. + + # Using uselect in main.py, but here we don't have easy access to it unless passed in. + # Let's try a simple approach: set socket timeout temporarily? + # Or use select.poll() + + import uselect + poller = uselect.poll() + poller.register(self.ws.sock, uselect.POLLIN) + events = poller.poll(200) # 200ms timeout + + if events: + try: + msg = self.ws.recv() + if isinstance(msg, str): + if msg.startswith(f"FONT_DATA:{target_hex_code}:"): + # Found it! + hex_data = msg.split(":")[2] + return binascii.unhexlify(hex_data) + elif msg.startswith("FONT_DATA:"): + # Wrong font data? Ignore or cache it? + parts = msg.split(":") + if len(parts) >= 3: + c = int(parts[1], 16) + d = binascii.unhexlify(parts[2]) + self.cache[c] = d + else: + # Other message, e.g. START_PLAYBACK + # We can't put it back easily. + # For now, just print it and ignore (it will be lost!) + # ideally we should have a message queue. + print(f"Ignored msg during font fetch: {msg}") + except: + pass + + return None + + def _draw_bitmap(self, tft, bitmap, x, y, w, h, color_bytes, bg_bytes): + # Convert 1bpp bitmap to RGB565 buffer + # bitmap length is w * h / 8 = 32 bytes for 16x16 + + # Optimize buffer allocation + rgb_buf = bytearray(w * h * 2) + idx = 0 + for byte in bitmap: + for i in range(7, -1, -1): + if (byte >> i) & 1: + rgb_buf[idx] = color_bytes[0] + rgb_buf[idx+1] = color_bytes[1] + else: + rgb_buf[idx] = bg_bytes[0] + rgb_buf[idx+1] = bg_bytes[1] + idx += 2 + tft.blit_buffer(rgb_buf, x, y, w, h) + + def _draw_ascii(self, tft, char, x, y, color, bg): + # Use framebuf for ASCII + w, h = 8, 8 + buf = bytearray(w * h // 8) + fb = framebuf.FrameBuffer(buf, w, h, framebuf.MONO_VLSB) + fb.fill(0) + fb.text(char, 0, 0, 1) + + # Since framebuf.text is 8x8, we center it vertically in 16px height + # Drawing pixel by pixel is slow but compatible + # To optimize, we can build a small buffer + + # Create a 8x16 RGB565 buffer + rgb_buf = bytearray(8 * 16 * 2) + # Fill with background + bg_high, bg_low = bg >> 8, bg & 0xFF + color_high, color_low = color >> 8, color & 0xFF + + for i in range(0, len(rgb_buf), 2): + rgb_buf[i] = bg_high + rgb_buf[i+1] = bg_low + + # Draw the 8x8 character into the buffer (centered) + # MONO_VLSB: each byte is a column of 8 pixels + for col in range(8): # 0..7 + byte = buf[col] + for row in range(8): # 0..7 + if (byte >> row) & 1: + # Calculate position in rgb_buf + # Target: x=col, y=row+4 + pos = ((row + 4) * 8 + col) * 2 + rgb_buf[pos] = color_high + rgb_buf[pos+1] = color_low + + tft.blit_buffer(rgb_buf, x, y, 8, 16) diff --git a/main.py b/main.py index 1786af0..d3d85ac 100644 --- a/main.py +++ b/main.py @@ -4,15 +4,195 @@ import math import struct import array import gc +import network import st7789py as st7789 from config import CURRENT_CONFIG from audio import AudioPlayer, Microphone from display import Display +from websocket_client import WebSocketClient +import uselect + +# ============================================================================= +# 网络配置 +# ============================================================================= +WIFI_SSID = "Tangledup-AI" +WIFI_PASS = "djt12345678" +# 请修改为你的电脑 IP 地址 +SERVER_IP = "6.6.6.88" +SERVER_PORT = 8000 +SERVER_URL = f"ws://{SERVER_IP}:{SERVER_PORT}/ws/audio" + +def diagnose_wifi(): + """ + 诊断WiFi模块状态,打印详细的调试信息 + """ + print("\n" + "="*50) + print("WiFi DIAGNOSTIC INFORMATION") + print("="*50) + + wlan = network.WLAN(network.STA_IF) + + # 基本状态 + print(f"WiFi Module Active: {wlan.active()}") + print(f"Connection Status: {wlan.isconnected()}") + + if wlan.isconnected(): + print(f"Network Config: {wlan.ifconfig()}") + print(f"Network SSID: {wlan.config('essid')}") + print(f"Signal Strength: {wlan.status('rssi')} dBm") + + # 扫描可用网络 + try: + print("\nScanning for available networks...") + wlan.active(True) + time.sleep(1) + + networks = wlan.scan() + print(f"Found {len(networks)} networks:") + + for net in networks: + ssid = net[0].decode('utf-8') if net[0] else "Hidden" + bssid = ':'.join(['%02x' % b for b in net[1]]) + channel = net[2] + rssi = net[3] + security = net[4] + + # 标记目标网络 + marker = " [TARGET]" if ssid == WIFI_SSID else "" + + print(f" {ssid}{marker}") + print(f" BSSID: {bssid}, Channel: {channel}, RSSI: {rssi}dBm") + + # 信号强度解释 + if rssi > -50: + signal_desc = "Excellent" + elif rssi > -60: + signal_desc = "Good" + elif rssi > -70: + signal_desc = "Fair" + else: + signal_desc = "Weak" + + print(f" Signal: {signal_desc}") + print("") + + except Exception as e: + print(f"Network scan failed: {e}") + + print("="*50 + "\n") + + +def connect_wifi(max_retries=3): + """ + 连接WiFi网络,包含完整的错误处理和重试机制 + + Args: + max_retries: 最大重试次数,默认为3次 + + Returns: + bool: 连接成功返回True,失败返回False + """ + wlan = network.WLAN(network.STA_IF) + + # 首先确保WiFi模块处于干净状态 + try: + wlan.active(False) # 先关闭WiFi + time.sleep(1) # 等待1秒让模块完全关闭 + wlan.active(True) # 重新激活WiFi + time.sleep(1) # 等待模块初始化完成 + except Exception as e: + print(f"WiFi module initialization error: {e}") + return False + + # 尝试连接,包含重试机制 + for attempt in range(max_retries): + try: + print(f"WiFi connection attempt {attempt + 1}/{max_retries}") + + # 检查是否已连接 + if wlan.isconnected(): + print('Already connected to WiFi') + print('Network config:', wlan.ifconfig()) + return True + + # 尝试连接 + print(f'Connecting to WiFi {WIFI_SSID}...') + wlan.connect(WIFI_SSID, WIFI_PASS) + + # 等待连接完成,设置超时 + start_time = time.time() + while not wlan.isconnected(): + if time.time() - start_time > 20: # 单次连接超时20秒 + print("WiFi connection timeout!") + break + time.sleep(0.5) + print(".", end="") + + print("") # 换行 + + # 检查连接结果 + if wlan.isconnected(): + print('WiFi connected successfully!') + print('Network config:', wlan.ifconfig()) + return True + else: + print(f"Connection attempt {attempt + 1} failed") + + # 在重试前进行清理 + if attempt < max_retries - 1: # 如果不是最后一次尝试 + print("Resetting WiFi module for retry...") + wlan.disconnect() # 断开连接 + time.sleep(2) # 等待2秒 + + except OSError as e: + print(f"WiFi connection error on attempt {attempt + 1}: {e}") + if "Wifi Internal State Error" in str(e): + print("Detected internal state error, resetting WiFi module...") + try: + wlan.active(False) + time.sleep(2) + wlan.active(True) + time.sleep(1) + except: + pass + + if attempt < max_retries - 1: + print(f"Retrying in 3 seconds...") + time.sleep(3) + + except Exception as e: + print(f"Unexpected error on attempt {attempt + 1}: {e}") + if attempt < max_retries - 1: + time.sleep(2) + + # 所有尝试都失败 + print("All WiFi connection attempts failed!") + try: + wlan.active(False) # 关闭WiFi模块节省电力 + except: + pass + return False # ============================================================================= # 硬件引脚配置 (从 config.py 获取) # ============================================================================= +def print_nice_asr(text, display=None): + """在终端美观地打印ASR结果,并在屏幕显示""" + print("\n" + "*"*40) + print(" ASR RESULT:") + print(f" {text}") + print("*"*40 + "\n") + + if display and display.tft: + # 清除之前的文本区域 (保留顶部的状态栏和底部的可视化条) + # 假设状态栏 30px,底部 240-200=40px 用于可视化? + # init_ui 画了 0-30 的白条。 + # update_audio_bar 在 240-bar_height 画条。 + # 我们使用中间区域 40 - 200 + display.fill_rect(0, 40, 240, 160, st7789.BLACK) + display.text(text, 0, 40, st7789.WHITE) + def main(): print("\n" + "="*40) print("AUDIO & MIC DIAGNOSTIC V5 (Modular & Clean)") @@ -35,7 +215,44 @@ def main(): speaker = AudioPlayer() if speaker.i2s: # 默认播放马里奥 - speaker.play_mario() + # speaker.play_mario() + + # 播放简单方波 (1kHz, 1秒) + # 直接在 main.py 中实现分块播放,避免因 audio.py 未同步导致的 MemoryError + print("Playing 1kHz square wave...") + try: + import struct + + # 1. 参数设置 + sr = 24000 # 默认采样率 + if hasattr(speaker, 'config') and speaker.config: + sr = speaker.config.get('sample_rate', 24000) + freq = 1000 + duration = 1000 # ms + vol = 10000 # 音量 (max 32767) + + # 2. 准备缓冲区 (只生成一小段,循环播放) + # 1kHz @ 24kHz -> 24 samples/cycle + period = sr // freq + # 生成约 1000 字节的 buffer (包含整数个周期) + cycles_in_buf = 10 + buf = bytearray(period * cycles_in_buf * 4) # 16bit stereo = 4 bytes/frame + + # 3. 填充方波数据 + for i in range(period * cycles_in_buf): + # 方波逻辑 + sample = vol if (i % period) < (period // 2) else -vol + # 写入左右声道 (Little Endian, 16-bit signed) + struct.pack_into(' 0: + speaker.i2s.write(buf) + + except Exception as e: + print(f"Tone error: {e}") + else: print("!!! Speaker initialization failed") @@ -57,7 +274,49 @@ def main(): # 录音状态变量 is_recording = False - recorded_chunks = [] + + # WebSocket 连接 + ws = None + + # 定义连接函数 + def connect_ws(): + nonlocal ws + # Reset existing connection object to ensure clean slate + try: + if ws: + ws.close() + except: + pass + ws = None + + try: + print(f"Connecting to WebSocket Server: {SERVER_URL}") + ws = WebSocketClient(SERVER_URL) + print("WebSocket connected successfully!") + + # Pass WebSocket to display for font loading + if display: + display.set_ws(ws) + + return True + except Exception as e: + print(f"WebSocket connection failed: {e}") + return False + + # 先运行WiFi诊断 + print("Running WiFi diagnostics...") + diagnose_wifi() + + # 尝试连接WiFi + print("Starting WiFi connection process...") + if connect_wifi(max_retries=3): + print("WiFi connected successfully!") + connect_ws() + else: + print("WiFi connection failed after all attempts!") + print("Continuing in offline mode without WebSocket functionality...") + print("You can still use the device for local audio recording and visualization.") + # 调试:打印一次 Boot 键状态 print(f"Boot Button Initial State: {boot_btn.value()}") @@ -86,68 +345,151 @@ def main(): if not is_recording: print("\n>>> Start Recording (Boot Pressed)...") is_recording = True - recorded_chunks = [] if display.tft: print(">>> Filling Screen WHITE") display.fill(st7789.WHITE) else: print(">>> Display TFT is None!") + + # 尝试重连 WS + if ws is None or not ws.is_connected(): + print(">>> WS not connected, trying to reconnect...") + connect_ws() + + # 发送开始录音指令 + if ws and ws.is_connected(): + try: + ws.send("START_RECORDING") + except Exception as e: + print(f"WS Send Error: {e}") + ws = None # Disconnect on error + else: + print(">>> Warning: No WebSocket connection! Audio will be discarded.") - # 录音 + # 录音并流式传输 if mic.i2s: num_read = mic.readinto(read_buf) if num_read > 0: - try: - recorded_chunks.append(bytes(read_buf[:num_read])) - except MemoryError: - print("Memory Full!") + if ws and ws.is_connected(): + try: + # 发送二进制数据 + ws.send(read_buf[:num_read], opcode=2) + + # 检查是否有回传的 ASR 结果 (非阻塞) + poller = uselect.poll() + poller.register(ws.sock, uselect.POLLIN) + events = poller.poll(0) # 0 = return immediately + if events: + msg = ws.recv() + if isinstance(msg, str) and msg.startswith("ASR:"): + print_nice_asr(msg[4:], display) + + except Exception as e: + print(f"WS Send/Recv Error: {e}") + # 如果发送失败,视为断开 + try: + ws.close() + except: + pass + ws = None + else: + # 如果没有 WS,就不保存了,避免内存溢出 + pass + continue # 跳过可视化逻辑 # === 按键释放处理 === elif is_recording: - print(f"\n>>> Stop Recording. Captured {len(recorded_chunks)} chunks.") + print(f"\n>>> Stop Recording.") is_recording = False if display.tft: display.init_ui() - # 播放录音 - if speaker.i2s and len(recorded_chunks) > 0: - print(">>> Playing...") + # 停止录音并等待回放 + if ws: try: - cfg = speaker.config - # 重新初始化 Speaker (16kHz Mono 16-bit) 以匹配 Mic 数据 - speaker.i2s.deinit() - speaker.i2s = machine.I2S( - 0, - sck=machine.Pin(cfg['bck']), - ws=machine.Pin(cfg['ws']), - sd=machine.Pin(cfg['sd']), - mode=machine.I2S.TX, - bits=16, - format=machine.I2S.MONO, - rate=16000, - ibuf=20000, - ) + print(">>> Sending STOP & Waiting for playback...") + ws.send("STOP_RECORDING") - # 播放数据 - for chunk in recorded_chunks: - # 32-bit Mono -> 16-bit Mono (取高16位) - # chunk 是 bytes, 转为 array('h') 方便访问 16-bit word - # 32-bit 数据: LowWord, HighWord - # 我们需要 HighWord - arr = array.array('h', chunk) - samples = arr[1::2] - speaker.i2s.write(samples) + # 重新初始化 Speaker (16kHz Mono 16-bit) + if speaker.i2s: + cfg = speaker.config + speaker.i2s.deinit() + speaker.i2s = machine.I2S( + 0, + sck=machine.Pin(cfg['bck']), + ws=machine.Pin(cfg['ws']), + sd=machine.Pin(cfg['sd']), + mode=machine.I2S.TX, + bits=16, + format=machine.I2S.MONO, + rate=16000, + ibuf=40000, + ) + + # 接收回放循环 + playback_timeout = 5000 # 5秒无数据则退出 + last_data_time = time.ticks_ms() + + while True: + # Check for data with timeout + poller = uselect.poll() + poller.register(ws.sock, uselect.POLLIN) + events = poller.poll(100) # 100ms wait + + if events: + msg = ws.recv() + last_data_time = time.ticks_ms() + + if isinstance(msg, str): + if msg == "START_PLAYBACK": + print(">>> Server starting playback stream...") + continue + elif msg == "STOP_PLAYBACK": + print(">>> Server finished playback.") + break + elif msg.startswith("ASR:"): + print_nice_asr(msg[4:], display) + + elif isinstance(msg, bytes): + # 播放接收到的音频数据 + if speaker.i2s: + # 使用 try-except 防止 write 阻塞导致的问题 + try: + speaker.i2s.write(msg) + except Exception as e: + print(f"I2S Write Error: {e}") + + elif msg is None: + print("WS Connection closed or error (recv returned None)") + try: + ws.close() + except: + pass + ws = None + break + else: + # No data received in this poll window + if time.ticks_diff(time.ticks_ms(), last_data_time) > playback_timeout: + print("Playback timeout - no data received for 5 seconds") + break + + # Feed watchdog or do other small tasks if needed + # time.sleep(0.01) except Exception as e: - print(f"Playback error: {e}") + print(f"Playback loop error: {e}") + try: + ws.close() + except: + pass + ws = None # 恢复 Speaker 原始配置 if speaker.i2s: speaker.i2s.deinit() speaker._init_audio() - recorded_chunks = [] gc.collect() # === 原有的可视化逻辑 === @@ -178,10 +520,7 @@ def main(): last_print = time.ticks_ms() if display.tft: - # 调整缩放比例,让显示更敏感 - # 你的日志显示安静时 Max ~2000-3000, 说话时 Max ~40000 - # 我们可以把 Max 40000 映射到满格 - + # 调整缩放比例 bar_height = int((max_val / 40000) * 200) if bar_height > 200: bar_height = 200 if bar_height < 0: bar_height = 0 diff --git a/websocket_client.py b/websocket_client.py new file mode 100644 index 0000000..8c4cbed --- /dev/null +++ b/websocket_client.py @@ -0,0 +1,178 @@ +import usocket as socket +import ubinascii +import uos + +class WebSocketError(Exception): + pass + +class WebSocketClient: + def __init__(self, uri, timeout=5): + self.sock = None + self.uri = uri + self.timeout = timeout + self.connect() + + def connect(self): + uri = self.uri + assert uri.startswith("ws://") + + uri = uri[5:] + if "/" in uri: + host, path = uri.split("/", 1) + else: + host, path = uri, "" + path = "/" + path + + if ":" in host: + host, port = host.split(":") + port = int(port) + else: + port = 80 + + print(f"Connecting to {host}:{port}{path}...") + self.sock = socket.socket() + + # Add timeout + self.sock.settimeout(self.timeout) + + addr_info = socket.getaddrinfo(host, port) + addr = addr_info[0][-1] + print(f"Resolved address: {addr}") + + try: + self.sock.connect(addr) + except OSError as e: + print(f"Socket connect failed: {e}") + if e.args[0] == 113: + print("Hint: Check firewall settings on server or if server is running.") + raise + + # Random key + key = ubinascii.b2a_base64(uos.urandom(16)).strip() + + + req = "GET {} HTTP/1.1\r\n".format(path) + req += "Host: {}:{}\r\n".format(host, port) + req += "Connection: Upgrade\r\n" + req += "Upgrade: websocket\r\n" + req += "Sec-WebSocket-Key: {}\r\n".format(key.decode()) + req += "Sec-WebSocket-Version: 13\r\n" + req += "\r\n" + + self.sock.write(req.encode()) + + # Read handshake response + header = b"" + while b"\r\n\r\n" not in header: + chunk = self.sock.read(1) + if not chunk: + raise WebSocketError("Connection closed during handshake") + header += chunk + + if b" 101 " not in header: + raise WebSocketError("Handshake failed: " + header.decode()) + + print("WebSocket connected!") + + def is_connected(self): + return self.sock is not None + + def send(self, data, opcode=1): # 1=Text, 2=Binary + if not self.sock: + print("WebSocket is not connected (send called on closed socket)") + raise WebSocketError("Connection closed") + + if isinstance(data, str): + data = data.encode('utf-8') + + header = bytearray() + header.append(0x80 | opcode) # FIN + Opcode + + length = len(data) + if length < 126: + header.append(0x80 | length) # Masked + length + elif length < 65536: + header.append(0x80 | 126) + header.extend(length.to_bytes(2, 'big')) + else: + header.append(0x80 | 127) + header.extend(length.to_bytes(8, 'big')) + + mask = uos.urandom(4) + header.extend(mask) + + masked_data = bytearray(length) + for i in range(length): + masked_data[i] = data[i] ^ mask[i % 4] + + self.sock.write(header) + self.sock.write(masked_data) + + def recv(self): + # Read header + try: + # Read 2 bytes at once + header = self.sock.read(2) + if not header or len(header) < 2: return None + + b1 = header[0] + b2 = header[1] + + fin = b1 & 0x80 + opcode = b1 & 0x0f + + mask = b2 & 0x80 + length = b2 & 0x7f + + if length == 126: + length_bytes = self.sock.read(2) + if not length_bytes: return None + length = int.from_bytes(length_bytes, 'big') + elif length == 127: + length_bytes = self.sock.read(8) + if not length_bytes: return None + length = int.from_bytes(length_bytes, 'big') + + if mask: + mask_key = self.sock.read(4) + if not mask_key: return None + + # Read payload + data = bytearray(length) + view = memoryview(data) + pos = 0 + while pos < length: + read_len = self.sock.readinto(view[pos:]) + if read_len == 0: + return None + pos += read_len + + if mask: + unmasked = bytearray(length) + for i in range(length): + unmasked[i] = data[i] ^ mask_key[i % 4] + data = unmasked + + if opcode == 1: # Text + return data.decode('utf-8') + elif opcode == 2: # Binary + return data + elif opcode == 8: # Close + self.close() + return None + elif opcode == 9: # Ping + self.send(data, opcode=10) # Pong + return self.recv() + + return data + + except Exception as e: + # Don't print timeout errors as they are expected in non-blocking polling + if "ETIMEDOUT" not in str(e) and "110" not in str(e): + print(f"WS Recv Error: {e}") + return None + + def close(self): + if self.sock: + self.sock.close() + self.sock = None diff --git a/websocket_server/.env b/websocket_server/.env new file mode 100644 index 0000000..4e472f7 --- /dev/null +++ b/websocket_server/.env @@ -0,0 +1 @@ +DASHSCOPE_API_KEY=sk-a294f382488d46a1aa0d7cd8e750729b \ No newline at end of file diff --git a/websocket_server/GB2312-16.bin b/websocket_server/GB2312-16.bin new file mode 100644 index 0000000..75b182a Binary files /dev/null and b/websocket_server/GB2312-16.bin differ diff --git a/websocket_server/README.md b/websocket_server/README.md new file mode 100644 index 0000000..3a670c1 --- /dev/null +++ b/websocket_server/README.md @@ -0,0 +1,31 @@ +# WebSocket Audio Server + +This is a FastAPI server that receives audio from an ESP32 via WebSocket, saves it, processes it (converts 32-bit to 16-bit), and sends it back for playback. + +## Installation + +1. Install dependencies: + ```bash + pip install -r requirements.txt + ``` + +## Usage + +1. Start the server: + ```bash + python server.py + ``` + Or: + ```bash + uvicorn server:app --host 0.0.0.0 --port 8000 + ``` + +2. Update the IP address in `main.py` on your ESP32 to match your computer's IP address. + Look for `SERVER_IP` variable. + +## Features + +- Receives raw audio stream from ESP32. +- Saves raw audio to `received_audio.raw`. +- Converts 32-bit audio (from ICS-43434) to 16-bit audio (for MAX98357A). +- Streams processed audio back to ESP32 for playback. diff --git a/websocket_server/__pycache__/server.cpython-312.pyc b/websocket_server/__pycache__/server.cpython-312.pyc new file mode 100644 index 0000000..6d9d5fe Binary files /dev/null and b/websocket_server/__pycache__/server.cpython-312.pyc differ diff --git a/websocket_server/__pycache__/server.cpython-313.pyc b/websocket_server/__pycache__/server.cpython-313.pyc new file mode 100644 index 0000000..e4204b4 Binary files /dev/null and b/websocket_server/__pycache__/server.cpython-313.pyc differ diff --git a/websocket_server/generate_font.py b/websocket_server/generate_font.py new file mode 100644 index 0000000..ba28aa5 --- /dev/null +++ b/websocket_server/generate_font.py @@ -0,0 +1,127 @@ +import struct +import freetype +import os + +# Font file and output file +FONT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/websocket_server/GB2312.ttf" +OUTPUT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/GB2312-16.bin" + +# Font size (16x16) +FONT_SIZE = 16 + +def create_gb2312_font(): + # Load the face + try: + face = freetype.Face(FONT_FILE) + except Exception as e: + print(f"Error loading font: {e}") + return + + # Set char size + face.set_pixel_sizes(FONT_SIZE, FONT_SIZE) + + print(f"Generating GB2312 font file: {OUTPUT_FILE}") + + with open(OUTPUT_FILE, 'wb') as f: + # Iterate through GB2312 code points + # Area: 0xA1 - 0xFE (161 - 254) -> 94 areas + # Index: 0xA1 - 0xFE (161 - 254) -> 94 chars per area + + count = 0 + total_chars = 94 * 94 + + # Buffer for empty char (32 bytes of 0x00) + empty_char = b'\x00' * 32 + + for area in range(0xA1, 0xFF): + for index in range(0xA1, 0xFF): + # Construct GB2312 code + gb_code = bytes([area, index]) + + try: + # Decode to unicode character + char = gb_code.decode('gb2312') + + # Load glyph + face.load_char(char, freetype.FT_LOAD_RENDER | freetype.FT_LOAD_TARGET_MONO) + bitmap = face.glyph.bitmap + + # Convert bitmap to 32 bytes (16x16 / 8) + # The bitmap.buffer is a flat list of bytes. + # For mono rendering, each byte is 0 or 255? No, it's packed? + # FT_LOAD_TARGET_MONO packs 8 pixels into 1 byte. + + # We need to ensure it's 16x16. + # Center the glyph in 16x16 box. + + glyph_width = bitmap.width + glyph_rows = bitmap.rows + glyph_pitch = bitmap.pitch + + # Create a 16x16 buffer (32 bytes) + char_buffer = bytearray(32) + + # Calculate offsets to center + x_off = (FONT_SIZE - glyph_width) // 2 + # Vertical alignment is tricky. Let's use bearing Y or just center based on rows. + # A better way is using face.glyph.bitmap_top + # But for fixed height font generation, usually we just center or align baseline. + # Let's try simple centering for now. + y_off = (FONT_SIZE - glyph_rows) // 2 + # Adjust y_off if it's too high/low? + # Let's align to baseline approximately. + # Usually baseline is at 12-13px for 16px font. + # face.size.ascender might help but let's stick to bitmap center for simplicity first. + + # Copy bitmap to buffer + src_buf = bitmap.buffer + + for row in range(glyph_rows): + # Target row + dst_row = row + y_off + if dst_row < 0 or dst_row >= FONT_SIZE: + continue + + # Source row bytes + # pitch is bytes per row + src_start = row * glyph_pitch + + # We need to copy bits. + # This is getting complicated because FreeType mono bitmap format + # might not match our target format exactly (MSB/LSB). + # Let's iterate pixels. + + for col in range(glyph_width): + dst_col = col + x_off + if dst_col < 0 or dst_col >= FONT_SIZE: + continue + + # Get pixel from src + byte_idx = src_start + (col >> 3) + bit_idx = 7 - (col & 7) + pixel = (src_buf[byte_idx] >> bit_idx) & 1 + + if pixel: + # Set pixel in dst + # format: row by row, 2 bytes per row. + # row 0: byte 0, byte 1 + # byte 0: bits 0-7 (left to right) -> wait, usually MSB is left. + dst_byte_idx = dst_row * 2 + (dst_col >> 3) + dst_bit_idx = 7 - (dst_col & 7) + char_buffer[dst_byte_idx] |= (1 << dst_bit_idx) + + f.write(char_buffer) + count += 1 + + except Exception: + # Character not found or decode error + f.write(empty_char) + + # Progress + if count % 1000 == 0: + print(f"Processed {count} characters...") + + print(f"Done! Generated {OUTPUT_FILE} with size {os.path.getsize(OUTPUT_FILE)} bytes.") + +if __name__ == "__main__": + create_gb2312_font() diff --git a/websocket_server/received_audio.mp3 b/websocket_server/received_audio.mp3 new file mode 100644 index 0000000..fb471c3 Binary files /dev/null and b/websocket_server/received_audio.mp3 differ diff --git a/websocket_server/received_audio.raw b/websocket_server/received_audio.raw new file mode 100644 index 0000000..8d06a40 Binary files /dev/null and b/websocket_server/received_audio.raw differ diff --git a/websocket_server/requirements.txt b/websocket_server/requirements.txt new file mode 100644 index 0000000..c69469c --- /dev/null +++ b/websocket_server/requirements.txt @@ -0,0 +1,6 @@ +fastapi +uvicorn +websockets +pydub +dashscope +python-dotenv diff --git a/websocket_server/server.py b/websocket_server/server.py new file mode 100644 index 0000000..4424f28 --- /dev/null +++ b/websocket_server/server.py @@ -0,0 +1,277 @@ +from fastapi import FastAPI, WebSocket, WebSocketDisconnect +import uvicorn +import asyncio +import os +import subprocess +import struct +from dotenv import load_dotenv +import dashscope +from dashscope.audio.asr import Recognition, RecognitionCallback, RecognitionResult +import json + +# 加载环境变量 +load_dotenv() +dashscope.api_key = os.getenv("DASHSCOPE_API_KEY") + +app = FastAPI() + +# 存储接收到的音频数据 +audio_buffer = bytearray() +RECORDING_RAW_FILE = "received_audio.raw" +RECORDING_MP3_FILE = "received_audio.mp3" +VOLUME_GAIN = 10.0 # 放大倍数 +FONT_FILE = "GB2312-16.bin" + +class MyRecognitionCallback(RecognitionCallback): + def __init__(self, websocket: WebSocket, loop: asyncio.AbstractEventLoop): + self.websocket = websocket + self.loop = loop + + def on_open(self) -> None: + print("ASR Session started") + + def on_close(self) -> None: + print("ASR Session closed") + + def on_event(self, result: RecognitionResult) -> None: + if result.get_sentence(): + text = result.get_sentence()['text'] + print(f"ASR Result: {text}") + # 将识别结果发送回客户端 + try: + asyncio.run_coroutine_threadsafe( + self.websocket.send_text(f"ASR:{text}"), + self.loop + ) + except Exception as e: + print(f"Failed to send ASR result to client: {e}") + +def process_chunk_32_to_16(chunk_bytes, gain=1.0): + processed_chunk = bytearray() + # Iterate 4 bytes at a time + for i in range(0, len(chunk_bytes), 4): + if i+3 < len(chunk_bytes): + # 取 chunk[i+2] 和 chunk[i+3] 组成 16-bit signed int + sample = struct.unpack_from(' 32767: sample = 32767 + elif sample < -32768: sample = -32768 + + # 重新打包为 16-bit little-endian + processed_chunk.extend(struct.pack('> 8) - 0xA0 + index = (code & 0xFF) - 0xA0 + + if area >= 1 and index >= 1: + offset = ((area - 1) * 94 + (index - 1)) * 32 + + # 读取字体文件 + # 注意:这里为了简单,每次都打开文件。如果并发高,应该缓存文件句柄或内容。 + # 假设字体文件在当前目录或上级目录 + # Prioritize finding the file in the script's directory + script_dir = os.path.dirname(os.path.abspath(__file__)) + font_path = os.path.join(script_dir, FONT_FILE) + + # Fallback: check one level up + if not os.path.exists(font_path): + font_path = os.path.join(script_dir, "..", FONT_FILE) + + # Fallback: check current working directory + if not os.path.exists(font_path): + font_path = FONT_FILE + + if os.path.exists(font_path): + print(f"Reading font from: {font_path} (Offset: {offset})") + with open(font_path, "rb") as f: + f.seek(offset) + font_data = f.read(32) + + if len(font_data) == 32: + import binascii + hex_data = binascii.hexlify(font_data).decode('utf-8') + response = f"FONT_DATA:{hex_code}:{hex_data}" + print(f"Sending Font Response: {response[:30]}...") + await websocket.send_text(response) + else: + print(f"Error: Read {len(font_data)} bytes for font data (expected 32)") + else: + print(f"Font file not found: {font_path}") + else: + print(f"Invalid GB2312 code: {hex_code} (Area: {area}, Index: {index})") + except Exception as e: + print(f"Error handling GET_FONT: {e}") + + elif "bytes" in message: + # 接收音频数据并追加到缓冲区 + data = message["bytes"] + audio_buffer.extend(data) + + # 实时处理并发送给 ASR + pcm_chunk = process_chunk_32_to_16(data, VOLUME_GAIN) + processed_buffer.extend(pcm_chunk) + + if recognition: + try: + recognition.send_audio_frame(pcm_chunk) + except Exception as e: + print(f"Error sending audio frame to ASR: {e}") + + except WebSocketDisconnect: + print("Client disconnected") + if recognition: + try: + recognition.stop() + except: + pass + except Exception as e: + print(f"Error: {e}") + if recognition: + try: + recognition.stop() + except: + pass + +if __name__ == "__main__": + # 获取本机IP,方便ESP32连接 + import socket + hostname = socket.gethostname() + local_ip = socket.gethostbyname(hostname) + print(f"Server running on ws://{local_ip}:8000/ws/audio") + + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/websocket_server/test_font.py b/websocket_server/test_font.py new file mode 100644 index 0000000..090c972 --- /dev/null +++ b/websocket_server/test_font.py @@ -0,0 +1,55 @@ +import os + +FONT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/websocket_server/GB2312-16.bin" + +def test_font(): + if not os.path.exists(FONT_FILE): + print(f"Error: File not found at {FONT_FILE}") + return + + file_size = os.path.getsize(FONT_FILE) + print(f"Font file size: {file_size} bytes") + + # Expected size for GB2312-16 (94x94 chars * 32 bytes) + expected_size = 94 * 94 * 32 + print(f"Expected size: {expected_size} bytes") + + if file_size != expected_size: + print(f"Warning: File size mismatch! (Diff: {file_size - expected_size})") + + # Try to render '中' (0xD6D0) + # Area: 0xD6 - 0xA0 = 54 + # Index: 0xD0 - 0xA0 = 48 + area = 0xD6 - 0xA0 + index = 0xD0 - 0xA0 + offset = ((area - 1) * 94 + (index - 1)) * 32 + + print(f"Testing character '中' (0xD6D0)") + print(f"Area: {area}, Index: {index}, Offset: {offset}") + + with open(FONT_FILE, "rb") as f: + f.seek(offset) + data = f.read(32) + + if len(data) != 32: + print("Error: Could not read 32 bytes") + return + + print("Bitmap data:") + for i in range(16): + # Each row is 2 bytes (16 bits) + byte1 = data[i*2] + byte2 = data[i*2+1] + + # Print as bits + line = "" + for b in range(8): + if (byte1 >> (7-b)) & 1: line += "##" + else: line += ".." + for b in range(8): + if (byte2 >> (7-b)) & 1: line += "##" + else: line += ".." + print(line) + +if __name__ == "__main__": + test_font()