This commit is contained in:
jeremygan2021
2026-03-02 22:43:04 +08:00
parent c0882a93a9
commit e0776a1839
18 changed files with 1331 additions and 82 deletions

View File

@@ -1,4 +1,5 @@
这是一个esp32 s3项目
用的是Micropython
使用的spi7789 方形的屏幕封装
硬件是基于c++文件夹里的代码改到MicroPython上面
硬件是基于c++文件夹里的代码改到MicroPython上面
websocket_server是这个esp32的服务器项目

21
LICENSE
View File

@@ -1,21 +0,0 @@
MIT License
Copyright (c) 2019 Ivan Belokobylskiy
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -42,33 +42,70 @@ class AudioPlayer:
self.i2s = None
def play_tone(self, frequency, duration_ms, volume=0.5):
"""播放指定频率的音调"""
"""播放指定频率的音调 (优化内存版)"""
if self.i2s is None: return
sample_rate = self.config.get('sample_rate', 24000)
n_samples = int(sample_rate * duration_ms / 1000)
if frequency <= 0:
# 静音处理
time.sleep_ms(duration_ms)
return
# 振幅
amplitude = int(32767 * volume)
# STEREO: 每个采样 2 个声道 (L+R),每个声道 2 字节 (16-bit) -> 4 字节/帧
buffer = bytearray(n_samples * 4)
if frequency > 0:
period = sample_rate // frequency
half_period = period // 2
# 计算单周期采样数
period = sample_rate // frequency
# 目标 buffer 大小约 2048 字节 (防止 buffer 只有几字节导致 underrun)
target_size = 2048
frame_size = 4 # 16bit stereo
# 计算 buffer 中包含多少个完整周期
period_bytes = period * frame_size
repeats = max(1, target_size // period_bytes)
buffer_bytes = repeats * period_bytes
buffer = bytearray(buffer_bytes)
# 填充 buffer
half_period = period // 2
# 预计算采样值的高低字节
pos_val = amplitude
neg_val = -amplitude
pos_low = pos_val & 0xFF
pos_high = (pos_val >> 8) & 0xFF
neg_low = neg_val & 0xFF
neg_high = (neg_val >> 8) & 0xFF
for i in range(period * repeats):
# 方波:前半周期高电平,后半周期低电平
if (i % period) < half_period:
low, high = pos_low, pos_high
else:
low, high = neg_low, neg_high
for i in range(n_samples):
# 方波:前半周期高电平,后半周期低电平
sample = amplitude if (i % period) < half_period else -amplitude
# 左声道
struct.pack_into('<h', buffer, i * 4, sample)
# 右声道
struct.pack_into('<h', buffer, i * 4 + 2, sample)
else:
# 静音缓冲区默认为0
pass
idx = i * 4
buffer[idx] = low
buffer[idx+1] = high
buffer[idx+2] = low
buffer[idx+3] = high
# 计算总共需要写入的数据量
total_bytes = int((sample_rate * duration_ms / 1000) * frame_size)
written = 0
try:
# 写入多次以确保缓冲区填满并开始播放
self.i2s.write(buffer)
while written < total_bytes:
to_write = min(len(buffer), total_bytes - written)
if to_write == len(buffer):
self.i2s.write(buffer)
else:
self.i2s.write(buffer[:to_write])
written += to_write
except Exception as e:
print(f"Write error: {e}")

View File

@@ -1,6 +1,7 @@
import machine
import st7789py as st7789
from config import CURRENT_CONFIG
import font
class Display:
def __init__(self):
@@ -8,6 +9,7 @@ class Display:
self.width = 240
self.height = 240
self._init_display()
self.font = font.Font()
def _init_display(self):
print(">>> Initializing Display...")
@@ -41,6 +43,14 @@ class Display:
if self.tft:
self.tft.fill_rect(x, y, w, h, color)
def set_ws(self, ws):
if self.font:
self.font.set_ws(ws)
def text(self, text, x, y, color):
if self.tft:
self.font.text(self.tft, text, x, y, color)
def init_ui(self):
"""初始化 UI 背景"""
if self.tft:

208
font.py Normal file
View File

@@ -0,0 +1,208 @@
import framebuf
import struct
import time
import binascii
class Font:
def __init__(self, ws=None):
self.ws = ws
self.cache = {} # Simple cache for font bitmaps: {code: bytes}
def set_ws(self, ws):
self.ws = ws
def text(self, tft, text, x, y, color, bg=0x0000):
"""
Draw text on ST7789 display using WebSocket to fetch fonts
"""
# Pre-calculate color bytes
color_bytes = struct.pack(">H", color)
bg_bytes = struct.pack(">H", bg)
initial_x = x
for char in text:
# Handle newlines
if char == '\n':
x = initial_x
y += 16
continue
# Boundary check
if x + 16 > tft.width:
x = initial_x
y += 16
if y + 16 > tft.height:
break
is_chinese = False
buf_data = None
# Check if it's Chinese
if ord(char) > 127:
try:
gb = char.encode('gb2312')
if len(gb) == 2:
code = struct.unpack('>H', gb)[0]
# Try to get from cache
if code in self.cache:
buf_data = self.cache[code]
is_chinese = True
else:
# Need to fetch from server
# Since we can't block easily here (unless we use a blocking socket recv or a callback mechanism),
# we have to rely on the main loop to handle responses.
# But we want to draw *now*.
#
# Solution:
# 1. Send request
# 2. Wait for response with timeout (blocking wait)
# This is slow for long text but works for small amounts.
if self.ws:
# Send request: GET_FONT:0xA1A1
hex_code = "0x{:04X}".format(code)
print(f"Requesting font for {hex_code} ({char})")
self.ws.send(f"GET_FONT:{hex_code}")
# Wait for response
# We need to peek/read from WS until we get FONT_DATA
buf_data = self._wait_for_font(hex_code)
if buf_data:
self.cache[code] = buf_data
is_chinese = True
print(f"Font loaded for {hex_code}")
else:
print(f"Font fetch timeout for {hex_code}")
# Fallback: draw question mark or box
self._draw_ascii(tft, '?', x, y, color, bg)
x += 8
continue # Skip drawing bitmap logic
else:
print("WS not available for font fetch")
except Exception as e:
print(f"Font error: {e}")
pass
if is_chinese and buf_data:
# Draw Chinese character (16x16)
self._draw_bitmap(tft, buf_data, x, y, 16, 16, color_bytes, bg_bytes)
x += 16
else:
# Draw ASCII (8x16) using built-in framebuf font (8x8 actually)
# If char is not ASCII, replace with '?' to avoid framebuf errors
if ord(char) > 127:
char = '?'
self._draw_ascii(tft, char, x, y, color, bg)
x += 8
def _wait_for_font(self, target_hex_code):
"""
Blocking wait for specific font data from WebSocket.
Timeout 1s.
WARNING: This might consume other messages (like audio playback commands)!
We need to handle them or put them back?
WebSocketClient doesn't support peeking easily.
This is a limitation. If we receive other messages, we should probably print them or ignore them.
But for ASR result display, usually we are in a state where we just received ASR result and are waiting for TTS.
"""
if not self.ws:
return None
start = time.ticks_ms()
while time.ticks_diff(time.ticks_ms(), start) < 1000:
# We use a non-blocking poll if possible, but here we want to block until data arrives
# ws.recv() is blocking.
# But we might block forever if server doesn't reply.
# So we should use poll with timeout.
# Using uselect in main.py, but here we don't have easy access to it unless passed in.
# Let's try a simple approach: set socket timeout temporarily?
# Or use select.poll()
import uselect
poller = uselect.poll()
poller.register(self.ws.sock, uselect.POLLIN)
events = poller.poll(200) # 200ms timeout
if events:
try:
msg = self.ws.recv()
if isinstance(msg, str):
if msg.startswith(f"FONT_DATA:{target_hex_code}:"):
# Found it!
hex_data = msg.split(":")[2]
return binascii.unhexlify(hex_data)
elif msg.startswith("FONT_DATA:"):
# Wrong font data? Ignore or cache it?
parts = msg.split(":")
if len(parts) >= 3:
c = int(parts[1], 16)
d = binascii.unhexlify(parts[2])
self.cache[c] = d
else:
# Other message, e.g. START_PLAYBACK
# We can't put it back easily.
# For now, just print it and ignore (it will be lost!)
# ideally we should have a message queue.
print(f"Ignored msg during font fetch: {msg}")
except:
pass
return None
def _draw_bitmap(self, tft, bitmap, x, y, w, h, color_bytes, bg_bytes):
# Convert 1bpp bitmap to RGB565 buffer
# bitmap length is w * h / 8 = 32 bytes for 16x16
# Optimize buffer allocation
rgb_buf = bytearray(w * h * 2)
idx = 0
for byte in bitmap:
for i in range(7, -1, -1):
if (byte >> i) & 1:
rgb_buf[idx] = color_bytes[0]
rgb_buf[idx+1] = color_bytes[1]
else:
rgb_buf[idx] = bg_bytes[0]
rgb_buf[idx+1] = bg_bytes[1]
idx += 2
tft.blit_buffer(rgb_buf, x, y, w, h)
def _draw_ascii(self, tft, char, x, y, color, bg):
# Use framebuf for ASCII
w, h = 8, 8
buf = bytearray(w * h // 8)
fb = framebuf.FrameBuffer(buf, w, h, framebuf.MONO_VLSB)
fb.fill(0)
fb.text(char, 0, 0, 1)
# Since framebuf.text is 8x8, we center it vertically in 16px height
# Drawing pixel by pixel is slow but compatible
# To optimize, we can build a small buffer
# Create a 8x16 RGB565 buffer
rgb_buf = bytearray(8 * 16 * 2)
# Fill with background
bg_high, bg_low = bg >> 8, bg & 0xFF
color_high, color_low = color >> 8, color & 0xFF
for i in range(0, len(rgb_buf), 2):
rgb_buf[i] = bg_high
rgb_buf[i+1] = bg_low
# Draw the 8x8 character into the buffer (centered)
# MONO_VLSB: each byte is a column of 8 pixels
for col in range(8): # 0..7
byte = buf[col]
for row in range(8): # 0..7
if (byte >> row) & 1:
# Calculate position in rgb_buf
# Target: x=col, y=row+4
pos = ((row + 4) * 8 + col) * 2
rgb_buf[pos] = color_high
rgb_buf[pos+1] = color_low
tft.blit_buffer(rgb_buf, x, y, 8, 16)

421
main.py
View File

@@ -4,15 +4,195 @@ import math
import struct
import array
import gc
import network
import st7789py as st7789
from config import CURRENT_CONFIG
from audio import AudioPlayer, Microphone
from display import Display
from websocket_client import WebSocketClient
import uselect
# =============================================================================
# 网络配置
# =============================================================================
WIFI_SSID = "Tangledup-AI"
WIFI_PASS = "djt12345678"
# 请修改为你的电脑 IP 地址
SERVER_IP = "6.6.6.88"
SERVER_PORT = 8000
SERVER_URL = f"ws://{SERVER_IP}:{SERVER_PORT}/ws/audio"
def diagnose_wifi():
"""
诊断WiFi模块状态打印详细的调试信息
"""
print("\n" + "="*50)
print("WiFi DIAGNOSTIC INFORMATION")
print("="*50)
wlan = network.WLAN(network.STA_IF)
# 基本状态
print(f"WiFi Module Active: {wlan.active()}")
print(f"Connection Status: {wlan.isconnected()}")
if wlan.isconnected():
print(f"Network Config: {wlan.ifconfig()}")
print(f"Network SSID: {wlan.config('essid')}")
print(f"Signal Strength: {wlan.status('rssi')} dBm")
# 扫描可用网络
try:
print("\nScanning for available networks...")
wlan.active(True)
time.sleep(1)
networks = wlan.scan()
print(f"Found {len(networks)} networks:")
for net in networks:
ssid = net[0].decode('utf-8') if net[0] else "Hidden"
bssid = ':'.join(['%02x' % b for b in net[1]])
channel = net[2]
rssi = net[3]
security = net[4]
# 标记目标网络
marker = " [TARGET]" if ssid == WIFI_SSID else ""
print(f" {ssid}{marker}")
print(f" BSSID: {bssid}, Channel: {channel}, RSSI: {rssi}dBm")
# 信号强度解释
if rssi > -50:
signal_desc = "Excellent"
elif rssi > -60:
signal_desc = "Good"
elif rssi > -70:
signal_desc = "Fair"
else:
signal_desc = "Weak"
print(f" Signal: {signal_desc}")
print("")
except Exception as e:
print(f"Network scan failed: {e}")
print("="*50 + "\n")
def connect_wifi(max_retries=3):
"""
连接WiFi网络包含完整的错误处理和重试机制
Args:
max_retries: 最大重试次数默认为3次
Returns:
bool: 连接成功返回True失败返回False
"""
wlan = network.WLAN(network.STA_IF)
# 首先确保WiFi模块处于干净状态
try:
wlan.active(False) # 先关闭WiFi
time.sleep(1) # 等待1秒让模块完全关闭
wlan.active(True) # 重新激活WiFi
time.sleep(1) # 等待模块初始化完成
except Exception as e:
print(f"WiFi module initialization error: {e}")
return False
# 尝试连接,包含重试机制
for attempt in range(max_retries):
try:
print(f"WiFi connection attempt {attempt + 1}/{max_retries}")
# 检查是否已连接
if wlan.isconnected():
print('Already connected to WiFi')
print('Network config:', wlan.ifconfig())
return True
# 尝试连接
print(f'Connecting to WiFi {WIFI_SSID}...')
wlan.connect(WIFI_SSID, WIFI_PASS)
# 等待连接完成,设置超时
start_time = time.time()
while not wlan.isconnected():
if time.time() - start_time > 20: # 单次连接超时20秒
print("WiFi connection timeout!")
break
time.sleep(0.5)
print(".", end="")
print("") # 换行
# 检查连接结果
if wlan.isconnected():
print('WiFi connected successfully!')
print('Network config:', wlan.ifconfig())
return True
else:
print(f"Connection attempt {attempt + 1} failed")
# 在重试前进行清理
if attempt < max_retries - 1: # 如果不是最后一次尝试
print("Resetting WiFi module for retry...")
wlan.disconnect() # 断开连接
time.sleep(2) # 等待2秒
except OSError as e:
print(f"WiFi connection error on attempt {attempt + 1}: {e}")
if "Wifi Internal State Error" in str(e):
print("Detected internal state error, resetting WiFi module...")
try:
wlan.active(False)
time.sleep(2)
wlan.active(True)
time.sleep(1)
except:
pass
if attempt < max_retries - 1:
print(f"Retrying in 3 seconds...")
time.sleep(3)
except Exception as e:
print(f"Unexpected error on attempt {attempt + 1}: {e}")
if attempt < max_retries - 1:
time.sleep(2)
# 所有尝试都失败
print("All WiFi connection attempts failed!")
try:
wlan.active(False) # 关闭WiFi模块节省电力
except:
pass
return False
# =============================================================================
# 硬件引脚配置 (从 config.py 获取)
# =============================================================================
def print_nice_asr(text, display=None):
"""在终端美观地打印ASR结果并在屏幕显示"""
print("\n" + "*"*40)
print(" ASR RESULT:")
print(f" {text}")
print("*"*40 + "\n")
if display and display.tft:
# 清除之前的文本区域 (保留顶部的状态栏和底部的可视化条)
# 假设状态栏 30px底部 240-200=40px 用于可视化?
# init_ui 画了 0-30 的白条。
# update_audio_bar 在 240-bar_height 画条。
# 我们使用中间区域 40 - 200
display.fill_rect(0, 40, 240, 160, st7789.BLACK)
display.text(text, 0, 40, st7789.WHITE)
def main():
print("\n" + "="*40)
print("AUDIO & MIC DIAGNOSTIC V5 (Modular & Clean)")
@@ -35,7 +215,44 @@ def main():
speaker = AudioPlayer()
if speaker.i2s:
# 默认播放马里奥
speaker.play_mario()
# speaker.play_mario()
# 播放简单方波 (1kHz, 1秒)
# 直接在 main.py 中实现分块播放,避免因 audio.py 未同步导致的 MemoryError
print("Playing 1kHz square wave...")
try:
import struct
# 1. 参数设置
sr = 24000 # 默认采样率
if hasattr(speaker, 'config') and speaker.config:
sr = speaker.config.get('sample_rate', 24000)
freq = 1000
duration = 1000 # ms
vol = 10000 # 音量 (max 32767)
# 2. 准备缓冲区 (只生成一小段,循环播放)
# 1kHz @ 24kHz -> 24 samples/cycle
period = sr // freq
# 生成约 1000 字节的 buffer (包含整数个周期)
cycles_in_buf = 10
buf = bytearray(period * cycles_in_buf * 4) # 16bit stereo = 4 bytes/frame
# 3. 填充方波数据
for i in range(period * cycles_in_buf):
# 方波逻辑
sample = vol if (i % period) < (period // 2) else -vol
# 写入左右声道 (Little Endian, 16-bit signed)
struct.pack_into('<hh', buf, i*4, sample, sample)
# 4. 循环写入 I2S
t_end = time.ticks_add(time.ticks_ms(), duration)
while time.ticks_diff(t_end, time.ticks_ms()) > 0:
speaker.i2s.write(buf)
except Exception as e:
print(f"Tone error: {e}")
else:
print("!!! Speaker initialization failed")
@@ -57,7 +274,49 @@ def main():
# 录音状态变量
is_recording = False
recorded_chunks = []
# WebSocket 连接
ws = None
# 定义连接函数
def connect_ws():
nonlocal ws
# Reset existing connection object to ensure clean slate
try:
if ws:
ws.close()
except:
pass
ws = None
try:
print(f"Connecting to WebSocket Server: {SERVER_URL}")
ws = WebSocketClient(SERVER_URL)
print("WebSocket connected successfully!")
# Pass WebSocket to display for font loading
if display:
display.set_ws(ws)
return True
except Exception as e:
print(f"WebSocket connection failed: {e}")
return False
# 先运行WiFi诊断
print("Running WiFi diagnostics...")
diagnose_wifi()
# 尝试连接WiFi
print("Starting WiFi connection process...")
if connect_wifi(max_retries=3):
print("WiFi connected successfully!")
connect_ws()
else:
print("WiFi connection failed after all attempts!")
print("Continuing in offline mode without WebSocket functionality...")
print("You can still use the device for local audio recording and visualization.")
# 调试:打印一次 Boot 键状态
print(f"Boot Button Initial State: {boot_btn.value()}")
@@ -86,68 +345,151 @@ def main():
if not is_recording:
print("\n>>> Start Recording (Boot Pressed)...")
is_recording = True
recorded_chunks = []
if display.tft:
print(">>> Filling Screen WHITE")
display.fill(st7789.WHITE)
else:
print(">>> Display TFT is None!")
# 尝试重连 WS
if ws is None or not ws.is_connected():
print(">>> WS not connected, trying to reconnect...")
connect_ws()
# 发送开始录音指令
if ws and ws.is_connected():
try:
ws.send("START_RECORDING")
except Exception as e:
print(f"WS Send Error: {e}")
ws = None # Disconnect on error
else:
print(">>> Warning: No WebSocket connection! Audio will be discarded.")
# 录音
# 录音并流式传输
if mic.i2s:
num_read = mic.readinto(read_buf)
if num_read > 0:
try:
recorded_chunks.append(bytes(read_buf[:num_read]))
except MemoryError:
print("Memory Full!")
if ws and ws.is_connected():
try:
# 发送二进制数据
ws.send(read_buf[:num_read], opcode=2)
# 检查是否有回传的 ASR 结果 (非阻塞)
poller = uselect.poll()
poller.register(ws.sock, uselect.POLLIN)
events = poller.poll(0) # 0 = return immediately
if events:
msg = ws.recv()
if isinstance(msg, str) and msg.startswith("ASR:"):
print_nice_asr(msg[4:], display)
except Exception as e:
print(f"WS Send/Recv Error: {e}")
# 如果发送失败,视为断开
try:
ws.close()
except:
pass
ws = None
else:
# 如果没有 WS就不保存了避免内存溢出
pass
continue # 跳过可视化逻辑
# === 按键释放处理 ===
elif is_recording:
print(f"\n>>> Stop Recording. Captured {len(recorded_chunks)} chunks.")
print(f"\n>>> Stop Recording.")
is_recording = False
if display.tft:
display.init_ui()
# 播放录音
if speaker.i2s and len(recorded_chunks) > 0:
print(">>> Playing...")
# 停止录音并等待回放
if ws:
try:
cfg = speaker.config
# 重新初始化 Speaker (16kHz Mono 16-bit) 以匹配 Mic 数据
speaker.i2s.deinit()
speaker.i2s = machine.I2S(
0,
sck=machine.Pin(cfg['bck']),
ws=machine.Pin(cfg['ws']),
sd=machine.Pin(cfg['sd']),
mode=machine.I2S.TX,
bits=16,
format=machine.I2S.MONO,
rate=16000,
ibuf=20000,
)
print(">>> Sending STOP & Waiting for playback...")
ws.send("STOP_RECORDING")
# 播放数据
for chunk in recorded_chunks:
# 32-bit Mono -> 16-bit Mono (取高16位)
# chunk 是 bytes, 转为 array('h') 方便访问 16-bit word
# 32-bit 数据: LowWord, HighWord
# 我们需要 HighWord
arr = array.array('h', chunk)
samples = arr[1::2]
speaker.i2s.write(samples)
# 重新初始化 Speaker (16kHz Mono 16-bit)
if speaker.i2s:
cfg = speaker.config
speaker.i2s.deinit()
speaker.i2s = machine.I2S(
0,
sck=machine.Pin(cfg['bck']),
ws=machine.Pin(cfg['ws']),
sd=machine.Pin(cfg['sd']),
mode=machine.I2S.TX,
bits=16,
format=machine.I2S.MONO,
rate=16000,
ibuf=40000,
)
# 接收回放循环
playback_timeout = 5000 # 5秒无数据则退出
last_data_time = time.ticks_ms()
while True:
# Check for data with timeout
poller = uselect.poll()
poller.register(ws.sock, uselect.POLLIN)
events = poller.poll(100) # 100ms wait
if events:
msg = ws.recv()
last_data_time = time.ticks_ms()
if isinstance(msg, str):
if msg == "START_PLAYBACK":
print(">>> Server starting playback stream...")
continue
elif msg == "STOP_PLAYBACK":
print(">>> Server finished playback.")
break
elif msg.startswith("ASR:"):
print_nice_asr(msg[4:], display)
elif isinstance(msg, bytes):
# 播放接收到的音频数据
if speaker.i2s:
# 使用 try-except 防止 write 阻塞导致的问题
try:
speaker.i2s.write(msg)
except Exception as e:
print(f"I2S Write Error: {e}")
elif msg is None:
print("WS Connection closed or error (recv returned None)")
try:
ws.close()
except:
pass
ws = None
break
else:
# No data received in this poll window
if time.ticks_diff(time.ticks_ms(), last_data_time) > playback_timeout:
print("Playback timeout - no data received for 5 seconds")
break
# Feed watchdog or do other small tasks if needed
# time.sleep(0.01)
except Exception as e:
print(f"Playback error: {e}")
print(f"Playback loop error: {e}")
try:
ws.close()
except:
pass
ws = None
# 恢复 Speaker 原始配置
if speaker.i2s: speaker.i2s.deinit()
speaker._init_audio()
recorded_chunks = []
gc.collect()
# === 原有的可视化逻辑 ===
@@ -178,10 +520,7 @@ def main():
last_print = time.ticks_ms()
if display.tft:
# 调整缩放比例,让显示更敏感
# 你的日志显示安静时 Max ~2000-3000, 说话时 Max ~40000
# 我们可以把 Max 40000 映射到满格
# 调整缩放比例
bar_height = int((max_val / 40000) * 200)
if bar_height > 200: bar_height = 200
if bar_height < 0: bar_height = 0

178
websocket_client.py Normal file
View File

@@ -0,0 +1,178 @@
import usocket as socket
import ubinascii
import uos
class WebSocketError(Exception):
pass
class WebSocketClient:
def __init__(self, uri, timeout=5):
self.sock = None
self.uri = uri
self.timeout = timeout
self.connect()
def connect(self):
uri = self.uri
assert uri.startswith("ws://")
uri = uri[5:]
if "/" in uri:
host, path = uri.split("/", 1)
else:
host, path = uri, ""
path = "/" + path
if ":" in host:
host, port = host.split(":")
port = int(port)
else:
port = 80
print(f"Connecting to {host}:{port}{path}...")
self.sock = socket.socket()
# Add timeout
self.sock.settimeout(self.timeout)
addr_info = socket.getaddrinfo(host, port)
addr = addr_info[0][-1]
print(f"Resolved address: {addr}")
try:
self.sock.connect(addr)
except OSError as e:
print(f"Socket connect failed: {e}")
if e.args[0] == 113:
print("Hint: Check firewall settings on server or if server is running.")
raise
# Random key
key = ubinascii.b2a_base64(uos.urandom(16)).strip()
req = "GET {} HTTP/1.1\r\n".format(path)
req += "Host: {}:{}\r\n".format(host, port)
req += "Connection: Upgrade\r\n"
req += "Upgrade: websocket\r\n"
req += "Sec-WebSocket-Key: {}\r\n".format(key.decode())
req += "Sec-WebSocket-Version: 13\r\n"
req += "\r\n"
self.sock.write(req.encode())
# Read handshake response
header = b""
while b"\r\n\r\n" not in header:
chunk = self.sock.read(1)
if not chunk:
raise WebSocketError("Connection closed during handshake")
header += chunk
if b" 101 " not in header:
raise WebSocketError("Handshake failed: " + header.decode())
print("WebSocket connected!")
def is_connected(self):
return self.sock is not None
def send(self, data, opcode=1): # 1=Text, 2=Binary
if not self.sock:
print("WebSocket is not connected (send called on closed socket)")
raise WebSocketError("Connection closed")
if isinstance(data, str):
data = data.encode('utf-8')
header = bytearray()
header.append(0x80 | opcode) # FIN + Opcode
length = len(data)
if length < 126:
header.append(0x80 | length) # Masked + length
elif length < 65536:
header.append(0x80 | 126)
header.extend(length.to_bytes(2, 'big'))
else:
header.append(0x80 | 127)
header.extend(length.to_bytes(8, 'big'))
mask = uos.urandom(4)
header.extend(mask)
masked_data = bytearray(length)
for i in range(length):
masked_data[i] = data[i] ^ mask[i % 4]
self.sock.write(header)
self.sock.write(masked_data)
def recv(self):
# Read header
try:
# Read 2 bytes at once
header = self.sock.read(2)
if not header or len(header) < 2: return None
b1 = header[0]
b2 = header[1]
fin = b1 & 0x80
opcode = b1 & 0x0f
mask = b2 & 0x80
length = b2 & 0x7f
if length == 126:
length_bytes = self.sock.read(2)
if not length_bytes: return None
length = int.from_bytes(length_bytes, 'big')
elif length == 127:
length_bytes = self.sock.read(8)
if not length_bytes: return None
length = int.from_bytes(length_bytes, 'big')
if mask:
mask_key = self.sock.read(4)
if not mask_key: return None
# Read payload
data = bytearray(length)
view = memoryview(data)
pos = 0
while pos < length:
read_len = self.sock.readinto(view[pos:])
if read_len == 0:
return None
pos += read_len
if mask:
unmasked = bytearray(length)
for i in range(length):
unmasked[i] = data[i] ^ mask_key[i % 4]
data = unmasked
if opcode == 1: # Text
return data.decode('utf-8')
elif opcode == 2: # Binary
return data
elif opcode == 8: # Close
self.close()
return None
elif opcode == 9: # Ping
self.send(data, opcode=10) # Pong
return self.recv()
return data
except Exception as e:
# Don't print timeout errors as they are expected in non-blocking polling
if "ETIMEDOUT" not in str(e) and "110" not in str(e):
print(f"WS Recv Error: {e}")
return None
def close(self):
if self.sock:
self.sock.close()
self.sock = None

1
websocket_server/.env Normal file
View File

@@ -0,0 +1 @@
DASHSCOPE_API_KEY=sk-a294f382488d46a1aa0d7cd8e750729b

Binary file not shown.

View File

@@ -0,0 +1,31 @@
# WebSocket Audio Server
This is a FastAPI server that receives audio from an ESP32 via WebSocket, saves it, processes it (converts 32-bit to 16-bit), and sends it back for playback.
## Installation
1. Install dependencies:
```bash
pip install -r requirements.txt
```
## Usage
1. Start the server:
```bash
python server.py
```
Or:
```bash
uvicorn server:app --host 0.0.0.0 --port 8000
```
2. Update the IP address in `main.py` on your ESP32 to match your computer's IP address.
Look for `SERVER_IP` variable.
## Features
- Receives raw audio stream from ESP32.
- Saves raw audio to `received_audio.raw`.
- Converts 32-bit audio (from ICS-43434) to 16-bit audio (for MAX98357A).
- Streams processed audio back to ESP32 for playback.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,127 @@
import struct
import freetype
import os
# Font file and output file
FONT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/websocket_server/GB2312.ttf"
OUTPUT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/GB2312-16.bin"
# Font size (16x16)
FONT_SIZE = 16
def create_gb2312_font():
# Load the face
try:
face = freetype.Face(FONT_FILE)
except Exception as e:
print(f"Error loading font: {e}")
return
# Set char size
face.set_pixel_sizes(FONT_SIZE, FONT_SIZE)
print(f"Generating GB2312 font file: {OUTPUT_FILE}")
with open(OUTPUT_FILE, 'wb') as f:
# Iterate through GB2312 code points
# Area: 0xA1 - 0xFE (161 - 254) -> 94 areas
# Index: 0xA1 - 0xFE (161 - 254) -> 94 chars per area
count = 0
total_chars = 94 * 94
# Buffer for empty char (32 bytes of 0x00)
empty_char = b'\x00' * 32
for area in range(0xA1, 0xFF):
for index in range(0xA1, 0xFF):
# Construct GB2312 code
gb_code = bytes([area, index])
try:
# Decode to unicode character
char = gb_code.decode('gb2312')
# Load glyph
face.load_char(char, freetype.FT_LOAD_RENDER | freetype.FT_LOAD_TARGET_MONO)
bitmap = face.glyph.bitmap
# Convert bitmap to 32 bytes (16x16 / 8)
# The bitmap.buffer is a flat list of bytes.
# For mono rendering, each byte is 0 or 255? No, it's packed?
# FT_LOAD_TARGET_MONO packs 8 pixels into 1 byte.
# We need to ensure it's 16x16.
# Center the glyph in 16x16 box.
glyph_width = bitmap.width
glyph_rows = bitmap.rows
glyph_pitch = bitmap.pitch
# Create a 16x16 buffer (32 bytes)
char_buffer = bytearray(32)
# Calculate offsets to center
x_off = (FONT_SIZE - glyph_width) // 2
# Vertical alignment is tricky. Let's use bearing Y or just center based on rows.
# A better way is using face.glyph.bitmap_top
# But for fixed height font generation, usually we just center or align baseline.
# Let's try simple centering for now.
y_off = (FONT_SIZE - glyph_rows) // 2
# Adjust y_off if it's too high/low?
# Let's align to baseline approximately.
# Usually baseline is at 12-13px for 16px font.
# face.size.ascender might help but let's stick to bitmap center for simplicity first.
# Copy bitmap to buffer
src_buf = bitmap.buffer
for row in range(glyph_rows):
# Target row
dst_row = row + y_off
if dst_row < 0 or dst_row >= FONT_SIZE:
continue
# Source row bytes
# pitch is bytes per row
src_start = row * glyph_pitch
# We need to copy bits.
# This is getting complicated because FreeType mono bitmap format
# might not match our target format exactly (MSB/LSB).
# Let's iterate pixels.
for col in range(glyph_width):
dst_col = col + x_off
if dst_col < 0 or dst_col >= FONT_SIZE:
continue
# Get pixel from src
byte_idx = src_start + (col >> 3)
bit_idx = 7 - (col & 7)
pixel = (src_buf[byte_idx] >> bit_idx) & 1
if pixel:
# Set pixel in dst
# format: row by row, 2 bytes per row.
# row 0: byte 0, byte 1
# byte 0: bits 0-7 (left to right) -> wait, usually MSB is left.
dst_byte_idx = dst_row * 2 + (dst_col >> 3)
dst_bit_idx = 7 - (dst_col & 7)
char_buffer[dst_byte_idx] |= (1 << dst_bit_idx)
f.write(char_buffer)
count += 1
except Exception:
# Character not found or decode error
f.write(empty_char)
# Progress
if count % 1000 == 0:
print(f"Processed {count} characters...")
print(f"Done! Generated {OUTPUT_FILE} with size {os.path.getsize(OUTPUT_FILE)} bytes.")
if __name__ == "__main__":
create_gb2312_font()

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,6 @@
fastapi
uvicorn
websockets
pydub
dashscope
python-dotenv

277
websocket_server/server.py Normal file
View File

@@ -0,0 +1,277 @@
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
import uvicorn
import asyncio
import os
import subprocess
import struct
from dotenv import load_dotenv
import dashscope
from dashscope.audio.asr import Recognition, RecognitionCallback, RecognitionResult
import json
# 加载环境变量
load_dotenv()
dashscope.api_key = os.getenv("DASHSCOPE_API_KEY")
app = FastAPI()
# 存储接收到的音频数据
audio_buffer = bytearray()
RECORDING_RAW_FILE = "received_audio.raw"
RECORDING_MP3_FILE = "received_audio.mp3"
VOLUME_GAIN = 10.0 # 放大倍数
FONT_FILE = "GB2312-16.bin"
class MyRecognitionCallback(RecognitionCallback):
def __init__(self, websocket: WebSocket, loop: asyncio.AbstractEventLoop):
self.websocket = websocket
self.loop = loop
def on_open(self) -> None:
print("ASR Session started")
def on_close(self) -> None:
print("ASR Session closed")
def on_event(self, result: RecognitionResult) -> None:
if result.get_sentence():
text = result.get_sentence()['text']
print(f"ASR Result: {text}")
# 将识别结果发送回客户端
try:
asyncio.run_coroutine_threadsafe(
self.websocket.send_text(f"ASR:{text}"),
self.loop
)
except Exception as e:
print(f"Failed to send ASR result to client: {e}")
def process_chunk_32_to_16(chunk_bytes, gain=1.0):
processed_chunk = bytearray()
# Iterate 4 bytes at a time
for i in range(0, len(chunk_bytes), 4):
if i+3 < len(chunk_bytes):
# 取 chunk[i+2] 和 chunk[i+3] 组成 16-bit signed int
sample = struct.unpack_from('<h', chunk_bytes, i+2)[0]
# 放大音量
sample = int(sample * gain)
# 限幅 (Clamping) 防止溢出爆音
if sample > 32767: sample = 32767
elif sample < -32768: sample = -32768
# 重新打包为 16-bit little-endian
processed_chunk.extend(struct.pack('<h', sample))
return processed_chunk
@app.websocket("/ws/audio")
async def websocket_endpoint(websocket: WebSocket):
global audio_buffer
await websocket.accept()
print("Client connected")
recognition = None
processed_buffer = bytearray()
loop = asyncio.get_running_loop()
try:
while True:
# 接收消息 (可能是文本指令或二进制音频数据)
try:
message = await websocket.receive()
except RuntimeError as e:
if "Cannot call \"receive\" once a disconnect message has been received" in str(e):
print("Client disconnected (RuntimeError caught)")
break
raise e
if "text" in message:
text = message["text"]
print(f"Received text: {text}")
if text == "START_RECORDING":
print("Start recording...")
audio_buffer = bytearray() # 清空缓冲区
processed_buffer = bytearray()
# 启动实时语音识别
try:
callback = MyRecognitionCallback(websocket, loop)
recognition = Recognition(
model='paraformer-realtime-v2',
format='pcm',
sample_rate=16000,
callback=callback
)
recognition.start()
print("DashScope ASR started")
except Exception as e:
print(f"Failed to start ASR: {e}")
recognition = None
elif text == "STOP_RECORDING":
print(f"Stop recording. Total raw bytes: {len(audio_buffer)}")
# 停止语音识别
if recognition:
try:
recognition.stop()
print("DashScope ASR stopped")
except Exception as e:
print(f"Error stopping ASR: {e}")
recognition = None
# 使用实时处理过的音频数据
processed_audio = processed_buffer
print(f"Processed audio size: {len(processed_audio)} bytes (Gain: {VOLUME_GAIN}x)")
# 2. 保存原始 RAW 文件 (16-bit PCM)
with open(RECORDING_RAW_FILE, "wb") as f:
f.write(processed_audio)
# 3. 转换为 MP3 并保存 (使用 ffmpeg 命令行,避免 Python 3.13 audioop 问题)
try:
# ffmpeg -y -f s16le -ar 16000 -ac 1 -i received_audio.raw received_audio.mp3
cmd = [
"ffmpeg",
"-y", # 覆盖输出文件
"-f", "s16le", # 输入格式: signed 16-bit little endian
"-ar", "16000", # 输入采样率
"-ac", "1", # 输入声道数
"-i", RECORDING_RAW_FILE,
RECORDING_MP3_FILE
]
print(f"Running command: {' '.join(cmd)}")
# Use asyncio.create_subprocess_exec instead of subprocess.run to avoid blocking the event loop
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
raise subprocess.CalledProcessError(process.returncode, cmd, output=stdout, stderr=stderr)
print(f"Saved MP3 to {RECORDING_MP3_FILE}")
except subprocess.CalledProcessError as e:
print(f"Error converting to MP3: {e}")
# stderr might be bytes
error_msg = e.stderr.decode() if isinstance(e.stderr, bytes) else str(e.stderr)
print(f"FFmpeg stderr: {error_msg}")
except FileNotFoundError:
print("Error: ffmpeg not found. Please install ffmpeg.")
except Exception as e:
print(f"Error converting to MP3: {e}")
# 4. 发送回客户端播放
print("Sending audio back...")
await websocket.send_text("START_PLAYBACK")
# 分块发送
chunk_size = 4096
for i in range(0, len(processed_audio), chunk_size):
chunk = processed_audio[i:i+chunk_size]
await websocket.send_bytes(chunk)
# 小延时,避免发送过快导致 ESP32 缓冲区溢出
# 4096 bytes / 32000 bytes/s (16k*2) = ~0.128s
# 0.04s 约为 3 倍速发送,既保证缓冲又不至于拥塞
await asyncio.sleep(0.04)
await websocket.send_text("STOP_PLAYBACK")
print("Audio sent back finished.")
elif text.startswith("GET_FONT:"):
# 格式: GET_FONT:0xA1A1
try:
print(f"Font Request Received: {text}")
hex_code = text.split(":")[1]
code = int(hex_code, 16)
# 计算偏移量
# GB2312 编码范围0xA1A1 - 0xFEFE
# 区码:高字节 - 0xA0
# 位码:低字节 - 0xA0
area = (code >> 8) - 0xA0
index = (code & 0xFF) - 0xA0
if area >= 1 and index >= 1:
offset = ((area - 1) * 94 + (index - 1)) * 32
# 读取字体文件
# 注意:这里为了简单,每次都打开文件。如果并发高,应该缓存文件句柄或内容。
# 假设字体文件在当前目录或上级目录
# Prioritize finding the file in the script's directory
script_dir = os.path.dirname(os.path.abspath(__file__))
font_path = os.path.join(script_dir, FONT_FILE)
# Fallback: check one level up
if not os.path.exists(font_path):
font_path = os.path.join(script_dir, "..", FONT_FILE)
# Fallback: check current working directory
if not os.path.exists(font_path):
font_path = FONT_FILE
if os.path.exists(font_path):
print(f"Reading font from: {font_path} (Offset: {offset})")
with open(font_path, "rb") as f:
f.seek(offset)
font_data = f.read(32)
if len(font_data) == 32:
import binascii
hex_data = binascii.hexlify(font_data).decode('utf-8')
response = f"FONT_DATA:{hex_code}:{hex_data}"
print(f"Sending Font Response: {response[:30]}...")
await websocket.send_text(response)
else:
print(f"Error: Read {len(font_data)} bytes for font data (expected 32)")
else:
print(f"Font file not found: {font_path}")
else:
print(f"Invalid GB2312 code: {hex_code} (Area: {area}, Index: {index})")
except Exception as e:
print(f"Error handling GET_FONT: {e}")
elif "bytes" in message:
# 接收音频数据并追加到缓冲区
data = message["bytes"]
audio_buffer.extend(data)
# 实时处理并发送给 ASR
pcm_chunk = process_chunk_32_to_16(data, VOLUME_GAIN)
processed_buffer.extend(pcm_chunk)
if recognition:
try:
recognition.send_audio_frame(pcm_chunk)
except Exception as e:
print(f"Error sending audio frame to ASR: {e}")
except WebSocketDisconnect:
print("Client disconnected")
if recognition:
try:
recognition.stop()
except:
pass
except Exception as e:
print(f"Error: {e}")
if recognition:
try:
recognition.stop()
except:
pass
if __name__ == "__main__":
# 获取本机IP方便ESP32连接
import socket
hostname = socket.gethostname()
local_ip = socket.gethostbyname(hostname)
print(f"Server running on ws://{local_ip}:8000/ws/audio")
uvicorn.run(app, host="0.0.0.0", port=8000)

View File

@@ -0,0 +1,55 @@
import os
FONT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/websocket_server/GB2312-16.bin"
def test_font():
if not os.path.exists(FONT_FILE):
print(f"Error: File not found at {FONT_FILE}")
return
file_size = os.path.getsize(FONT_FILE)
print(f"Font file size: {file_size} bytes")
# Expected size for GB2312-16 (94x94 chars * 32 bytes)
expected_size = 94 * 94 * 32
print(f"Expected size: {expected_size} bytes")
if file_size != expected_size:
print(f"Warning: File size mismatch! (Diff: {file_size - expected_size})")
# Try to render '中' (0xD6D0)
# Area: 0xD6 - 0xA0 = 54
# Index: 0xD0 - 0xA0 = 48
area = 0xD6 - 0xA0
index = 0xD0 - 0xA0
offset = ((area - 1) * 94 + (index - 1)) * 32
print(f"Testing character '' (0xD6D0)")
print(f"Area: {area}, Index: {index}, Offset: {offset}")
with open(FONT_FILE, "rb") as f:
f.seek(offset)
data = f.read(32)
if len(data) != 32:
print("Error: Could not read 32 bytes")
return
print("Bitmap data:")
for i in range(16):
# Each row is 2 bytes (16 bits)
byte1 = data[i*2]
byte2 = data[i*2+1]
# Print as bits
line = ""
for b in range(8):
if (byte1 >> (7-b)) & 1: line += "##"
else: line += ".."
for b in range(8):
if (byte2 >> (7-b)) & 1: line += "##"
else: line += ".."
print(line)
if __name__ == "__main__":
test_font()