t
This commit is contained in:
@@ -2,3 +2,4 @@
|
||||
用的是Micropython
|
||||
使用的spi7789 方形的屏幕封装
|
||||
硬件是基于c++文件夹里的代码改到MicroPython上面
|
||||
websocket_server是这个esp32的服务器项目
|
||||
21
LICENSE
21
LICENSE
@@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 Ivan Belokobylskiy
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
75
audio.py
75
audio.py
@@ -42,33 +42,70 @@ class AudioPlayer:
|
||||
self.i2s = None
|
||||
|
||||
def play_tone(self, frequency, duration_ms, volume=0.5):
|
||||
"""播放指定频率的音调"""
|
||||
"""播放指定频率的音调 (优化内存版)"""
|
||||
if self.i2s is None: return
|
||||
|
||||
sample_rate = self.config.get('sample_rate', 24000)
|
||||
n_samples = int(sample_rate * duration_ms / 1000)
|
||||
|
||||
if frequency <= 0:
|
||||
# 静音处理
|
||||
time.sleep_ms(duration_ms)
|
||||
return
|
||||
|
||||
# 振幅
|
||||
amplitude = int(32767 * volume)
|
||||
|
||||
# STEREO: 每个采样 2 个声道 (L+R),每个声道 2 字节 (16-bit) -> 4 字节/帧
|
||||
buffer = bytearray(n_samples * 4)
|
||||
if frequency > 0:
|
||||
period = sample_rate // frequency
|
||||
half_period = period // 2
|
||||
# 计算单周期采样数
|
||||
period = sample_rate // frequency
|
||||
|
||||
for i in range(n_samples):
|
||||
# 方波:前半周期高电平,后半周期低电平
|
||||
sample = amplitude if (i % period) < half_period else -amplitude
|
||||
# 左声道
|
||||
struct.pack_into('<h', buffer, i * 4, sample)
|
||||
# 右声道
|
||||
struct.pack_into('<h', buffer, i * 4 + 2, sample)
|
||||
else:
|
||||
# 静音,缓冲区默认为0
|
||||
pass
|
||||
# 目标 buffer 大小约 2048 字节 (防止 buffer 只有几字节导致 underrun)
|
||||
target_size = 2048
|
||||
frame_size = 4 # 16bit stereo
|
||||
|
||||
# 计算 buffer 中包含多少个完整周期
|
||||
period_bytes = period * frame_size
|
||||
repeats = max(1, target_size // period_bytes)
|
||||
buffer_bytes = repeats * period_bytes
|
||||
|
||||
buffer = bytearray(buffer_bytes)
|
||||
|
||||
# 填充 buffer
|
||||
half_period = period // 2
|
||||
|
||||
# 预计算采样值的高低字节
|
||||
pos_val = amplitude
|
||||
neg_val = -amplitude
|
||||
|
||||
pos_low = pos_val & 0xFF
|
||||
pos_high = (pos_val >> 8) & 0xFF
|
||||
neg_low = neg_val & 0xFF
|
||||
neg_high = (neg_val >> 8) & 0xFF
|
||||
|
||||
for i in range(period * repeats):
|
||||
# 方波:前半周期高电平,后半周期低电平
|
||||
if (i % period) < half_period:
|
||||
low, high = pos_low, pos_high
|
||||
else:
|
||||
low, high = neg_low, neg_high
|
||||
|
||||
idx = i * 4
|
||||
buffer[idx] = low
|
||||
buffer[idx+1] = high
|
||||
buffer[idx+2] = low
|
||||
buffer[idx+3] = high
|
||||
|
||||
# 计算总共需要写入的数据量
|
||||
total_bytes = int((sample_rate * duration_ms / 1000) * frame_size)
|
||||
|
||||
written = 0
|
||||
try:
|
||||
# 写入多次以确保缓冲区填满并开始播放
|
||||
self.i2s.write(buffer)
|
||||
while written < total_bytes:
|
||||
to_write = min(len(buffer), total_bytes - written)
|
||||
if to_write == len(buffer):
|
||||
self.i2s.write(buffer)
|
||||
else:
|
||||
self.i2s.write(buffer[:to_write])
|
||||
written += to_write
|
||||
except Exception as e:
|
||||
print(f"Write error: {e}")
|
||||
|
||||
|
||||
10
display.py
10
display.py
@@ -1,6 +1,7 @@
|
||||
import machine
|
||||
import st7789py as st7789
|
||||
from config import CURRENT_CONFIG
|
||||
import font
|
||||
|
||||
class Display:
|
||||
def __init__(self):
|
||||
@@ -8,6 +9,7 @@ class Display:
|
||||
self.width = 240
|
||||
self.height = 240
|
||||
self._init_display()
|
||||
self.font = font.Font()
|
||||
|
||||
def _init_display(self):
|
||||
print(">>> Initializing Display...")
|
||||
@@ -41,6 +43,14 @@ class Display:
|
||||
if self.tft:
|
||||
self.tft.fill_rect(x, y, w, h, color)
|
||||
|
||||
def set_ws(self, ws):
|
||||
if self.font:
|
||||
self.font.set_ws(ws)
|
||||
|
||||
def text(self, text, x, y, color):
|
||||
if self.tft:
|
||||
self.font.text(self.tft, text, x, y, color)
|
||||
|
||||
def init_ui(self):
|
||||
"""初始化 UI 背景"""
|
||||
if self.tft:
|
||||
|
||||
208
font.py
Normal file
208
font.py
Normal file
@@ -0,0 +1,208 @@
|
||||
import framebuf
|
||||
import struct
|
||||
import time
|
||||
import binascii
|
||||
|
||||
class Font:
|
||||
def __init__(self, ws=None):
|
||||
self.ws = ws
|
||||
self.cache = {} # Simple cache for font bitmaps: {code: bytes}
|
||||
|
||||
def set_ws(self, ws):
|
||||
self.ws = ws
|
||||
|
||||
def text(self, tft, text, x, y, color, bg=0x0000):
|
||||
"""
|
||||
Draw text on ST7789 display using WebSocket to fetch fonts
|
||||
"""
|
||||
# Pre-calculate color bytes
|
||||
color_bytes = struct.pack(">H", color)
|
||||
bg_bytes = struct.pack(">H", bg)
|
||||
|
||||
initial_x = x
|
||||
|
||||
for char in text:
|
||||
# Handle newlines
|
||||
if char == '\n':
|
||||
x = initial_x
|
||||
y += 16
|
||||
continue
|
||||
|
||||
# Boundary check
|
||||
if x + 16 > tft.width:
|
||||
x = initial_x
|
||||
y += 16
|
||||
if y + 16 > tft.height:
|
||||
break
|
||||
|
||||
is_chinese = False
|
||||
buf_data = None
|
||||
|
||||
# Check if it's Chinese
|
||||
if ord(char) > 127:
|
||||
try:
|
||||
gb = char.encode('gb2312')
|
||||
if len(gb) == 2:
|
||||
code = struct.unpack('>H', gb)[0]
|
||||
# Try to get from cache
|
||||
if code in self.cache:
|
||||
buf_data = self.cache[code]
|
||||
is_chinese = True
|
||||
else:
|
||||
# Need to fetch from server
|
||||
# Since we can't block easily here (unless we use a blocking socket recv or a callback mechanism),
|
||||
# we have to rely on the main loop to handle responses.
|
||||
# But we want to draw *now*.
|
||||
#
|
||||
# Solution:
|
||||
# 1. Send request
|
||||
# 2. Wait for response with timeout (blocking wait)
|
||||
# This is slow for long text but works for small amounts.
|
||||
|
||||
if self.ws:
|
||||
# Send request: GET_FONT:0xA1A1
|
||||
hex_code = "0x{:04X}".format(code)
|
||||
print(f"Requesting font for {hex_code} ({char})")
|
||||
self.ws.send(f"GET_FONT:{hex_code}")
|
||||
|
||||
# Wait for response
|
||||
# We need to peek/read from WS until we get FONT_DATA
|
||||
buf_data = self._wait_for_font(hex_code)
|
||||
|
||||
if buf_data:
|
||||
self.cache[code] = buf_data
|
||||
is_chinese = True
|
||||
print(f"Font loaded for {hex_code}")
|
||||
else:
|
||||
print(f"Font fetch timeout for {hex_code}")
|
||||
# Fallback: draw question mark or box
|
||||
self._draw_ascii(tft, '?', x, y, color, bg)
|
||||
x += 8
|
||||
continue # Skip drawing bitmap logic
|
||||
else:
|
||||
print("WS not available for font fetch")
|
||||
except Exception as e:
|
||||
print(f"Font error: {e}")
|
||||
pass
|
||||
|
||||
if is_chinese and buf_data:
|
||||
# Draw Chinese character (16x16)
|
||||
self._draw_bitmap(tft, buf_data, x, y, 16, 16, color_bytes, bg_bytes)
|
||||
x += 16
|
||||
else:
|
||||
# Draw ASCII (8x16) using built-in framebuf font (8x8 actually)
|
||||
# If char is not ASCII, replace with '?' to avoid framebuf errors
|
||||
if ord(char) > 127:
|
||||
char = '?'
|
||||
self._draw_ascii(tft, char, x, y, color, bg)
|
||||
x += 8
|
||||
|
||||
def _wait_for_font(self, target_hex_code):
|
||||
"""
|
||||
Blocking wait for specific font data from WebSocket.
|
||||
Timeout 1s.
|
||||
WARNING: This might consume other messages (like audio playback commands)!
|
||||
We need to handle them or put them back?
|
||||
WebSocketClient doesn't support peeking easily.
|
||||
|
||||
This is a limitation. If we receive other messages, we should probably print them or ignore them.
|
||||
But for ASR result display, usually we are in a state where we just received ASR result and are waiting for TTS.
|
||||
"""
|
||||
if not self.ws:
|
||||
return None
|
||||
|
||||
start = time.ticks_ms()
|
||||
while time.ticks_diff(time.ticks_ms(), start) < 1000:
|
||||
# We use a non-blocking poll if possible, but here we want to block until data arrives
|
||||
# ws.recv() is blocking.
|
||||
# But we might block forever if server doesn't reply.
|
||||
# So we should use poll with timeout.
|
||||
|
||||
# Using uselect in main.py, but here we don't have easy access to it unless passed in.
|
||||
# Let's try a simple approach: set socket timeout temporarily?
|
||||
# Or use select.poll()
|
||||
|
||||
import uselect
|
||||
poller = uselect.poll()
|
||||
poller.register(self.ws.sock, uselect.POLLIN)
|
||||
events = poller.poll(200) # 200ms timeout
|
||||
|
||||
if events:
|
||||
try:
|
||||
msg = self.ws.recv()
|
||||
if isinstance(msg, str):
|
||||
if msg.startswith(f"FONT_DATA:{target_hex_code}:"):
|
||||
# Found it!
|
||||
hex_data = msg.split(":")[2]
|
||||
return binascii.unhexlify(hex_data)
|
||||
elif msg.startswith("FONT_DATA:"):
|
||||
# Wrong font data? Ignore or cache it?
|
||||
parts = msg.split(":")
|
||||
if len(parts) >= 3:
|
||||
c = int(parts[1], 16)
|
||||
d = binascii.unhexlify(parts[2])
|
||||
self.cache[c] = d
|
||||
else:
|
||||
# Other message, e.g. START_PLAYBACK
|
||||
# We can't put it back easily.
|
||||
# For now, just print it and ignore (it will be lost!)
|
||||
# ideally we should have a message queue.
|
||||
print(f"Ignored msg during font fetch: {msg}")
|
||||
except:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
def _draw_bitmap(self, tft, bitmap, x, y, w, h, color_bytes, bg_bytes):
|
||||
# Convert 1bpp bitmap to RGB565 buffer
|
||||
# bitmap length is w * h / 8 = 32 bytes for 16x16
|
||||
|
||||
# Optimize buffer allocation
|
||||
rgb_buf = bytearray(w * h * 2)
|
||||
idx = 0
|
||||
for byte in bitmap:
|
||||
for i in range(7, -1, -1):
|
||||
if (byte >> i) & 1:
|
||||
rgb_buf[idx] = color_bytes[0]
|
||||
rgb_buf[idx+1] = color_bytes[1]
|
||||
else:
|
||||
rgb_buf[idx] = bg_bytes[0]
|
||||
rgb_buf[idx+1] = bg_bytes[1]
|
||||
idx += 2
|
||||
tft.blit_buffer(rgb_buf, x, y, w, h)
|
||||
|
||||
def _draw_ascii(self, tft, char, x, y, color, bg):
|
||||
# Use framebuf for ASCII
|
||||
w, h = 8, 8
|
||||
buf = bytearray(w * h // 8)
|
||||
fb = framebuf.FrameBuffer(buf, w, h, framebuf.MONO_VLSB)
|
||||
fb.fill(0)
|
||||
fb.text(char, 0, 0, 1)
|
||||
|
||||
# Since framebuf.text is 8x8, we center it vertically in 16px height
|
||||
# Drawing pixel by pixel is slow but compatible
|
||||
# To optimize, we can build a small buffer
|
||||
|
||||
# Create a 8x16 RGB565 buffer
|
||||
rgb_buf = bytearray(8 * 16 * 2)
|
||||
# Fill with background
|
||||
bg_high, bg_low = bg >> 8, bg & 0xFF
|
||||
color_high, color_low = color >> 8, color & 0xFF
|
||||
|
||||
for i in range(0, len(rgb_buf), 2):
|
||||
rgb_buf[i] = bg_high
|
||||
rgb_buf[i+1] = bg_low
|
||||
|
||||
# Draw the 8x8 character into the buffer (centered)
|
||||
# MONO_VLSB: each byte is a column of 8 pixels
|
||||
for col in range(8): # 0..7
|
||||
byte = buf[col]
|
||||
for row in range(8): # 0..7
|
||||
if (byte >> row) & 1:
|
||||
# Calculate position in rgb_buf
|
||||
# Target: x=col, y=row+4
|
||||
pos = ((row + 4) * 8 + col) * 2
|
||||
rgb_buf[pos] = color_high
|
||||
rgb_buf[pos+1] = color_low
|
||||
|
||||
tft.blit_buffer(rgb_buf, x, y, 8, 16)
|
||||
421
main.py
421
main.py
@@ -4,15 +4,195 @@ import math
|
||||
import struct
|
||||
import array
|
||||
import gc
|
||||
import network
|
||||
import st7789py as st7789
|
||||
from config import CURRENT_CONFIG
|
||||
from audio import AudioPlayer, Microphone
|
||||
from display import Display
|
||||
from websocket_client import WebSocketClient
|
||||
import uselect
|
||||
|
||||
# =============================================================================
|
||||
# 网络配置
|
||||
# =============================================================================
|
||||
WIFI_SSID = "Tangledup-AI"
|
||||
WIFI_PASS = "djt12345678"
|
||||
# 请修改为你的电脑 IP 地址
|
||||
SERVER_IP = "6.6.6.88"
|
||||
SERVER_PORT = 8000
|
||||
SERVER_URL = f"ws://{SERVER_IP}:{SERVER_PORT}/ws/audio"
|
||||
|
||||
def diagnose_wifi():
|
||||
"""
|
||||
诊断WiFi模块状态,打印详细的调试信息
|
||||
"""
|
||||
print("\n" + "="*50)
|
||||
print("WiFi DIAGNOSTIC INFORMATION")
|
||||
print("="*50)
|
||||
|
||||
wlan = network.WLAN(network.STA_IF)
|
||||
|
||||
# 基本状态
|
||||
print(f"WiFi Module Active: {wlan.active()}")
|
||||
print(f"Connection Status: {wlan.isconnected()}")
|
||||
|
||||
if wlan.isconnected():
|
||||
print(f"Network Config: {wlan.ifconfig()}")
|
||||
print(f"Network SSID: {wlan.config('essid')}")
|
||||
print(f"Signal Strength: {wlan.status('rssi')} dBm")
|
||||
|
||||
# 扫描可用网络
|
||||
try:
|
||||
print("\nScanning for available networks...")
|
||||
wlan.active(True)
|
||||
time.sleep(1)
|
||||
|
||||
networks = wlan.scan()
|
||||
print(f"Found {len(networks)} networks:")
|
||||
|
||||
for net in networks:
|
||||
ssid = net[0].decode('utf-8') if net[0] else "Hidden"
|
||||
bssid = ':'.join(['%02x' % b for b in net[1]])
|
||||
channel = net[2]
|
||||
rssi = net[3]
|
||||
security = net[4]
|
||||
|
||||
# 标记目标网络
|
||||
marker = " [TARGET]" if ssid == WIFI_SSID else ""
|
||||
|
||||
print(f" {ssid}{marker}")
|
||||
print(f" BSSID: {bssid}, Channel: {channel}, RSSI: {rssi}dBm")
|
||||
|
||||
# 信号强度解释
|
||||
if rssi > -50:
|
||||
signal_desc = "Excellent"
|
||||
elif rssi > -60:
|
||||
signal_desc = "Good"
|
||||
elif rssi > -70:
|
||||
signal_desc = "Fair"
|
||||
else:
|
||||
signal_desc = "Weak"
|
||||
|
||||
print(f" Signal: {signal_desc}")
|
||||
print("")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Network scan failed: {e}")
|
||||
|
||||
print("="*50 + "\n")
|
||||
|
||||
|
||||
def connect_wifi(max_retries=3):
|
||||
"""
|
||||
连接WiFi网络,包含完整的错误处理和重试机制
|
||||
|
||||
Args:
|
||||
max_retries: 最大重试次数,默认为3次
|
||||
|
||||
Returns:
|
||||
bool: 连接成功返回True,失败返回False
|
||||
"""
|
||||
wlan = network.WLAN(network.STA_IF)
|
||||
|
||||
# 首先确保WiFi模块处于干净状态
|
||||
try:
|
||||
wlan.active(False) # 先关闭WiFi
|
||||
time.sleep(1) # 等待1秒让模块完全关闭
|
||||
wlan.active(True) # 重新激活WiFi
|
||||
time.sleep(1) # 等待模块初始化完成
|
||||
except Exception as e:
|
||||
print(f"WiFi module initialization error: {e}")
|
||||
return False
|
||||
|
||||
# 尝试连接,包含重试机制
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
print(f"WiFi connection attempt {attempt + 1}/{max_retries}")
|
||||
|
||||
# 检查是否已连接
|
||||
if wlan.isconnected():
|
||||
print('Already connected to WiFi')
|
||||
print('Network config:', wlan.ifconfig())
|
||||
return True
|
||||
|
||||
# 尝试连接
|
||||
print(f'Connecting to WiFi {WIFI_SSID}...')
|
||||
wlan.connect(WIFI_SSID, WIFI_PASS)
|
||||
|
||||
# 等待连接完成,设置超时
|
||||
start_time = time.time()
|
||||
while not wlan.isconnected():
|
||||
if time.time() - start_time > 20: # 单次连接超时20秒
|
||||
print("WiFi connection timeout!")
|
||||
break
|
||||
time.sleep(0.5)
|
||||
print(".", end="")
|
||||
|
||||
print("") # 换行
|
||||
|
||||
# 检查连接结果
|
||||
if wlan.isconnected():
|
||||
print('WiFi connected successfully!')
|
||||
print('Network config:', wlan.ifconfig())
|
||||
return True
|
||||
else:
|
||||
print(f"Connection attempt {attempt + 1} failed")
|
||||
|
||||
# 在重试前进行清理
|
||||
if attempt < max_retries - 1: # 如果不是最后一次尝试
|
||||
print("Resetting WiFi module for retry...")
|
||||
wlan.disconnect() # 断开连接
|
||||
time.sleep(2) # 等待2秒
|
||||
|
||||
except OSError as e:
|
||||
print(f"WiFi connection error on attempt {attempt + 1}: {e}")
|
||||
if "Wifi Internal State Error" in str(e):
|
||||
print("Detected internal state error, resetting WiFi module...")
|
||||
try:
|
||||
wlan.active(False)
|
||||
time.sleep(2)
|
||||
wlan.active(True)
|
||||
time.sleep(1)
|
||||
except:
|
||||
pass
|
||||
|
||||
if attempt < max_retries - 1:
|
||||
print(f"Retrying in 3 seconds...")
|
||||
time.sleep(3)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Unexpected error on attempt {attempt + 1}: {e}")
|
||||
if attempt < max_retries - 1:
|
||||
time.sleep(2)
|
||||
|
||||
# 所有尝试都失败
|
||||
print("All WiFi connection attempts failed!")
|
||||
try:
|
||||
wlan.active(False) # 关闭WiFi模块节省电力
|
||||
except:
|
||||
pass
|
||||
return False
|
||||
|
||||
# =============================================================================
|
||||
# 硬件引脚配置 (从 config.py 获取)
|
||||
# =============================================================================
|
||||
|
||||
def print_nice_asr(text, display=None):
|
||||
"""在终端美观地打印ASR结果,并在屏幕显示"""
|
||||
print("\n" + "*"*40)
|
||||
print(" ASR RESULT:")
|
||||
print(f" {text}")
|
||||
print("*"*40 + "\n")
|
||||
|
||||
if display and display.tft:
|
||||
# 清除之前的文本区域 (保留顶部的状态栏和底部的可视化条)
|
||||
# 假设状态栏 30px,底部 240-200=40px 用于可视化?
|
||||
# init_ui 画了 0-30 的白条。
|
||||
# update_audio_bar 在 240-bar_height 画条。
|
||||
# 我们使用中间区域 40 - 200
|
||||
display.fill_rect(0, 40, 240, 160, st7789.BLACK)
|
||||
display.text(text, 0, 40, st7789.WHITE)
|
||||
|
||||
def main():
|
||||
print("\n" + "="*40)
|
||||
print("AUDIO & MIC DIAGNOSTIC V5 (Modular & Clean)")
|
||||
@@ -35,7 +215,44 @@ def main():
|
||||
speaker = AudioPlayer()
|
||||
if speaker.i2s:
|
||||
# 默认播放马里奥
|
||||
speaker.play_mario()
|
||||
# speaker.play_mario()
|
||||
|
||||
# 播放简单方波 (1kHz, 1秒)
|
||||
# 直接在 main.py 中实现分块播放,避免因 audio.py 未同步导致的 MemoryError
|
||||
print("Playing 1kHz square wave...")
|
||||
try:
|
||||
import struct
|
||||
|
||||
# 1. 参数设置
|
||||
sr = 24000 # 默认采样率
|
||||
if hasattr(speaker, 'config') and speaker.config:
|
||||
sr = speaker.config.get('sample_rate', 24000)
|
||||
freq = 1000
|
||||
duration = 1000 # ms
|
||||
vol = 10000 # 音量 (max 32767)
|
||||
|
||||
# 2. 准备缓冲区 (只生成一小段,循环播放)
|
||||
# 1kHz @ 24kHz -> 24 samples/cycle
|
||||
period = sr // freq
|
||||
# 生成约 1000 字节的 buffer (包含整数个周期)
|
||||
cycles_in_buf = 10
|
||||
buf = bytearray(period * cycles_in_buf * 4) # 16bit stereo = 4 bytes/frame
|
||||
|
||||
# 3. 填充方波数据
|
||||
for i in range(period * cycles_in_buf):
|
||||
# 方波逻辑
|
||||
sample = vol if (i % period) < (period // 2) else -vol
|
||||
# 写入左右声道 (Little Endian, 16-bit signed)
|
||||
struct.pack_into('<hh', buf, i*4, sample, sample)
|
||||
|
||||
# 4. 循环写入 I2S
|
||||
t_end = time.ticks_add(time.ticks_ms(), duration)
|
||||
while time.ticks_diff(t_end, time.ticks_ms()) > 0:
|
||||
speaker.i2s.write(buf)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Tone error: {e}")
|
||||
|
||||
else:
|
||||
print("!!! Speaker initialization failed")
|
||||
|
||||
@@ -57,7 +274,49 @@ def main():
|
||||
|
||||
# 录音状态变量
|
||||
is_recording = False
|
||||
recorded_chunks = []
|
||||
|
||||
# WebSocket 连接
|
||||
ws = None
|
||||
|
||||
# 定义连接函数
|
||||
def connect_ws():
|
||||
nonlocal ws
|
||||
# Reset existing connection object to ensure clean slate
|
||||
try:
|
||||
if ws:
|
||||
ws.close()
|
||||
except:
|
||||
pass
|
||||
ws = None
|
||||
|
||||
try:
|
||||
print(f"Connecting to WebSocket Server: {SERVER_URL}")
|
||||
ws = WebSocketClient(SERVER_URL)
|
||||
print("WebSocket connected successfully!")
|
||||
|
||||
# Pass WebSocket to display for font loading
|
||||
if display:
|
||||
display.set_ws(ws)
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"WebSocket connection failed: {e}")
|
||||
return False
|
||||
|
||||
# 先运行WiFi诊断
|
||||
print("Running WiFi diagnostics...")
|
||||
diagnose_wifi()
|
||||
|
||||
# 尝试连接WiFi
|
||||
print("Starting WiFi connection process...")
|
||||
if connect_wifi(max_retries=3):
|
||||
print("WiFi connected successfully!")
|
||||
connect_ws()
|
||||
else:
|
||||
print("WiFi connection failed after all attempts!")
|
||||
print("Continuing in offline mode without WebSocket functionality...")
|
||||
print("You can still use the device for local audio recording and visualization.")
|
||||
|
||||
|
||||
# 调试:打印一次 Boot 键状态
|
||||
print(f"Boot Button Initial State: {boot_btn.value()}")
|
||||
@@ -86,68 +345,151 @@ def main():
|
||||
if not is_recording:
|
||||
print("\n>>> Start Recording (Boot Pressed)...")
|
||||
is_recording = True
|
||||
recorded_chunks = []
|
||||
if display.tft:
|
||||
print(">>> Filling Screen WHITE")
|
||||
display.fill(st7789.WHITE)
|
||||
else:
|
||||
print(">>> Display TFT is None!")
|
||||
|
||||
# 录音
|
||||
# 尝试重连 WS
|
||||
if ws is None or not ws.is_connected():
|
||||
print(">>> WS not connected, trying to reconnect...")
|
||||
connect_ws()
|
||||
|
||||
# 发送开始录音指令
|
||||
if ws and ws.is_connected():
|
||||
try:
|
||||
ws.send("START_RECORDING")
|
||||
except Exception as e:
|
||||
print(f"WS Send Error: {e}")
|
||||
ws = None # Disconnect on error
|
||||
else:
|
||||
print(">>> Warning: No WebSocket connection! Audio will be discarded.")
|
||||
|
||||
# 录音并流式传输
|
||||
if mic.i2s:
|
||||
num_read = mic.readinto(read_buf)
|
||||
if num_read > 0:
|
||||
try:
|
||||
recorded_chunks.append(bytes(read_buf[:num_read]))
|
||||
except MemoryError:
|
||||
print("Memory Full!")
|
||||
if ws and ws.is_connected():
|
||||
try:
|
||||
# 发送二进制数据
|
||||
ws.send(read_buf[:num_read], opcode=2)
|
||||
|
||||
# 检查是否有回传的 ASR 结果 (非阻塞)
|
||||
poller = uselect.poll()
|
||||
poller.register(ws.sock, uselect.POLLIN)
|
||||
events = poller.poll(0) # 0 = return immediately
|
||||
if events:
|
||||
msg = ws.recv()
|
||||
if isinstance(msg, str) and msg.startswith("ASR:"):
|
||||
print_nice_asr(msg[4:], display)
|
||||
|
||||
except Exception as e:
|
||||
print(f"WS Send/Recv Error: {e}")
|
||||
# 如果发送失败,视为断开
|
||||
try:
|
||||
ws.close()
|
||||
except:
|
||||
pass
|
||||
ws = None
|
||||
else:
|
||||
# 如果没有 WS,就不保存了,避免内存溢出
|
||||
pass
|
||||
|
||||
continue # 跳过可视化逻辑
|
||||
|
||||
# === 按键释放处理 ===
|
||||
elif is_recording:
|
||||
print(f"\n>>> Stop Recording. Captured {len(recorded_chunks)} chunks.")
|
||||
print(f"\n>>> Stop Recording.")
|
||||
is_recording = False
|
||||
|
||||
if display.tft:
|
||||
display.init_ui()
|
||||
|
||||
# 播放录音
|
||||
if speaker.i2s and len(recorded_chunks) > 0:
|
||||
print(">>> Playing...")
|
||||
# 停止录音并等待回放
|
||||
if ws:
|
||||
try:
|
||||
cfg = speaker.config
|
||||
# 重新初始化 Speaker (16kHz Mono 16-bit) 以匹配 Mic 数据
|
||||
speaker.i2s.deinit()
|
||||
speaker.i2s = machine.I2S(
|
||||
0,
|
||||
sck=machine.Pin(cfg['bck']),
|
||||
ws=machine.Pin(cfg['ws']),
|
||||
sd=machine.Pin(cfg['sd']),
|
||||
mode=machine.I2S.TX,
|
||||
bits=16,
|
||||
format=machine.I2S.MONO,
|
||||
rate=16000,
|
||||
ibuf=20000,
|
||||
)
|
||||
print(">>> Sending STOP & Waiting for playback...")
|
||||
ws.send("STOP_RECORDING")
|
||||
|
||||
# 播放数据
|
||||
for chunk in recorded_chunks:
|
||||
# 32-bit Mono -> 16-bit Mono (取高16位)
|
||||
# chunk 是 bytes, 转为 array('h') 方便访问 16-bit word
|
||||
# 32-bit 数据: LowWord, HighWord
|
||||
# 我们需要 HighWord
|
||||
arr = array.array('h', chunk)
|
||||
samples = arr[1::2]
|
||||
speaker.i2s.write(samples)
|
||||
# 重新初始化 Speaker (16kHz Mono 16-bit)
|
||||
if speaker.i2s:
|
||||
cfg = speaker.config
|
||||
speaker.i2s.deinit()
|
||||
speaker.i2s = machine.I2S(
|
||||
0,
|
||||
sck=machine.Pin(cfg['bck']),
|
||||
ws=machine.Pin(cfg['ws']),
|
||||
sd=machine.Pin(cfg['sd']),
|
||||
mode=machine.I2S.TX,
|
||||
bits=16,
|
||||
format=machine.I2S.MONO,
|
||||
rate=16000,
|
||||
ibuf=40000,
|
||||
)
|
||||
|
||||
# 接收回放循环
|
||||
playback_timeout = 5000 # 5秒无数据则退出
|
||||
last_data_time = time.ticks_ms()
|
||||
|
||||
while True:
|
||||
# Check for data with timeout
|
||||
poller = uselect.poll()
|
||||
poller.register(ws.sock, uselect.POLLIN)
|
||||
events = poller.poll(100) # 100ms wait
|
||||
|
||||
if events:
|
||||
msg = ws.recv()
|
||||
last_data_time = time.ticks_ms()
|
||||
|
||||
if isinstance(msg, str):
|
||||
if msg == "START_PLAYBACK":
|
||||
print(">>> Server starting playback stream...")
|
||||
continue
|
||||
elif msg == "STOP_PLAYBACK":
|
||||
print(">>> Server finished playback.")
|
||||
break
|
||||
elif msg.startswith("ASR:"):
|
||||
print_nice_asr(msg[4:], display)
|
||||
|
||||
elif isinstance(msg, bytes):
|
||||
# 播放接收到的音频数据
|
||||
if speaker.i2s:
|
||||
# 使用 try-except 防止 write 阻塞导致的问题
|
||||
try:
|
||||
speaker.i2s.write(msg)
|
||||
except Exception as e:
|
||||
print(f"I2S Write Error: {e}")
|
||||
|
||||
elif msg is None:
|
||||
print("WS Connection closed or error (recv returned None)")
|
||||
try:
|
||||
ws.close()
|
||||
except:
|
||||
pass
|
||||
ws = None
|
||||
break
|
||||
else:
|
||||
# No data received in this poll window
|
||||
if time.ticks_diff(time.ticks_ms(), last_data_time) > playback_timeout:
|
||||
print("Playback timeout - no data received for 5 seconds")
|
||||
break
|
||||
|
||||
# Feed watchdog or do other small tasks if needed
|
||||
# time.sleep(0.01)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Playback error: {e}")
|
||||
print(f"Playback loop error: {e}")
|
||||
try:
|
||||
ws.close()
|
||||
except:
|
||||
pass
|
||||
ws = None
|
||||
|
||||
# 恢复 Speaker 原始配置
|
||||
if speaker.i2s: speaker.i2s.deinit()
|
||||
speaker._init_audio()
|
||||
|
||||
recorded_chunks = []
|
||||
gc.collect()
|
||||
|
||||
# === 原有的可视化逻辑 ===
|
||||
@@ -178,10 +520,7 @@ def main():
|
||||
last_print = time.ticks_ms()
|
||||
|
||||
if display.tft:
|
||||
# 调整缩放比例,让显示更敏感
|
||||
# 你的日志显示安静时 Max ~2000-3000, 说话时 Max ~40000
|
||||
# 我们可以把 Max 40000 映射到满格
|
||||
|
||||
# 调整缩放比例
|
||||
bar_height = int((max_val / 40000) * 200)
|
||||
if bar_height > 200: bar_height = 200
|
||||
if bar_height < 0: bar_height = 0
|
||||
|
||||
178
websocket_client.py
Normal file
178
websocket_client.py
Normal file
@@ -0,0 +1,178 @@
|
||||
import usocket as socket
|
||||
import ubinascii
|
||||
import uos
|
||||
|
||||
class WebSocketError(Exception):
|
||||
pass
|
||||
|
||||
class WebSocketClient:
|
||||
def __init__(self, uri, timeout=5):
|
||||
self.sock = None
|
||||
self.uri = uri
|
||||
self.timeout = timeout
|
||||
self.connect()
|
||||
|
||||
def connect(self):
|
||||
uri = self.uri
|
||||
assert uri.startswith("ws://")
|
||||
|
||||
uri = uri[5:]
|
||||
if "/" in uri:
|
||||
host, path = uri.split("/", 1)
|
||||
else:
|
||||
host, path = uri, ""
|
||||
path = "/" + path
|
||||
|
||||
if ":" in host:
|
||||
host, port = host.split(":")
|
||||
port = int(port)
|
||||
else:
|
||||
port = 80
|
||||
|
||||
print(f"Connecting to {host}:{port}{path}...")
|
||||
self.sock = socket.socket()
|
||||
|
||||
# Add timeout
|
||||
self.sock.settimeout(self.timeout)
|
||||
|
||||
addr_info = socket.getaddrinfo(host, port)
|
||||
addr = addr_info[0][-1]
|
||||
print(f"Resolved address: {addr}")
|
||||
|
||||
try:
|
||||
self.sock.connect(addr)
|
||||
except OSError as e:
|
||||
print(f"Socket connect failed: {e}")
|
||||
if e.args[0] == 113:
|
||||
print("Hint: Check firewall settings on server or if server is running.")
|
||||
raise
|
||||
|
||||
# Random key
|
||||
key = ubinascii.b2a_base64(uos.urandom(16)).strip()
|
||||
|
||||
|
||||
req = "GET {} HTTP/1.1\r\n".format(path)
|
||||
req += "Host: {}:{}\r\n".format(host, port)
|
||||
req += "Connection: Upgrade\r\n"
|
||||
req += "Upgrade: websocket\r\n"
|
||||
req += "Sec-WebSocket-Key: {}\r\n".format(key.decode())
|
||||
req += "Sec-WebSocket-Version: 13\r\n"
|
||||
req += "\r\n"
|
||||
|
||||
self.sock.write(req.encode())
|
||||
|
||||
# Read handshake response
|
||||
header = b""
|
||||
while b"\r\n\r\n" not in header:
|
||||
chunk = self.sock.read(1)
|
||||
if not chunk:
|
||||
raise WebSocketError("Connection closed during handshake")
|
||||
header += chunk
|
||||
|
||||
if b" 101 " not in header:
|
||||
raise WebSocketError("Handshake failed: " + header.decode())
|
||||
|
||||
print("WebSocket connected!")
|
||||
|
||||
def is_connected(self):
|
||||
return self.sock is not None
|
||||
|
||||
def send(self, data, opcode=1): # 1=Text, 2=Binary
|
||||
if not self.sock:
|
||||
print("WebSocket is not connected (send called on closed socket)")
|
||||
raise WebSocketError("Connection closed")
|
||||
|
||||
if isinstance(data, str):
|
||||
data = data.encode('utf-8')
|
||||
|
||||
header = bytearray()
|
||||
header.append(0x80 | opcode) # FIN + Opcode
|
||||
|
||||
length = len(data)
|
||||
if length < 126:
|
||||
header.append(0x80 | length) # Masked + length
|
||||
elif length < 65536:
|
||||
header.append(0x80 | 126)
|
||||
header.extend(length.to_bytes(2, 'big'))
|
||||
else:
|
||||
header.append(0x80 | 127)
|
||||
header.extend(length.to_bytes(8, 'big'))
|
||||
|
||||
mask = uos.urandom(4)
|
||||
header.extend(mask)
|
||||
|
||||
masked_data = bytearray(length)
|
||||
for i in range(length):
|
||||
masked_data[i] = data[i] ^ mask[i % 4]
|
||||
|
||||
self.sock.write(header)
|
||||
self.sock.write(masked_data)
|
||||
|
||||
def recv(self):
|
||||
# Read header
|
||||
try:
|
||||
# Read 2 bytes at once
|
||||
header = self.sock.read(2)
|
||||
if not header or len(header) < 2: return None
|
||||
|
||||
b1 = header[0]
|
||||
b2 = header[1]
|
||||
|
||||
fin = b1 & 0x80
|
||||
opcode = b1 & 0x0f
|
||||
|
||||
mask = b2 & 0x80
|
||||
length = b2 & 0x7f
|
||||
|
||||
if length == 126:
|
||||
length_bytes = self.sock.read(2)
|
||||
if not length_bytes: return None
|
||||
length = int.from_bytes(length_bytes, 'big')
|
||||
elif length == 127:
|
||||
length_bytes = self.sock.read(8)
|
||||
if not length_bytes: return None
|
||||
length = int.from_bytes(length_bytes, 'big')
|
||||
|
||||
if mask:
|
||||
mask_key = self.sock.read(4)
|
||||
if not mask_key: return None
|
||||
|
||||
# Read payload
|
||||
data = bytearray(length)
|
||||
view = memoryview(data)
|
||||
pos = 0
|
||||
while pos < length:
|
||||
read_len = self.sock.readinto(view[pos:])
|
||||
if read_len == 0:
|
||||
return None
|
||||
pos += read_len
|
||||
|
||||
if mask:
|
||||
unmasked = bytearray(length)
|
||||
for i in range(length):
|
||||
unmasked[i] = data[i] ^ mask_key[i % 4]
|
||||
data = unmasked
|
||||
|
||||
if opcode == 1: # Text
|
||||
return data.decode('utf-8')
|
||||
elif opcode == 2: # Binary
|
||||
return data
|
||||
elif opcode == 8: # Close
|
||||
self.close()
|
||||
return None
|
||||
elif opcode == 9: # Ping
|
||||
self.send(data, opcode=10) # Pong
|
||||
return self.recv()
|
||||
|
||||
return data
|
||||
|
||||
except Exception as e:
|
||||
# Don't print timeout errors as they are expected in non-blocking polling
|
||||
if "ETIMEDOUT" not in str(e) and "110" not in str(e):
|
||||
print(f"WS Recv Error: {e}")
|
||||
return None
|
||||
|
||||
def close(self):
|
||||
if self.sock:
|
||||
self.sock.close()
|
||||
self.sock = None
|
||||
1
websocket_server/.env
Normal file
1
websocket_server/.env
Normal file
@@ -0,0 +1 @@
|
||||
DASHSCOPE_API_KEY=sk-a294f382488d46a1aa0d7cd8e750729b
|
||||
BIN
websocket_server/GB2312-16.bin
Normal file
BIN
websocket_server/GB2312-16.bin
Normal file
Binary file not shown.
31
websocket_server/README.md
Normal file
31
websocket_server/README.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# WebSocket Audio Server
|
||||
|
||||
This is a FastAPI server that receives audio from an ESP32 via WebSocket, saves it, processes it (converts 32-bit to 16-bit), and sends it back for playback.
|
||||
|
||||
## Installation
|
||||
|
||||
1. Install dependencies:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
1. Start the server:
|
||||
```bash
|
||||
python server.py
|
||||
```
|
||||
Or:
|
||||
```bash
|
||||
uvicorn server:app --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
2. Update the IP address in `main.py` on your ESP32 to match your computer's IP address.
|
||||
Look for `SERVER_IP` variable.
|
||||
|
||||
## Features
|
||||
|
||||
- Receives raw audio stream from ESP32.
|
||||
- Saves raw audio to `received_audio.raw`.
|
||||
- Converts 32-bit audio (from ICS-43434) to 16-bit audio (for MAX98357A).
|
||||
- Streams processed audio back to ESP32 for playback.
|
||||
BIN
websocket_server/__pycache__/server.cpython-312.pyc
Normal file
BIN
websocket_server/__pycache__/server.cpython-312.pyc
Normal file
Binary file not shown.
BIN
websocket_server/__pycache__/server.cpython-313.pyc
Normal file
BIN
websocket_server/__pycache__/server.cpython-313.pyc
Normal file
Binary file not shown.
127
websocket_server/generate_font.py
Normal file
127
websocket_server/generate_font.py
Normal file
@@ -0,0 +1,127 @@
|
||||
import struct
|
||||
import freetype
|
||||
import os
|
||||
|
||||
# Font file and output file
|
||||
FONT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/websocket_server/GB2312.ttf"
|
||||
OUTPUT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/GB2312-16.bin"
|
||||
|
||||
# Font size (16x16)
|
||||
FONT_SIZE = 16
|
||||
|
||||
def create_gb2312_font():
|
||||
# Load the face
|
||||
try:
|
||||
face = freetype.Face(FONT_FILE)
|
||||
except Exception as e:
|
||||
print(f"Error loading font: {e}")
|
||||
return
|
||||
|
||||
# Set char size
|
||||
face.set_pixel_sizes(FONT_SIZE, FONT_SIZE)
|
||||
|
||||
print(f"Generating GB2312 font file: {OUTPUT_FILE}")
|
||||
|
||||
with open(OUTPUT_FILE, 'wb') as f:
|
||||
# Iterate through GB2312 code points
|
||||
# Area: 0xA1 - 0xFE (161 - 254) -> 94 areas
|
||||
# Index: 0xA1 - 0xFE (161 - 254) -> 94 chars per area
|
||||
|
||||
count = 0
|
||||
total_chars = 94 * 94
|
||||
|
||||
# Buffer for empty char (32 bytes of 0x00)
|
||||
empty_char = b'\x00' * 32
|
||||
|
||||
for area in range(0xA1, 0xFF):
|
||||
for index in range(0xA1, 0xFF):
|
||||
# Construct GB2312 code
|
||||
gb_code = bytes([area, index])
|
||||
|
||||
try:
|
||||
# Decode to unicode character
|
||||
char = gb_code.decode('gb2312')
|
||||
|
||||
# Load glyph
|
||||
face.load_char(char, freetype.FT_LOAD_RENDER | freetype.FT_LOAD_TARGET_MONO)
|
||||
bitmap = face.glyph.bitmap
|
||||
|
||||
# Convert bitmap to 32 bytes (16x16 / 8)
|
||||
# The bitmap.buffer is a flat list of bytes.
|
||||
# For mono rendering, each byte is 0 or 255? No, it's packed?
|
||||
# FT_LOAD_TARGET_MONO packs 8 pixels into 1 byte.
|
||||
|
||||
# We need to ensure it's 16x16.
|
||||
# Center the glyph in 16x16 box.
|
||||
|
||||
glyph_width = bitmap.width
|
||||
glyph_rows = bitmap.rows
|
||||
glyph_pitch = bitmap.pitch
|
||||
|
||||
# Create a 16x16 buffer (32 bytes)
|
||||
char_buffer = bytearray(32)
|
||||
|
||||
# Calculate offsets to center
|
||||
x_off = (FONT_SIZE - glyph_width) // 2
|
||||
# Vertical alignment is tricky. Let's use bearing Y or just center based on rows.
|
||||
# A better way is using face.glyph.bitmap_top
|
||||
# But for fixed height font generation, usually we just center or align baseline.
|
||||
# Let's try simple centering for now.
|
||||
y_off = (FONT_SIZE - glyph_rows) // 2
|
||||
# Adjust y_off if it's too high/low?
|
||||
# Let's align to baseline approximately.
|
||||
# Usually baseline is at 12-13px for 16px font.
|
||||
# face.size.ascender might help but let's stick to bitmap center for simplicity first.
|
||||
|
||||
# Copy bitmap to buffer
|
||||
src_buf = bitmap.buffer
|
||||
|
||||
for row in range(glyph_rows):
|
||||
# Target row
|
||||
dst_row = row + y_off
|
||||
if dst_row < 0 or dst_row >= FONT_SIZE:
|
||||
continue
|
||||
|
||||
# Source row bytes
|
||||
# pitch is bytes per row
|
||||
src_start = row * glyph_pitch
|
||||
|
||||
# We need to copy bits.
|
||||
# This is getting complicated because FreeType mono bitmap format
|
||||
# might not match our target format exactly (MSB/LSB).
|
||||
# Let's iterate pixels.
|
||||
|
||||
for col in range(glyph_width):
|
||||
dst_col = col + x_off
|
||||
if dst_col < 0 or dst_col >= FONT_SIZE:
|
||||
continue
|
||||
|
||||
# Get pixel from src
|
||||
byte_idx = src_start + (col >> 3)
|
||||
bit_idx = 7 - (col & 7)
|
||||
pixel = (src_buf[byte_idx] >> bit_idx) & 1
|
||||
|
||||
if pixel:
|
||||
# Set pixel in dst
|
||||
# format: row by row, 2 bytes per row.
|
||||
# row 0: byte 0, byte 1
|
||||
# byte 0: bits 0-7 (left to right) -> wait, usually MSB is left.
|
||||
dst_byte_idx = dst_row * 2 + (dst_col >> 3)
|
||||
dst_bit_idx = 7 - (dst_col & 7)
|
||||
char_buffer[dst_byte_idx] |= (1 << dst_bit_idx)
|
||||
|
||||
f.write(char_buffer)
|
||||
count += 1
|
||||
|
||||
except Exception:
|
||||
# Character not found or decode error
|
||||
f.write(empty_char)
|
||||
|
||||
# Progress
|
||||
if count % 1000 == 0:
|
||||
print(f"Processed {count} characters...")
|
||||
|
||||
print(f"Done! Generated {OUTPUT_FILE} with size {os.path.getsize(OUTPUT_FILE)} bytes.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
create_gb2312_font()
|
||||
BIN
websocket_server/received_audio.mp3
Normal file
BIN
websocket_server/received_audio.mp3
Normal file
Binary file not shown.
BIN
websocket_server/received_audio.raw
Normal file
BIN
websocket_server/received_audio.raw
Normal file
Binary file not shown.
6
websocket_server/requirements.txt
Normal file
6
websocket_server/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
websockets
|
||||
pydub
|
||||
dashscope
|
||||
python-dotenv
|
||||
277
websocket_server/server.py
Normal file
277
websocket_server/server.py
Normal file
@@ -0,0 +1,277 @@
|
||||
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
||||
import uvicorn
|
||||
import asyncio
|
||||
import os
|
||||
import subprocess
|
||||
import struct
|
||||
from dotenv import load_dotenv
|
||||
import dashscope
|
||||
from dashscope.audio.asr import Recognition, RecognitionCallback, RecognitionResult
|
||||
import json
|
||||
|
||||
# 加载环境变量
|
||||
load_dotenv()
|
||||
dashscope.api_key = os.getenv("DASHSCOPE_API_KEY")
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
# 存储接收到的音频数据
|
||||
audio_buffer = bytearray()
|
||||
RECORDING_RAW_FILE = "received_audio.raw"
|
||||
RECORDING_MP3_FILE = "received_audio.mp3"
|
||||
VOLUME_GAIN = 10.0 # 放大倍数
|
||||
FONT_FILE = "GB2312-16.bin"
|
||||
|
||||
class MyRecognitionCallback(RecognitionCallback):
|
||||
def __init__(self, websocket: WebSocket, loop: asyncio.AbstractEventLoop):
|
||||
self.websocket = websocket
|
||||
self.loop = loop
|
||||
|
||||
def on_open(self) -> None:
|
||||
print("ASR Session started")
|
||||
|
||||
def on_close(self) -> None:
|
||||
print("ASR Session closed")
|
||||
|
||||
def on_event(self, result: RecognitionResult) -> None:
|
||||
if result.get_sentence():
|
||||
text = result.get_sentence()['text']
|
||||
print(f"ASR Result: {text}")
|
||||
# 将识别结果发送回客户端
|
||||
try:
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
self.websocket.send_text(f"ASR:{text}"),
|
||||
self.loop
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Failed to send ASR result to client: {e}")
|
||||
|
||||
def process_chunk_32_to_16(chunk_bytes, gain=1.0):
|
||||
processed_chunk = bytearray()
|
||||
# Iterate 4 bytes at a time
|
||||
for i in range(0, len(chunk_bytes), 4):
|
||||
if i+3 < len(chunk_bytes):
|
||||
# 取 chunk[i+2] 和 chunk[i+3] 组成 16-bit signed int
|
||||
sample = struct.unpack_from('<h', chunk_bytes, i+2)[0]
|
||||
|
||||
# 放大音量
|
||||
sample = int(sample * gain)
|
||||
|
||||
# 限幅 (Clamping) 防止溢出爆音
|
||||
if sample > 32767: sample = 32767
|
||||
elif sample < -32768: sample = -32768
|
||||
|
||||
# 重新打包为 16-bit little-endian
|
||||
processed_chunk.extend(struct.pack('<h', sample))
|
||||
return processed_chunk
|
||||
|
||||
@app.websocket("/ws/audio")
|
||||
async def websocket_endpoint(websocket: WebSocket):
|
||||
global audio_buffer
|
||||
await websocket.accept()
|
||||
print("Client connected")
|
||||
|
||||
recognition = None
|
||||
processed_buffer = bytearray()
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
try:
|
||||
while True:
|
||||
# 接收消息 (可能是文本指令或二进制音频数据)
|
||||
try:
|
||||
message = await websocket.receive()
|
||||
except RuntimeError as e:
|
||||
if "Cannot call \"receive\" once a disconnect message has been received" in str(e):
|
||||
print("Client disconnected (RuntimeError caught)")
|
||||
break
|
||||
raise e
|
||||
|
||||
if "text" in message:
|
||||
text = message["text"]
|
||||
print(f"Received text: {text}")
|
||||
|
||||
if text == "START_RECORDING":
|
||||
print("Start recording...")
|
||||
audio_buffer = bytearray() # 清空缓冲区
|
||||
processed_buffer = bytearray()
|
||||
|
||||
# 启动实时语音识别
|
||||
try:
|
||||
callback = MyRecognitionCallback(websocket, loop)
|
||||
recognition = Recognition(
|
||||
model='paraformer-realtime-v2',
|
||||
format='pcm',
|
||||
sample_rate=16000,
|
||||
callback=callback
|
||||
)
|
||||
recognition.start()
|
||||
print("DashScope ASR started")
|
||||
except Exception as e:
|
||||
print(f"Failed to start ASR: {e}")
|
||||
recognition = None
|
||||
|
||||
elif text == "STOP_RECORDING":
|
||||
print(f"Stop recording. Total raw bytes: {len(audio_buffer)}")
|
||||
|
||||
# 停止语音识别
|
||||
if recognition:
|
||||
try:
|
||||
recognition.stop()
|
||||
print("DashScope ASR stopped")
|
||||
except Exception as e:
|
||||
print(f"Error stopping ASR: {e}")
|
||||
recognition = None
|
||||
|
||||
# 使用实时处理过的音频数据
|
||||
processed_audio = processed_buffer
|
||||
|
||||
print(f"Processed audio size: {len(processed_audio)} bytes (Gain: {VOLUME_GAIN}x)")
|
||||
|
||||
# 2. 保存原始 RAW 文件 (16-bit PCM)
|
||||
with open(RECORDING_RAW_FILE, "wb") as f:
|
||||
f.write(processed_audio)
|
||||
|
||||
# 3. 转换为 MP3 并保存 (使用 ffmpeg 命令行,避免 Python 3.13 audioop 问题)
|
||||
try:
|
||||
# ffmpeg -y -f s16le -ar 16000 -ac 1 -i received_audio.raw received_audio.mp3
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-y", # 覆盖输出文件
|
||||
"-f", "s16le", # 输入格式: signed 16-bit little endian
|
||||
"-ar", "16000", # 输入采样率
|
||||
"-ac", "1", # 输入声道数
|
||||
"-i", RECORDING_RAW_FILE,
|
||||
RECORDING_MP3_FILE
|
||||
]
|
||||
print(f"Running command: {' '.join(cmd)}")
|
||||
|
||||
# Use asyncio.create_subprocess_exec instead of subprocess.run to avoid blocking the event loop
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
stdout, stderr = await process.communicate()
|
||||
|
||||
if process.returncode != 0:
|
||||
raise subprocess.CalledProcessError(process.returncode, cmd, output=stdout, stderr=stderr)
|
||||
|
||||
print(f"Saved MP3 to {RECORDING_MP3_FILE}")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error converting to MP3: {e}")
|
||||
# stderr might be bytes
|
||||
error_msg = e.stderr.decode() if isinstance(e.stderr, bytes) else str(e.stderr)
|
||||
print(f"FFmpeg stderr: {error_msg}")
|
||||
except FileNotFoundError:
|
||||
print("Error: ffmpeg not found. Please install ffmpeg.")
|
||||
except Exception as e:
|
||||
print(f"Error converting to MP3: {e}")
|
||||
|
||||
# 4. 发送回客户端播放
|
||||
print("Sending audio back...")
|
||||
await websocket.send_text("START_PLAYBACK")
|
||||
|
||||
# 分块发送
|
||||
chunk_size = 4096
|
||||
for i in range(0, len(processed_audio), chunk_size):
|
||||
chunk = processed_audio[i:i+chunk_size]
|
||||
await websocket.send_bytes(chunk)
|
||||
# 小延时,避免发送过快导致 ESP32 缓冲区溢出
|
||||
# 4096 bytes / 32000 bytes/s (16k*2) = ~0.128s
|
||||
# 0.04s 约为 3 倍速发送,既保证缓冲又不至于拥塞
|
||||
await asyncio.sleep(0.04)
|
||||
|
||||
await websocket.send_text("STOP_PLAYBACK")
|
||||
print("Audio sent back finished.")
|
||||
|
||||
elif text.startswith("GET_FONT:"):
|
||||
# 格式: GET_FONT:0xA1A1
|
||||
try:
|
||||
print(f"Font Request Received: {text}")
|
||||
hex_code = text.split(":")[1]
|
||||
code = int(hex_code, 16)
|
||||
|
||||
# 计算偏移量
|
||||
# GB2312 编码范围:0xA1A1 - 0xFEFE
|
||||
# 区码:高字节 - 0xA0
|
||||
# 位码:低字节 - 0xA0
|
||||
area = (code >> 8) - 0xA0
|
||||
index = (code & 0xFF) - 0xA0
|
||||
|
||||
if area >= 1 and index >= 1:
|
||||
offset = ((area - 1) * 94 + (index - 1)) * 32
|
||||
|
||||
# 读取字体文件
|
||||
# 注意:这里为了简单,每次都打开文件。如果并发高,应该缓存文件句柄或内容。
|
||||
# 假设字体文件在当前目录或上级目录
|
||||
# Prioritize finding the file in the script's directory
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
font_path = os.path.join(script_dir, FONT_FILE)
|
||||
|
||||
# Fallback: check one level up
|
||||
if not os.path.exists(font_path):
|
||||
font_path = os.path.join(script_dir, "..", FONT_FILE)
|
||||
|
||||
# Fallback: check current working directory
|
||||
if not os.path.exists(font_path):
|
||||
font_path = FONT_FILE
|
||||
|
||||
if os.path.exists(font_path):
|
||||
print(f"Reading font from: {font_path} (Offset: {offset})")
|
||||
with open(font_path, "rb") as f:
|
||||
f.seek(offset)
|
||||
font_data = f.read(32)
|
||||
|
||||
if len(font_data) == 32:
|
||||
import binascii
|
||||
hex_data = binascii.hexlify(font_data).decode('utf-8')
|
||||
response = f"FONT_DATA:{hex_code}:{hex_data}"
|
||||
print(f"Sending Font Response: {response[:30]}...")
|
||||
await websocket.send_text(response)
|
||||
else:
|
||||
print(f"Error: Read {len(font_data)} bytes for font data (expected 32)")
|
||||
else:
|
||||
print(f"Font file not found: {font_path}")
|
||||
else:
|
||||
print(f"Invalid GB2312 code: {hex_code} (Area: {area}, Index: {index})")
|
||||
except Exception as e:
|
||||
print(f"Error handling GET_FONT: {e}")
|
||||
|
||||
elif "bytes" in message:
|
||||
# 接收音频数据并追加到缓冲区
|
||||
data = message["bytes"]
|
||||
audio_buffer.extend(data)
|
||||
|
||||
# 实时处理并发送给 ASR
|
||||
pcm_chunk = process_chunk_32_to_16(data, VOLUME_GAIN)
|
||||
processed_buffer.extend(pcm_chunk)
|
||||
|
||||
if recognition:
|
||||
try:
|
||||
recognition.send_audio_frame(pcm_chunk)
|
||||
except Exception as e:
|
||||
print(f"Error sending audio frame to ASR: {e}")
|
||||
|
||||
except WebSocketDisconnect:
|
||||
print("Client disconnected")
|
||||
if recognition:
|
||||
try:
|
||||
recognition.stop()
|
||||
except:
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
if recognition:
|
||||
try:
|
||||
recognition.stop()
|
||||
except:
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 获取本机IP,方便ESP32连接
|
||||
import socket
|
||||
hostname = socket.gethostname()
|
||||
local_ip = socket.gethostbyname(hostname)
|
||||
print(f"Server running on ws://{local_ip}:8000/ws/audio")
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
55
websocket_server/test_font.py
Normal file
55
websocket_server/test_font.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import os
|
||||
|
||||
FONT_FILE = "/Users/jeremygan/Desktop/python_dev/epaper2/websocket_server/GB2312-16.bin"
|
||||
|
||||
def test_font():
|
||||
if not os.path.exists(FONT_FILE):
|
||||
print(f"Error: File not found at {FONT_FILE}")
|
||||
return
|
||||
|
||||
file_size = os.path.getsize(FONT_FILE)
|
||||
print(f"Font file size: {file_size} bytes")
|
||||
|
||||
# Expected size for GB2312-16 (94x94 chars * 32 bytes)
|
||||
expected_size = 94 * 94 * 32
|
||||
print(f"Expected size: {expected_size} bytes")
|
||||
|
||||
if file_size != expected_size:
|
||||
print(f"Warning: File size mismatch! (Diff: {file_size - expected_size})")
|
||||
|
||||
# Try to render '中' (0xD6D0)
|
||||
# Area: 0xD6 - 0xA0 = 54
|
||||
# Index: 0xD0 - 0xA0 = 48
|
||||
area = 0xD6 - 0xA0
|
||||
index = 0xD0 - 0xA0
|
||||
offset = ((area - 1) * 94 + (index - 1)) * 32
|
||||
|
||||
print(f"Testing character '中' (0xD6D0)")
|
||||
print(f"Area: {area}, Index: {index}, Offset: {offset}")
|
||||
|
||||
with open(FONT_FILE, "rb") as f:
|
||||
f.seek(offset)
|
||||
data = f.read(32)
|
||||
|
||||
if len(data) != 32:
|
||||
print("Error: Could not read 32 bytes")
|
||||
return
|
||||
|
||||
print("Bitmap data:")
|
||||
for i in range(16):
|
||||
# Each row is 2 bytes (16 bits)
|
||||
byte1 = data[i*2]
|
||||
byte2 = data[i*2+1]
|
||||
|
||||
# Print as bits
|
||||
line = ""
|
||||
for b in range(8):
|
||||
if (byte1 >> (7-b)) & 1: line += "##"
|
||||
else: line += ".."
|
||||
for b in range(8):
|
||||
if (byte2 >> (7-b)) & 1: line += "##"
|
||||
else: line += ".."
|
||||
print(line)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_font()
|
||||
Reference in New Issue
Block a user