diff --git a/convert_img.py b/convert_img.py index 2f09b1f..d257ab0 100644 --- a/convert_img.py +++ b/convert_img.py @@ -3,6 +3,12 @@ from machine import UART from config import ttl_tx, ttl_rx from printer_driver import TsplPrinter +def print_model_info(model_name): + """ + 在控制台打印当前使用的模型名称 + """ + print(f"\n[INFO] Current Image Generation Model: {model_name}\n") + def print_bitmap(printer, data, width, height, x_offset=0, y_offset=0, invert=False): """ 发送位图数据到打印机 diff --git a/main.py b/main.py index 78d6c74..ff7d0e6 100644 --- a/main.py +++ b/main.py @@ -6,6 +6,7 @@ import network import st7789py as st7789 from config import CURRENT_CONFIG, SERVER_URL, ttl_tx, ttl_rx from audio import AudioPlayer, Microphone +import convert_img # Define colors that might be missing in st7789py DARKGREY = 0x4208 @@ -118,21 +119,22 @@ def process_message(msg, display, image_state, image_data_list, printer_uart=Non if isinstance(msg, (bytes, bytearray)): if image_state == IMAGE_STATE_RECEIVING: try: - if len(image_data_list) < 2: + if len(image_data_list) < 3: # 异常情况,重置 return IMAGE_STATE_IDLE, None - img_size = image_data_list[0] - current_offset = image_data_list[1] + width = image_data_list[0] + height = image_data_list[1] + current_offset = image_data_list[2] # Stream directly to display if display and display.tft: - x = (240 - img_size) // 2 - y = (240 - img_size) // 2 - display.show_image_chunk(x, y, img_size, img_size, msg, current_offset) + x = (240 - width) // 2 + y = (240 - height) // 2 + display.show_image_chunk(x, y, width, height, msg, current_offset) # Update offset - image_data_list[1] += len(msg) + image_data_list[2] += len(msg) except Exception as e: print(f"Stream image error: {e}") @@ -194,18 +196,28 @@ def process_message(msg, display, image_state, image_data_list, printer_uart=Non try: parts = msg.split(":") size = int(parts[1]) - img_size = int(parts[2]) if len(parts) > 2 else 64 - print(f"Image start, size: {size}, img_size: {img_size}") + + width = 64 + height = 64 + + if len(parts) >= 4: + width = int(parts[2]) + height = int(parts[3]) + elif len(parts) == 3: + width = int(parts[2]) + height = int(parts[2]) # assume square + + print(f"Image start, size: {size}, dim: {width}x{height}") image_data_list.clear() - image_data_list.append(img_size) # Store metadata at index 0 - image_data_list.append(0) # Store current received bytes offset at index 1 + image_data_list.append(width) # index 0 + image_data_list.append(height) # index 1 + image_data_list.append(0) # index 2: offset # Prepare display for streaming if display and display.tft: - # Calculate position - x = (240 - img_size) // 2 - y = (240 - img_size) // 2 - # Pre-set window (this will be done in first chunk call) + # Clear screen area where image will be + # optional, but good practice if new image is smaller + pass return IMAGE_STATE_RECEIVING, None except Exception as e: diff --git a/websocket_server/image_generator.py b/websocket_server/image_generator.py index 5ce1bd3..bdfcb2d 100644 --- a/websocket_server/image_generator.py +++ b/websocket_server/image_generator.py @@ -38,11 +38,11 @@ class ImageGenerator: 2. 画面必须清晰、线条粗壮,适合低分辨率热敏打印机打印。 3. 绝对不要有复杂的阴影、渐变、黑白线条描述。 4. 背景必须是纯白 (White background)。 -5. 提示词内容请使用英文描述,因为绘图模型对英文理解更好,但在描述中强调 "black and white line art", "simple lines", "vector style"。 +5. 提示词内容请使用中文描述,因为绘图模型对中文生成要更准确。 6. 尺寸比例遵循宽48mm:高30mm (约 1.6:1)。 7. 直接输出优化后的提示词,不要包含任何解释。 如果用户要求输入文字,则用```把文字包裹起来,文字是中文 -"black and white line art, Chinese:```中国人```" +"房子的旁边有一个小孩,黑白线稿画作,卡通形象, 文字:```中国人```在下方。" """ try: @@ -52,7 +52,7 @@ class ImageGenerator: # Currently using Qwen-Turbo for all providers for prompt optimization # You can also decouple this if needed response = Generation.call( - model='qwen-turbo', + model='qwen3.5-plus', prompt=f'{system_prompt}\n\n用户语音识别结果:{asr_text}\n\n优化后的提示词:', max_tokens=200, temperature=0.8 diff --git a/websocket_server/server.py b/websocket_server/server.py index cdbfe7e..f3e5083 100644 --- a/websocket_server/server.py +++ b/websocket_server/server.py @@ -235,7 +235,17 @@ async def start_async_image_generation(websocket: WebSocket, asr_text: str): await asyncio.sleep(0.2) # 同步调用图片生成函数 - image_path = await asyncio.to_thread(generate_image, optimized_prompt, progress_callback) + gen_result = await asyncio.to_thread(generate_image, optimized_prompt, progress_callback) + + image_path = None + width = 0 + height = 0 + + if gen_result: + if isinstance(gen_result, tuple): + image_path, width, height = gen_result + else: + image_path = gen_result task.result = image_path @@ -244,7 +254,7 @@ async def start_async_image_generation(websocket: WebSocket, asr_text: str): await websocket.send_text("STATUS:COMPLETE:图片生成完成") await asyncio.sleep(0.2) - await send_image_to_client(websocket, image_path) + await send_image_to_client(websocket, image_path, width, height) else: task.status = "failed" task.error = "图片生成失败" @@ -267,15 +277,18 @@ async def start_async_image_generation(websocket: WebSocket, asr_text: str): return task -async def send_image_to_client(websocket: WebSocket, image_path: str): +async def send_image_to_client(websocket: WebSocket, image_path: str, width: int = 0, height: int = 0): """发送图片数据到客户端""" with open(image_path, 'rb') as f: image_data = f.read() - print(f"Sending image to ESP32, size: {len(image_data)} bytes") + print(f"Sending image to ESP32, size: {len(image_data)} bytes, dim: {width}x{height}") # Send start marker - await websocket.send_text(f"IMAGE_START:{len(image_data)}:{THUMB_SIZE}") + if width > 0 and height > 0: + await websocket.send_text(f"IMAGE_START:{len(image_data)}:{width}:{height}") + else: + await websocket.send_text(f"IMAGE_START:{len(image_data)}:{THUMB_SIZE}") # Send binary data directly chunk_size = 512 # Decreased chunk size for ESP32 memory stability @@ -594,15 +607,16 @@ def generate_image(prompt, progress_callback=None, retry_count=0, max_retries=2) from PIL import Image img = Image.open(GENERATED_IMAGE_FILE) - # 缩小到THUMB_SIZE x THUMB_SIZE - img = img.resize((THUMB_SIZE, THUMB_SIZE), Image.LANCZOS) + # 缩小到 fit THUMB_SIZE x THUMB_SIZE (保持比例) + img.thumbnail((THUMB_SIZE, THUMB_SIZE), Image.Resampling.LANCZOS) + width, height = img.size # 转换为RGB565格式的原始数据 # 每个像素2字节 (R5 G6 B5) rgb565_data = bytearray() - for y in range(THUMB_SIZE): - for x in range(THUMB_SIZE): + for y in range(height): + for x in range(width): r, g, b = img.getpixel((x, y))[:3] # 转换为RGB565 @@ -619,23 +633,23 @@ def generate_image(prompt, progress_callback=None, retry_count=0, max_retries=2) with open(GENERATED_THUMB_FILE, 'wb') as f: f.write(rgb565_data) - print(f"Thumbnail saved to {GENERATED_THUMB_FILE}, size: {len(rgb565_data)} bytes") + print(f"Thumbnail saved to {GENERATED_THUMB_FILE}, size: {len(rgb565_data)} bytes, dim: {width}x{height}") if progress_callback: progress_callback(100, "图片生成完成!") - return GENERATED_THUMB_FILE + return GENERATED_THUMB_FILE, width, height except ImportError: print("PIL not available, sending original image") if progress_callback: progress_callback(100, "图片生成完成!(原始格式)") - return GENERATED_IMAGE_FILE + return GENERATED_IMAGE_FILE, 0, 0 except Exception as e: print(f"Error processing image: {e}") if progress_callback: progress_callback(80, f"图片处理出错: {str(e)}") - return GENERATED_IMAGE_FILE + return GENERATED_IMAGE_FILE, 0, 0 except Exception as e: print(f"Error in generate_image: {e}")