t

2026-03-05 21:56:44 +08:00
parent b430051d29
commit 9558ea4b35
4 changed files with 63 additions and 31 deletions
--- a/convert_img.py
+++ b/convert_img.py
@@ -3,6 +3,12 @@ from machine import UART
 from config import ttl_tx, ttl_rx
 from printer_driver import TsplPrinter

+def print_model_info(model_name):
+    """
+    在控制台打印当前使用的模型名称
+    """
+    print(f"\n[INFO] Current Image Generation Model: {model_name}\n")
+
 def print_bitmap(printer, data, width, height, x_offset=0, y_offset=0, invert=False):
    """
    发送位图数据到打印机
--- a/main.py
+++ b/main.py
@@ -6,6 +6,7 @@ import network
 import st7789py as st7789
 from config import CURRENT_CONFIG, SERVER_URL, ttl_tx, ttl_rx
 from audio import AudioPlayer, Microphone
+import convert_img

 # Define colors that might be missing in st7789py
 DARKGREY = 0x4208
@@ -118,21 +119,22 @@ def process_message(msg, display, image_state, image_data_list, printer_uart=Non
    if isinstance(msg, (bytes, bytearray)):
        if image_state == IMAGE_STATE_RECEIVING:
            try:
-                if len(image_data_list) < 2:
+                if len(image_data_list) < 3:
                     # 异常情况，重置
                     return IMAGE_STATE_IDLE, None
                
-                img_size = image_data_list[0]
-                current_offset = image_data_list[1]
+                width = image_data_list[0]
+                height = image_data_list[1]
+                current_offset = image_data_list[2]
                
                # Stream directly to display
                if display and display.tft:
-                    x = (240 - img_size) // 2
-                    y = (240 - img_size) // 2
-                    display.show_image_chunk(x, y, img_size, img_size, msg, current_offset)
+                    x = (240 - width) // 2
+                    y = (240 - height) // 2
+                    display.show_image_chunk(x, y, width, height, msg, current_offset)
                
                # Update offset
-                image_data_list[1] += len(msg)
+                image_data_list[2] += len(msg)
                
            except Exception as e:
                print(f"Stream image error: {e}")
@@ -194,18 +196,28 @@ def process_message(msg, display, image_state, image_data_list, printer_uart=Non
        try:
            parts = msg.split(":")
            size = int(parts[1])
-            img_size = int(parts[2]) if len(parts) > 2 else 64
-            print(f"Image start, size: {size}, img_size: {img_size}")
+            
+            width = 64
+            height = 64
+            
+            if len(parts) >= 4:
+                width = int(parts[2])
+                height = int(parts[3])
+            elif len(parts) == 3:
+                width = int(parts[2])
+                height = int(parts[2]) # assume square
+            
+            print(f"Image start, size: {size}, dim: {width}x{height}")
            image_data_list.clear()
-            image_data_list.append(img_size) # Store metadata at index 0
-            image_data_list.append(0)        # Store current received bytes offset at index 1
+            image_data_list.append(width)  # index 0
+            image_data_list.append(height) # index 1
+            image_data_list.append(0)      # index 2: offset
            
            # Prepare display for streaming
            if display and display.tft:
-                # Calculate position
-                x = (240 - img_size) // 2
-                y = (240 - img_size) // 2
-                # Pre-set window (this will be done in first chunk call)
+                # Clear screen area where image will be
+                # optional, but good practice if new image is smaller
+                pass
            
            return IMAGE_STATE_RECEIVING, None
        except Exception as e:
--- a/websocket_server/image_generator.py
+++ b/websocket_server/image_generator.py
@@ -38,11 +38,11 @@ class ImageGenerator:
 2. 画面必须清晰、线条粗壮，适合低分辨率热敏打印机打印。
 3. 绝对不要有复杂的阴影、渐变、黑白线条描述。
 4. 背景必须是纯白 (White background)。
-5. 提示词内容请使用英文描述，因为绘图模型对英文理解更好，但在描述中强调 "black and white line art", "simple lines", "vector style"。
+5. 提示词内容请使用中文描述，因为绘图模型对中文生成要更准确。
 6. 尺寸比例遵循宽48mm:高30mm (约 1.6:1)。
 7. 直接输出优化后的提示词，不要包含任何解释。
 如果用户要求输入文字，则用```把文字包裹起来，文字是中文
-"black and white line art， Chinese:```中国人```"
+"房子的旁边有一个小孩，黑白线稿画作，卡通形象， 文字:```中国人```在下方。"
 """

        try:
@@ -52,7 +52,7 @@ class ImageGenerator:
            # Currently using Qwen-Turbo for all providers for prompt optimization
            # You can also decouple this if needed
            response = Generation.call(
-                model='qwen-turbo',
+                model='qwen3.5-plus',
                prompt=f'{system_prompt}\n\n用户语音识别结果：{asr_text}\n\n优化后的提示词：',
                max_tokens=200,
                temperature=0.8
--- a/websocket_server/server.py
+++ b/websocket_server/server.py
@@ -235,7 +235,17 @@ async def start_async_image_generation(websocket: WebSocket, asr_text: str):
        await asyncio.sleep(0.2)
        
        # 同步调用图片生成函数
-        image_path = await asyncio.to_thread(generate_image, optimized_prompt, progress_callback)
+        gen_result = await asyncio.to_thread(generate_image, optimized_prompt, progress_callback)
+        
+        image_path = None
+        width = 0
+        height = 0
+        
+        if gen_result:
+            if isinstance(gen_result, tuple):
+                image_path, width, height = gen_result
+            else:
+                image_path = gen_result
        
        task.result = image_path
        
@@ -244,7 +254,7 @@ async def start_async_image_generation(websocket: WebSocket, asr_text: str):
            await websocket.send_text("STATUS:COMPLETE:图片生成完成")
            await asyncio.sleep(0.2)
            
-            await send_image_to_client(websocket, image_path)
+            await send_image_to_client(websocket, image_path, width, height)
        else:
            task.status = "failed"
            task.error = "图片生成失败"
@@ -267,15 +277,18 @@ async def start_async_image_generation(websocket: WebSocket, asr_text: str):
    return task


-async def send_image_to_client(websocket: WebSocket, image_path: str):
+async def send_image_to_client(websocket: WebSocket, image_path: str, width: int = 0, height: int = 0):
    """发送图片数据到客户端"""
    with open(image_path, 'rb') as f:
        image_data = f.read()
    
-    print(f"Sending image to ESP32, size: {len(image_data)} bytes")
+    print(f"Sending image to ESP32, size: {len(image_data)} bytes, dim: {width}x{height}")
    
    # Send start marker
-    await websocket.send_text(f"IMAGE_START:{len(image_data)}:{THUMB_SIZE}")
+    if width > 0 and height > 0:
+        await websocket.send_text(f"IMAGE_START:{len(image_data)}:{width}:{height}")
+    else:
+        await websocket.send_text(f"IMAGE_START:{len(image_data)}:{THUMB_SIZE}")
    
    # Send binary data directly
    chunk_size = 512  # Decreased chunk size for ESP32 memory stability
@@ -594,15 +607,16 @@ def generate_image(prompt, progress_callback=None, retry_count=0, max_retries=2)
            from PIL import Image
            img = Image.open(GENERATED_IMAGE_FILE)
            
-            # 缩小到THUMB_SIZE x THUMB_SIZE
-            img = img.resize((THUMB_SIZE, THUMB_SIZE), Image.LANCZOS)
+            # 缩小到 fit THUMB_SIZE x THUMB_SIZE (保持比例)
+            img.thumbnail((THUMB_SIZE, THUMB_SIZE), Image.Resampling.LANCZOS)
+            width, height = img.size
            
            # 转换为RGB565格式的原始数据
            # 每个像素2字节 (R5 G6 B5)
            rgb565_data = bytearray()
            
-            for y in range(THUMB_SIZE):
-                for x in range(THUMB_SIZE):
+            for y in range(height):
+                for x in range(width):
                    r, g, b = img.getpixel((x, y))[:3]
                    
                    # 转换为RGB565
@@ -619,23 +633,23 @@ def generate_image(prompt, progress_callback=None, retry_count=0, max_retries=2)
            with open(GENERATED_THUMB_FILE, 'wb') as f:
                f.write(rgb565_data)
            
-            print(f"Thumbnail saved to {GENERATED_THUMB_FILE}, size: {len(rgb565_data)} bytes")
+            print(f"Thumbnail saved to {GENERATED_THUMB_FILE}, size: {len(rgb565_data)} bytes, dim: {width}x{height}")
            
            if progress_callback:
                progress_callback(100, "图片生成完成!")
            
-            return GENERATED_THUMB_FILE
+            return GENERATED_THUMB_FILE, width, height
            
        except ImportError:
            print("PIL not available, sending original image")
            if progress_callback:
                progress_callback(100, "图片生成完成!(原始格式)")
-            return GENERATED_IMAGE_FILE
+            return GENERATED_IMAGE_FILE, 0, 0
        except Exception as e:
            print(f"Error processing image: {e}")
            if progress_callback:
                progress_callback(80, f"图片处理出错: {str(e)}")
-            return GENERATED_IMAGE_FILE
+            return GENERATED_IMAGE_FILE, 0, 0
            
    except Exception as e:
        print(f"Error in generate_image: {e}")