翻译

2026-02-15 22:44:25 +08:00
parent 753badd0f8
commit 9e6e9f98b6
2 changed files with 197 additions and 70 deletions
--- a/fastAPI_tarot.py
+++ b/fastAPI_tarot.py
@@ -127,7 +127,48 @@ app.openapi = custom_openapi
 app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
 import re
 # ------------------- 辅助函数 -------------------
 def is_english(text: str) -> bool:
    """
    简单判断是否为英文 prompt。
    如果有中文字符则认为不是英文。
    """
    for char in text:
        if '\u4e00' <= char <= '\u9fff':
            return False
    return True
 def translate_to_sam3_prompt(text: str) -> str:
    """
    使用大模型将非英文提示词翻译为适合 SAM3 的英文提示词
    """
    print(f"正在翻译提示词: {text}")
    try:
        messages = [
            {
                "role": "user",
                "content": [
                    {"text": f"请将以下描述翻译成简洁、精准的英文，用于图像分割模型(SAM)的提示词。直接返回英文，不要包含任何解释或其他文字。\n\n输入: {text}"}
                ]
            }
        ]
        response = MultiModalConversation.call(model=QWEN_MODEL, messages=messages)
        if response.status_code == 200:
            translated_text = response.output.choices[0].message.content[0]['text'].strip()
            # 去除可能的 markdown 标记或引号
            translated_text = translated_text.replace('"', '').replace("'", "").strip()
            print(f"翻译结果: {translated_text}")
            return translated_text
        else:
            print(f"翻译失败: {response.code} - {response.message}")
            return text # Fallback to original
    except Exception as e:
        print(f"翻译异常: {e}")
        return text
 def order_points(pts):
    """
    对四个坐标点进行排序：左上，右上，右下，左下
@@ -178,10 +219,10 @@ def load_image_from_url(url: str) -> Image.Image:
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"无法下载图片: {str(e)}")
-def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RESULT_IMAGE_DIR, is_tarot: bool = True) -> list[dict]:
+def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RESULT_IMAGE_DIR, is_tarot: bool = True, cutout: bool = False) -> list[dict]:
    """
-    根据 mask 和 box 进行透视矫正并裁剪出独立的对象图片 (保留透明背景)
+    根据 mask 和 box 进行处理并保存独立的对象图片
-    返回包含文件名和元数据的列表
+    :param cutout: 如果为 True，则进行轮廓抠图（透明背景）；否则进行透视矫正（主要用于卡片）
    """
    saved_objects = []
    # Convert image to numpy array (RGB)
@@ -194,73 +235,125 @@ def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RE
        else:
            mask_np = mask.squeeze()
-        # Ensure mask is uint8 binary for OpenCV
+        # Handle box conversion
        if isinstance(box, torch.Tensor):
            box_np = box.cpu().numpy()
        else:
            box_np = np.array(box)
        # Ensure mask is uint8 binary for OpenCV/Pillow
        if mask_np.dtype == bool:
            mask_uint8 = (mask_np * 255).astype(np.uint8)
        else:
            mask_uint8 = (mask_np > 0.5).astype(np.uint8) * 255
-        # Find contours
+        if cutout:
-        contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            # --- 轮廓抠图模式 (透明背景) ---
-        if not contours:
+            # 1. 准备 RGBA 原图
-            continue
+            if image.mode != "RGBA":
                img_rgba = image.convert("RGBA")
            else:
                img_rgba = image.copy()
-        # Get largest contour
+            # 2. 准备 Alpha Mask
-        c = max(contours, key=cv2.contourArea)
+            # mask_uint8 已经是 0-255，可以直接作为 Alpha 通道的基础
            # 将 Mask 缩放到和图片一样大 (一般是一样的，但以防万一)
            if mask_uint8.shape != img_rgba.size[::-1]: # size is (w, h), shape is (h, w)
                 # 如果尺寸不一致可能需要 resize，这里假设尺寸一致
                 pass
-        # Approximate contour to polygon
+            mask_img = Image.fromarray(mask_uint8, mode='L')
-        peri = cv2.arcLength(c, True)
+            
-        approx = cv2.approxPolyDP(c, 0.04 * peri, True)
+            # 3. 将 Mask 应用到 Alpha 通道
            # 创建一个新的空白透明图
            cutout_img = Image.new("RGBA", img_rgba.size, (0, 0, 0, 0))
            # 将原图粘贴上去，使用 mask 作为 mask
            cutout_img.paste(image.convert("RGB"), (0, 0), mask=mask_img)
            # 4. Crop to Box
            # box is [x1, y1, x2, y2]
            x1, y1, x2, y2 = map(int, box_np)
            # 边界检查
            w, h = cutout_img.size
            x1 = max(0, x1); y1 = max(0, y1)
            x2 = min(w, x2); y2 = min(h, y2)
            # 避免无效 crop
            if x2 > x1 and y2 > y1:
                final_img = cutout_img.crop((x1, y1, x2, y2))
            else:
                final_img = cutout_img # Fallback
            # Save
            prefix = "cutout"
            is_rotated = False # 抠图模式下不进行自动旋转
            filename = f"{prefix}_{uuid.uuid4().hex}_{i}.png"
            save_path = os.path.join(output_dir, filename)
            final_img.save(save_path)
            saved_objects.append({
                "filename": filename,
                "is_rotated_by_algorithm": is_rotated,
                "note": "Mask cutout applied. Background removed."
            })
        # If we have 4 points, use them. If not, fallback to minAreaRect
        if len(approx) == 4:
            pts = approx.reshape(4, 2)
        else:
-            rect = cv2.minAreaRect(c)
+            # --- 透视矫正模式 (原有逻辑) ---
-            pts = cv2.boxPoints(rect)
+            # Find contours
            contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            if not contours:
                continue
-        # Apply perspective transform
+            # Get largest contour
-        # 注意：这里我们只变换RGB部分，Alpha通道需要额外处理或者直接应用同样的变换
+            c = max(contours, key=cv2.contourArea)
        # 为了简单，我们直接对原图（假设不带Alpha）进行变换
        # 如果需要保留背景透明，需要先将原图转为RGBA，再做变换
-        # Check if original image has Alpha
+            # Approximate contour to polygon
-        if img_arr.shape[2] == 4:
+            peri = cv2.arcLength(c, True)
-            warped = four_point_transform(img_arr, pts)
+            approx = cv2.approxPolyDP(c, 0.04 * peri, True)
        else:
            # Add alpha channel from mask? 
            # 透视变换后的矩形本身就是去掉了背景的，所以不需要额外的Mask Alpha
            # 但是为了保持一致性，我们可以给变换后的图加一个全不透明的Alpha，或者保留RGB
            warped = four_point_transform(img_arr, pts)
-        # Check orientation (Portrait vs Landscape)
+            # If we have 4 points, use them. If not, fallback to minAreaRect
-        h, w = warped.shape[:2]
+            if len(approx) == 4:
-        is_rotated = False
+                pts = approx.reshape(4, 2)
            else:
                rect = cv2.minAreaRect(c)
                pts = cv2.boxPoints(rect)
-        # Enforce Portrait for Tarot cards (Standard 7x12 cm ratio approx)
+            # Apply perspective transform
-        if is_tarot and w > h:
+            # Check if original image has Alpha
-            # Rotate 90 degrees clockwise
+            if img_arr.shape[2] == 4:
-            warped = cv2.rotate(warped, cv2.ROTATE_90_CLOCKWISE)
+                warped = four_point_transform(img_arr, pts)
-            is_rotated = True
+            else:
                warped = four_point_transform(img_arr, pts)
-        # Convert back to PIL
+            # Check orientation (Portrait vs Landscape)
-        pil_warped = Image.fromarray(warped)
+            h, w = warped.shape[:2]
            is_rotated = False
-        # Save
+            # Enforce Portrait for Tarot cards (Standard 7x12 cm ratio approx)
-        prefix = "tarot" if is_tarot else "segment"
+            if is_tarot and w > h:
-        filename = f"{prefix}_{uuid.uuid4().hex}_{i}.png" 
+                # Rotate 90 degrees clockwise
-        save_path = os.path.join(output_dir, filename)
+                warped = cv2.rotate(warped, cv2.ROTATE_90_CLOCKWISE)
-        pil_warped.save(save_path)
+                is_rotated = True
-        # 正逆位判断逻辑 (基于几何只能做到这一步，无法区分上下颠倒)
+            # Convert back to PIL
-        # 这里我们假设长边垂直为正位，如果做了旋转则标记
+            pil_warped = Image.fromarray(warped)
        # 真正的正逆位需要OCR或图像识别
-        saved_objects.append({
+            # Save
-            "filename": filename,
+            prefix = "tarot" if is_tarot else "segment"
-            "is_rotated_by_algorithm": is_rotated,
+            filename = f"{prefix}_{uuid.uuid4().hex}_{i}.png" 
-            "note": "Geometric correction applied. True upright/reversed requires content analysis."
+            save_path = os.path.join(output_dir, filename)
-        })
+            pil_warped.save(save_path)
            # 正逆位判断逻辑 (基于几何只能做到这一步，无法区分上下颠倒)
            # 这里我们假设长边垂直为正位，如果做了旋转则标记
            # 真正的正逆位需要OCR或图像识别
            saved_objects.append({
                "filename": filename,
                "is_rotated_by_algorithm": is_rotated,
                "note": "Geometric correction applied. True upright/reversed requires content analysis."
            })
    return saved_objects
@@ -405,11 +498,28 @@ async def segment(
    prompt: str = Form(...),
    file: Optional[UploadFile] = File(None),
    image_url: Optional[str] = Form(None),
-    save_segment_images: bool = Form(False)
+    save_segment_images: bool = Form(False),
    cutout: bool = Form(False)
 ):
    if not file and not image_url:
        raise HTTPException(status_code=400, detail="必须提供 file (图片文件) 或 image_url (图片链接)")
    # ------------------- Prompt 翻译/优化 -------------------
    final_prompt = prompt
    if not is_english(prompt):
        # 使用 Qwen 进行翻译
        try:
            # 构建消息请求翻译
            # 注意：translate_to_sam3_prompt 是一个阻塞调用，可能会增加耗时
            # 但对于分割任务来说是可以接受的
            translated = translate_to_sam3_prompt(prompt)
            if translated:
                final_prompt = translated
        except Exception as e:
            print(f"Prompt翻译失败，使用原始Prompt: {e}")
    print(f"最终使用的 Prompt: {final_prompt}")
    try:
        if file:
            image = Image.open(file.file).convert("RGB")
@@ -422,7 +532,7 @@ async def segment(
    try:
        inference_state = processor.set_image(image)
-        output = processor.set_text_prompt(state=inference_state, prompt=prompt)
+        output = processor.set_text_prompt(state=inference_state, prompt=final_prompt)
        masks, boxes, scores = output["masks"], output["boxes"], output["scores"]
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"模型推理错误: {str(e)}")
@@ -442,21 +552,26 @@ async def segment(
            output_dir = os.path.join(RESULT_IMAGE_DIR, request_id)
            os.makedirs(output_dir, exist_ok=True)
-            saved_objects = crop_and_save_objects(image, masks, boxes, output_dir=output_dir, is_tarot=False)
+            # 传递 cutout 参数
            saved_objects = crop_and_save_objects(
                image, 
                masks, 
                boxes, 
                output_dir=output_dir, 
                is_tarot=False, 
                cutout=cutout
            )
            for obj in saved_objects:
                fname = obj["filename"]
                seg_url = str(request.url_for("static", path=f"results/{request_id}/{fname}"))
                saved_segments_info.append({
                    "url": seg_url,
-                    "filename": fname
+                    "filename": fname,
                    "note": obj.get("note", "")
                })
        except Exception as e:
             # Log error but don't fail the whole request if segmentation saving fails?
             # Or fail it? Let's fail it to be safe or include error in response.
             # Given simple requirement, I'll let it fail or just print.
             print(f"Error saving segments: {e}")
             # We can optionally raise HTTPException here too.
             raise HTTPException(status_code=500, detail=f"保存分割图片失败: {str(e)}")
    response_content = {
@@ -691,6 +806,18 @@ async def segment_face(
    if not file and not image_url:
        raise HTTPException(status_code=400, detail="必须提供 file (图片文件) 或 image_url (图片链接)")
    # ------------------- Prompt 翻译/优化 -------------------
    final_prompt = prompt
    if not is_english(prompt):
        try:
            translated = translate_to_sam3_prompt(prompt)
            if translated:
                final_prompt = translated
        except Exception as e:
            print(f"Prompt翻译失败，使用原始Prompt: {e}")
    print(f"Face Segment 最终使用的 Prompt: {final_prompt}")
    # 1. 加载图片
    try:
        if file:
@@ -709,7 +836,7 @@ async def segment_face(
        result = human_analysis_service.process_face_segmentation_and_analysis(
            processor=processor,
            image=image,
-            prompt=prompt,
+            prompt=final_prompt,
            output_base_dir=RESULT_IMAGE_DIR
        )
    except Exception as e:
--- a/static/results/seg_37fe4609bc984323a0284b404456f966.jpg
+++ b/static/results/seg_37fe4609bc984323a0284b404456f966.jpg