tarot

2026-02-15 16:37:24 +08:00
parent f981a05b32
commit 882989f252
29 changed files with 118 additions and 37 deletions
--- a/fastAPI_tarot.py
+++ b/fastAPI_tarot.py
@@ -3,6 +3,7 @@ import uuid
 import time
 import requests
 import numpy as np
 import cv2
 from typing import Optional
 from contextlib import asynccontextmanager
@@ -120,6 +121,46 @@ app.openapi = custom_openapi
 app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
 # ------------------- 辅助函数 -------------------
 def order_points(pts):
    """
    对四个坐标点进行排序：左上，右上，右下，左下
    """
    rect = np.zeros((4, 2), dtype="float32")
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]
    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    return rect
 def four_point_transform(image, pts):
    """
    根据四个点进行透视变换
    """
    rect = order_points(pts)
    (tl, tr, br, bl) = rect
    # 计算新图像的宽度
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))
    # 计算新图像的高度
    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))
    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype="float32")
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    return warped
 def load_image_from_url(url: str) -> Image.Image:
    try:
        headers = {'User-Agent': 'Mozilla/5.0'}
@@ -130,13 +171,14 @@ def load_image_from_url(url: str) -> Image.Image:
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"无法下载图片: {str(e)}")
-def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RESULT_IMAGE_DIR) -> list[str]:
+def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RESULT_IMAGE_DIR) -> list[dict]:
    """
-    根据 mask 和 box 裁剪出独立的对象图片 (保留透明背景)
+    根据 mask 和 box 进行透视矫正并裁剪出独立的对象图片 (保留透明背景)
    返回包含文件名和元数据的列表
    """
-    saved_files = []
+    saved_objects = []
-    # Convert image to numpy array
+    # Convert image to numpy array (RGB)
-    img_arr = np.array(image) # RGB (H, W, 3)
+    img_arr = np.array(image)
    for i, (mask, box) in enumerate(zip(masks, boxes)):
        # Handle tensor/numpy conversions
@@ -145,44 +187,74 @@ def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RE
        else:
            mask_np = mask.squeeze()
-        if isinstance(box, torch.Tensor):
+        # Ensure mask is uint8 binary for OpenCV
-            box_np = box.cpu().numpy()
+        if mask_np.dtype == bool:
            mask_uint8 = (mask_np * 255).astype(np.uint8)
        else:
-            box_np = box
+            mask_uint8 = (mask_np > 0.5).astype(np.uint8) * 255
-        # Get coordinates
+        # Find contours
-        x1, y1, x2, y2 = map(int, box_np)
+        contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-        
+        if not contours:
        # Ensure coordinates are within bounds
        x1 = max(0, x1)
        y1 = max(0, y1)
        x2 = min(image.width, x2)
        y2 = min(image.height, y2)
        # Check valid crop
        if x2 <= x1 or y2 <= y1:
            continue
-        # Create Alpha channel from mask (0 or 255)
+        # Get largest contour
-        # mask_np is boolean or float 0..1. If boolean, *255 -> 0/255.
+        c = max(contours, key=cv2.contourArea)
        alpha = (mask_np * 255).astype(np.uint8)
-        # Combine RGB and Alpha
+        # Approximate contour to polygon
-        rgba = np.dstack((img_arr, alpha))
+        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.04 * peri, True)
-        # Convert back to PIL for cropping
+        # If we have 4 points, use them. If not, fallback to minAreaRect
-        pil_rgba = Image.fromarray(rgba)
+        if len(approx) == 4:
            pts = approx.reshape(4, 2)
        else:
            rect = cv2.minAreaRect(c)
            pts = cv2.boxPoints(rect)
        # Apply perspective transform
        # 注意：这里我们只变换RGB部分，Alpha通道需要额外处理或者直接应用同样的变换
        # 为了简单，我们直接对原图（假设不带Alpha）进行变换
        # 如果需要保留背景透明，需要先将原图转为RGBA，再做变换
-        # Crop to bounding box
+        # Check if original image has Alpha
-        cropped = pil_rgba.crop((x1, y1, x2, y2))
+        if img_arr.shape[2] == 4:
            warped = four_point_transform(img_arr, pts)
        else:
            # Add alpha channel from mask? 
            # 透视变换后的矩形本身就是去掉了背景的，所以不需要额外的Mask Alpha
            # 但是为了保持一致性，我们可以给变换后的图加一个全不透明的Alpha，或者保留RGB
            warped = four_point_transform(img_arr, pts)
        # Check orientation (Portrait vs Landscape)
        h, w = warped.shape[:2]
        is_rotated = False
        # Enforce Portrait for Tarot cards (Standard 7x12 cm ratio approx)
        if w > h:
            # Rotate 90 degrees clockwise
            warped = cv2.rotate(warped, cv2.ROTATE_90_CLOCKWISE)
            is_rotated = True
        # Convert back to PIL
        pil_warped = Image.fromarray(warped)
        # Save
-        filename = f"tarot_{uuid.uuid4().hex}_{i}.png" # Use png for transparency
+        filename = f"tarot_{uuid.uuid4().hex}_{i}.png" 
        save_path = os.path.join(output_dir, filename)
-        cropped.save(save_path)
+        pil_warped.save(save_path)
        saved_files.append(filename)
-    return saved_files
+        # 正逆位判断逻辑 (基于几何只能做到这一步，无法区分上下颠倒)
        # 这里我们假设长边垂直为正位，如果做了旋转则标记
        # 真正的正逆位需要OCR或图像识别
        saved_objects.append({
            "filename": filename,
            "is_rotated_by_algorithm": is_rotated,
            "note": "Geometric correction applied. True upright/reversed requires content analysis."
        })
    return saved_objects
 def generate_and_save_result(image: Image.Image, inference_state, output_dir: str = RESULT_IMAGE_DIR) -> str:
    filename = f"seg_{uuid.uuid4().hex}.jpg"
@@ -295,12 +367,21 @@ async def segment_tarot(
    # 数量正确，执行抠图
    try:
-        filenames = crop_and_save_objects(image, masks, boxes, output_dir=output_dir)
+        saved_objects = crop_and_save_objects(image, masks, boxes, output_dir=output_dir)
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"抠图处理错误: {str(e)}")
-    # 生成 URL 列表
+    # 生成 URL 列表和元数据
-    card_urls = [str(request.url_for("static", path=f"results/{request_id}/{fname}")) for fname in filenames]
+    tarot_cards = []
    for obj in saved_objects:
        fname = obj["filename"]
        file_url = str(request.url_for("static", path=f"results/{request_id}/{fname}"))
        tarot_cards.append({
            "url": file_url,
            "is_rotated": obj["is_rotated_by_algorithm"],
            "orientation_status": "corrected_to_portrait" if obj["is_rotated_by_algorithm"] else "original_portrait",
            "note": obj["note"]
        })
    # 生成整体效果图
    try:
@@ -311,8 +392,8 @@ async def segment_tarot(
    return JSONResponse(content={
        "status": "success",
-        "message": f"成功识别并分割 {expected_count} 张塔罗牌",
+        "message": f"成功识别并分割 {expected_count} 张塔罗牌 (已执行透视矫正)",
-        "tarot_cards": card_urls,
+        "tarot_cards": tarot_cards,
        "full_visualization": main_file_url,
        "scores": scores.tolist() if torch.is_tensor(scores) else scores
    })
--- a/static/results/1771141606_f84c60e1/seg_755ce842193e431d88f2e9a8680ddded.jpg
+++ b/static/results/1771141606_f84c60e1/seg_755ce842193e431d88f2e9a8680ddded.jpg
--- a/static/results/1771141606_f84c60e1/tarot_8972c19bf5b44b2783c3f48827da274d_0.png
+++ b/static/results/1771141606_f84c60e1/tarot_8972c19bf5b44b2783c3f48827da274d_0.png
--- a/static/results/1771141606_f84c60e1/tarot_9ca8f077422e49488e8760de8bc8e47b_1.png
+++ b/static/results/1771141606_f84c60e1/tarot_9ca8f077422e49488e8760de8bc8e47b_1.png
--- a/static/results/1771142529_586bf911/seg_c8a0f84341534b8ea0e120b1479a28f9.jpg
+++ b/static/results/1771142529_586bf911/seg_c8a0f84341534b8ea0e120b1479a28f9.jpg
--- a/static/results/1771142550_18ba511b/seg_18be15d43b4d4c959693c74f7f010196.jpg
+++ b/static/results/1771142550_18ba511b/seg_18be15d43b4d4c959693c74f7f010196.jpg
--- a/static/results/1771142565_49ee276c/seg_e50b91eeb1094ca5af241f0a7e71a150.jpg
+++ b/static/results/1771142565_49ee276c/seg_e50b91eeb1094ca5af241f0a7e71a150.jpg
--- a/static/results/1771142565_49ee276c/tarot_90df7c6bd0534a2ba1df93aaf01f05a7_1.png
+++ b/static/results/1771142565_49ee276c/tarot_90df7c6bd0534a2ba1df93aaf01f05a7_1.png
--- a/static/results/1771142565_49ee276c/tarot_d267968c5cb34eb4a3c2b68b52bb8969_2.png
+++ b/static/results/1771142565_49ee276c/tarot_d267968c5cb34eb4a3c2b68b52bb8969_2.png
--- a/static/results/1771142565_49ee276c/tarot_ee43a2201cf747229fc2db80638c85c9_0.png
+++ b/static/results/1771142565_49ee276c/tarot_ee43a2201cf747229fc2db80638c85c9_0.png
--- a/static/results/1771144502_6378d9cb/seg_83cac064bea94c37a0c5a3cb29b38bf3.jpg
+++ b/static/results/1771144502_6378d9cb/seg_83cac064bea94c37a0c5a3cb29b38bf3.jpg
--- a/static/results/1771144502_6378d9cb/tarot_22a3707295d24bb7a88c0e8d020b6501_2.png
+++ b/static/results/1771144502_6378d9cb/tarot_22a3707295d24bb7a88c0e8d020b6501_2.png
--- a/static/results/1771144502_6378d9cb/tarot_844dbd5f5d444d798a644211d8864a27_1.png
+++ b/static/results/1771144502_6378d9cb/tarot_844dbd5f5d444d798a644211d8864a27_1.png
--- a/static/results/1771144502_6378d9cb/tarot_bb663f7440354cf4840ac766cbe778d6_0.png
+++ b/static/results/1771144502_6378d9cb/tarot_bb663f7440354cf4840ac766cbe778d6_0.png
--- a/static/results/1771144553_0a9f3e15/seg_a6e62efff2a749e4ac7da457ebebe4fc.jpg
+++ b/static/results/1771144553_0a9f3e15/seg_a6e62efff2a749e4ac7da457ebebe4fc.jpg
--- a/static/results/1771144553_0a9f3e15/tarot_4372c25cb87743f0be3fc0ebda553856_5.png
+++ b/static/results/1771144553_0a9f3e15/tarot_4372c25cb87743f0be3fc0ebda553856_5.png
--- a/static/results/1771144553_0a9f3e15/tarot_491ce3ddc28a4a86a60247ca3677fd39_6.png
+++ b/static/results/1771144553_0a9f3e15/tarot_491ce3ddc28a4a86a60247ca3677fd39_6.png
--- a/static/results/1771144553_0a9f3e15/tarot_6e000f4cd00b495690e4e9026999f330_7.png
+++ b/static/results/1771144553_0a9f3e15/tarot_6e000f4cd00b495690e4e9026999f330_7.png
--- a/static/results/1771144553_0a9f3e15/tarot_9594dbdd4c864755845490f298df64f2_0.png
+++ b/static/results/1771144553_0a9f3e15/tarot_9594dbdd4c864755845490f298df64f2_0.png
--- a/static/results/1771144553_0a9f3e15/tarot_b2b7bcc83cdb4ddfb53845a9b0351d94_1.png
+++ b/static/results/1771144553_0a9f3e15/tarot_b2b7bcc83cdb4ddfb53845a9b0351d94_1.png
--- a/static/results/1771144553_0a9f3e15/tarot_b50367bc0c2f4508a1839e19f526f255_2.png
+++ b/static/results/1771144553_0a9f3e15/tarot_b50367bc0c2f4508a1839e19f526f255_2.png
--- a/static/results/1771144553_0a9f3e15/tarot_c60d39c9ada647d6b0ee5597d8a88a8a_4.png
+++ b/static/results/1771144553_0a9f3e15/tarot_c60d39c9ada647d6b0ee5597d8a88a8a_4.png
--- a/static/results/1771144553_0a9f3e15/tarot_c8c93345201e464e94410f581ca48f48_9.png
+++ b/static/results/1771144553_0a9f3e15/tarot_c8c93345201e464e94410f581ca48f48_9.png
--- a/static/results/1771144553_0a9f3e15/tarot_cb0a5bb73c2047ec8cdf87fc545ce14d_8.png
+++ b/static/results/1771144553_0a9f3e15/tarot_cb0a5bb73c2047ec8cdf87fc545ce14d_8.png
--- a/static/results/1771144553_0a9f3e15/tarot_d365231889444c60b392bcbf89a49913_3.png
+++ b/static/results/1771144553_0a9f3e15/tarot_d365231889444c60b392bcbf89a49913_3.png
--- a/static/results/1771144590_7abda702/seg_b51947dd9106461294da5167ad58c01a.jpg
+++ b/static/results/1771144590_7abda702/seg_b51947dd9106461294da5167ad58c01a.jpg
--- a/static/results/1771144590_7abda702/tarot_0801fbbe819045e18a1d797f821c827b_1.png
+++ b/static/results/1771144590_7abda702/tarot_0801fbbe819045e18a1d797f821c827b_1.png
--- a/static/results/1771144590_7abda702/tarot_6c2a162eb7c64809a6d2593296e5d4e5_2.png
+++ b/static/results/1771144590_7abda702/tarot_6c2a162eb7c64809a6d2593296e5d4e5_2.png
--- a/static/results/1771144590_7abda702/tarot_fc015d8dc3dc4754aac8d072c4beecce_0.png
+++ b/static/results/1771144590_7abda702/tarot_fc015d8dc3dc4754aac8d072c4beecce_0.png