diff --git a/fastAPI_tarot.py b/fastAPI_tarot.py index b50e15f..564cd81 100644 --- a/fastAPI_tarot.py +++ b/fastAPI_tarot.py @@ -3,6 +3,7 @@ import uuid import time import requests import numpy as np +import cv2 from typing import Optional from contextlib import asynccontextmanager @@ -120,6 +121,46 @@ app.openapi = custom_openapi app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static") # ------------------- 辅助函数 ------------------- +def order_points(pts): + """ + 对四个坐标点进行排序:左上,右上,右下,左下 + """ + rect = np.zeros((4, 2), dtype="float32") + s = pts.sum(axis=1) + rect[0] = pts[np.argmin(s)] + rect[2] = pts[np.argmax(s)] + diff = np.diff(pts, axis=1) + rect[1] = pts[np.argmin(diff)] + rect[3] = pts[np.argmax(diff)] + return rect + +def four_point_transform(image, pts): + """ + 根据四个点进行透视变换 + """ + rect = order_points(pts) + (tl, tr, br, bl) = rect + + # 计算新图像的宽度 + widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) + widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) + maxWidth = max(int(widthA), int(widthB)) + + # 计算新图像的高度 + heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) + heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) + maxHeight = max(int(heightA), int(heightB)) + + dst = np.array([ + [0, 0], + [maxWidth - 1, 0], + [maxWidth - 1, maxHeight - 1], + [0, maxHeight - 1]], dtype="float32") + + M = cv2.getPerspectiveTransform(rect, dst) + warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) + return warped + def load_image_from_url(url: str) -> Image.Image: try: headers = {'User-Agent': 'Mozilla/5.0'} @@ -130,13 +171,14 @@ def load_image_from_url(url: str) -> Image.Image: except Exception as e: raise HTTPException(status_code=400, detail=f"无法下载图片: {str(e)}") -def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RESULT_IMAGE_DIR) -> list[str]: +def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RESULT_IMAGE_DIR) -> list[dict]: """ - 根据 mask 和 box 裁剪出独立的对象图片 (保留透明背景) + 根据 mask 和 box 进行透视矫正并裁剪出独立的对象图片 (保留透明背景) + 返回包含文件名和元数据的列表 """ - saved_files = [] - # Convert image to numpy array - img_arr = np.array(image) # RGB (H, W, 3) + saved_objects = [] + # Convert image to numpy array (RGB) + img_arr = np.array(image) for i, (mask, box) in enumerate(zip(masks, boxes)): # Handle tensor/numpy conversions @@ -145,44 +187,74 @@ def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RE else: mask_np = mask.squeeze() - if isinstance(box, torch.Tensor): - box_np = box.cpu().numpy() + # Ensure mask is uint8 binary for OpenCV + if mask_np.dtype == bool: + mask_uint8 = (mask_np * 255).astype(np.uint8) else: - box_np = box + mask_uint8 = (mask_np > 0.5).astype(np.uint8) * 255 - # Get coordinates - x1, y1, x2, y2 = map(int, box_np) - - # Ensure coordinates are within bounds - x1 = max(0, x1) - y1 = max(0, y1) - x2 = min(image.width, x2) - y2 = min(image.height, y2) - - # Check valid crop - if x2 <= x1 or y2 <= y1: + # Find contours + contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + if not contours: continue - # Create Alpha channel from mask (0 or 255) - # mask_np is boolean or float 0..1. If boolean, *255 -> 0/255. - alpha = (mask_np * 255).astype(np.uint8) + # Get largest contour + c = max(contours, key=cv2.contourArea) - # Combine RGB and Alpha - rgba = np.dstack((img_arr, alpha)) + # Approximate contour to polygon + peri = cv2.arcLength(c, True) + approx = cv2.approxPolyDP(c, 0.04 * peri, True) - # Convert back to PIL for cropping - pil_rgba = Image.fromarray(rgba) + # If we have 4 points, use them. If not, fallback to minAreaRect + if len(approx) == 4: + pts = approx.reshape(4, 2) + else: + rect = cv2.minAreaRect(c) + pts = cv2.boxPoints(rect) + + # Apply perspective transform + # 注意:这里我们只变换RGB部分,Alpha通道需要额外处理或者直接应用同样的变换 + # 为了简单,我们直接对原图(假设不带Alpha)进行变换 + # 如果需要保留背景透明,需要先将原图转为RGBA,再做变换 - # Crop to bounding box - cropped = pil_rgba.crop((x1, y1, x2, y2)) + # Check if original image has Alpha + if img_arr.shape[2] == 4: + warped = four_point_transform(img_arr, pts) + else: + # Add alpha channel from mask? + # 透视变换后的矩形本身就是去掉了背景的,所以不需要额外的Mask Alpha + # 但是为了保持一致性,我们可以给变换后的图加一个全不透明的Alpha,或者保留RGB + warped = four_point_transform(img_arr, pts) + + # Check orientation (Portrait vs Landscape) + h, w = warped.shape[:2] + is_rotated = False + + # Enforce Portrait for Tarot cards (Standard 7x12 cm ratio approx) + if w > h: + # Rotate 90 degrees clockwise + warped = cv2.rotate(warped, cv2.ROTATE_90_CLOCKWISE) + is_rotated = True + + # Convert back to PIL + pil_warped = Image.fromarray(warped) # Save - filename = f"tarot_{uuid.uuid4().hex}_{i}.png" # Use png for transparency + filename = f"tarot_{uuid.uuid4().hex}_{i}.png" save_path = os.path.join(output_dir, filename) - cropped.save(save_path) - saved_files.append(filename) + pil_warped.save(save_path) - return saved_files + # 正逆位判断逻辑 (基于几何只能做到这一步,无法区分上下颠倒) + # 这里我们假设长边垂直为正位,如果做了旋转则标记 + # 真正的正逆位需要OCR或图像识别 + + saved_objects.append({ + "filename": filename, + "is_rotated_by_algorithm": is_rotated, + "note": "Geometric correction applied. True upright/reversed requires content analysis." + }) + + return saved_objects def generate_and_save_result(image: Image.Image, inference_state, output_dir: str = RESULT_IMAGE_DIR) -> str: filename = f"seg_{uuid.uuid4().hex}.jpg" @@ -295,12 +367,21 @@ async def segment_tarot( # 数量正确,执行抠图 try: - filenames = crop_and_save_objects(image, masks, boxes, output_dir=output_dir) + saved_objects = crop_and_save_objects(image, masks, boxes, output_dir=output_dir) except Exception as e: raise HTTPException(status_code=500, detail=f"抠图处理错误: {str(e)}") - # 生成 URL 列表 - card_urls = [str(request.url_for("static", path=f"results/{request_id}/{fname}")) for fname in filenames] + # 生成 URL 列表和元数据 + tarot_cards = [] + for obj in saved_objects: + fname = obj["filename"] + file_url = str(request.url_for("static", path=f"results/{request_id}/{fname}")) + tarot_cards.append({ + "url": file_url, + "is_rotated": obj["is_rotated_by_algorithm"], + "orientation_status": "corrected_to_portrait" if obj["is_rotated_by_algorithm"] else "original_portrait", + "note": obj["note"] + }) # 生成整体效果图 try: @@ -311,8 +392,8 @@ async def segment_tarot( return JSONResponse(content={ "status": "success", - "message": f"成功识别并分割 {expected_count} 张塔罗牌", - "tarot_cards": card_urls, + "message": f"成功识别并分割 {expected_count} 张塔罗牌 (已执行透视矫正)", + "tarot_cards": tarot_cards, "full_visualization": main_file_url, "scores": scores.tolist() if torch.is_tensor(scores) else scores }) diff --git a/static/results/seg_9061c56e4b284f60a109e405c20af31b.jpg b/static/results/1771141606_f84c60e1/seg_755ce842193e431d88f2e9a8680ddded.jpg similarity index 100% rename from static/results/seg_9061c56e4b284f60a109e405c20af31b.jpg rename to static/results/1771141606_f84c60e1/seg_755ce842193e431d88f2e9a8680ddded.jpg diff --git a/static/results/tarot_c0033d32490548b99a6ddbcf721f2d9a_0.png b/static/results/1771141606_f84c60e1/tarot_8972c19bf5b44b2783c3f48827da274d_0.png similarity index 100% rename from static/results/tarot_c0033d32490548b99a6ddbcf721f2d9a_0.png rename to static/results/1771141606_f84c60e1/tarot_8972c19bf5b44b2783c3f48827da274d_0.png diff --git a/static/results/1771141606_f84c60e1/tarot_9ca8f077422e49488e8760de8bc8e47b_1.png b/static/results/1771141606_f84c60e1/tarot_9ca8f077422e49488e8760de8bc8e47b_1.png new file mode 100644 index 0000000..8a95672 Binary files /dev/null and b/static/results/1771141606_f84c60e1/tarot_9ca8f077422e49488e8760de8bc8e47b_1.png differ diff --git a/static/results/1771142529_586bf911/seg_c8a0f84341534b8ea0e120b1479a28f9.jpg b/static/results/1771142529_586bf911/seg_c8a0f84341534b8ea0e120b1479a28f9.jpg new file mode 100644 index 0000000..74df5c7 Binary files /dev/null and b/static/results/1771142529_586bf911/seg_c8a0f84341534b8ea0e120b1479a28f9.jpg differ diff --git a/static/results/1771142550_18ba511b/seg_18be15d43b4d4c959693c74f7f010196.jpg b/static/results/1771142550_18ba511b/seg_18be15d43b4d4c959693c74f7f010196.jpg new file mode 100644 index 0000000..74df5c7 Binary files /dev/null and b/static/results/1771142550_18ba511b/seg_18be15d43b4d4c959693c74f7f010196.jpg differ diff --git a/static/results/1771142565_49ee276c/seg_e50b91eeb1094ca5af241f0a7e71a150.jpg b/static/results/1771142565_49ee276c/seg_e50b91eeb1094ca5af241f0a7e71a150.jpg new file mode 100644 index 0000000..74df5c7 Binary files /dev/null and b/static/results/1771142565_49ee276c/seg_e50b91eeb1094ca5af241f0a7e71a150.jpg differ diff --git a/static/results/1771142565_49ee276c/tarot_90df7c6bd0534a2ba1df93aaf01f05a7_1.png b/static/results/1771142565_49ee276c/tarot_90df7c6bd0534a2ba1df93aaf01f05a7_1.png new file mode 100644 index 0000000..097d706 Binary files /dev/null and b/static/results/1771142565_49ee276c/tarot_90df7c6bd0534a2ba1df93aaf01f05a7_1.png differ diff --git a/static/results/1771142565_49ee276c/tarot_d267968c5cb34eb4a3c2b68b52bb8969_2.png b/static/results/1771142565_49ee276c/tarot_d267968c5cb34eb4a3c2b68b52bb8969_2.png new file mode 100644 index 0000000..cff4dda Binary files /dev/null and b/static/results/1771142565_49ee276c/tarot_d267968c5cb34eb4a3c2b68b52bb8969_2.png differ diff --git a/static/results/1771142565_49ee276c/tarot_ee43a2201cf747229fc2db80638c85c9_0.png b/static/results/1771142565_49ee276c/tarot_ee43a2201cf747229fc2db80638c85c9_0.png new file mode 100644 index 0000000..d1d5249 Binary files /dev/null and b/static/results/1771142565_49ee276c/tarot_ee43a2201cf747229fc2db80638c85c9_0.png differ diff --git a/static/results/1771144502_6378d9cb/seg_83cac064bea94c37a0c5a3cb29b38bf3.jpg b/static/results/1771144502_6378d9cb/seg_83cac064bea94c37a0c5a3cb29b38bf3.jpg new file mode 100644 index 0000000..74df5c7 Binary files /dev/null and b/static/results/1771144502_6378d9cb/seg_83cac064bea94c37a0c5a3cb29b38bf3.jpg differ diff --git a/static/results/1771144502_6378d9cb/tarot_22a3707295d24bb7a88c0e8d020b6501_2.png b/static/results/1771144502_6378d9cb/tarot_22a3707295d24bb7a88c0e8d020b6501_2.png new file mode 100644 index 0000000..8f75017 Binary files /dev/null and b/static/results/1771144502_6378d9cb/tarot_22a3707295d24bb7a88c0e8d020b6501_2.png differ diff --git a/static/results/1771144502_6378d9cb/tarot_844dbd5f5d444d798a644211d8864a27_1.png b/static/results/1771144502_6378d9cb/tarot_844dbd5f5d444d798a644211d8864a27_1.png new file mode 100644 index 0000000..d3b6ea7 Binary files /dev/null and b/static/results/1771144502_6378d9cb/tarot_844dbd5f5d444d798a644211d8864a27_1.png differ diff --git a/static/results/1771144502_6378d9cb/tarot_bb663f7440354cf4840ac766cbe778d6_0.png b/static/results/1771144502_6378d9cb/tarot_bb663f7440354cf4840ac766cbe778d6_0.png new file mode 100644 index 0000000..2f76f47 Binary files /dev/null and b/static/results/1771144502_6378d9cb/tarot_bb663f7440354cf4840ac766cbe778d6_0.png differ diff --git a/static/results/1771144553_0a9f3e15/seg_a6e62efff2a749e4ac7da457ebebe4fc.jpg b/static/results/1771144553_0a9f3e15/seg_a6e62efff2a749e4ac7da457ebebe4fc.jpg new file mode 100644 index 0000000..fe43df1 Binary files /dev/null and b/static/results/1771144553_0a9f3e15/seg_a6e62efff2a749e4ac7da457ebebe4fc.jpg differ diff --git a/static/results/1771144553_0a9f3e15/tarot_4372c25cb87743f0be3fc0ebda553856_5.png b/static/results/1771144553_0a9f3e15/tarot_4372c25cb87743f0be3fc0ebda553856_5.png new file mode 100644 index 0000000..07aa574 Binary files /dev/null and b/static/results/1771144553_0a9f3e15/tarot_4372c25cb87743f0be3fc0ebda553856_5.png differ diff --git a/static/results/1771144553_0a9f3e15/tarot_491ce3ddc28a4a86a60247ca3677fd39_6.png b/static/results/1771144553_0a9f3e15/tarot_491ce3ddc28a4a86a60247ca3677fd39_6.png new file mode 100644 index 0000000..34e1141 Binary files /dev/null and b/static/results/1771144553_0a9f3e15/tarot_491ce3ddc28a4a86a60247ca3677fd39_6.png differ diff --git a/static/results/1771144553_0a9f3e15/tarot_6e000f4cd00b495690e4e9026999f330_7.png b/static/results/1771144553_0a9f3e15/tarot_6e000f4cd00b495690e4e9026999f330_7.png new file mode 100644 index 0000000..59fd758 Binary files /dev/null and b/static/results/1771144553_0a9f3e15/tarot_6e000f4cd00b495690e4e9026999f330_7.png differ diff --git a/static/results/1771144553_0a9f3e15/tarot_9594dbdd4c864755845490f298df64f2_0.png b/static/results/1771144553_0a9f3e15/tarot_9594dbdd4c864755845490f298df64f2_0.png new file mode 100644 index 0000000..fd4a74c Binary files /dev/null and b/static/results/1771144553_0a9f3e15/tarot_9594dbdd4c864755845490f298df64f2_0.png differ diff --git a/static/results/1771144553_0a9f3e15/tarot_b2b7bcc83cdb4ddfb53845a9b0351d94_1.png b/static/results/1771144553_0a9f3e15/tarot_b2b7bcc83cdb4ddfb53845a9b0351d94_1.png new file mode 100644 index 0000000..5eafb82 Binary files /dev/null and b/static/results/1771144553_0a9f3e15/tarot_b2b7bcc83cdb4ddfb53845a9b0351d94_1.png differ diff --git a/static/results/1771144553_0a9f3e15/tarot_b50367bc0c2f4508a1839e19f526f255_2.png b/static/results/1771144553_0a9f3e15/tarot_b50367bc0c2f4508a1839e19f526f255_2.png new file mode 100644 index 0000000..9cc8ba7 Binary files /dev/null and b/static/results/1771144553_0a9f3e15/tarot_b50367bc0c2f4508a1839e19f526f255_2.png differ diff --git a/static/results/1771144553_0a9f3e15/tarot_c60d39c9ada647d6b0ee5597d8a88a8a_4.png b/static/results/1771144553_0a9f3e15/tarot_c60d39c9ada647d6b0ee5597d8a88a8a_4.png new file mode 100644 index 0000000..dd152e3 Binary files /dev/null and b/static/results/1771144553_0a9f3e15/tarot_c60d39c9ada647d6b0ee5597d8a88a8a_4.png differ diff --git a/static/results/1771144553_0a9f3e15/tarot_c8c93345201e464e94410f581ca48f48_9.png b/static/results/1771144553_0a9f3e15/tarot_c8c93345201e464e94410f581ca48f48_9.png new file mode 100644 index 0000000..96efb5f Binary files /dev/null and b/static/results/1771144553_0a9f3e15/tarot_c8c93345201e464e94410f581ca48f48_9.png differ diff --git a/static/results/1771144553_0a9f3e15/tarot_cb0a5bb73c2047ec8cdf87fc545ce14d_8.png b/static/results/1771144553_0a9f3e15/tarot_cb0a5bb73c2047ec8cdf87fc545ce14d_8.png new file mode 100644 index 0000000..638a92a Binary files /dev/null and b/static/results/1771144553_0a9f3e15/tarot_cb0a5bb73c2047ec8cdf87fc545ce14d_8.png differ diff --git a/static/results/1771144553_0a9f3e15/tarot_d365231889444c60b392bcbf89a49913_3.png b/static/results/1771144553_0a9f3e15/tarot_d365231889444c60b392bcbf89a49913_3.png new file mode 100644 index 0000000..73e9027 Binary files /dev/null and b/static/results/1771144553_0a9f3e15/tarot_d365231889444c60b392bcbf89a49913_3.png differ diff --git a/static/results/1771144590_7abda702/seg_b51947dd9106461294da5167ad58c01a.jpg b/static/results/1771144590_7abda702/seg_b51947dd9106461294da5167ad58c01a.jpg new file mode 100644 index 0000000..5cbfb2b Binary files /dev/null and b/static/results/1771144590_7abda702/seg_b51947dd9106461294da5167ad58c01a.jpg differ diff --git a/static/results/1771144590_7abda702/tarot_0801fbbe819045e18a1d797f821c827b_1.png b/static/results/1771144590_7abda702/tarot_0801fbbe819045e18a1d797f821c827b_1.png new file mode 100644 index 0000000..d270037 Binary files /dev/null and b/static/results/1771144590_7abda702/tarot_0801fbbe819045e18a1d797f821c827b_1.png differ diff --git a/static/results/1771144590_7abda702/tarot_6c2a162eb7c64809a6d2593296e5d4e5_2.png b/static/results/1771144590_7abda702/tarot_6c2a162eb7c64809a6d2593296e5d4e5_2.png new file mode 100644 index 0000000..fefb525 Binary files /dev/null and b/static/results/1771144590_7abda702/tarot_6c2a162eb7c64809a6d2593296e5d4e5_2.png differ diff --git a/static/results/1771144590_7abda702/tarot_fc015d8dc3dc4754aac8d072c4beecce_0.png b/static/results/1771144590_7abda702/tarot_fc015d8dc3dc4754aac8d072c4beecce_0.png new file mode 100644 index 0000000..1b96d36 Binary files /dev/null and b/static/results/1771144590_7abda702/tarot_fc015d8dc3dc4754aac8d072c4beecce_0.png differ