This commit is contained in:
2026-02-15 16:37:24 +08:00
parent f981a05b32
commit 882989f252
29 changed files with 118 additions and 37 deletions

View File

@@ -3,6 +3,7 @@ import uuid
import time import time
import requests import requests
import numpy as np import numpy as np
import cv2
from typing import Optional from typing import Optional
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
@@ -120,6 +121,46 @@ app.openapi = custom_openapi
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static") app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
# ------------------- 辅助函数 ------------------- # ------------------- 辅助函数 -------------------
def order_points(pts):
"""
对四个坐标点进行排序:左上,右上,右下,左下
"""
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts):
"""
根据四个点进行透视变换
"""
rect = order_points(pts)
(tl, tr, br, bl) = rect
# 计算新图像的宽度
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# 计算新图像的高度
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
def load_image_from_url(url: str) -> Image.Image: def load_image_from_url(url: str) -> Image.Image:
try: try:
headers = {'User-Agent': 'Mozilla/5.0'} headers = {'User-Agent': 'Mozilla/5.0'}
@@ -130,13 +171,14 @@ def load_image_from_url(url: str) -> Image.Image:
except Exception as e: except Exception as e:
raise HTTPException(status_code=400, detail=f"无法下载图片: {str(e)}") raise HTTPException(status_code=400, detail=f"无法下载图片: {str(e)}")
def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RESULT_IMAGE_DIR) -> list[str]: def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RESULT_IMAGE_DIR) -> list[dict]:
""" """
根据 mask 和 box 裁剪出独立的对象图片 (保留透明背景) 根据 mask 和 box 进行透视矫正并裁剪出独立的对象图片 (保留透明背景)
返回包含文件名和元数据的列表
""" """
saved_files = [] saved_objects = []
# Convert image to numpy array # Convert image to numpy array (RGB)
img_arr = np.array(image) # RGB (H, W, 3) img_arr = np.array(image)
for i, (mask, box) in enumerate(zip(masks, boxes)): for i, (mask, box) in enumerate(zip(masks, boxes)):
# Handle tensor/numpy conversions # Handle tensor/numpy conversions
@@ -145,44 +187,74 @@ def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RE
else: else:
mask_np = mask.squeeze() mask_np = mask.squeeze()
if isinstance(box, torch.Tensor): # Ensure mask is uint8 binary for OpenCV
box_np = box.cpu().numpy() if mask_np.dtype == bool:
mask_uint8 = (mask_np * 255).astype(np.uint8)
else: else:
box_np = box mask_uint8 = (mask_np > 0.5).astype(np.uint8) * 255
# Get coordinates # Find contours
x1, y1, x2, y2 = map(int, box_np) contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
# Ensure coordinates are within bounds
x1 = max(0, x1)
y1 = max(0, y1)
x2 = min(image.width, x2)
y2 = min(image.height, y2)
# Check valid crop
if x2 <= x1 or y2 <= y1:
continue continue
# Create Alpha channel from mask (0 or 255) # Get largest contour
# mask_np is boolean or float 0..1. If boolean, *255 -> 0/255. c = max(contours, key=cv2.contourArea)
alpha = (mask_np * 255).astype(np.uint8)
# Combine RGB and Alpha # Approximate contour to polygon
rgba = np.dstack((img_arr, alpha)) peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.04 * peri, True)
# Convert back to PIL for cropping # If we have 4 points, use them. If not, fallback to minAreaRect
pil_rgba = Image.fromarray(rgba) if len(approx) == 4:
pts = approx.reshape(4, 2)
else:
rect = cv2.minAreaRect(c)
pts = cv2.boxPoints(rect)
# Apply perspective transform
# 注意这里我们只变换RGB部分Alpha通道需要额外处理或者直接应用同样的变换
# 为了简单我们直接对原图假设不带Alpha进行变换
# 如果需要保留背景透明需要先将原图转为RGBA再做变换
# Crop to bounding box # Check if original image has Alpha
cropped = pil_rgba.crop((x1, y1, x2, y2)) if img_arr.shape[2] == 4:
warped = four_point_transform(img_arr, pts)
else:
# Add alpha channel from mask?
# 透视变换后的矩形本身就是去掉了背景的所以不需要额外的Mask Alpha
# 但是为了保持一致性我们可以给变换后的图加一个全不透明的Alpha或者保留RGB
warped = four_point_transform(img_arr, pts)
# Check orientation (Portrait vs Landscape)
h, w = warped.shape[:2]
is_rotated = False
# Enforce Portrait for Tarot cards (Standard 7x12 cm ratio approx)
if w > h:
# Rotate 90 degrees clockwise
warped = cv2.rotate(warped, cv2.ROTATE_90_CLOCKWISE)
is_rotated = True
# Convert back to PIL
pil_warped = Image.fromarray(warped)
# Save # Save
filename = f"tarot_{uuid.uuid4().hex}_{i}.png" # Use png for transparency filename = f"tarot_{uuid.uuid4().hex}_{i}.png"
save_path = os.path.join(output_dir, filename) save_path = os.path.join(output_dir, filename)
cropped.save(save_path) pil_warped.save(save_path)
saved_files.append(filename)
return saved_files # 正逆位判断逻辑 (基于几何只能做到这一步,无法区分上下颠倒)
# 这里我们假设长边垂直为正位,如果做了旋转则标记
# 真正的正逆位需要OCR或图像识别
saved_objects.append({
"filename": filename,
"is_rotated_by_algorithm": is_rotated,
"note": "Geometric correction applied. True upright/reversed requires content analysis."
})
return saved_objects
def generate_and_save_result(image: Image.Image, inference_state, output_dir: str = RESULT_IMAGE_DIR) -> str: def generate_and_save_result(image: Image.Image, inference_state, output_dir: str = RESULT_IMAGE_DIR) -> str:
filename = f"seg_{uuid.uuid4().hex}.jpg" filename = f"seg_{uuid.uuid4().hex}.jpg"
@@ -295,12 +367,21 @@ async def segment_tarot(
# 数量正确,执行抠图 # 数量正确,执行抠图
try: try:
filenames = crop_and_save_objects(image, masks, boxes, output_dir=output_dir) saved_objects = crop_and_save_objects(image, masks, boxes, output_dir=output_dir)
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail=f"抠图处理错误: {str(e)}") raise HTTPException(status_code=500, detail=f"抠图处理错误: {str(e)}")
# 生成 URL 列表 # 生成 URL 列表和元数据
card_urls = [str(request.url_for("static", path=f"results/{request_id}/{fname}")) for fname in filenames] tarot_cards = []
for obj in saved_objects:
fname = obj["filename"]
file_url = str(request.url_for("static", path=f"results/{request_id}/{fname}"))
tarot_cards.append({
"url": file_url,
"is_rotated": obj["is_rotated_by_algorithm"],
"orientation_status": "corrected_to_portrait" if obj["is_rotated_by_algorithm"] else "original_portrait",
"note": obj["note"]
})
# 生成整体效果图 # 生成整体效果图
try: try:
@@ -311,8 +392,8 @@ async def segment_tarot(
return JSONResponse(content={ return JSONResponse(content={
"status": "success", "status": "success",
"message": f"成功识别并分割 {expected_count} 张塔罗牌", "message": f"成功识别并分割 {expected_count} 张塔罗牌 (已执行透视矫正)",
"tarot_cards": card_urls, "tarot_cards": tarot_cards,
"full_visualization": main_file_url, "full_visualization": main_file_url,
"scores": scores.tolist() if torch.is_tensor(scores) else scores "scores": scores.tolist() if torch.is_tensor(scores) else scores
}) })

View File

Before

Width:  |  Height:  |  Size: 92 KiB

After

Width:  |  Height:  |  Size: 92 KiB

View File

Before

Width:  |  Height:  |  Size: 516 KiB

After

Width:  |  Height:  |  Size: 516 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 359 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 423 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 447 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 414 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 336 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 321 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 332 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 237 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 680 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 707 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 670 KiB