tarot
155
fastAPI_tarot.py
@@ -3,6 +3,7 @@ import uuid
|
||||
import time
|
||||
import requests
|
||||
import numpy as np
|
||||
import cv2
|
||||
from typing import Optional
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
@@ -120,6 +121,46 @@ app.openapi = custom_openapi
|
||||
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
||||
|
||||
# ------------------- 辅助函数 -------------------
|
||||
def order_points(pts):
|
||||
"""
|
||||
对四个坐标点进行排序:左上,右上,右下,左下
|
||||
"""
|
||||
rect = np.zeros((4, 2), dtype="float32")
|
||||
s = pts.sum(axis=1)
|
||||
rect[0] = pts[np.argmin(s)]
|
||||
rect[2] = pts[np.argmax(s)]
|
||||
diff = np.diff(pts, axis=1)
|
||||
rect[1] = pts[np.argmin(diff)]
|
||||
rect[3] = pts[np.argmax(diff)]
|
||||
return rect
|
||||
|
||||
def four_point_transform(image, pts):
|
||||
"""
|
||||
根据四个点进行透视变换
|
||||
"""
|
||||
rect = order_points(pts)
|
||||
(tl, tr, br, bl) = rect
|
||||
|
||||
# 计算新图像的宽度
|
||||
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
|
||||
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
|
||||
maxWidth = max(int(widthA), int(widthB))
|
||||
|
||||
# 计算新图像的高度
|
||||
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
|
||||
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
|
||||
maxHeight = max(int(heightA), int(heightB))
|
||||
|
||||
dst = np.array([
|
||||
[0, 0],
|
||||
[maxWidth - 1, 0],
|
||||
[maxWidth - 1, maxHeight - 1],
|
||||
[0, maxHeight - 1]], dtype="float32")
|
||||
|
||||
M = cv2.getPerspectiveTransform(rect, dst)
|
||||
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
|
||||
return warped
|
||||
|
||||
def load_image_from_url(url: str) -> Image.Image:
|
||||
try:
|
||||
headers = {'User-Agent': 'Mozilla/5.0'}
|
||||
@@ -130,13 +171,14 @@ def load_image_from_url(url: str) -> Image.Image:
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"无法下载图片: {str(e)}")
|
||||
|
||||
def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RESULT_IMAGE_DIR) -> list[str]:
|
||||
def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RESULT_IMAGE_DIR) -> list[dict]:
|
||||
"""
|
||||
根据 mask 和 box 裁剪出独立的对象图片 (保留透明背景)
|
||||
根据 mask 和 box 进行透视矫正并裁剪出独立的对象图片 (保留透明背景)
|
||||
返回包含文件名和元数据的列表
|
||||
"""
|
||||
saved_files = []
|
||||
# Convert image to numpy array
|
||||
img_arr = np.array(image) # RGB (H, W, 3)
|
||||
saved_objects = []
|
||||
# Convert image to numpy array (RGB)
|
||||
img_arr = np.array(image)
|
||||
|
||||
for i, (mask, box) in enumerate(zip(masks, boxes)):
|
||||
# Handle tensor/numpy conversions
|
||||
@@ -145,44 +187,74 @@ def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RE
|
||||
else:
|
||||
mask_np = mask.squeeze()
|
||||
|
||||
if isinstance(box, torch.Tensor):
|
||||
box_np = box.cpu().numpy()
|
||||
# Ensure mask is uint8 binary for OpenCV
|
||||
if mask_np.dtype == bool:
|
||||
mask_uint8 = (mask_np * 255).astype(np.uint8)
|
||||
else:
|
||||
box_np = box
|
||||
mask_uint8 = (mask_np > 0.5).astype(np.uint8) * 255
|
||||
|
||||
# Get coordinates
|
||||
x1, y1, x2, y2 = map(int, box_np)
|
||||
|
||||
# Ensure coordinates are within bounds
|
||||
x1 = max(0, x1)
|
||||
y1 = max(0, y1)
|
||||
x2 = min(image.width, x2)
|
||||
y2 = min(image.height, y2)
|
||||
|
||||
# Check valid crop
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
# Find contours
|
||||
contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
if not contours:
|
||||
continue
|
||||
|
||||
# Create Alpha channel from mask (0 or 255)
|
||||
# mask_np is boolean or float 0..1. If boolean, *255 -> 0/255.
|
||||
alpha = (mask_np * 255).astype(np.uint8)
|
||||
# Get largest contour
|
||||
c = max(contours, key=cv2.contourArea)
|
||||
|
||||
# Combine RGB and Alpha
|
||||
rgba = np.dstack((img_arr, alpha))
|
||||
# Approximate contour to polygon
|
||||
peri = cv2.arcLength(c, True)
|
||||
approx = cv2.approxPolyDP(c, 0.04 * peri, True)
|
||||
|
||||
# Convert back to PIL for cropping
|
||||
pil_rgba = Image.fromarray(rgba)
|
||||
# If we have 4 points, use them. If not, fallback to minAreaRect
|
||||
if len(approx) == 4:
|
||||
pts = approx.reshape(4, 2)
|
||||
else:
|
||||
rect = cv2.minAreaRect(c)
|
||||
pts = cv2.boxPoints(rect)
|
||||
|
||||
# Apply perspective transform
|
||||
# 注意:这里我们只变换RGB部分,Alpha通道需要额外处理或者直接应用同样的变换
|
||||
# 为了简单,我们直接对原图(假设不带Alpha)进行变换
|
||||
# 如果需要保留背景透明,需要先将原图转为RGBA,再做变换
|
||||
|
||||
# Crop to bounding box
|
||||
cropped = pil_rgba.crop((x1, y1, x2, y2))
|
||||
# Check if original image has Alpha
|
||||
if img_arr.shape[2] == 4:
|
||||
warped = four_point_transform(img_arr, pts)
|
||||
else:
|
||||
# Add alpha channel from mask?
|
||||
# 透视变换后的矩形本身就是去掉了背景的,所以不需要额外的Mask Alpha
|
||||
# 但是为了保持一致性,我们可以给变换后的图加一个全不透明的Alpha,或者保留RGB
|
||||
warped = four_point_transform(img_arr, pts)
|
||||
|
||||
# Check orientation (Portrait vs Landscape)
|
||||
h, w = warped.shape[:2]
|
||||
is_rotated = False
|
||||
|
||||
# Enforce Portrait for Tarot cards (Standard 7x12 cm ratio approx)
|
||||
if w > h:
|
||||
# Rotate 90 degrees clockwise
|
||||
warped = cv2.rotate(warped, cv2.ROTATE_90_CLOCKWISE)
|
||||
is_rotated = True
|
||||
|
||||
# Convert back to PIL
|
||||
pil_warped = Image.fromarray(warped)
|
||||
|
||||
# Save
|
||||
filename = f"tarot_{uuid.uuid4().hex}_{i}.png" # Use png for transparency
|
||||
filename = f"tarot_{uuid.uuid4().hex}_{i}.png"
|
||||
save_path = os.path.join(output_dir, filename)
|
||||
cropped.save(save_path)
|
||||
saved_files.append(filename)
|
||||
pil_warped.save(save_path)
|
||||
|
||||
return saved_files
|
||||
# 正逆位判断逻辑 (基于几何只能做到这一步,无法区分上下颠倒)
|
||||
# 这里我们假设长边垂直为正位,如果做了旋转则标记
|
||||
# 真正的正逆位需要OCR或图像识别
|
||||
|
||||
saved_objects.append({
|
||||
"filename": filename,
|
||||
"is_rotated_by_algorithm": is_rotated,
|
||||
"note": "Geometric correction applied. True upright/reversed requires content analysis."
|
||||
})
|
||||
|
||||
return saved_objects
|
||||
|
||||
def generate_and_save_result(image: Image.Image, inference_state, output_dir: str = RESULT_IMAGE_DIR) -> str:
|
||||
filename = f"seg_{uuid.uuid4().hex}.jpg"
|
||||
@@ -295,12 +367,21 @@ async def segment_tarot(
|
||||
|
||||
# 数量正确,执行抠图
|
||||
try:
|
||||
filenames = crop_and_save_objects(image, masks, boxes, output_dir=output_dir)
|
||||
saved_objects = crop_and_save_objects(image, masks, boxes, output_dir=output_dir)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"抠图处理错误: {str(e)}")
|
||||
|
||||
# 生成 URL 列表
|
||||
card_urls = [str(request.url_for("static", path=f"results/{request_id}/{fname}")) for fname in filenames]
|
||||
# 生成 URL 列表和元数据
|
||||
tarot_cards = []
|
||||
for obj in saved_objects:
|
||||
fname = obj["filename"]
|
||||
file_url = str(request.url_for("static", path=f"results/{request_id}/{fname}"))
|
||||
tarot_cards.append({
|
||||
"url": file_url,
|
||||
"is_rotated": obj["is_rotated_by_algorithm"],
|
||||
"orientation_status": "corrected_to_portrait" if obj["is_rotated_by_algorithm"] else "original_portrait",
|
||||
"note": obj["note"]
|
||||
})
|
||||
|
||||
# 生成整体效果图
|
||||
try:
|
||||
@@ -311,8 +392,8 @@ async def segment_tarot(
|
||||
|
||||
return JSONResponse(content={
|
||||
"status": "success",
|
||||
"message": f"成功识别并分割 {expected_count} 张塔罗牌",
|
||||
"tarot_cards": card_urls,
|
||||
"message": f"成功识别并分割 {expected_count} 张塔罗牌 (已执行透视矫正)",
|
||||
"tarot_cards": tarot_cards,
|
||||
"full_visualization": main_file_url,
|
||||
"scores": scores.tolist() if torch.is_tensor(scores) else scores
|
||||
})
|
||||
|
||||
|
Before Width: | Height: | Size: 92 KiB After Width: | Height: | Size: 92 KiB |
|
Before Width: | Height: | Size: 516 KiB After Width: | Height: | Size: 516 KiB |
|
After Width: | Height: | Size: 359 KiB |
|
After Width: | Height: | Size: 90 KiB |
|
After Width: | Height: | Size: 90 KiB |
|
After Width: | Height: | Size: 90 KiB |
|
After Width: | Height: | Size: 423 KiB |
|
After Width: | Height: | Size: 447 KiB |
|
After Width: | Height: | Size: 414 KiB |
|
After Width: | Height: | Size: 90 KiB |
|
After Width: | Height: | Size: 336 KiB |
|
After Width: | Height: | Size: 321 KiB |
|
After Width: | Height: | Size: 332 KiB |
|
After Width: | Height: | Size: 71 KiB |
|
After Width: | Height: | Size: 9.4 KiB |
|
After Width: | Height: | Size: 6.5 KiB |
|
After Width: | Height: | Size: 18 KiB |
|
After Width: | Height: | Size: 14 KiB |
|
After Width: | Height: | Size: 12 KiB |
|
After Width: | Height: | Size: 6.8 KiB |
|
After Width: | Height: | Size: 14 KiB |
|
After Width: | Height: | Size: 22 KiB |
|
After Width: | Height: | Size: 5.6 KiB |
|
After Width: | Height: | Size: 9.2 KiB |
|
After Width: | Height: | Size: 237 KiB |
|
After Width: | Height: | Size: 680 KiB |
|
After Width: | Height: | Size: 707 KiB |
|
After Width: | Height: | Size: 670 KiB |