admin update

2026-02-18 00:55:37 +08:00
parent 0ab6f52525
commit 34cbeb69c3
3 changed files with 217 additions and 116 deletions
--- a/fastAPI_tarot.py
+++ b/fastAPI_tarot.py
@@ -266,7 +266,7 @@ def is_english(text: str) -> bool:
            return False
    return True

-def append_to_history(req_type: str, prompt: str, status: str, result_path: str = None, details: str = ""):
+def append_to_history(req_type: str, prompt: str, status: str, result_path: str = None, details: str = "", final_prompt: str = None, duration: float = 0.0):
    """
    记录请求历史到 history.json
    """
@@ -274,9 +274,11 @@ def append_to_history(req_type: str, prompt: str, status: str, result_path: str
        "timestamp": time.time(),
        "type": req_type,
        "prompt": prompt,
+        "final_prompt": final_prompt,
        "status": status,
        "result_path": result_path,
-        "details": details
+        "details": details,
+        "duration": duration
    }
    try:
        with open(HISTORY_FILE, "a", encoding="utf-8") as f:
@@ -371,7 +373,7 @@ def load_image_from_url(url: str) -> Image.Image:
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"无法下载图片: {str(e)}")

-def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RESULT_IMAGE_DIR, is_tarot: bool = True, cutout: bool = False) -> list[dict]:
+def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RESULT_IMAGE_DIR, is_tarot: bool = True, cutout: bool = False, perspective_correction: bool = False) -> list[dict]:
    """
    根据 mask 和 box 进行处理并保存独立的对象图片
    
@@ -382,6 +384,7 @@ def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RE
    - output_dir: 输出目录
    - is_tarot: 是否为塔罗牌模式 (会影响文件名前缀和旋转逻辑)
    - cutout: 如果为 True，则进行轮廓抠图（透明背景）；否则进行透视矫正（主要用于卡片）
+    - perspective_correction: 是否进行梯度透视矫正
    
    返回:
    - 保存的对象信息列表
@@ -409,8 +412,8 @@ def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RE
        else:
            mask_uint8 = (mask_np > 0.5).astype(np.uint8) * 255

+        # --- 准备基础图像 ---
        if cutout:
-            # --- 轮廓抠图模式 (透明背景) ---
            # 1. 准备 RGBA 原图
            if image.mode != "RGBA":
                img_rgba = image.convert("RGBA")
@@ -421,73 +424,84 @@ def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RE
            mask_img = Image.fromarray(mask_uint8, mode='L')
            
            # 3. 将 Mask 应用到 Alpha 通道
-            cutout_img = Image.new("RGBA", img_rgba.size, (0, 0, 0, 0))
-            cutout_img.paste(image.convert("RGB"), (0, 0), mask=mask_img)
+            base_img_pil = Image.new("RGBA", img_rgba.size, (0, 0, 0, 0))
+            base_img_pil.paste(image.convert("RGB"), (0, 0), mask=mask_img)
            
-            # 4. Crop to Box
-            x1, y1, x2, y2 = map(int, box_np)
-            w, h = cutout_img.size
-            x1 = max(0, x1); y1 = max(0, y1)
-            x2 = min(w, x2); y2 = min(h, y2)
-            
-            if x2 > x1 and y2 > y1:
-                final_img = cutout_img.crop((x1, y1, x2, y2))
-            else:
-                final_img = cutout_img # Fallback
-            
-            # Save
-            prefix = "cutout"
-            is_rotated = False
-            
-            filename = f"{prefix}_{uuid.uuid4().hex}_{i}.png"
-            save_path = os.path.join(output_dir, filename)
-            final_img.save(save_path)
-            
-            saved_objects.append({
-                "filename": filename,
-                "is_rotated_by_algorithm": is_rotated,
-                "note": "Mask cutout applied. Background removed."
-            })
-
+            # Convert to numpy for potential warping
+            base_img_arr = np.array(base_img_pil)
        else:
+            base_img_pil = image.convert("RGB")
+            base_img_arr = img_arr # RGB numpy array
+
+        # --- 透视矫正 vs 简单裁剪 ---
+        final_img_pil = None
+        is_rotated = False
+        note = ""
+
+        if perspective_correction:
            # --- 透视矫正模式 (矩形矫正) ---
            contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-            if not contours:
-                continue
-                
-            c = max(contours, key=cv2.contourArea)
-            peri = cv2.arcLength(c, True)
-            approx = cv2.approxPolyDP(c, 0.04 * peri, True)
            
-            if len(approx) == 4:
-                pts = approx.reshape(4, 2)
+            pts = None
+            if contours:
+                c = max(contours, key=cv2.contourArea)
+                peri = cv2.arcLength(c, True)
+                approx = cv2.approxPolyDP(c, 0.04 * peri, True)
+                
+                if len(approx) == 4:
+                    pts = approx.reshape(4, 2)
+                else:
+                    rect = cv2.minAreaRect(c)
+                    pts = cv2.boxPoints(rect)
+            
+            if pts is not None:
+                warped = four_point_transform(base_img_arr, pts)
+                note = "Geometric correction applied."
            else:
-                rect = cv2.minAreaRect(c)
-                pts = cv2.boxPoints(rect)
-                
-            warped = four_point_transform(img_arr, pts)
-            
-            # Check orientation (Portrait vs Landscape)
+                # Fallback to simple crop if no contours found
+                x1, y1, x2, y2 = map(int, box_np)
+                # Ensure bounds
+                h, w = base_img_arr.shape[:2]
+                x1 = max(0, x1); y1 = max(0, y1)
+                x2 = min(w, x2); y2 = min(h, y2)
+                warped = base_img_arr[y1:y2, x1:x2]
+                note = "Correction failed, fallback to crop."
+
+            # Check orientation (Portrait vs Landscape) - Only for Tarot usually
            h, w = warped.shape[:2]
-            is_rotated = False
            
            # 强制竖屏逻辑 (塔罗牌通常是竖屏)
            if is_tarot and w > h:
                warped = cv2.rotate(warped, cv2.ROTATE_90_CLOCKWISE)
                is_rotated = True
                
-            pil_warped = Image.fromarray(warped)
+            final_img_pil = Image.fromarray(warped)
            
-            prefix = "tarot" if is_tarot else "segment"
-            filename = f"{prefix}_{uuid.uuid4().hex}_{i}.png" 
-            save_path = os.path.join(output_dir, filename)
-            pil_warped.save(save_path)
+        else:
+            # --- 简单裁剪模式 (Simple Crop) ---
+            x1, y1, x2, y2 = map(int, box_np)
+            w, h = base_img_pil.size
+            x1 = max(0, x1); y1 = max(0, y1)
+            x2 = min(w, x2); y2 = min(h, y2)
            
-            saved_objects.append({
-                "filename": filename,
-                "is_rotated_by_algorithm": is_rotated,
-                "note": "Geometric correction applied."
-            })
+            if x2 > x1 and y2 > y1:
+                final_img_pil = base_img_pil.crop((x1, y1, x2, y2))
+            else:
+                final_img_pil = base_img_pil # Fallback
+            
+            note = "Simple crop applied."
+
+        # --- 保存图片 ---
+        prefix = "cutout" if cutout else ("tarot" if is_tarot else "segment")
+        filename = f"{prefix}_{uuid.uuid4().hex}_{i}.png"
+        save_path = os.path.join(output_dir, filename)
+        final_img_pil.save(save_path)
+        
+        saved_objects.append({
+            "filename": filename,
+            "is_rotated_by_algorithm": is_rotated,
+            "note": note
+        })
        
    return saved_objects

@@ -660,6 +674,7 @@ async def segment(
    image_url: Optional[str] = Form(None, description="URL of the image"),
    save_segment_images: bool = Form(False, description="Whether to save and return individual segmented objects"),
    cutout: bool = Form(False, description="If True, returns transparent background PNGs; otherwise returns original crops"),
+    perspective_correction: bool = Form(False, description="If True, applies perspective correction (warping) to the segmented object."),
    highlight: bool = Form(False, description="If True, darkens the background to highlight the subject (周边变黑放大)."),
    confidence: float = Form(0.7, description="Confidence threshold (0.0-1.0). Default is 0.7.")

@@ -671,11 +686,13 @@ async def segment(
    - 支持自动将中文 Prompt 翻译为英文
    - 支持周边变黑放大效果 (Highlight Mode)
    - 支持手动设置置信度 (Confidence Threshold)
+    - 支持透视矫正 (Perspective Correction)

    """
    if not file and not image_url:
        raise HTTPException(status_code=400, detail="必须提供 file (图片文件) 或 image_url (图片链接)")

+    start_time = time.time()
    # 1. Prompt 处理
    final_prompt = prompt
    if not is_english(prompt):
@@ -695,7 +712,8 @@ async def segment(
        elif image_url:
            image = load_image_from_url(image_url)
    except Exception as e:
-        append_to_history("general", prompt, "failed", details=f"Image Load Error: {str(e)}")
+        duration = time.time() - start_time
+        append_to_history("general", prompt, "failed", details=f"Image Load Error: {str(e)}", final_prompt=final_prompt, duration=duration)
        raise HTTPException(status_code=400, detail=f"图片解析失败: {str(e)}")

    processor = request.app.state.processor
@@ -725,7 +743,8 @@ async def segment(
                processor.confidence_threshold = original_confidence
                
    except Exception as e:
-        append_to_history("general", prompt, "failed", details=f"Inference Error: {str(e)}")
+        duration = time.time() - start_time
+        append_to_history("general", prompt, "failed", details=f"Inference Error: {str(e)}", final_prompt=final_prompt, duration=duration)
        raise HTTPException(status_code=500, detail=f"模型推理错误: {str(e)}")

    # 4. 结果可视化与保存
@@ -738,7 +757,8 @@ async def segment(
        else:
            filename = generate_and_save_result(image, inference_state)
    except Exception as e:
-        append_to_history("general", prompt, "failed", details=f"Save Error: {str(e)}")
+        duration = time.time() - start_time
+        append_to_history("general", prompt, "failed", details=f"Save Error: {str(e)}", final_prompt=final_prompt, duration=duration)
        raise HTTPException(status_code=500, detail=f"绘图保存错误: {str(e)}")

    file_url = request.url_for("static", path=f"results/{filename}")
@@ -757,7 +777,8 @@ async def segment(
                boxes, 
                output_dir=output_dir, 
                is_tarot=False, 
-                cutout=cutout
+                cutout=cutout,
+                perspective_correction=perspective_correction
            )
            
            for obj in saved_objects:
@@ -771,7 +792,8 @@ async def segment(
        except Exception as e:
             print(f"Error saving segments: {e}")
             # Don't fail the whole request just for this part, but log it? Or fail? Usually fail.
-             append_to_history("general", prompt, "partial_success", result_path=f"results/{filename}", details="Segments save failed")
+             duration = time.time() - start_time
+             append_to_history("general", prompt, "partial_success", result_path=f"results/{filename}", details="Segments save failed", final_prompt=final_prompt, duration=duration)
             raise HTTPException(status_code=500, detail=f"保存分割图片失败: {str(e)}")

    response_content = {
@@ -784,7 +806,8 @@ async def segment(
    if save_segment_images:
         response_content["segmented_images"] = saved_segments_info

-    append_to_history("general", prompt, "success", result_path=f"results/{filename}", details=f"Detected: {len(masks)}")
+    duration = time.time() - start_time
+    append_to_history("general", prompt, "success", result_path=f"results/{filename}", details=f"Detected: {len(masks)}", final_prompt=final_prompt, duration=duration)
    return JSONResponse(content=response_content)

 # ------------------------------------------
@@ -808,13 +831,15 @@ async def segment_tarot(
    if not file and not image_url:
        raise HTTPException(status_code=400, detail="必须提供 file (图片文件) 或 image_url (图片链接)")

+    start_time = time.time()
    try:
        if file:
            image = Image.open(file.file).convert("RGB")
        elif image_url:
            image = load_image_from_url(image_url)
    except Exception as e:
-        append_to_history("tarot", f"expected: {expected_count}", "failed", details=f"Image Load Error: {str(e)}")
+        duration = time.time() - start_time
+        append_to_history("tarot", f"expected: {expected_count}", "failed", details=f"Image Load Error: {str(e)}", duration=duration)
        raise HTTPException(status_code=400, detail=f"图片解析失败: {str(e)}")

    processor = request.app.state.processor
@@ -825,7 +850,8 @@ async def segment_tarot(
        output = processor.set_text_prompt(state=inference_state, prompt="tarot card")
        masks, boxes, scores = output["masks"], output["boxes"], output["scores"]
    except Exception as e:
-        append_to_history("tarot", f"expected: {expected_count}", "failed", details=f"Inference Error: {str(e)}")
+        duration = time.time() - start_time
+        append_to_history("tarot", f"expected: {expected_count}", "failed", details=f"Inference Error: {str(e)}", duration=duration)
        raise HTTPException(status_code=500, detail=f"模型推理错误: {str(e)}")

    # 核心逻辑：判断数量
@@ -844,7 +870,8 @@ async def segment_tarot(
        except:
            file_url = None
        
-        append_to_history("tarot", f"expected: {expected_count}", "failed", result_path=f"results/{request_id}/{filename}" if file_url else None, details=f"Detected {detected_count} cards, expected {expected_count}")
+        duration = time.time() - start_time
+        append_to_history("tarot", f"expected: {expected_count}", "failed", result_path=f"results/{request_id}/{filename}" if file_url else None, details=f"Detected {detected_count} cards, expected {expected_count}", duration=duration)
        return JSONResponse(
            status_code=400, 
            content={
@@ -857,9 +884,10 @@ async def segment_tarot(

    # 数量正确，执行抠图
    try:
-        saved_objects = crop_and_save_objects(image, masks, boxes, output_dir=output_dir)
+        saved_objects = crop_and_save_objects(image, masks, boxes, output_dir=output_dir, is_tarot=True, perspective_correction=True)
    except Exception as e:
-        append_to_history("tarot", f"expected: {expected_count}", "failed", details=f"Crop Error: {str(e)}")
+        duration = time.time() - start_time
+        append_to_history("tarot", f"expected: {expected_count}", "failed", details=f"Crop Error: {str(e)}", duration=duration)
        raise HTTPException(status_code=500, detail=f"抠图处理错误: {str(e)}")

    # 生成 URL 列表和元数据
@@ -881,7 +909,8 @@ async def segment_tarot(
    except:
        main_file_url = None

-    append_to_history("tarot", f"expected: {expected_count}", "success", result_path=f"results/{request_id}/{main_filename}" if main_file_url else None, details=f"Successfully segmented {expected_count} cards")
+    duration = time.time() - start_time
+    append_to_history("tarot", f"expected: {expected_count}", "success", result_path=f"results/{request_id}/{main_filename}" if main_file_url else None, details=f"Successfully segmented {expected_count} cards", duration=duration)
    return JSONResponse(content={
        "status": "success",
        "message": f"成功识别并分割 {expected_count} 张塔罗牌 (已执行透视矫正)",
@@ -907,13 +936,15 @@ async def recognize_tarot(
    if not file and not image_url:
        raise HTTPException(status_code=400, detail="必须提供 file (图片文件) 或 image_url (图片链接)")

+    start_time = time.time()
    try:
        if file:
            image = Image.open(file.file).convert("RGB")
        elif image_url:
            image = load_image_from_url(image_url)
    except Exception as e:
-        append_to_history("tarot-recognize", f"expected: {expected_count}", "failed", details=f"Image Load Error: {str(e)}")
+        duration = time.time() - start_time
+        append_to_history("tarot-recognize", f"expected: {expected_count}", "failed", details=f"Image Load Error: {str(e)}", duration=duration)
        raise HTTPException(status_code=400, detail=f"图片解析失败: {str(e)}")

    processor = request.app.state.processor
@@ -923,7 +954,8 @@ async def recognize_tarot(
        output = processor.set_text_prompt(state=inference_state, prompt="tarot card")
        masks, boxes, scores = output["masks"], output["boxes"], output["scores"]
    except Exception as e:
-        append_to_history("tarot-recognize", f"expected: {expected_count}", "failed", details=f"Inference Error: {str(e)}")
+        duration = time.time() - start_time
+        append_to_history("tarot-recognize", f"expected: {expected_count}", "failed", details=f"Inference Error: {str(e)}", duration=duration)
        raise HTTPException(status_code=500, detail=f"模型推理错误: {str(e)}")

    detected_count = len(masks)
@@ -951,7 +983,8 @@ async def recognize_tarot(
        spread_info = recognize_spread_with_qwen(temp_raw_path)
    
    if detected_count != expected_count:
-        append_to_history("tarot-recognize", f"expected: {expected_count}", "failed", result_path=f"results/{request_id}/{main_filename}" if main_file_url else None, details=f"Detected {detected_count}, expected {expected_count}")
+        duration = time.time() - start_time
+        append_to_history("tarot-recognize", f"expected: {expected_count}", "failed", result_path=f"results/{request_id}/{main_filename}" if main_file_url else None, details=f"Detected {detected_count}, expected {expected_count}", duration=duration)
        return JSONResponse(
            status_code=400, 
            content={
@@ -965,9 +998,10 @@ async def recognize_tarot(

    # 数量正确，执行抠图 + 矫正
    try:
-        saved_objects = crop_and_save_objects(image, masks, boxes, output_dir=output_dir)
+        saved_objects = crop_and_save_objects(image, masks, boxes, output_dir=output_dir, is_tarot=True, perspective_correction=True)
    except Exception as e:
-        append_to_history("tarot-recognize", f"expected: {expected_count}", "failed", details=f"Crop Error: {str(e)}")
+        duration = time.time() - start_time
+        append_to_history("tarot-recognize", f"expected: {expected_count}", "failed", details=f"Crop Error: {str(e)}", duration=duration)
        raise HTTPException(status_code=500, detail=f"抠图处理错误: {str(e)}")

    # 遍历每张卡片进行识别
@@ -988,7 +1022,8 @@ async def recognize_tarot(
            "note": obj["note"]
        })

-    append_to_history("tarot-recognize", f"expected: {expected_count}", "success", result_path=f"results/{request_id}/{main_filename}" if main_file_url else None, details=f"Spread: {spread_info.get('spread_name', 'Unknown')}")
+    duration = time.time() - start_time
+    append_to_history("tarot-recognize", f"expected: {expected_count}", "success", result_path=f"results/{request_id}/{main_filename}" if main_file_url else None, details=f"Spread: {spread_info.get('spread_name', 'Unknown')}", duration=duration)
    return JSONResponse(content={
        "status": "success",
        "message": f"成功识别并分割 {expected_count} 张塔罗牌 (含Qwen识别结果)",
@@ -1019,6 +1054,7 @@ async def segment_face(
    if not file and not image_url:
        raise HTTPException(status_code=400, detail="必须提供 file (图片文件) 或 image_url (图片链接)")

+    start_time = time.time()
    # Prompt 翻译/优化
    final_prompt = prompt
    if not is_english(prompt):
@@ -1038,7 +1074,8 @@ async def segment_face(
        elif image_url:
            image = load_image_from_url(image_url)
    except Exception as e:
-        append_to_history("face", prompt, "failed", details=f"Image Load Error: {str(e)}")
+        duration = time.time() - start_time
+        append_to_history("face", prompt, "failed", details=f"Image Load Error: {str(e)}", final_prompt=final_prompt, duration=duration)
        raise HTTPException(status_code=400, detail=f"图片解析失败: {str(e)}")

    processor = request.app.state.processor
@@ -1056,7 +1093,8 @@ async def segment_face(
    except Exception as e:
        import traceback
        traceback.print_exc()
-        append_to_history("face", prompt, "failed", details=f"Process Error: {str(e)}")
+        duration = time.time() - start_time
+        append_to_history("face", prompt, "failed", details=f"Process Error: {str(e)}", final_prompt=final_prompt, duration=duration)
        raise HTTPException(status_code=500, detail=f"处理失败: {str(e)}")

    # 补全 URL
@@ -1069,7 +1107,8 @@ async def segment_face(
            relative_path = item.pop("relative_path")
            item["url"] = str(request.url_for("static", path=relative_path))

-    append_to_history("face", prompt, result["status"], details=f"Results: {len(result.get('results', []))}")
+    duration = time.time() - start_time
+    append_to_history("face", prompt, result["status"], details=f"Results: {len(result.get('results', []))}", final_prompt=final_prompt, duration=duration)
    return JSONResponse(content=result)

 # ==========================================
@@ -1168,6 +1207,9 @@ async def list_files(path: str = ""):
                # path is relative to results/
                # so url is /static/results/path/name
                rel_path = os.path.join("results", path, entry.name)
+                # Ensure forward slashes for URL
+                if os.sep != "/":
+                    rel_path = rel_path.replace(os.sep, "/")
                item["url"] = f"/static/{rel_path}"
                
            items.append(item)