This commit is contained in:
2026-02-15 22:32:05 +08:00
parent 08a63807f3
commit b83a19e9c2
8 changed files with 390 additions and 18 deletions

View File

@@ -23,6 +23,7 @@ from PIL import Image
from sam3.model_builder import build_sam3_image_model
from sam3.model.sam3_image_processor import Sam3Processor
from sam3.visualization_utils import plot_results
import human_analysis_service # 引入新服务
# ------------------- 配置与路径 -------------------
STATIC_DIR = "static"
@@ -92,7 +93,7 @@ async def lifespan(app: FastAPI):
app = FastAPI(
lifespan=lifespan,
title="SAM3 Segmentation API",
description="## 🔒 受 API Key 保护\n请点击右上角 **Authorize** 并输入: `123quant-speed`",
description="## 🔒 受 API Key 保护\n请点击右上角 **Authorize** 并输入: `123quant-*****`",
)
# 手动添加 OpenAPI 安全配置,让 Docs 里的锁头生效
@@ -177,7 +178,7 @@ def load_image_from_url(url: str) -> Image.Image:
except Exception as e:
raise HTTPException(status_code=400, detail=f"无法下载图片: {str(e)}")
def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RESULT_IMAGE_DIR) -> list[dict]:
def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RESULT_IMAGE_DIR, is_tarot: bool = True) -> list[dict]:
"""
根据 mask 和 box 进行透视矫正并裁剪出独立的对象图片 (保留透明背景)
返回包含文件名和元数据的列表
@@ -237,7 +238,7 @@ def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RE
is_rotated = False
# Enforce Portrait for Tarot cards (Standard 7x12 cm ratio approx)
if w > h:
if is_tarot and w > h:
# Rotate 90 degrees clockwise
warped = cv2.rotate(warped, cv2.ROTATE_90_CLOCKWISE)
is_rotated = True
@@ -246,7 +247,8 @@ def crop_and_save_objects(image: Image.Image, masks, boxes, output_dir: str = RE
pil_warped = Image.fromarray(warped)
# Save
filename = f"tarot_{uuid.uuid4().hex}_{i}.png"
prefix = "tarot" if is_tarot else "segment"
filename = f"{prefix}_{uuid.uuid4().hex}_{i}.png"
save_path = os.path.join(output_dir, filename)
pil_warped.save(save_path)
@@ -272,23 +274,73 @@ def generate_and_save_result(image: Image.Image, inference_state, output_dir: st
def recognize_card_with_qwen(image_path: str) -> dict:
"""
调用 Qwen-VL 识别塔罗牌
调用 Qwen-VL 识别塔罗牌 (采用正逆位对比策略)
"""
try:
# 确保路径是绝对路径并加上 file:// 前缀
# 确保路径是绝对路径
abs_path = os.path.abspath(image_path)
file_url = f"file://{abs_path}"
messages = [
{
"role": "user",
"content": [
{"image": file_url},
{"text": "这是一张塔罗牌。请识别它的名字中文并判断它是正位还是逆位。请以JSON格式返回包含 'name''position' 两个字段。例如:{'name': '愚者', 'position': '正位'}。不要包含Markdown代码块标记。"}
]
}
]
# -------------------------------------------------
# 优化策略生成一张旋转180度的对比图
# 让 AI 做选择题而不是判断题,大幅提高准确率
# -------------------------------------------------
try:
# 1. 打开原图
img = Image.open(abs_path)
# 2. 生成旋转图 (180度)
rotated_img = img.rotate(180)
# 3. 保存旋转图
dir_name = os.path.dirname(abs_path)
file_name = os.path.basename(abs_path)
rotated_name = f"rotated_{file_name}"
rotated_path = os.path.join(dir_name, rotated_name)
rotated_img.save(rotated_path)
rotated_file_url = f"file://{rotated_path}"
# 4. 构建对比 Prompt
# 发送两张图图1=原图, 图2=旋转图
# 询问 AI 哪一张是“正位”
messages = [
{
"role": "user",
"content": [
{"image": file_url}, # 图1 (原图)
{"image": rotated_file_url}, # 图2 (旋转180度)
{"text": """这是一张塔罗牌的两个方向:
图1原始方向
图2旋转180度后的方向
请仔细对比两张图片的牌面内容(文字方向、人物站立方向、图案逻辑):
1. 识别这张牌的名字(中文)。
2. 判断哪一张图片展示了正确的“正位”Upright状态。
- 如果图1是正位说明原图就是正位。
- 如果图2是正位说明原图是逆位。
请以JSON格式返回包含 'name''position' 两个字段。
例如:{'name': '愚者', 'position': '正位'} 或 {'name': '倒吊人', 'position': '逆位'}。
不要包含Markdown代码块标记。"""}
]
}
]
except Exception as e:
print(f"对比图生成失败,回退到单图模式: {e}")
# 回退到旧的单图模式
messages = [
{
"role": "user",
"content": [
{"image": file_url},
{"text": "这是一张塔罗牌。请识别它的名字中文并判断它是正位还是逆位。请以JSON格式返回包含 'name''position' 两个字段。例如:{'name': '愚者', 'position': '正位'}。不要包含Markdown代码块标记。"}
]
}
]
# 调用模型
response = MultiModalConversation.call(model=QWEN_MODEL, messages=messages)
if response.status_code == 200:
@@ -352,7 +404,8 @@ async def segment(
request: Request,
prompt: str = Form(...),
file: Optional[UploadFile] = File(None),
image_url: Optional[str] = Form(None)
image_url: Optional[str] = Form(None),
save_segment_images: bool = Form(False)
):
if not file and not image_url:
raise HTTPException(status_code=400, detail="必须提供 file (图片文件) 或 image_url (图片链接)")
@@ -380,13 +433,43 @@ async def segment(
raise HTTPException(status_code=500, detail=f"绘图保存错误: {str(e)}")
file_url = request.url_for("static", path=f"results/{filename}")
# New logic for saving segments
saved_segments_info = []
if save_segment_images:
try:
request_id = f"{int(time.time())}_{uuid.uuid4().hex[:8]}"
output_dir = os.path.join(RESULT_IMAGE_DIR, request_id)
os.makedirs(output_dir, exist_ok=True)
saved_objects = crop_and_save_objects(image, masks, boxes, output_dir=output_dir, is_tarot=False)
for obj in saved_objects:
fname = obj["filename"]
seg_url = str(request.url_for("static", path=f"results/{request_id}/{fname}"))
saved_segments_info.append({
"url": seg_url,
"filename": fname
})
except Exception as e:
# Log error but don't fail the whole request if segmentation saving fails?
# Or fail it? Let's fail it to be safe or include error in response.
# Given simple requirement, I'll let it fail or just print.
print(f"Error saving segments: {e}")
# We can optionally raise HTTPException here too.
raise HTTPException(status_code=500, detail=f"保存分割图片失败: {str(e)}")
return JSONResponse(content={
response_content = {
"status": "success",
"result_image_url": str(file_url),
"detected_count": len(masks),
"scores": scores.tolist() if torch.is_tensor(scores) else scores
})
}
if save_segment_images:
response_content["segmented_images"] = saved_segments_info
return JSONResponse(content=response_content)
@app.post("/segment_tarot", dependencies=[Depends(verify_api_key)])
async def segment_tarot(
@@ -592,6 +675,64 @@ async def recognize_tarot(
"scores": scores.tolist() if torch.is_tensor(scores) else scores
})
@app.post("/segment_face", dependencies=[Depends(verify_api_key)])
async def segment_face(
request: Request,
file: Optional[UploadFile] = File(None),
image_url: Optional[str] = Form(None),
prompt: str = Form("face and hair") # 默认提示词包含头发
):
"""
人脸/头部检测与属性分析接口 (新功能)
1. 调用 SAM3 分割出头部区域 (含头发)
2. 裁剪并保存
3. 调用 Qwen-VL 识别性别和年龄
"""
if not file and not image_url:
raise HTTPException(status_code=400, detail="必须提供 file (图片文件) 或 image_url (图片链接)")
# 1. 加载图片
try:
if file:
image = Image.open(file.file).convert("RGB")
elif image_url:
image = load_image_from_url(image_url)
except Exception as e:
raise HTTPException(status_code=400, detail=f"图片解析失败: {str(e)}")
processor = request.app.state.processor
# 2. 调用独立服务进行处理
try:
# 传入 processor 和 image
# 注意Result Image Dir 我们直接复用 RESULT_IMAGE_DIR
result = human_analysis_service.process_face_segmentation_and_analysis(
processor=processor,
image=image,
prompt=prompt,
output_base_dir=RESULT_IMAGE_DIR
)
except Exception as e:
# 打印详细错误堆栈以便调试
import traceback
traceback.print_exc()
raise HTTPException(status_code=500, detail=f"处理失败: {str(e)}")
# 3. 补全 URL (因为 service 层不知道 request context)
if result["status"] == "success":
# 处理全图可视化的 URL
if result.get("full_visualization"):
full_vis_rel_path = result["full_visualization"]
result["full_visualization"] = str(request.url_for("static", path=full_vis_rel_path))
for item in result["results"]:
# item["relative_path"] 是相对路径,如 results/xxx/xxx.jpg
# 我们需要将其转换为完整 URL
relative_path = item.pop("relative_path") # 移除相对路径字段,只返回 URL
item["url"] = str(request.url_for("static", path=relative_path))
return JSONResponse(content=result)
if __name__ == "__main__":
import uvicorn
# 注意:如果你的文件名不是 fastAPI_tarot.py请修改下面第一个参数