qwen3.5 优化

This commit is contained in:
2026-02-18 14:38:12 +08:00
parent 765a0aebdc
commit aee6f8804f
28 changed files with 628 additions and 55 deletions

View File

@@ -6,6 +6,8 @@ import numpy as np
import json
import torch
import cv2
import ast
import re
from PIL import Image
from dashscope import MultiModalConversation
@@ -95,6 +97,35 @@ def create_highlighted_visualization(image: Image.Image, masks, output_path: str
# Save
Image.fromarray(result_np).save(output_path)
def extract_json_from_response(text: str) -> dict:
"""
Robustly extract JSON from text, handling:
1. Markdown code blocks (```json ... ```)
2. Single quotes (Python dict style) via ast.literal_eval
"""
try:
# 1. Try to find JSON block
json_match = re.search(r'```json\s*(.*?)\s*```', text, re.DOTALL)
if json_match:
clean_text = json_match.group(1).strip()
else:
# Try to find { ... } block if no markdown
match = re.search(r'\{.*\}', text, re.DOTALL)
if match:
clean_text = match.group(0).strip()
else:
clean_text = text.strip()
# 2. Try standard JSON
return json.loads(clean_text)
except Exception as e1:
# 3. Try ast.literal_eval for single quotes
try:
return ast.literal_eval(clean_text)
except Exception as e2:
# 4. Fail
raise ValueError(f"Could not parse JSON: {e1} | {e2} | Content: {text[:100]}...")
def analyze_demographics_with_qwen(image_path: str, model_name: str = 'qwen-vl-max', prompt_template: str = None) -> dict:
"""
调用 Qwen-VL 模型分析人物的年龄和性别
@@ -131,19 +162,21 @@ def analyze_demographics_with_qwen(image_path: str, model_name: str = 'qwen-vl-m
if response.status_code == 200:
content = response.output.choices[0].message.content[0]['text']
# 清理 Markdown 代码块标记
clean_content = content.replace("```json", "").replace("```", "").strip()
try:
result = json.loads(clean_content)
result = extract_json_from_response(content)
result["model_used"] = model_name
return result
except json.JSONDecodeError:
return {"raw_analysis": clean_content}
except Exception as e:
print(f"JSON Parse Error in face analysis: {e}")
return {"raw_analysis": content, "error": str(e), "model_used": model_name}
else:
return {"error": f"API Error: {response.code} - {response.message}"}
except Exception as e:
return {"error": f"分析失败: {str(e)}"}
import asyncio
def process_face_segmentation_and_analysis(
processor,
image: Image.Image,
@@ -156,11 +189,11 @@ def process_face_segmentation_and_analysis(
核心处理逻辑:
1. SAM3 分割 (默认提示词 "head" 以包含头发)
2. 裁剪图片
3. Qwen-VL 识别性别年龄
3. Qwen-VL 识别性别年龄 (并发)
4. 返回结果
"""
# 1. SAM3 推理
# 1. SAM3 推理 (同步,因为涉及 GPU 操作)
inference_state = processor.set_image(image)
output = processor.set_text_prompt(state=inference_state, prompt=prompt)
masks, boxes, scores = output["masks"], output["boxes"], output["scores"]
@@ -179,7 +212,7 @@ def process_face_segmentation_and_analysis(
output_dir = os.path.join(output_base_dir, request_id)
os.makedirs(output_dir, exist_ok=True)
# --- 新增:生成背景变暗的整体可视化图 ---
# --- 生成可视化图 ---
vis_filename = f"seg_{uuid.uuid4().hex}.jpg"
vis_path = os.path.join(output_dir, vis_filename)
try:
@@ -188,38 +221,238 @@ def process_face_segmentation_and_analysis(
except Exception as e:
print(f"可视化生成失败: {e}")
full_vis_relative_path = None
# -------------------------------------
# ------------------
results = []
# 转换 boxes 为 numpy
# 转换 boxes 和 scores
if isinstance(boxes, torch.Tensor):
boxes_np = boxes.cpu().numpy()
else:
boxes_np = boxes
# 转换 scores 为 list
if isinstance(scores, torch.Tensor):
scores_list = scores.tolist()
else:
scores_list = scores if isinstance(scores, list) else [float(scores)]
for i, box in enumerate(boxes_np):
# 2. 裁剪 (带一点 padding 以保留完整发型)
# 2. 裁剪 (带一点 padding 以保留完整发型)
cropped_img = crop_head_with_padding(image, box, padding_ratio=0.1)
# 准备异步任务
async def run_analysis_tasks():
loop = asyncio.get_event_loop()
tasks = []
temp_results = [] # 存储 (index, filename, score) 以便后续排序组合
for i, box in enumerate(boxes_np):
# 2. 裁剪 (同步)
cropped_img = crop_head_with_padding(image, box, padding_ratio=0.1)
filename = f"face_{i}.jpg"
save_path = os.path.join(output_dir, filename)
cropped_img.save(save_path)
# 3. 准备识别任务
task = loop.run_in_executor(
None,
analyze_demographics_with_qwen,
save_path,
qwen_model,
analysis_prompt
)
tasks.append(task)
temp_results.append({
"filename": filename,
"relative_path": f"results/{request_id}/{filename}",
"score": float(scores_list[i]) if i < len(scores_list) else 0.0
})
# 等待所有任务完成
if tasks:
analysis_results = await asyncio.gather(*tasks)
else:
analysis_results = []
# 组合结果
final_results = []
for i, item in enumerate(temp_results):
item["analysis"] = analysis_results[i]
final_results.append(item)
return final_results
# 运行异步任务
# 注意:由于本函数被 FastAPI (异步环境) 中的同步或异步函数调用,
# 如果上层是 async def我们可以直接 await。
# 但由于这个函数定义没有 async且之前的调用是同步的
# 为了兼容性,我们需要检查当前是否在事件循环中。
# 然而,查看 fastAPI_tarot.py这个函数是在 async def segment_face 中被调用的。
# 但它是作为普通函数被导入和调用的。
# 为了不破坏现有签名,我们可以使用 asyncio.run() 或者在新循环中运行,
# 但这在已经运行的 loop 中是不允许的。
# 最佳方案:修改本函数为 async并在 fastAPI_tarot.py 中 await 它。
# 但这需要修改 fastAPI_tarot.py 的调用处。
# 既然我们已经修改了 fastAPI_tarot.py我们也可以顺便修改这里的签名。
# 但为了稳妥,我们可以用一种 hack
# 如果在一个正在运行的 loop 中调用,我们必须返回 awaitable 或者使用 loop.run_until_complete (会报错)
# 让我们先把这个函数改成 async然后去修改 fastAPI_tarot.py 的调用。
# 这是最正确的做法。
pass # 占位,实际代码在下面
async def process_face_segmentation_and_analysis_async(
processor,
image: Image.Image,
prompt: str = "head",
output_base_dir: str = "static/results",
qwen_model: str = "qwen-vl-max",
analysis_prompt: str = None
) -> dict:
# ... (同上逻辑,只是是 async)
# 1. SAM3 推理
inference_state = processor.set_image(image)
output = processor.set_text_prompt(state=inference_state, prompt=prompt)
masks, boxes, scores = output["masks"], output["boxes"], output["scores"]
detected_count = len(masks)
if detected_count == 0:
return {
"status": "success",
"message": "未检测到目标",
"detected_count": 0,
"results": []
}
# 保存裁剪图
request_id = f"{int(time.time())}_{uuid.uuid4().hex[:8]}"
output_dir = os.path.join(output_base_dir, request_id)
os.makedirs(output_dir, exist_ok=True)
vis_filename = f"seg_{uuid.uuid4().hex}.jpg"
vis_path = os.path.join(output_dir, vis_filename)
try:
create_highlighted_visualization(image, masks, vis_path)
full_vis_relative_path = f"results/{request_id}/{vis_filename}"
except Exception as e:
print(f"可视化生成失败: {e}")
full_vis_relative_path = None
if isinstance(boxes, torch.Tensor):
boxes_np = boxes.cpu().numpy()
else:
boxes_np = boxes
if isinstance(scores, torch.Tensor):
scores_list = scores.tolist()
else:
scores_list = scores if isinstance(scores, list) else [float(scores)]
loop = asyncio.get_event_loop()
tasks = []
results = []
for i, box in enumerate(boxes_np):
cropped_img = crop_head_with_padding(image, box, padding_ratio=0.1)
filename = f"face_{i}.jpg"
save_path = os.path.join(output_dir, filename)
cropped_img.save(save_path)
# 3. 识别
task = loop.run_in_executor(
None,
analyze_demographics_with_qwen,
save_path,
qwen_model,
analysis_prompt
)
tasks.append(task)
results.append({
"filename": filename,
"relative_path": f"results/{request_id}/{filename}",
"score": float(scores_list[i]) if i < len(scores_list) else 0.0
})
if tasks:
analysis_results = await asyncio.gather(*tasks)
else:
analysis_results = []
for i, item in enumerate(results):
item["analysis"] = analysis_results[i]
return {
"status": "success",
"message": f"成功检测并分析 {detected_count} 个人脸",
"detected_count": detected_count,
"request_id": request_id,
"full_visualization": full_vis_relative_path,
"scores": scores_list,
"results": results
}
# 保留旧的同步接口以兼容其他潜在调用者,但内部实现可能会有问题如果它在 loop 中运行
# 既然我们主要关注 fastAPI_tarot.py我们可以直接替换 process_face_segmentation_and_analysis
# 或者让它只是一个 wrapper
def process_face_segmentation_and_analysis(
processor,
image: Image.Image,
prompt: str = "head",
output_base_dir: str = "static/results",
qwen_model: str = "qwen-vl-max",
analysis_prompt: str = None
) -> dict:
"""
同步版本 (保留以兼容)
注意:如果在 async loop 中调用此函数,且此函数内部没有异步操作,则会阻塞 loop。
如果需要异步并发,请使用 process_face_segmentation_and_analysis_async
"""
# 这里我们简单地复用逻辑,但去除异步部分,退化为串行
# 1. SAM3 推理
inference_state = processor.set_image(image)
output = processor.set_text_prompt(state=inference_state, prompt=prompt)
masks, boxes, scores = output["masks"], output["boxes"], output["scores"]
detected_count = len(masks)
if detected_count == 0:
return {
"status": "success",
"message": "未检测到目标",
"detected_count": 0,
"results": []
}
request_id = f"{int(time.time())}_{uuid.uuid4().hex[:8]}"
output_dir = os.path.join(output_base_dir, request_id)
os.makedirs(output_dir, exist_ok=True)
vis_filename = f"seg_{uuid.uuid4().hex}.jpg"
vis_path = os.path.join(output_dir, vis_filename)
try:
create_highlighted_visualization(image, masks, vis_path)
full_vis_relative_path = f"results/{request_id}/{vis_filename}"
except Exception as e:
print(f"可视化生成失败: {e}")
full_vis_relative_path = None
if isinstance(boxes, torch.Tensor):
boxes_np = boxes.cpu().numpy()
else:
boxes_np = boxes
if isinstance(scores, torch.Tensor):
scores_list = scores.tolist()
else:
scores_list = scores if isinstance(scores, list) else [float(scores)]
results = []
for i, box in enumerate(boxes_np):
cropped_img = crop_head_with_padding(image, box, padding_ratio=0.1)
filename = f"face_{i}.jpg"
save_path = os.path.join(output_dir, filename)
cropped_img.save(save_path)
# 同步调用
analysis = analyze_demographics_with_qwen(save_path, model_name=qwen_model, prompt_template=analysis_prompt)
# 构造返回结果
# 注意URL 生成需要依赖外部的 request context这里只返回相对路径或文件名
# 由调用方组装完整 URL
results.append({
"filename": filename,
"relative_path": f"results/{request_id}/{filename}",
@@ -232,7 +465,8 @@ def process_face_segmentation_and_analysis(
"message": f"成功检测并分析 {detected_count} 个人脸",
"detected_count": detected_count,
"request_id": request_id,
"full_visualization": full_vis_relative_path, # 返回相对路径
"scores": scores_list, # 返回全部分数
"full_visualization": full_vis_relative_path,
"scores": scores_list,
"results": results
}
}