admin update

This commit is contained in:
2026-02-18 01:26:22 +08:00
parent 2d315948a2
commit 06f2b2928b
4 changed files with 794 additions and 125 deletions

View File

@@ -21,6 +21,7 @@ import traceback
import re import re
import asyncio import asyncio
import shutil import shutil
import subprocess
from datetime import datetime from datetime import datetime
from typing import Optional, List, Dict, Any from typing import Optional, List, Dict, Any
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
@@ -1287,12 +1288,38 @@ async def get_config(request: Request):
""" """
Get system config info Get system config info
""" """
device = "Unknown" device_str = "Unknown"
gpu_status = {}
if hasattr(request.app.state, "device"): if hasattr(request.app.state, "device"):
device = str(request.app.state.device) device_str = str(request.app.state.device)
# 获取 GPU 详细信息
if torch.cuda.is_available():
try:
device_id = torch.cuda.current_device()
props = torch.cuda.get_device_properties(device_id)
total_mem = props.total_memory
reserved_mem = torch.cuda.memory_reserved(device_id)
allocated_mem = torch.cuda.memory_allocated(device_id)
gpu_status = {
"available": True,
"name": props.name,
"total_memory": f"{total_mem / 1024**3:.2f} GB",
"reserved_memory": f"{reserved_mem / 1024**3:.2f} GB",
"allocated_memory": f"{allocated_mem / 1024**3:.2f} GB",
"memory_usage_percent": round((reserved_mem / total_mem) * 100, 1)
}
except Exception as e:
gpu_status = {"available": True, "error": str(e)}
else:
gpu_status = {"available": False, "reason": "No CUDA device detected"}
return { return {
"device": device, "device": device_str,
"gpu_status": gpu_status,
"cleanup_config": CLEANUP_CONFIG, "cleanup_config": CLEANUP_CONFIG,
"current_qwen_model": QWEN_MODEL, "current_qwen_model": QWEN_MODEL,
"available_qwen_models": AVAILABLE_QWEN_MODELS "available_qwen_models": AVAILABLE_QWEN_MODELS
@@ -1348,6 +1375,93 @@ async def update_prompts(
PROMPTS[key] = content PROMPTS[key] = content
return {"status": "success", "message": f"Prompt '{key}' updated"} return {"status": "success", "message": f"Prompt '{key}' updated"}
# ------------------------------------------
# GPU Status Helper & API
# ------------------------------------------
def get_gpu_status_smi():
"""
Get detailed GPU status using nvidia-smi
Returns: dict with utilization, memory, temp, power, etc.
"""
cuda_version = "Unknown"
try:
import torch
if torch.version.cuda:
cuda_version = torch.version.cuda
except:
pass
try:
# Check if nvidia-smi is available
# Fields: utilization.gpu, utilization.memory, temperature.gpu, power.draw, power.limit, memory.total, memory.used, memory.free, name, driver_version
result = subprocess.run(
['nvidia-smi', '--query-gpu=utilization.gpu,utilization.memory,temperature.gpu,power.draw,power.limit,memory.total,memory.used,memory.free,name,driver_version', '--format=csv,noheader,nounits'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding='utf-8'
)
if result.returncode != 0:
raise Exception("nvidia-smi failed")
# Parse the first line (assuming single GPU for now, or take the first one)
line = result.stdout.strip().split('\n')[0]
vals = [x.strip() for x in line.split(',')]
return {
"available": True,
"gpu_util": float(vals[0]), # %
"mem_util": float(vals[1]), # % (controller utilization)
"temperature": float(vals[2]), # C
"power_draw": float(vals[3]), # W
"power_limit": float(vals[4]), # W
"mem_total": float(vals[5]), # MB
"mem_used": float(vals[6]), # MB
"mem_free": float(vals[7]), # MB
"name": vals[8],
"driver_version": vals[9],
"cuda_version": cuda_version,
"source": "nvidia-smi",
"timestamp": time.time()
}
except Exception as e:
# Fallback to torch if available
if torch.cuda.is_available():
try:
device_id = torch.cuda.current_device()
props = torch.cuda.get_device_properties(device_id)
mem_reserved = torch.cuda.memory_reserved(device_id) / 1024**2 # MB
mem_total = props.total_memory / 1024**2 # MB
return {
"available": True,
"gpu_util": 0, # Torch can't get this easily
"mem_util": (mem_reserved / mem_total) * 100,
"temperature": 0,
"power_draw": 0,
"power_limit": 0,
"mem_total": mem_total,
"mem_used": mem_reserved,
"mem_free": mem_total - mem_reserved,
"name": props.name,
"driver_version": "Unknown",
"cuda_version": cuda_version,
"source": "torch",
"timestamp": time.time()
}
except:
pass
return {"available": False, "error": str(e)}
@app.get("/admin/api/gpu/status", dependencies=[Depends(verify_admin)])
async def get_gpu_status_api():
"""
Get real-time GPU status
"""
return get_gpu_status_smi()
# ========================================== # ==========================================
# 10. Main Entry Point (启动入口) # 10. Main Entry Point (启动入口)
# ========================================== # ==========================================

View File

@@ -1 +1,2 @@
{"timestamp": 1771347621.2198663, "type": "general", "prompt": "正面的麻将牌", "final_prompt": "Front-facing mahjong tile", "status": "success", "result_path": "results/seg_72b3c186467d48bf8591c9699ce90ca7.jpg", "details": "Detected: 13", "duration": 2.699465274810791} {"timestamp": 1771347621.2198663, "type": "general", "prompt": "正面的麻将牌", "final_prompt": "Front-facing mahjong tile", "status": "success", "result_path": "results/seg_72b3c186467d48bf8591c9699ce90ca7.jpg", "details": "Detected: 13", "duration": 2.699465274810791}
{"timestamp": 1771348998.3732698, "type": "general", "prompt": "正面的麻将牌", "final_prompt": "Front-facing mahjong tile", "status": "success", "result_path": "results/seg_01ad9bff76274811a6b6e5a8a16cf01f.jpg", "details": "Detected: 13", "duration": 3.353431224822998}

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB