admin update

This commit is contained in:
2026-02-18 01:26:22 +08:00
parent 2d315948a2
commit 06f2b2928b
4 changed files with 794 additions and 125 deletions

View File

@@ -21,6 +21,7 @@ import traceback
import re
import asyncio
import shutil
import subprocess
from datetime import datetime
from typing import Optional, List, Dict, Any
from contextlib import asynccontextmanager
@@ -1287,12 +1288,38 @@ async def get_config(request: Request):
"""
Get system config info
"""
device = "Unknown"
device_str = "Unknown"
gpu_status = {}
if hasattr(request.app.state, "device"):
device = str(request.app.state.device)
device_str = str(request.app.state.device)
# 获取 GPU 详细信息
if torch.cuda.is_available():
try:
device_id = torch.cuda.current_device()
props = torch.cuda.get_device_properties(device_id)
total_mem = props.total_memory
reserved_mem = torch.cuda.memory_reserved(device_id)
allocated_mem = torch.cuda.memory_allocated(device_id)
gpu_status = {
"available": True,
"name": props.name,
"total_memory": f"{total_mem / 1024**3:.2f} GB",
"reserved_memory": f"{reserved_mem / 1024**3:.2f} GB",
"allocated_memory": f"{allocated_mem / 1024**3:.2f} GB",
"memory_usage_percent": round((reserved_mem / total_mem) * 100, 1)
}
except Exception as e:
gpu_status = {"available": True, "error": str(e)}
else:
gpu_status = {"available": False, "reason": "No CUDA device detected"}
return {
"device": device,
"device": device_str,
"gpu_status": gpu_status,
"cleanup_config": CLEANUP_CONFIG,
"current_qwen_model": QWEN_MODEL,
"available_qwen_models": AVAILABLE_QWEN_MODELS
@@ -1348,6 +1375,93 @@ async def update_prompts(
PROMPTS[key] = content
return {"status": "success", "message": f"Prompt '{key}' updated"}
# ------------------------------------------
# GPU Status Helper & API
# ------------------------------------------
def get_gpu_status_smi():
"""
Get detailed GPU status using nvidia-smi
Returns: dict with utilization, memory, temp, power, etc.
"""
cuda_version = "Unknown"
try:
import torch
if torch.version.cuda:
cuda_version = torch.version.cuda
except:
pass
try:
# Check if nvidia-smi is available
# Fields: utilization.gpu, utilization.memory, temperature.gpu, power.draw, power.limit, memory.total, memory.used, memory.free, name, driver_version
result = subprocess.run(
['nvidia-smi', '--query-gpu=utilization.gpu,utilization.memory,temperature.gpu,power.draw,power.limit,memory.total,memory.used,memory.free,name,driver_version', '--format=csv,noheader,nounits'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding='utf-8'
)
if result.returncode != 0:
raise Exception("nvidia-smi failed")
# Parse the first line (assuming single GPU for now, or take the first one)
line = result.stdout.strip().split('\n')[0]
vals = [x.strip() for x in line.split(',')]
return {
"available": True,
"gpu_util": float(vals[0]), # %
"mem_util": float(vals[1]), # % (controller utilization)
"temperature": float(vals[2]), # C
"power_draw": float(vals[3]), # W
"power_limit": float(vals[4]), # W
"mem_total": float(vals[5]), # MB
"mem_used": float(vals[6]), # MB
"mem_free": float(vals[7]), # MB
"name": vals[8],
"driver_version": vals[9],
"cuda_version": cuda_version,
"source": "nvidia-smi",
"timestamp": time.time()
}
except Exception as e:
# Fallback to torch if available
if torch.cuda.is_available():
try:
device_id = torch.cuda.current_device()
props = torch.cuda.get_device_properties(device_id)
mem_reserved = torch.cuda.memory_reserved(device_id) / 1024**2 # MB
mem_total = props.total_memory / 1024**2 # MB
return {
"available": True,
"gpu_util": 0, # Torch can't get this easily
"mem_util": (mem_reserved / mem_total) * 100,
"temperature": 0,
"power_draw": 0,
"power_limit": 0,
"mem_total": mem_total,
"mem_used": mem_reserved,
"mem_free": mem_total - mem_reserved,
"name": props.name,
"driver_version": "Unknown",
"cuda_version": cuda_version,
"source": "torch",
"timestamp": time.time()
}
except:
pass
return {"available": False, "error": str(e)}
@app.get("/admin/api/gpu/status", dependencies=[Depends(verify_admin)])
async def get_gpu_status_api():
"""
Get real-time GPU status
"""
return get_gpu_status_smi()
# ==========================================
# 10. Main Entry Point (启动入口)
# ==========================================

View File

@@ -1 +1,2 @@
{"timestamp": 1771347621.2198663, "type": "general", "prompt": "正面的麻将牌", "final_prompt": "Front-facing mahjong tile", "status": "success", "result_path": "results/seg_72b3c186467d48bf8591c9699ce90ca7.jpg", "details": "Detected: 13", "duration": 2.699465274810791}
{"timestamp": 1771348998.3732698, "type": "general", "prompt": "正面的麻将牌", "final_prompt": "Front-facing mahjong tile", "status": "success", "result_path": "results/seg_01ad9bff76274811a6b6e5a8a16cf01f.jpg", "details": "Detected: 13", "duration": 3.353431224822998}

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB