Files
whisper-api/src/api/health.py

54 lines
1.7 KiB
Python

from fastapi import APIRouter
import torch
import psutil
from src.config import settings
from src.services.whisper_service import get_model_info
router = APIRouter()
@router.get("/health")
async def health_check():
"""Health check endpoint with GPU and model status"""
# Get GPU info if available
gpu_info = {"available": False}
if torch.cuda.is_available():
gpu_name = torch.cuda.get_device_name(0)
vram_total = torch.cuda.get_device_properties(0).total_memory / (1024**3)
vram_allocated = torch.cuda.memory_allocated(0) / (1024**3)
gpu_info = {
"available": True,
"name": gpu_name,
"vram_total_gb": round(vram_total, 2),
"vram_used_gb": round(vram_allocated, 2),
"vram_free_gb": round(vram_total - vram_allocated, 2)
}
# Get system info
memory = psutil.virtual_memory()
# Get model status
model_status = get_model_info()
return {
"status": "healthy",
"version": "1.0.0",
"model": settings.whisper_model,
"gpu": gpu_info,
"model_status": {
"loaded": model_status.get("loaded", False),
"is_downloading": model_status.get("is_downloading", False),
"download_percentage": model_status.get("download_percentage", 0),
"status_message": model_status.get("status_message", "Unknown"),
"device": model_status.get("device", "cpu")
},
"system": {
"cpu_percent": psutil.cpu_percent(interval=1),
"memory_percent": memory.percent,
"memory_available_gb": round(memory.available / (1024**3), 2)
}
}