from fastapi import APIRouter import torch import psutil from src.config import settings from src.services.whisper_service import get_model_info router = APIRouter() @router.get("/health") async def health_check(): """Health check endpoint with GPU and model status""" # Get GPU info if available gpu_info = {"available": False} if torch.cuda.is_available(): gpu_name = torch.cuda.get_device_name(0) vram_total = torch.cuda.get_device_properties(0).total_memory / (1024**3) vram_allocated = torch.cuda.memory_allocated(0) / (1024**3) gpu_info = { "available": True, "name": gpu_name, "vram_total_gb": round(vram_total, 2), "vram_used_gb": round(vram_allocated, 2), "vram_free_gb": round(vram_total - vram_allocated, 2) } # Get system info memory = psutil.virtual_memory() # Get model status model_status = get_model_info() return { "status": "healthy", "version": "1.0.0", "model": settings.whisper_model, "gpu": gpu_info, "model_status": { "loaded": model_status.get("loaded", False), "is_downloading": model_status.get("is_downloading", False), "download_percentage": model_status.get("download_percentage", 0), "status_message": model_status.get("status_message", "Unknown"), "device": model_status.get("device", "cpu") }, "system": { "cpu_percent": psutil.cpu_percent(interval=1), "memory_percent": memory.percent, "memory_available_gb": round(memory.available / (1024**3), 2) } }