whisper-api/src/api/health.py

from fastapi import APIRouter
import torch
import psutil

from src.config import settings
from src.services.whisper_service import get_model_info

router = APIRouter()


@router.get("/health")
async def health_check():
    """Health check endpoint with GPU and model status"""

    # Get GPU info if available
    gpu_info = {"available": False}
    if torch.cuda.is_available():
        gpu_name = torch.cuda.get_device_name(0)
        vram_total = torch.cuda.get_device_properties(0).total_memory / (1024**3)
        vram_allocated = torch.cuda.memory_allocated(0) / (1024**3)

        gpu_info = {
            "available": True,
            "name": gpu_name,
            "vram_total_gb": round(vram_total, 2),
            "vram_used_gb": round(vram_allocated, 2),
            "vram_free_gb": round(vram_total - vram_allocated, 2)
        }

    # Get system info
    memory = psutil.virtual_memory()

    # Get model status
    model_status = get_model_info()

    return {
        "status": "healthy",
        "version": "1.0.0",
        "model": settings.whisper_model,
        "gpu": gpu_info,
        "model_status": {
            "loaded": model_status.get("loaded", False),
            "is_downloading": model_status.get("is_downloading", False),
            "download_percentage": model_status.get("download_percentage", 0),
            "status_message": model_status.get("status_message", "Unknown"),
            "device": model_status.get("device", "cpu")
        },
        "system": {
            "cpu_percent": psutil.cpu_percent(interval=1),
            "memory_percent": memory.percent,
            "memory_available_gb": round(memory.available / (1024**3), 2)
        }
    }