diff --git a/src/api/health.py b/src/api/health.py index 3754394..327e1d9 100644 --- a/src/api/health.py +++ b/src/api/health.py @@ -10,7 +10,7 @@ router = APIRouter() @router.get("/health") async def health_check(): - """Health check endpoint with GPU status""" + """Health check endpoint with GPU and model status""" # Get GPU info if available gpu_info = {"available": False} @@ -30,11 +30,21 @@ async def health_check(): # Get system info memory = psutil.virtual_memory() + # Get model status + model_status = get_model_info() + return { "status": "healthy", "version": "1.0.0", "model": settings.whisper_model, "gpu": gpu_info, + "model_status": { + "loaded": model_status.get("loaded", False), + "is_downloading": model_status.get("is_downloading", False), + "download_percentage": model_status.get("download_percentage", 0), + "status_message": model_status.get("status_message", "Unknown"), + "device": model_status.get("device", "cpu") + }, "system": { "cpu_percent": psutil.cpu_percent(interval=1), "memory_percent": memory.percent, diff --git a/src/services/whisper_service.py b/src/services/whisper_service.py index 85cd4b6..08ea3bb 100644 --- a/src/services/whisper_service.py +++ b/src/services/whisper_service.py @@ -52,7 +52,7 @@ def _download_hook(progress_bytes, total_bytes): def load_model(model_name: str = None): - """Load Whisper model""" + """Load Whisper model with automatic CPU fallback""" global _model, _model_status, _current_model_name if model_name is None: @@ -70,12 +70,21 @@ def load_model(model_name: str = None): _model_status["status_message"] = "Starting download..." _model_status["model_name"] = model_name - print(f"Loading Whisper model: {model_name}") + # Check if GPU is available, fallback to CPU if not + requested_device = settings.whisper_device + actual_device = requested_device + + if requested_device == "cuda" and not torch.cuda.is_available(): + actual_device = "cpu" + print(f"⚠️ GPU not available, falling back to CPU mode") + _model_status["status_message"] = "GPU not available, using CPU..." + + print(f"Loading Whisper model: {model_name} on {actual_device}") try: # Whisper doesn't have a direct progress callback, but we can monitor the models directory _model = whisper.load_model( model_name, - device=settings.whisper_device, + device=actual_device, download_root=settings.models_path ) _current_model_name = model_name @@ -83,7 +92,14 @@ def load_model(model_name: str = None): _model_status["is_loaded"] = True _model_status["download_percentage"] = 100 _model_status["status_message"] = "Model loaded successfully" - print(f"Model {model_name} loaded on {settings.whisper_device}") + + # Update device in status to reflect actual device used + if actual_device != requested_device: + settings.whisper_device = actual_device + print(f"✅ Model {model_name} loaded on CPU (GPU fallback)") + else: + print(f"✅ Model {model_name} loaded on {actual_device}") + except Exception as e: _model_status["is_downloading"] = False _model_status["status_message"] = f"Error: {str(e)}" diff --git a/src/templates/dashboard.html b/src/templates/dashboard.html index 876d595..df4fe43 100644 --- a/src/templates/dashboard.html +++ b/src/templates/dashboard.html @@ -151,39 +151,55 @@ // Model Status Updates async function updateModelStatus() { try { - // We need to use an API key for the model-status endpoint - // For admin panel, we'll use a simple endpoint that doesn't require auth - // or we'll skip this and use the health endpoint instead const response = await fetch('/health'); const data = await response.json(); const statusCard = document.getElementById('model-status-content'); - const progressContainer = document.getElementById('model-progress-container'); - const progressBar = document.getElementById('model-progress-bar'); - const progressText = document.getElementById('model-progress-text'); - if (data.gpu && data.gpu.available) { + // Prioritize download status over GPU status + if (data.model_status && data.model_status.is_downloading) { + // Show download status statusCard.innerHTML = ` -
Status: ✅ GPU Ready
-GPU: ${data.gpu.name}
-VRAM: ${data.gpu.vram_used_gb} GB / ${data.gpu.vram_total_gb} GB
-Model: ${data.model}
+Status: ⏳ Downloading...
+${data.model_status.status_message}
+ ++ ${data.model_status.download_percentage}% +
`; - progressContainer.style.display = 'none'; + } else if (data.model_status && data.model_status.loaded) { + // Show GPU/Model ready status + if (data.gpu && data.gpu.available) { + statusCard.innerHTML = ` +Status: ✅ Loaded & Ready
+Model: ${data.model}
+Device: ${data.model_status.device}
+GPU: ${data.gpu.name}
+VRAM: ${data.gpu.vram_used_gb} GB / ${data.gpu.vram_total_gb} GB
+ `; + } else { + statusCard.innerHTML = ` +Status: ✅ Loaded & Ready (CPU Mode)
+Model: ${data.model}
+Device: CPU
+ `; + } } else { + // Not started or loading statusCard.innerHTML = ` -Status: ⏳ Loading...
-Model is being downloaded. Please wait...
+Status: ⏸️ Not Started
+The model will be loaded on first transcription request.
`; - progressContainer.style.display = 'block'; } } catch (error) { console.error('Error fetching model status:', error); } } - // Update status every 5 seconds + // Update status every 3 seconds during download for smoother progress updateModelStatus(); - setInterval(updateModelStatus, 5000); + setInterval(updateModelStatus, 3000); {% endblock %} diff --git a/src/templates/models.html b/src/templates/models.html index 330755a..cdc1b56 100644 --- a/src/templates/models.html +++ b/src/templates/models.html @@ -38,6 +38,39 @@ + +Current mode: {{ current_status.device | upper }}
+ + {% if gpu_available %} + {% if current_status.device == 'cuda' %} + + {% else %} + + {% endif %} ++ GPU detected: {{ gpu_name }} +
+ {% else %} ++ ⚠️ No compatible GPU detected. Running in CPU mode. +
+