From c740ec16183dec0e0b21d73af9e282fe19562cd5 Mon Sep 17 00:00:00 2001 From: Dominic Ballenthin Date: Thu, 29 Jan 2026 02:10:49 +0100 Subject: [PATCH] Fix dashboard download status display, add CPU/GPU toggle and automatic fallback --- src/api/health.py | 12 +++++- src/services/whisper_service.py | 24 ++++++++++-- src/templates/dashboard.html | 50 ++++++++++++++++--------- src/templates/models.html | 33 +++++++++++++++++ src/web/routes.py | 66 ++++++++++++++++++++++++++++++++- 5 files changed, 162 insertions(+), 23 deletions(-) diff --git a/src/api/health.py b/src/api/health.py index 3754394..327e1d9 100644 --- a/src/api/health.py +++ b/src/api/health.py @@ -10,7 +10,7 @@ router = APIRouter() @router.get("/health") async def health_check(): - """Health check endpoint with GPU status""" + """Health check endpoint with GPU and model status""" # Get GPU info if available gpu_info = {"available": False} @@ -30,11 +30,21 @@ async def health_check(): # Get system info memory = psutil.virtual_memory() + # Get model status + model_status = get_model_info() + return { "status": "healthy", "version": "1.0.0", "model": settings.whisper_model, "gpu": gpu_info, + "model_status": { + "loaded": model_status.get("loaded", False), + "is_downloading": model_status.get("is_downloading", False), + "download_percentage": model_status.get("download_percentage", 0), + "status_message": model_status.get("status_message", "Unknown"), + "device": model_status.get("device", "cpu") + }, "system": { "cpu_percent": psutil.cpu_percent(interval=1), "memory_percent": memory.percent, diff --git a/src/services/whisper_service.py b/src/services/whisper_service.py index 85cd4b6..08ea3bb 100644 --- a/src/services/whisper_service.py +++ b/src/services/whisper_service.py @@ -52,7 +52,7 @@ def _download_hook(progress_bytes, total_bytes): def load_model(model_name: str = None): - """Load Whisper model""" + """Load Whisper model with automatic CPU fallback""" global _model, _model_status, _current_model_name if model_name is None: @@ -70,12 +70,21 @@ def load_model(model_name: str = None): _model_status["status_message"] = "Starting download..." _model_status["model_name"] = model_name - print(f"Loading Whisper model: {model_name}") + # Check if GPU is available, fallback to CPU if not + requested_device = settings.whisper_device + actual_device = requested_device + + if requested_device == "cuda" and not torch.cuda.is_available(): + actual_device = "cpu" + print(f"⚠️ GPU not available, falling back to CPU mode") + _model_status["status_message"] = "GPU not available, using CPU..." + + print(f"Loading Whisper model: {model_name} on {actual_device}") try: # Whisper doesn't have a direct progress callback, but we can monitor the models directory _model = whisper.load_model( model_name, - device=settings.whisper_device, + device=actual_device, download_root=settings.models_path ) _current_model_name = model_name @@ -83,7 +92,14 @@ def load_model(model_name: str = None): _model_status["is_loaded"] = True _model_status["download_percentage"] = 100 _model_status["status_message"] = "Model loaded successfully" - print(f"Model {model_name} loaded on {settings.whisper_device}") + + # Update device in status to reflect actual device used + if actual_device != requested_device: + settings.whisper_device = actual_device + print(f"✅ Model {model_name} loaded on CPU (GPU fallback)") + else: + print(f"✅ Model {model_name} loaded on {actual_device}") + except Exception as e: _model_status["is_downloading"] = False _model_status["status_message"] = f"Error: {str(e)}" diff --git a/src/templates/dashboard.html b/src/templates/dashboard.html index 876d595..df4fe43 100644 --- a/src/templates/dashboard.html +++ b/src/templates/dashboard.html @@ -151,39 +151,55 @@ // Model Status Updates async function updateModelStatus() { try { - // We need to use an API key for the model-status endpoint - // For admin panel, we'll use a simple endpoint that doesn't require auth - // or we'll skip this and use the health endpoint instead const response = await fetch('/health'); const data = await response.json(); const statusCard = document.getElementById('model-status-content'); - const progressContainer = document.getElementById('model-progress-container'); - const progressBar = document.getElementById('model-progress-bar'); - const progressText = document.getElementById('model-progress-text'); - if (data.gpu && data.gpu.available) { + // Prioritize download status over GPU status + if (data.model_status && data.model_status.is_downloading) { + // Show download status statusCard.innerHTML = ` -

Status: ✅ GPU Ready

-

GPU: ${data.gpu.name}

-

VRAM: ${data.gpu.vram_used_gb} GB / ${data.gpu.vram_total_gb} GB

-

Model: ${data.model}

+

Status: ⏳ Downloading...

+

${data.model_status.status_message}

+
+
+
+

+ ${data.model_status.download_percentage}% +

`; - progressContainer.style.display = 'none'; + } else if (data.model_status && data.model_status.loaded) { + // Show GPU/Model ready status + if (data.gpu && data.gpu.available) { + statusCard.innerHTML = ` +

Status: ✅ Loaded & Ready

+

Model: ${data.model}

+

Device: ${data.model_status.device}

+

GPU: ${data.gpu.name}

+

VRAM: ${data.gpu.vram_used_gb} GB / ${data.gpu.vram_total_gb} GB

+ `; + } else { + statusCard.innerHTML = ` +

Status: ✅ Loaded & Ready (CPU Mode)

+

Model: ${data.model}

+

Device: CPU

+ `; + } } else { + // Not started or loading statusCard.innerHTML = ` -

Status: ⏳ Loading...

-

Model is being downloaded. Please wait...

+

Status: ⏸️ Not Started

+

The model will be loaded on first transcription request.

`; - progressContainer.style.display = 'block'; } } catch (error) { console.error('Error fetching model status:', error); } } - // Update status every 5 seconds + // Update status every 3 seconds during download for smoother progress updateModelStatus(); - setInterval(updateModelStatus, 5000); + setInterval(updateModelStatus, 3000); {% endblock %} diff --git a/src/templates/models.html b/src/templates/models.html index 330755a..cdc1b56 100644 --- a/src/templates/models.html +++ b/src/templates/models.html @@ -38,6 +38,39 @@ + +
+

⚙️ Processing Mode

+

Current mode: {{ current_status.device | upper }}

+ + {% if gpu_available %} + {% if current_status.device == 'cuda' %} +
+ + +
+ {% else %} +
+ + +
+ {% endif %} +

+ GPU detected: {{ gpu_name }} +

+ {% else %} +
+

+ ⚠️ No compatible GPU detected. Running in CPU mode. +

+
+ {% endif %} +
+

Available Models

diff --git a/src/web/routes.py b/src/web/routes.py index d82a3fd..80cbae5 100644 --- a/src/web/routes.py +++ b/src/web/routes.py @@ -7,6 +7,8 @@ from fastapi.responses import HTMLResponse, RedirectResponse from fastapi.templating import Jinja2Templates from sqlalchemy.orm import Session +import torch + from src.config import settings from src.database.db import get_db from src.database.models import ApiKey, UsageLog @@ -201,12 +203,20 @@ async def manage_models(request: Request, message: Optional[str] = None, error: models = get_available_models() current_status = get_model_status() + # Check GPU availability + gpu_available = torch.cuda.is_available() + gpu_name = "" + if gpu_available: + gpu_name = torch.cuda.get_device_name(0) + return templates.TemplateResponse("models.html", { "request": request, "models": models, "current_status": current_status, "message": message, - "error": error + "error": error, + "gpu_available": gpu_available, + "gpu_name": gpu_name }) @@ -316,3 +326,57 @@ async def reload_model_admin(request: Request): url=f"/admin/models?error={str(e)}", status_code=302 ) + + +@router.post("/models/set-device") +async def set_device_admin( + request: Request, + device: str = Form(...) +): + """Switch between CPU and GPU mode""" + try: + check_admin_auth(request) + except HTTPException as e: + return RedirectResponse(url="/admin/login", status_code=302) + + try: + # Validate device + if device not in ["cpu", "cuda"]: + return RedirectResponse( + url="/admin/models?error=Invalid device. Must be 'cpu' or 'cuda'", + status_code=302 + ) + + # Check if GPU is available when switching to cuda + if device == "cuda" and not torch.cuda.is_available(): + return RedirectResponse( + url="/admin/models?error=No compatible GPU detected. Cannot switch to GPU mode.", + status_code=302 + ) + + # Update settings + settings.whisper_device = device + + # Unload current model to force reload with new device + from src.services.whisper_service import _model, _current_model_name + import gc + + if _model is not None: + # Clear the model from memory + _model = None + gc.collect() + + # Clear CUDA cache if switching away from GPU + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + device_name = "GPU" if device == "cuda" else "CPU" + return RedirectResponse( + url=f"/admin/models?message=Switched to {device_name} mode. Model will be reloaded on next request.", + status_code=302 + ) + except Exception as e: + return RedirectResponse( + url=f"/admin/models?error={str(e)}", + status_code=302 + )