From c740ec16183dec0e0b21d73af9e282fe19562cd5 Mon Sep 17 00:00:00 2001
From: Dominic Ballenthin <dominic.ballenthin@insight-it.de>
Date: Thu, 29 Jan 2026 02:10:49 +0100
Subject: [PATCH] Fix dashboard download status display, add CPU/GPU toggle and
 automatic fallback

---
 src/api/health.py               | 12 +++++-
 src/services/whisper_service.py | 24 ++++++++++--
 src/templates/dashboard.html    | 50 ++++++++++++++++---------
 src/templates/models.html       | 33 +++++++++++++++++
 src/web/routes.py               | 66 ++++++++++++++++++++++++++++++++-
 5 files changed, 162 insertions(+), 23 deletions(-)

diff --git a/src/api/health.py b/src/api/health.py
index 3754394..327e1d9 100644
--- a/src/api/health.py
+++ b/src/api/health.py
@@ -10,7 +10,7 @@ router = APIRouter()
 
 @router.get("/health")
 async def health_check():
-    """Health check endpoint with GPU status"""
+    """Health check endpoint with GPU and model status"""
     
     # Get GPU info if available
     gpu_info = {"available": False}
@@ -30,11 +30,21 @@ async def health_check():
     # Get system info
     memory = psutil.virtual_memory()
     
+    # Get model status
+    model_status = get_model_info()
+    
     return {
         "status": "healthy",
         "version": "1.0.0",
         "model": settings.whisper_model,
         "gpu": gpu_info,
+        "model_status": {
+            "loaded": model_status.get("loaded", False),
+            "is_downloading": model_status.get("is_downloading", False),
+            "download_percentage": model_status.get("download_percentage", 0),
+            "status_message": model_status.get("status_message", "Unknown"),
+            "device": model_status.get("device", "cpu")
+        },
         "system": {
             "cpu_percent": psutil.cpu_percent(interval=1),
             "memory_percent": memory.percent,
diff --git a/src/services/whisper_service.py b/src/services/whisper_service.py
index 85cd4b6..08ea3bb 100644
--- a/src/services/whisper_service.py
+++ b/src/services/whisper_service.py
@@ -52,7 +52,7 @@ def _download_hook(progress_bytes, total_bytes):
 
 
 def load_model(model_name: str = None):
-    """Load Whisper model"""
+    """Load Whisper model with automatic CPU fallback"""
     global _model, _model_status, _current_model_name
     
     if model_name is None:
@@ -70,12 +70,21 @@ def load_model(model_name: str = None):
             _model_status["status_message"] = "Starting download..."
             _model_status["model_name"] = model_name
             
-            print(f"Loading Whisper model: {model_name}")
+            # Check if GPU is available, fallback to CPU if not
+            requested_device = settings.whisper_device
+            actual_device = requested_device
+            
+            if requested_device == "cuda" and not torch.cuda.is_available():
+                actual_device = "cpu"
+                print(f"⚠️  GPU not available, falling back to CPU mode")
+                _model_status["status_message"] = "GPU not available, using CPU..."
+            
+            print(f"Loading Whisper model: {model_name} on {actual_device}")
             try:
                 # Whisper doesn't have a direct progress callback, but we can monitor the models directory
                 _model = whisper.load_model(
                     model_name,
-                    device=settings.whisper_device,
+                    device=actual_device,
                     download_root=settings.models_path
                 )
                 _current_model_name = model_name
@@ -83,7 +92,14 @@ def load_model(model_name: str = None):
                 _model_status["is_loaded"] = True
                 _model_status["download_percentage"] = 100
                 _model_status["status_message"] = "Model loaded successfully"
-                print(f"Model {model_name} loaded on {settings.whisper_device}")
+                
+                # Update device in status to reflect actual device used
+                if actual_device != requested_device:
+                    settings.whisper_device = actual_device
+                    print(f"✅ Model {model_name} loaded on CPU (GPU fallback)")
+                else:
+                    print(f"✅ Model {model_name} loaded on {actual_device}")
+                    
             except Exception as e:
                 _model_status["is_downloading"] = False
                 _model_status["status_message"] = f"Error: {str(e)}"
diff --git a/src/templates/dashboard.html b/src/templates/dashboard.html
index 876d595..df4fe43 100644
--- a/src/templates/dashboard.html
+++ b/src/templates/dashboard.html
@@ -151,39 +151,55 @@
     // Model Status Updates
     async function updateModelStatus() {
         try {
-            // We need to use an API key for the model-status endpoint
-            // For admin panel, we'll use a simple endpoint that doesn't require auth
-            // or we'll skip this and use the health endpoint instead
             const response = await fetch('/health');
             const data = await response.json();
             
             const statusCard = document.getElementById('model-status-content');
-            const progressContainer = document.getElementById('model-progress-container');
-            const progressBar = document.getElementById('model-progress-bar');
-            const progressText = document.getElementById('model-progress-text');
             
-            if (data.gpu && data.gpu.available) {
+            // Prioritize download status over GPU status
+            if (data.model_status && data.model_status.is_downloading) {
+                // Show download status
                 statusCard.innerHTML = `
-                    <p><strong>Status:</strong> <span style="color: #48bb78;">✅ GPU Ready</span></p>
-                    <p><strong>GPU:</strong> ${data.gpu.name}</p>
-                    <p><strong>VRAM:</strong> ${data.gpu.vram_used_gb} GB / ${data.gpu.vram_total_gb} GB</p>
-                    <p><strong>Model:</strong> ${data.model}</p>
+                    <p><strong>Status:</strong> <span style="color: #ed8936;">⏳ Downloading...</span></p>
+                    <p>${data.model_status.status_message}</p>
+                    <div style="background: #e2e8f0; border-radius: 10px; height: 20px; overflow: hidden; margin-top: 10px;">
+                        <div style="background: linear-gradient(90deg, #667eea, #764ba2); height: 100%; width: ${data.model_status.download_percentage}%; transition: width 0.5s ease;"></div>
+                    </div>
+                    <p style="text-align: center; margin-top: 5px; font-weight: bold; color: #667aea;">
+                        ${data.model_status.download_percentage}%
+                    </p>
                 `;
-                progressContainer.style.display = 'none';
+            } else if (data.model_status && data.model_status.loaded) {
+                // Show GPU/Model ready status
+                if (data.gpu && data.gpu.available) {
+                    statusCard.innerHTML = `
+                        <p><strong>Status:</strong> <span style="color: #48bb78;">✅ Loaded & Ready</span></p>
+                        <p><strong>Model:</strong> ${data.model}</p>
+                        <p><strong>Device:</strong> ${data.model_status.device}</p>
+                        <p><strong>GPU:</strong> ${data.gpu.name}</p>
+                        <p><strong>VRAM:</strong> ${data.gpu.vram_used_gb} GB / ${data.gpu.vram_total_gb} GB</p>
+                    `;
+                } else {
+                    statusCard.innerHTML = `
+                        <p><strong>Status:</strong> <span style="color: #48bb78;">✅ Loaded & Ready (CPU Mode)</span></p>
+                        <p><strong>Model:</strong> ${data.model}</p>
+                        <p><strong>Device:</strong> CPU</p>
+                    `;
+                }
             } else {
+                // Not started or loading
                 statusCard.innerHTML = `
-                    <p><strong>Status:</strong> <span style="color: #ed8936;">⏳ Loading...</span></p>
-                    <p>Model is being downloaded. Please wait...</p>
+                    <p><strong>Status:</strong> <span style="color: #718096;">⏸️ Not Started</span></p>
+                    <p>The model will be loaded on first transcription request.</p>
                 `;
-                progressContainer.style.display = 'block';
             }
         } catch (error) {
             console.error('Error fetching model status:', error);
         }
     }
     
-    // Update status every 5 seconds
+    // Update status every 3 seconds during download for smoother progress
     updateModelStatus();
-    setInterval(updateModelStatus, 5000);
+    setInterval(updateModelStatus, 3000);
 </script>
 {% endblock %}
diff --git a/src/templates/models.html b/src/templates/models.html
index 330755a..cdc1b56 100644
--- a/src/templates/models.html
+++ b/src/templates/models.html
@@ -38,6 +38,39 @@
         </div>
     </div>
     
+    <!-- Device Mode Toggle -->
+    <div class="card">
+        <h2>⚙️ Processing Mode</h2>
+        <p>Current mode: <strong>{{ current_status.device | upper }}</strong></p>
+        
+        {% if gpu_available %}
+            {% if current_status.device == 'cuda' %}
+                <form method="POST" action="/admin/models/set-device">
+                    <input type="hidden" name="device" value="cpu">
+                    <button type="submit" class="btn" style="background: #4299e1;">
+                        🔄 Switch to CPU Mode
+                    </button>
+                </form>
+            {% else %}
+                <form method="POST" action="/admin/models/set-device">
+                    <input type="hidden" name="device" value="cuda">
+                    <button type="submit" class="btn" style="background: #48bb78;">
+                        🚀 Switch to GPU Mode
+                    </button>
+                </form>
+            {% endif %}
+            <p style="margin-top: 10px; font-size: 12px; color: #666;">
+                GPU detected: {{ gpu_name }}
+            </p>
+        {% else %}
+            <div style="background: #fed7d7; padding: 10px; border-radius: 5px; margin-top: 10px;">
+                <p style="color: #742a2a; margin: 0;">
+                    ⚠️ No compatible GPU detected. Running in CPU mode.
+                </p>
+            </div>
+        {% endif %}
+    </div>
+    
     <!-- Available Models -->
     <div class="card">
         <h2>Available Models</h2>
diff --git a/src/web/routes.py b/src/web/routes.py
index d82a3fd..80cbae5 100644
--- a/src/web/routes.py
+++ b/src/web/routes.py
@@ -7,6 +7,8 @@ from fastapi.responses import HTMLResponse, RedirectResponse
 from fastapi.templating import Jinja2Templates
 from sqlalchemy.orm import Session
 
+import torch
+
 from src.config import settings
 from src.database.db import get_db
 from src.database.models import ApiKey, UsageLog
@@ -201,12 +203,20 @@ async def manage_models(request: Request, message: Optional[str] = None, error:
     models = get_available_models()
     current_status = get_model_status()
     
+    # Check GPU availability
+    gpu_available = torch.cuda.is_available()
+    gpu_name = ""
+    if gpu_available:
+        gpu_name = torch.cuda.get_device_name(0)
+    
     return templates.TemplateResponse("models.html", {
         "request": request,
         "models": models,
         "current_status": current_status,
         "message": message,
-        "error": error
+        "error": error,
+        "gpu_available": gpu_available,
+        "gpu_name": gpu_name
     })
 
 
@@ -316,3 +326,57 @@ async def reload_model_admin(request: Request):
             url=f"/admin/models?error={str(e)}", 
             status_code=302
         )
+
+
+@router.post("/models/set-device")
+async def set_device_admin(
+    request: Request,
+    device: str = Form(...)
+):
+    """Switch between CPU and GPU mode"""
+    try:
+        check_admin_auth(request)
+    except HTTPException as e:
+        return RedirectResponse(url="/admin/login", status_code=302)
+    
+    try:
+        # Validate device
+        if device not in ["cpu", "cuda"]:
+            return RedirectResponse(
+                url="/admin/models?error=Invalid device. Must be 'cpu' or 'cuda'", 
+                status_code=302
+            )
+        
+        # Check if GPU is available when switching to cuda
+        if device == "cuda" and not torch.cuda.is_available():
+            return RedirectResponse(
+                url="/admin/models?error=No compatible GPU detected. Cannot switch to GPU mode.", 
+                status_code=302
+            )
+        
+        # Update settings
+        settings.whisper_device = device
+        
+        # Unload current model to force reload with new device
+        from src.services.whisper_service import _model, _current_model_name
+        import gc
+        
+        if _model is not None:
+            # Clear the model from memory
+            _model = None
+            gc.collect()
+            
+            # Clear CUDA cache if switching away from GPU
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+        
+        device_name = "GPU" if device == "cuda" else "CPU"
+        return RedirectResponse(
+            url=f"/admin/models?message=Switched to {device_name} mode. Model will be reloaded on next request.", 
+            status_code=302
+        )
+    except Exception as e:
+        return RedirectResponse(
+            url=f"/admin/models?error={str(e)}", 
+            status_code=302
+        )