Fix dashboard download status display, add CPU/GPU toggle and automatic fallback

This commit is contained in:
Dominic Ballenthin
2026-01-29 02:10:49 +01:00
parent ee9465f661
commit c740ec1618
5 changed files with 162 additions and 23 deletions

View File

@@ -10,7 +10,7 @@ router = APIRouter()
@router.get("/health") @router.get("/health")
async def health_check(): async def health_check():
"""Health check endpoint with GPU status""" """Health check endpoint with GPU and model status"""
# Get GPU info if available # Get GPU info if available
gpu_info = {"available": False} gpu_info = {"available": False}
@@ -30,11 +30,21 @@ async def health_check():
# Get system info # Get system info
memory = psutil.virtual_memory() memory = psutil.virtual_memory()
# Get model status
model_status = get_model_info()
return { return {
"status": "healthy", "status": "healthy",
"version": "1.0.0", "version": "1.0.0",
"model": settings.whisper_model, "model": settings.whisper_model,
"gpu": gpu_info, "gpu": gpu_info,
"model_status": {
"loaded": model_status.get("loaded", False),
"is_downloading": model_status.get("is_downloading", False),
"download_percentage": model_status.get("download_percentage", 0),
"status_message": model_status.get("status_message", "Unknown"),
"device": model_status.get("device", "cpu")
},
"system": { "system": {
"cpu_percent": psutil.cpu_percent(interval=1), "cpu_percent": psutil.cpu_percent(interval=1),
"memory_percent": memory.percent, "memory_percent": memory.percent,

View File

@@ -52,7 +52,7 @@ def _download_hook(progress_bytes, total_bytes):
def load_model(model_name: str = None): def load_model(model_name: str = None):
"""Load Whisper model""" """Load Whisper model with automatic CPU fallback"""
global _model, _model_status, _current_model_name global _model, _model_status, _current_model_name
if model_name is None: if model_name is None:
@@ -70,12 +70,21 @@ def load_model(model_name: str = None):
_model_status["status_message"] = "Starting download..." _model_status["status_message"] = "Starting download..."
_model_status["model_name"] = model_name _model_status["model_name"] = model_name
print(f"Loading Whisper model: {model_name}") # Check if GPU is available, fallback to CPU if not
requested_device = settings.whisper_device
actual_device = requested_device
if requested_device == "cuda" and not torch.cuda.is_available():
actual_device = "cpu"
print(f"⚠️ GPU not available, falling back to CPU mode")
_model_status["status_message"] = "GPU not available, using CPU..."
print(f"Loading Whisper model: {model_name} on {actual_device}")
try: try:
# Whisper doesn't have a direct progress callback, but we can monitor the models directory # Whisper doesn't have a direct progress callback, but we can monitor the models directory
_model = whisper.load_model( _model = whisper.load_model(
model_name, model_name,
device=settings.whisper_device, device=actual_device,
download_root=settings.models_path download_root=settings.models_path
) )
_current_model_name = model_name _current_model_name = model_name
@@ -83,7 +92,14 @@ def load_model(model_name: str = None):
_model_status["is_loaded"] = True _model_status["is_loaded"] = True
_model_status["download_percentage"] = 100 _model_status["download_percentage"] = 100
_model_status["status_message"] = "Model loaded successfully" _model_status["status_message"] = "Model loaded successfully"
print(f"Model {model_name} loaded on {settings.whisper_device}")
# Update device in status to reflect actual device used
if actual_device != requested_device:
settings.whisper_device = actual_device
print(f"✅ Model {model_name} loaded on CPU (GPU fallback)")
else:
print(f"✅ Model {model_name} loaded on {actual_device}")
except Exception as e: except Exception as e:
_model_status["is_downloading"] = False _model_status["is_downloading"] = False
_model_status["status_message"] = f"Error: {str(e)}" _model_status["status_message"] = f"Error: {str(e)}"

View File

@@ -151,39 +151,55 @@
// Model Status Updates // Model Status Updates
async function updateModelStatus() { async function updateModelStatus() {
try { try {
// We need to use an API key for the model-status endpoint
// For admin panel, we'll use a simple endpoint that doesn't require auth
// or we'll skip this and use the health endpoint instead
const response = await fetch('/health'); const response = await fetch('/health');
const data = await response.json(); const data = await response.json();
const statusCard = document.getElementById('model-status-content'); const statusCard = document.getElementById('model-status-content');
const progressContainer = document.getElementById('model-progress-container');
const progressBar = document.getElementById('model-progress-bar');
const progressText = document.getElementById('model-progress-text');
// Prioritize download status over GPU status
if (data.model_status && data.model_status.is_downloading) {
// Show download status
statusCard.innerHTML = `
<p><strong>Status:</strong> <span style="color: #ed8936;">⏳ Downloading...</span></p>
<p>${data.model_status.status_message}</p>
<div style="background: #e2e8f0; border-radius: 10px; height: 20px; overflow: hidden; margin-top: 10px;">
<div style="background: linear-gradient(90deg, #667eea, #764ba2); height: 100%; width: ${data.model_status.download_percentage}%; transition: width 0.5s ease;"></div>
</div>
<p style="text-align: center; margin-top: 5px; font-weight: bold; color: #667aea;">
${data.model_status.download_percentage}%
</p>
`;
} else if (data.model_status && data.model_status.loaded) {
// Show GPU/Model ready status
if (data.gpu && data.gpu.available) { if (data.gpu && data.gpu.available) {
statusCard.innerHTML = ` statusCard.innerHTML = `
<p><strong>Status:</strong> <span style="color: #48bb78;">✅ GPU Ready</span></p> <p><strong>Status:</strong> <span style="color: #48bb78;">✅ Loaded & Ready</span></p>
<p><strong>Model:</strong> ${data.model}</p>
<p><strong>Device:</strong> ${data.model_status.device}</p>
<p><strong>GPU:</strong> ${data.gpu.name}</p> <p><strong>GPU:</strong> ${data.gpu.name}</p>
<p><strong>VRAM:</strong> ${data.gpu.vram_used_gb} GB / ${data.gpu.vram_total_gb} GB</p> <p><strong>VRAM:</strong> ${data.gpu.vram_used_gb} GB / ${data.gpu.vram_total_gb} GB</p>
<p><strong>Model:</strong> ${data.model}</p>
`; `;
progressContainer.style.display = 'none';
} else { } else {
statusCard.innerHTML = ` statusCard.innerHTML = `
<p><strong>Status:</strong> <span style="color: #ed8936;"> Loading...</span></p> <p><strong>Status:</strong> <span style="color: #48bb78;"> Loaded & Ready (CPU Mode)</span></p>
<p>Model is being downloaded. Please wait...</p> <p><strong>Model:</strong> ${data.model}</p>
<p><strong>Device:</strong> CPU</p>
`;
}
} else {
// Not started or loading
statusCard.innerHTML = `
<p><strong>Status:</strong> <span style="color: #718096;">⏸️ Not Started</span></p>
<p>The model will be loaded on first transcription request.</p>
`; `;
progressContainer.style.display = 'block';
} }
} catch (error) { } catch (error) {
console.error('Error fetching model status:', error); console.error('Error fetching model status:', error);
} }
} }
// Update status every 5 seconds // Update status every 3 seconds during download for smoother progress
updateModelStatus(); updateModelStatus();
setInterval(updateModelStatus, 5000); setInterval(updateModelStatus, 3000);
</script> </script>
{% endblock %} {% endblock %}

View File

@@ -38,6 +38,39 @@
</div> </div>
</div> </div>
<!-- Device Mode Toggle -->
<div class="card">
<h2>⚙️ Processing Mode</h2>
<p>Current mode: <strong>{{ current_status.device | upper }}</strong></p>
{% if gpu_available %}
{% if current_status.device == 'cuda' %}
<form method="POST" action="/admin/models/set-device">
<input type="hidden" name="device" value="cpu">
<button type="submit" class="btn" style="background: #4299e1;">
🔄 Switch to CPU Mode
</button>
</form>
{% else %}
<form method="POST" action="/admin/models/set-device">
<input type="hidden" name="device" value="cuda">
<button type="submit" class="btn" style="background: #48bb78;">
🚀 Switch to GPU Mode
</button>
</form>
{% endif %}
<p style="margin-top: 10px; font-size: 12px; color: #666;">
GPU detected: {{ gpu_name }}
</p>
{% else %}
<div style="background: #fed7d7; padding: 10px; border-radius: 5px; margin-top: 10px;">
<p style="color: #742a2a; margin: 0;">
⚠️ No compatible GPU detected. Running in CPU mode.
</p>
</div>
{% endif %}
</div>
<!-- Available Models --> <!-- Available Models -->
<div class="card"> <div class="card">
<h2>Available Models</h2> <h2>Available Models</h2>

View File

@@ -7,6 +7,8 @@ from fastapi.responses import HTMLResponse, RedirectResponse
from fastapi.templating import Jinja2Templates from fastapi.templating import Jinja2Templates
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
import torch
from src.config import settings from src.config import settings
from src.database.db import get_db from src.database.db import get_db
from src.database.models import ApiKey, UsageLog from src.database.models import ApiKey, UsageLog
@@ -201,12 +203,20 @@ async def manage_models(request: Request, message: Optional[str] = None, error:
models = get_available_models() models = get_available_models()
current_status = get_model_status() current_status = get_model_status()
# Check GPU availability
gpu_available = torch.cuda.is_available()
gpu_name = ""
if gpu_available:
gpu_name = torch.cuda.get_device_name(0)
return templates.TemplateResponse("models.html", { return templates.TemplateResponse("models.html", {
"request": request, "request": request,
"models": models, "models": models,
"current_status": current_status, "current_status": current_status,
"message": message, "message": message,
"error": error "error": error,
"gpu_available": gpu_available,
"gpu_name": gpu_name
}) })
@@ -316,3 +326,57 @@ async def reload_model_admin(request: Request):
url=f"/admin/models?error={str(e)}", url=f"/admin/models?error={str(e)}",
status_code=302 status_code=302
) )
@router.post("/models/set-device")
async def set_device_admin(
request: Request,
device: str = Form(...)
):
"""Switch between CPU and GPU mode"""
try:
check_admin_auth(request)
except HTTPException as e:
return RedirectResponse(url="/admin/login", status_code=302)
try:
# Validate device
if device not in ["cpu", "cuda"]:
return RedirectResponse(
url="/admin/models?error=Invalid device. Must be 'cpu' or 'cuda'",
status_code=302
)
# Check if GPU is available when switching to cuda
if device == "cuda" and not torch.cuda.is_available():
return RedirectResponse(
url="/admin/models?error=No compatible GPU detected. Cannot switch to GPU mode.",
status_code=302
)
# Update settings
settings.whisper_device = device
# Unload current model to force reload with new device
from src.services.whisper_service import _model, _current_model_name
import gc
if _model is not None:
# Clear the model from memory
_model = None
gc.collect()
# Clear CUDA cache if switching away from GPU
if torch.cuda.is_available():
torch.cuda.empty_cache()
device_name = "GPU" if device == "cuda" else "CPU"
return RedirectResponse(
url=f"/admin/models?message=Switched to {device_name} mode. Model will be reloaded on next request.",
status_code=302
)
except Exception as e:
return RedirectResponse(
url=f"/admin/models?error={str(e)}",
status_code=302
)